Merge branches 'pm-cpuidle', 'pm-core' and 'pm-sleep'
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79         if (!tracing_selftest_disabled) {
80                 tracing_selftest_disabled = true;
81                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82         }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 static struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static bool tracepoint_printk_stop_on_boot __initdata;
90 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91
92 /* For tracers that don't implement custom flags */
93 static struct tracer_opt dummy_tracer_opt[] = {
94         { }
95 };
96
97 static int
98 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
99 {
100         return 0;
101 }
102
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
109
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 static int tracing_disabled = 1;
117
118 cpumask_var_t __read_mostly     tracing_buffer_mask;
119
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputing it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  */
135
136 enum ftrace_dump_mode ftrace_dump_on_oops;
137
138 /* When set, tracing will stop when a WARN*() is hit */
139 int __disable_trace_on_warning;
140
141 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
142 /* Map of enums to their values, for "eval_map" file */
143 struct trace_eval_map_head {
144         struct module                   *mod;
145         unsigned long                   length;
146 };
147
148 union trace_eval_map_item;
149
150 struct trace_eval_map_tail {
151         /*
152          * "end" is first and points to NULL as it must be different
153          * than "mod" or "eval_string"
154          */
155         union trace_eval_map_item       *next;
156         const char                      *end;   /* points to NULL */
157 };
158
159 static DEFINE_MUTEX(trace_eval_mutex);
160
161 /*
162  * The trace_eval_maps are saved in an array with two extra elements,
163  * one at the beginning, and one at the end. The beginning item contains
164  * the count of the saved maps (head.length), and the module they
165  * belong to if not built in (head.mod). The ending item contains a
166  * pointer to the next array of saved eval_map items.
167  */
168 union trace_eval_map_item {
169         struct trace_eval_map           map;
170         struct trace_eval_map_head      head;
171         struct trace_eval_map_tail      tail;
172 };
173
174 static union trace_eval_map_item *trace_eval_maps;
175 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176
177 int tracing_set_tracer(struct trace_array *tr, const char *buf);
178 static void ftrace_trace_userstack(struct trace_array *tr,
179                                    struct trace_buffer *buffer,
180                                    unsigned int trace_ctx);
181
182 #define MAX_TRACER_SIZE         100
183 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
184 static char *default_bootup_tracer;
185
186 static bool allocate_snapshot;
187 static bool snapshot_at_boot;
188
189 static int __init set_cmdline_ftrace(char *str)
190 {
191         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192         default_bootup_tracer = bootup_tracer_buf;
193         /* We are using ftrace early, expand it */
194         ring_buffer_expanded = true;
195         return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201         if (*str++ != '=' || !*str || !strcmp("1", str)) {
202                 ftrace_dump_on_oops = DUMP_ALL;
203                 return 1;
204         }
205
206         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207                 ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
215 static int __init stop_trace_on_warning(char *str)
216 {
217         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218                 __disable_trace_on_warning = 1;
219         return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
223 static int __init boot_alloc_snapshot(char *str)
224 {
225         allocate_snapshot = true;
226         /* We also need the main ring buffer expanded */
227         ring_buffer_expanded = true;
228         return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static int __init boot_snapshot(char *str)
234 {
235         snapshot_at_boot = true;
236         boot_alloc_snapshot(str);
237         return 1;
238 }
239 __setup("ftrace_boot_snapshot", boot_snapshot);
240
241
242 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
243
244 static int __init set_trace_boot_options(char *str)
245 {
246         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
247         return 1;
248 }
249 __setup("trace_options=", set_trace_boot_options);
250
251 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
252 static char *trace_boot_clock __initdata;
253
254 static int __init set_trace_boot_clock(char *str)
255 {
256         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
257         trace_boot_clock = trace_boot_clock_buf;
258         return 1;
259 }
260 __setup("trace_clock=", set_trace_boot_clock);
261
262 static int __init set_tracepoint_printk(char *str)
263 {
264         /* Ignore the "tp_printk_stop_on_boot" param */
265         if (*str == '_')
266                 return 0;
267
268         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
269                 tracepoint_printk = 1;
270         return 1;
271 }
272 __setup("tp_printk", set_tracepoint_printk);
273
274 static int __init set_tracepoint_printk_stop(char *str)
275 {
276         tracepoint_printk_stop_on_boot = true;
277         return 1;
278 }
279 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
280
281 unsigned long long ns2usecs(u64 nsec)
282 {
283         nsec += 500;
284         do_div(nsec, 1000);
285         return nsec;
286 }
287
288 static void
289 trace_process_export(struct trace_export *export,
290                struct ring_buffer_event *event, int flag)
291 {
292         struct trace_entry *entry;
293         unsigned int size = 0;
294
295         if (export->flags & flag) {
296                 entry = ring_buffer_event_data(event);
297                 size = ring_buffer_event_length(event);
298                 export->write(export, entry, size);
299         }
300 }
301
302 static DEFINE_MUTEX(ftrace_export_lock);
303
304 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
305
306 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
307 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
309
310 static inline void ftrace_exports_enable(struct trace_export *export)
311 {
312         if (export->flags & TRACE_EXPORT_FUNCTION)
313                 static_branch_inc(&trace_function_exports_enabled);
314
315         if (export->flags & TRACE_EXPORT_EVENT)
316                 static_branch_inc(&trace_event_exports_enabled);
317
318         if (export->flags & TRACE_EXPORT_MARKER)
319                 static_branch_inc(&trace_marker_exports_enabled);
320 }
321
322 static inline void ftrace_exports_disable(struct trace_export *export)
323 {
324         if (export->flags & TRACE_EXPORT_FUNCTION)
325                 static_branch_dec(&trace_function_exports_enabled);
326
327         if (export->flags & TRACE_EXPORT_EVENT)
328                 static_branch_dec(&trace_event_exports_enabled);
329
330         if (export->flags & TRACE_EXPORT_MARKER)
331                 static_branch_dec(&trace_marker_exports_enabled);
332 }
333
334 static void ftrace_exports(struct ring_buffer_event *event, int flag)
335 {
336         struct trace_export *export;
337
338         preempt_disable_notrace();
339
340         export = rcu_dereference_raw_check(ftrace_exports_list);
341         while (export) {
342                 trace_process_export(export, event, flag);
343                 export = rcu_dereference_raw_check(export->next);
344         }
345
346         preempt_enable_notrace();
347 }
348
349 static inline void
350 add_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352         rcu_assign_pointer(export->next, *list);
353         /*
354          * We are entering export into the list but another
355          * CPU might be walking that list. We need to make sure
356          * the export->next pointer is valid before another CPU sees
357          * the export pointer included into the list.
358          */
359         rcu_assign_pointer(*list, export);
360 }
361
362 static inline int
363 rm_trace_export(struct trace_export **list, struct trace_export *export)
364 {
365         struct trace_export **p;
366
367         for (p = list; *p != NULL; p = &(*p)->next)
368                 if (*p == export)
369                         break;
370
371         if (*p != export)
372                 return -1;
373
374         rcu_assign_pointer(*p, (*p)->next);
375
376         return 0;
377 }
378
379 static inline void
380 add_ftrace_export(struct trace_export **list, struct trace_export *export)
381 {
382         ftrace_exports_enable(export);
383
384         add_trace_export(list, export);
385 }
386
387 static inline int
388 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
389 {
390         int ret;
391
392         ret = rm_trace_export(list, export);
393         ftrace_exports_disable(export);
394
395         return ret;
396 }
397
398 int register_ftrace_export(struct trace_export *export)
399 {
400         if (WARN_ON_ONCE(!export->write))
401                 return -1;
402
403         mutex_lock(&ftrace_export_lock);
404
405         add_ftrace_export(&ftrace_exports_list, export);
406
407         mutex_unlock(&ftrace_export_lock);
408
409         return 0;
410 }
411 EXPORT_SYMBOL_GPL(register_ftrace_export);
412
413 int unregister_ftrace_export(struct trace_export *export)
414 {
415         int ret;
416
417         mutex_lock(&ftrace_export_lock);
418
419         ret = rm_ftrace_export(&ftrace_exports_list, export);
420
421         mutex_unlock(&ftrace_export_lock);
422
423         return ret;
424 }
425 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
426
427 /* trace_flags holds trace_options default values */
428 #define TRACE_DEFAULT_FLAGS                                             \
429         (FUNCTION_DEFAULT_FLAGS |                                       \
430          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
431          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
432          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
433          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
434          TRACE_ITER_HASH_PTR)
435
436 /* trace_options that are only supported by global_trace */
437 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
438                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
439
440 /* trace_flags that are default zero for instances */
441 #define ZEROED_TRACE_FLAGS \
442         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
443
444 /*
445  * The global_trace is the descriptor that holds the top-level tracing
446  * buffers for the live tracing.
447  */
448 static struct trace_array global_trace = {
449         .trace_flags = TRACE_DEFAULT_FLAGS,
450 };
451
452 LIST_HEAD(ftrace_trace_arrays);
453
454 int trace_array_get(struct trace_array *this_tr)
455 {
456         struct trace_array *tr;
457         int ret = -ENODEV;
458
459         mutex_lock(&trace_types_lock);
460         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
461                 if (tr == this_tr) {
462                         tr->ref++;
463                         ret = 0;
464                         break;
465                 }
466         }
467         mutex_unlock(&trace_types_lock);
468
469         return ret;
470 }
471
472 static void __trace_array_put(struct trace_array *this_tr)
473 {
474         WARN_ON(!this_tr->ref);
475         this_tr->ref--;
476 }
477
478 /**
479  * trace_array_put - Decrement the reference counter for this trace array.
480  * @this_tr : pointer to the trace array
481  *
482  * NOTE: Use this when we no longer need the trace array returned by
483  * trace_array_get_by_name(). This ensures the trace array can be later
484  * destroyed.
485  *
486  */
487 void trace_array_put(struct trace_array *this_tr)
488 {
489         if (!this_tr)
490                 return;
491
492         mutex_lock(&trace_types_lock);
493         __trace_array_put(this_tr);
494         mutex_unlock(&trace_types_lock);
495 }
496 EXPORT_SYMBOL_GPL(trace_array_put);
497
498 int tracing_check_open_get_tr(struct trace_array *tr)
499 {
500         int ret;
501
502         ret = security_locked_down(LOCKDOWN_TRACEFS);
503         if (ret)
504                 return ret;
505
506         if (tracing_disabled)
507                 return -ENODEV;
508
509         if (tr && trace_array_get(tr) < 0)
510                 return -ENODEV;
511
512         return 0;
513 }
514
515 int call_filter_check_discard(struct trace_event_call *call, void *rec,
516                               struct trace_buffer *buffer,
517                               struct ring_buffer_event *event)
518 {
519         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
520             !filter_match_preds(call->filter, rec)) {
521                 __trace_event_discard_commit(buffer, event);
522                 return 1;
523         }
524
525         return 0;
526 }
527
528 /**
529  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
530  * @filtered_pids: The list of pids to check
531  * @search_pid: The PID to find in @filtered_pids
532  *
533  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
534  */
535 bool
536 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
537 {
538         return trace_pid_list_is_set(filtered_pids, search_pid);
539 }
540
541 /**
542  * trace_ignore_this_task - should a task be ignored for tracing
543  * @filtered_pids: The list of pids to check
544  * @filtered_no_pids: The list of pids not to be traced
545  * @task: The task that should be ignored if not filtered
546  *
547  * Checks if @task should be traced or not from @filtered_pids.
548  * Returns true if @task should *NOT* be traced.
549  * Returns false if @task should be traced.
550  */
551 bool
552 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
553                        struct trace_pid_list *filtered_no_pids,
554                        struct task_struct *task)
555 {
556         /*
557          * If filtered_no_pids is not empty, and the task's pid is listed
558          * in filtered_no_pids, then return true.
559          * Otherwise, if filtered_pids is empty, that means we can
560          * trace all tasks. If it has content, then only trace pids
561          * within filtered_pids.
562          */
563
564         return (filtered_pids &&
565                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
566                 (filtered_no_pids &&
567                  trace_find_filtered_pid(filtered_no_pids, task->pid));
568 }
569
570 /**
571  * trace_filter_add_remove_task - Add or remove a task from a pid_list
572  * @pid_list: The list to modify
573  * @self: The current task for fork or NULL for exit
574  * @task: The task to add or remove
575  *
576  * If adding a task, if @self is defined, the task is only added if @self
577  * is also included in @pid_list. This happens on fork and tasks should
578  * only be added when the parent is listed. If @self is NULL, then the
579  * @task pid will be removed from the list, which would happen on exit
580  * of a task.
581  */
582 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
583                                   struct task_struct *self,
584                                   struct task_struct *task)
585 {
586         if (!pid_list)
587                 return;
588
589         /* For forks, we only add if the forking task is listed */
590         if (self) {
591                 if (!trace_find_filtered_pid(pid_list, self->pid))
592                         return;
593         }
594
595         /* "self" is set for forks, and NULL for exits */
596         if (self)
597                 trace_pid_list_set(pid_list, task->pid);
598         else
599                 trace_pid_list_clear(pid_list, task->pid);
600 }
601
602 /**
603  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
604  * @pid_list: The pid list to show
605  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
606  * @pos: The position of the file
607  *
608  * This is used by the seq_file "next" operation to iterate the pids
609  * listed in a trace_pid_list structure.
610  *
611  * Returns the pid+1 as we want to display pid of zero, but NULL would
612  * stop the iteration.
613  */
614 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
615 {
616         long pid = (unsigned long)v;
617         unsigned int next;
618
619         (*pos)++;
620
621         /* pid already is +1 of the actual previous bit */
622         if (trace_pid_list_next(pid_list, pid, &next) < 0)
623                 return NULL;
624
625         pid = next;
626
627         /* Return pid + 1 to allow zero to be represented */
628         return (void *)(pid + 1);
629 }
630
631 /**
632  * trace_pid_start - Used for seq_file to start reading pid lists
633  * @pid_list: The pid list to show
634  * @pos: The position of the file
635  *
636  * This is used by seq_file "start" operation to start the iteration
637  * of listing pids.
638  *
639  * Returns the pid+1 as we want to display pid of zero, but NULL would
640  * stop the iteration.
641  */
642 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
643 {
644         unsigned long pid;
645         unsigned int first;
646         loff_t l = 0;
647
648         if (trace_pid_list_first(pid_list, &first) < 0)
649                 return NULL;
650
651         pid = first;
652
653         /* Return pid + 1 so that zero can be the exit value */
654         for (pid++; pid && l < *pos;
655              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
656                 ;
657         return (void *)pid;
658 }
659
660 /**
661  * trace_pid_show - show the current pid in seq_file processing
662  * @m: The seq_file structure to write into
663  * @v: A void pointer of the pid (+1) value to display
664  *
665  * Can be directly used by seq_file operations to display the current
666  * pid value.
667  */
668 int trace_pid_show(struct seq_file *m, void *v)
669 {
670         unsigned long pid = (unsigned long)v - 1;
671
672         seq_printf(m, "%lu\n", pid);
673         return 0;
674 }
675
676 /* 128 should be much more than enough */
677 #define PID_BUF_SIZE            127
678
679 int trace_pid_write(struct trace_pid_list *filtered_pids,
680                     struct trace_pid_list **new_pid_list,
681                     const char __user *ubuf, size_t cnt)
682 {
683         struct trace_pid_list *pid_list;
684         struct trace_parser parser;
685         unsigned long val;
686         int nr_pids = 0;
687         ssize_t read = 0;
688         ssize_t ret;
689         loff_t pos;
690         pid_t pid;
691
692         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
693                 return -ENOMEM;
694
695         /*
696          * Always recreate a new array. The write is an all or nothing
697          * operation. Always create a new array when adding new pids by
698          * the user. If the operation fails, then the current list is
699          * not modified.
700          */
701         pid_list = trace_pid_list_alloc();
702         if (!pid_list) {
703                 trace_parser_put(&parser);
704                 return -ENOMEM;
705         }
706
707         if (filtered_pids) {
708                 /* copy the current bits to the new max */
709                 ret = trace_pid_list_first(filtered_pids, &pid);
710                 while (!ret) {
711                         trace_pid_list_set(pid_list, pid);
712                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
713                         nr_pids++;
714                 }
715         }
716
717         ret = 0;
718         while (cnt > 0) {
719
720                 pos = 0;
721
722                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
723                 if (ret < 0)
724                         break;
725
726                 read += ret;
727                 ubuf += ret;
728                 cnt -= ret;
729
730                 if (!trace_parser_loaded(&parser))
731                         break;
732
733                 ret = -EINVAL;
734                 if (kstrtoul(parser.buffer, 0, &val))
735                         break;
736
737                 pid = (pid_t)val;
738
739                 if (trace_pid_list_set(pid_list, pid) < 0) {
740                         ret = -1;
741                         break;
742                 }
743                 nr_pids++;
744
745                 trace_parser_clear(&parser);
746                 ret = 0;
747         }
748         trace_parser_put(&parser);
749
750         if (ret < 0) {
751                 trace_pid_list_free(pid_list);
752                 return ret;
753         }
754
755         if (!nr_pids) {
756                 /* Cleared the list of pids */
757                 trace_pid_list_free(pid_list);
758                 pid_list = NULL;
759         }
760
761         *new_pid_list = pid_list;
762
763         return read;
764 }
765
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
767 {
768         u64 ts;
769
770         /* Early boot up does not have a buffer yet */
771         if (!buf->buffer)
772                 return trace_clock_local();
773
774         ts = ring_buffer_time_stamp(buf->buffer);
775         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
776
777         return ts;
778 }
779
780 u64 ftrace_now(int cpu)
781 {
782         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
783 }
784
785 /**
786  * tracing_is_enabled - Show if global_trace has been enabled
787  *
788  * Shows if the global trace has been enabled or not. It uses the
789  * mirror flag "buffer_disabled" to be used in fast paths such as for
790  * the irqsoff tracer. But it may be inaccurate due to races. If you
791  * need to know the accurate state, use tracing_is_on() which is a little
792  * slower, but accurate.
793  */
794 int tracing_is_enabled(void)
795 {
796         /*
797          * For quick access (irqsoff uses this in fast path), just
798          * return the mirror variable of the state of the ring buffer.
799          * It's a little racy, but we don't really care.
800          */
801         smp_rmb();
802         return !global_trace.buffer_disabled;
803 }
804
805 /*
806  * trace_buf_size is the size in bytes that is allocated
807  * for a buffer. Note, the number of bytes is always rounded
808  * to page size.
809  *
810  * This number is purposely set to a low number of 16384.
811  * If the dump on oops happens, it will be much appreciated
812  * to not have to wait for all that output. Anyway this can be
813  * boot time and run time configurable.
814  */
815 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
816
817 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
818
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer            *trace_types __read_mostly;
821
822 /*
823  * trace_types_lock is used to protect the trace_types list.
824  */
825 DEFINE_MUTEX(trace_types_lock);
826
827 /*
828  * serialize the access of the ring buffer
829  *
830  * ring buffer serializes readers, but it is low level protection.
831  * The validity of the events (which returns by ring_buffer_peek() ..etc)
832  * are not protected by ring buffer.
833  *
834  * The content of events may become garbage if we allow other process consumes
835  * these events concurrently:
836  *   A) the page of the consumed events may become a normal page
837  *      (not reader page) in ring buffer, and this page will be rewritten
838  *      by events producer.
839  *   B) The page of the consumed events may become a page for splice_read,
840  *      and this page will be returned to system.
841  *
842  * These primitives allow multi process access to different cpu ring buffer
843  * concurrently.
844  *
845  * These primitives don't distinguish read-only and read-consume access.
846  * Multi read-only access are also serialized.
847  */
848
849 #ifdef CONFIG_SMP
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
852
853 static inline void trace_access_lock(int cpu)
854 {
855         if (cpu == RING_BUFFER_ALL_CPUS) {
856                 /* gain it for accessing the whole ring buffer. */
857                 down_write(&all_cpu_access_lock);
858         } else {
859                 /* gain it for accessing a cpu ring buffer. */
860
861                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862                 down_read(&all_cpu_access_lock);
863
864                 /* Secondly block other access to this @cpu ring buffer. */
865                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
866         }
867 }
868
869 static inline void trace_access_unlock(int cpu)
870 {
871         if (cpu == RING_BUFFER_ALL_CPUS) {
872                 up_write(&all_cpu_access_lock);
873         } else {
874                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875                 up_read(&all_cpu_access_lock);
876         }
877 }
878
879 static inline void trace_access_lock_init(void)
880 {
881         int cpu;
882
883         for_each_possible_cpu(cpu)
884                 mutex_init(&per_cpu(cpu_access_lock, cpu));
885 }
886
887 #else
888
889 static DEFINE_MUTEX(access_lock);
890
891 static inline void trace_access_lock(int cpu)
892 {
893         (void)cpu;
894         mutex_lock(&access_lock);
895 }
896
897 static inline void trace_access_unlock(int cpu)
898 {
899         (void)cpu;
900         mutex_unlock(&access_lock);
901 }
902
903 static inline void trace_access_lock_init(void)
904 {
905 }
906
907 #endif
908
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911                                  unsigned int trace_ctx,
912                                  int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914                                       struct trace_buffer *buffer,
915                                       unsigned int trace_ctx,
916                                       int skip, struct pt_regs *regs);
917
918 #else
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920                                         unsigned int trace_ctx,
921                                         int skip, struct pt_regs *regs)
922 {
923 }
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925                                       struct trace_buffer *buffer,
926                                       unsigned long trace_ctx,
927                                       int skip, struct pt_regs *regs)
928 {
929 }
930
931 #endif
932
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935                   int type, unsigned int trace_ctx)
936 {
937         struct trace_entry *ent = ring_buffer_event_data(event);
938
939         tracing_generic_entry_update(ent, type, trace_ctx);
940 }
941
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
944                           int type,
945                           unsigned long len,
946                           unsigned int trace_ctx)
947 {
948         struct ring_buffer_event *event;
949
950         event = ring_buffer_lock_reserve(buffer, len);
951         if (event != NULL)
952                 trace_event_setup(event, type, trace_ctx);
953
954         return event;
955 }
956
957 void tracer_tracing_on(struct trace_array *tr)
958 {
959         if (tr->array_buffer.buffer)
960                 ring_buffer_record_on(tr->array_buffer.buffer);
961         /*
962          * This flag is looked at when buffers haven't been allocated
963          * yet, or by some tracers (like irqsoff), that just want to
964          * know if the ring buffer has been disabled, but it can handle
965          * races of where it gets disabled but we still do a record.
966          * As the check is in the fast path of the tracers, it is more
967          * important to be fast than accurate.
968          */
969         tr->buffer_disabled = 0;
970         /* Make the flag seen by readers */
971         smp_wmb();
972 }
973
974 /**
975  * tracing_on - enable tracing buffers
976  *
977  * This function enables tracing buffers that may have been
978  * disabled with tracing_off.
979  */
980 void tracing_on(void)
981 {
982         tracer_tracing_on(&global_trace);
983 }
984 EXPORT_SYMBOL_GPL(tracing_on);
985
986
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
989 {
990         __this_cpu_write(trace_taskinfo_save, true);
991
992         /* If this is the temp buffer, we need to commit fully */
993         if (this_cpu_read(trace_buffered_event) == event) {
994                 /* Length is in event->array[0] */
995                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
996                 /* Release the temp buffer */
997                 this_cpu_dec(trace_buffered_event_cnt);
998                 /* ring_buffer_unlock_commit() enables preemption */
999                 preempt_enable_notrace();
1000         } else
1001                 ring_buffer_unlock_commit(buffer);
1002 }
1003
1004 /**
1005  * __trace_puts - write a constant string into the trace buffer.
1006  * @ip:    The address of the caller
1007  * @str:   The constant string to write
1008  * @size:  The size of the string.
1009  */
1010 int __trace_puts(unsigned long ip, const char *str, int size)
1011 {
1012         struct ring_buffer_event *event;
1013         struct trace_buffer *buffer;
1014         struct print_entry *entry;
1015         unsigned int trace_ctx;
1016         int alloc;
1017
1018         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1019                 return 0;
1020
1021         if (unlikely(tracing_selftest_running || tracing_disabled))
1022                 return 0;
1023
1024         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1025
1026         trace_ctx = tracing_gen_ctx();
1027         buffer = global_trace.array_buffer.buffer;
1028         ring_buffer_nest_start(buffer);
1029         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1030                                             trace_ctx);
1031         if (!event) {
1032                 size = 0;
1033                 goto out;
1034         }
1035
1036         entry = ring_buffer_event_data(event);
1037         entry->ip = ip;
1038
1039         memcpy(&entry->buf, str, size);
1040
1041         /* Add a newline if necessary */
1042         if (entry->buf[size - 1] != '\n') {
1043                 entry->buf[size] = '\n';
1044                 entry->buf[size + 1] = '\0';
1045         } else
1046                 entry->buf[size] = '\0';
1047
1048         __buffer_unlock_commit(buffer, event);
1049         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1050  out:
1051         ring_buffer_nest_end(buffer);
1052         return size;
1053 }
1054 EXPORT_SYMBOL_GPL(__trace_puts);
1055
1056 /**
1057  * __trace_bputs - write the pointer to a constant string into trace buffer
1058  * @ip:    The address of the caller
1059  * @str:   The constant string to write to the buffer to
1060  */
1061 int __trace_bputs(unsigned long ip, const char *str)
1062 {
1063         struct ring_buffer_event *event;
1064         struct trace_buffer *buffer;
1065         struct bputs_entry *entry;
1066         unsigned int trace_ctx;
1067         int size = sizeof(struct bputs_entry);
1068         int ret = 0;
1069
1070         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1071                 return 0;
1072
1073         if (unlikely(tracing_selftest_running || tracing_disabled))
1074                 return 0;
1075
1076         trace_ctx = tracing_gen_ctx();
1077         buffer = global_trace.array_buffer.buffer;
1078
1079         ring_buffer_nest_start(buffer);
1080         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1081                                             trace_ctx);
1082         if (!event)
1083                 goto out;
1084
1085         entry = ring_buffer_event_data(event);
1086         entry->ip                       = ip;
1087         entry->str                      = str;
1088
1089         __buffer_unlock_commit(buffer, event);
1090         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1091
1092         ret = 1;
1093  out:
1094         ring_buffer_nest_end(buffer);
1095         return ret;
1096 }
1097 EXPORT_SYMBOL_GPL(__trace_bputs);
1098
1099 #ifdef CONFIG_TRACER_SNAPSHOT
1100 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1101                                            void *cond_data)
1102 {
1103         struct tracer *tracer = tr->current_trace;
1104         unsigned long flags;
1105
1106         if (in_nmi()) {
1107                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1108                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1109                 return;
1110         }
1111
1112         if (!tr->allocated_snapshot) {
1113                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1114                 internal_trace_puts("*** stopping trace here!   ***\n");
1115                 tracing_off();
1116                 return;
1117         }
1118
1119         /* Note, snapshot can not be used when the tracer uses it */
1120         if (tracer->use_max_tr) {
1121                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1122                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1123                 return;
1124         }
1125
1126         local_irq_save(flags);
1127         update_max_tr(tr, current, smp_processor_id(), cond_data);
1128         local_irq_restore(flags);
1129 }
1130
1131 void tracing_snapshot_instance(struct trace_array *tr)
1132 {
1133         tracing_snapshot_instance_cond(tr, NULL);
1134 }
1135
1136 /**
1137  * tracing_snapshot - take a snapshot of the current buffer.
1138  *
1139  * This causes a swap between the snapshot buffer and the current live
1140  * tracing buffer. You can use this to take snapshots of the live
1141  * trace when some condition is triggered, but continue to trace.
1142  *
1143  * Note, make sure to allocate the snapshot with either
1144  * a tracing_snapshot_alloc(), or by doing it manually
1145  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1146  *
1147  * If the snapshot buffer is not allocated, it will stop tracing.
1148  * Basically making a permanent snapshot.
1149  */
1150 void tracing_snapshot(void)
1151 {
1152         struct trace_array *tr = &global_trace;
1153
1154         tracing_snapshot_instance(tr);
1155 }
1156 EXPORT_SYMBOL_GPL(tracing_snapshot);
1157
1158 /**
1159  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1160  * @tr:         The tracing instance to snapshot
1161  * @cond_data:  The data to be tested conditionally, and possibly saved
1162  *
1163  * This is the same as tracing_snapshot() except that the snapshot is
1164  * conditional - the snapshot will only happen if the
1165  * cond_snapshot.update() implementation receiving the cond_data
1166  * returns true, which means that the trace array's cond_snapshot
1167  * update() operation used the cond_data to determine whether the
1168  * snapshot should be taken, and if it was, presumably saved it along
1169  * with the snapshot.
1170  */
1171 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1172 {
1173         tracing_snapshot_instance_cond(tr, cond_data);
1174 }
1175 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1176
1177 /**
1178  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1179  * @tr:         The tracing instance
1180  *
1181  * When the user enables a conditional snapshot using
1182  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1183  * with the snapshot.  This accessor is used to retrieve it.
1184  *
1185  * Should not be called from cond_snapshot.update(), since it takes
1186  * the tr->max_lock lock, which the code calling
1187  * cond_snapshot.update() has already done.
1188  *
1189  * Returns the cond_data associated with the trace array's snapshot.
1190  */
1191 void *tracing_cond_snapshot_data(struct trace_array *tr)
1192 {
1193         void *cond_data = NULL;
1194
1195         local_irq_disable();
1196         arch_spin_lock(&tr->max_lock);
1197
1198         if (tr->cond_snapshot)
1199                 cond_data = tr->cond_snapshot->cond_data;
1200
1201         arch_spin_unlock(&tr->max_lock);
1202         local_irq_enable();
1203
1204         return cond_data;
1205 }
1206 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1207
1208 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1209                                         struct array_buffer *size_buf, int cpu_id);
1210 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1211
1212 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1213 {
1214         int ret;
1215
1216         if (!tr->allocated_snapshot) {
1217
1218                 /* allocate spare buffer */
1219                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1220                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1221                 if (ret < 0)
1222                         return ret;
1223
1224                 tr->allocated_snapshot = true;
1225         }
1226
1227         return 0;
1228 }
1229
1230 static void free_snapshot(struct trace_array *tr)
1231 {
1232         /*
1233          * We don't free the ring buffer. instead, resize it because
1234          * The max_tr ring buffer has some state (e.g. ring->clock) and
1235          * we want preserve it.
1236          */
1237         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1238         set_buffer_entries(&tr->max_buffer, 1);
1239         tracing_reset_online_cpus(&tr->max_buffer);
1240         tr->allocated_snapshot = false;
1241 }
1242
1243 /**
1244  * tracing_alloc_snapshot - allocate snapshot buffer.
1245  *
1246  * This only allocates the snapshot buffer if it isn't already
1247  * allocated - it doesn't also take a snapshot.
1248  *
1249  * This is meant to be used in cases where the snapshot buffer needs
1250  * to be set up for events that can't sleep but need to be able to
1251  * trigger a snapshot.
1252  */
1253 int tracing_alloc_snapshot(void)
1254 {
1255         struct trace_array *tr = &global_trace;
1256         int ret;
1257
1258         ret = tracing_alloc_snapshot_instance(tr);
1259         WARN_ON(ret < 0);
1260
1261         return ret;
1262 }
1263 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1264
1265 /**
1266  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1267  *
1268  * This is similar to tracing_snapshot(), but it will allocate the
1269  * snapshot buffer if it isn't already allocated. Use this only
1270  * where it is safe to sleep, as the allocation may sleep.
1271  *
1272  * This causes a swap between the snapshot buffer and the current live
1273  * tracing buffer. You can use this to take snapshots of the live
1274  * trace when some condition is triggered, but continue to trace.
1275  */
1276 void tracing_snapshot_alloc(void)
1277 {
1278         int ret;
1279
1280         ret = tracing_alloc_snapshot();
1281         if (ret < 0)
1282                 return;
1283
1284         tracing_snapshot();
1285 }
1286 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1287
1288 /**
1289  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1290  * @tr:         The tracing instance
1291  * @cond_data:  User data to associate with the snapshot
1292  * @update:     Implementation of the cond_snapshot update function
1293  *
1294  * Check whether the conditional snapshot for the given instance has
1295  * already been enabled, or if the current tracer is already using a
1296  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1297  * save the cond_data and update function inside.
1298  *
1299  * Returns 0 if successful, error otherwise.
1300  */
1301 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1302                                  cond_update_fn_t update)
1303 {
1304         struct cond_snapshot *cond_snapshot;
1305         int ret = 0;
1306
1307         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1308         if (!cond_snapshot)
1309                 return -ENOMEM;
1310
1311         cond_snapshot->cond_data = cond_data;
1312         cond_snapshot->update = update;
1313
1314         mutex_lock(&trace_types_lock);
1315
1316         ret = tracing_alloc_snapshot_instance(tr);
1317         if (ret)
1318                 goto fail_unlock;
1319
1320         if (tr->current_trace->use_max_tr) {
1321                 ret = -EBUSY;
1322                 goto fail_unlock;
1323         }
1324
1325         /*
1326          * The cond_snapshot can only change to NULL without the
1327          * trace_types_lock. We don't care if we race with it going
1328          * to NULL, but we want to make sure that it's not set to
1329          * something other than NULL when we get here, which we can
1330          * do safely with only holding the trace_types_lock and not
1331          * having to take the max_lock.
1332          */
1333         if (tr->cond_snapshot) {
1334                 ret = -EBUSY;
1335                 goto fail_unlock;
1336         }
1337
1338         local_irq_disable();
1339         arch_spin_lock(&tr->max_lock);
1340         tr->cond_snapshot = cond_snapshot;
1341         arch_spin_unlock(&tr->max_lock);
1342         local_irq_enable();
1343
1344         mutex_unlock(&trace_types_lock);
1345
1346         return ret;
1347
1348  fail_unlock:
1349         mutex_unlock(&trace_types_lock);
1350         kfree(cond_snapshot);
1351         return ret;
1352 }
1353 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1354
1355 /**
1356  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1357  * @tr:         The tracing instance
1358  *
1359  * Check whether the conditional snapshot for the given instance is
1360  * enabled; if so, free the cond_snapshot associated with it,
1361  * otherwise return -EINVAL.
1362  *
1363  * Returns 0 if successful, error otherwise.
1364  */
1365 int tracing_snapshot_cond_disable(struct trace_array *tr)
1366 {
1367         int ret = 0;
1368
1369         local_irq_disable();
1370         arch_spin_lock(&tr->max_lock);
1371
1372         if (!tr->cond_snapshot)
1373                 ret = -EINVAL;
1374         else {
1375                 kfree(tr->cond_snapshot);
1376                 tr->cond_snapshot = NULL;
1377         }
1378
1379         arch_spin_unlock(&tr->max_lock);
1380         local_irq_enable();
1381
1382         return ret;
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1385 #else
1386 void tracing_snapshot(void)
1387 {
1388         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot);
1391 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1392 {
1393         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1396 int tracing_alloc_snapshot(void)
1397 {
1398         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1399         return -ENODEV;
1400 }
1401 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1402 void tracing_snapshot_alloc(void)
1403 {
1404         /* Give warning */
1405         tracing_snapshot();
1406 }
1407 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1408 void *tracing_cond_snapshot_data(struct trace_array *tr)
1409 {
1410         return NULL;
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1413 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1414 {
1415         return -ENODEV;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1418 int tracing_snapshot_cond_disable(struct trace_array *tr)
1419 {
1420         return false;
1421 }
1422 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1423 #define free_snapshot(tr)       do { } while (0)
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428         if (tr->array_buffer.buffer)
1429                 ring_buffer_record_off(tr->array_buffer.buffer);
1430         /*
1431          * This flag is looked at when buffers haven't been allocated
1432          * yet, or by some tracers (like irqsoff), that just want to
1433          * know if the ring buffer has been disabled, but it can handle
1434          * races of where it gets disabled but we still do a record.
1435          * As the check is in the fast path of the tracers, it is more
1436          * important to be fast than accurate.
1437          */
1438         tr->buffer_disabled = 1;
1439         /* Make the flag seen by readers */
1440         smp_wmb();
1441 }
1442
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453         tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456
1457 void disable_trace_on_warning(void)
1458 {
1459         if (__disable_trace_on_warning) {
1460                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                         "Disabling tracing due to warning\n");
1462                 tracing_off();
1463         }
1464 }
1465
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474         if (tr->array_buffer.buffer)
1475                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476         return !tr->buffer_disabled;
1477 }
1478
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484         return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488 static int __init set_buf_size(char *str)
1489 {
1490         unsigned long buf_size;
1491
1492         if (!str)
1493                 return 0;
1494         buf_size = memparse(str, &str);
1495         /*
1496          * nr_entries can not be zero and the startup
1497          * tests require some buffer space. Therefore
1498          * ensure we have at least 4096 bytes of buffer.
1499          */
1500         trace_buf_size = max(4096UL, buf_size);
1501         return 1;
1502 }
1503 __setup("trace_buf_size=", set_buf_size);
1504
1505 static int __init set_tracing_thresh(char *str)
1506 {
1507         unsigned long threshold;
1508         int ret;
1509
1510         if (!str)
1511                 return 0;
1512         ret = kstrtoul(str, 0, &threshold);
1513         if (ret < 0)
1514                 return 0;
1515         tracing_thresh = threshold * 1000;
1516         return 1;
1517 }
1518 __setup("tracing_thresh=", set_tracing_thresh);
1519
1520 unsigned long nsecs_to_usecs(unsigned long nsecs)
1521 {
1522         return nsecs / 1000;
1523 }
1524
1525 /*
1526  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1527  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1528  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1529  * of strings in the order that the evals (enum) were defined.
1530  */
1531 #undef C
1532 #define C(a, b) b
1533
1534 /* These must match the bit positions in trace_iterator_flags */
1535 static const char *trace_options[] = {
1536         TRACE_FLAGS
1537         NULL
1538 };
1539
1540 static struct {
1541         u64 (*func)(void);
1542         const char *name;
1543         int in_ns;              /* is this clock in nanoseconds? */
1544 } trace_clocks[] = {
1545         { trace_clock_local,            "local",        1 },
1546         { trace_clock_global,           "global",       1 },
1547         { trace_clock_counter,          "counter",      0 },
1548         { trace_clock_jiffies,          "uptime",       0 },
1549         { trace_clock,                  "perf",         1 },
1550         { ktime_get_mono_fast_ns,       "mono",         1 },
1551         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1552         { ktime_get_boot_fast_ns,       "boot",         1 },
1553         { ktime_get_tai_fast_ns,        "tai",          1 },
1554         ARCH_TRACE_CLOCKS
1555 };
1556
1557 bool trace_clock_in_ns(struct trace_array *tr)
1558 {
1559         if (trace_clocks[tr->clock_id].in_ns)
1560                 return true;
1561
1562         return false;
1563 }
1564
1565 /*
1566  * trace_parser_get_init - gets the buffer for trace parser
1567  */
1568 int trace_parser_get_init(struct trace_parser *parser, int size)
1569 {
1570         memset(parser, 0, sizeof(*parser));
1571
1572         parser->buffer = kmalloc(size, GFP_KERNEL);
1573         if (!parser->buffer)
1574                 return 1;
1575
1576         parser->size = size;
1577         return 0;
1578 }
1579
1580 /*
1581  * trace_parser_put - frees the buffer for trace parser
1582  */
1583 void trace_parser_put(struct trace_parser *parser)
1584 {
1585         kfree(parser->buffer);
1586         parser->buffer = NULL;
1587 }
1588
1589 /*
1590  * trace_get_user - reads the user input string separated by  space
1591  * (matched by isspace(ch))
1592  *
1593  * For each string found the 'struct trace_parser' is updated,
1594  * and the function returns.
1595  *
1596  * Returns number of bytes read.
1597  *
1598  * See kernel/trace/trace.h for 'struct trace_parser' details.
1599  */
1600 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1601         size_t cnt, loff_t *ppos)
1602 {
1603         char ch;
1604         size_t read = 0;
1605         ssize_t ret;
1606
1607         if (!*ppos)
1608                 trace_parser_clear(parser);
1609
1610         ret = get_user(ch, ubuf++);
1611         if (ret)
1612                 goto out;
1613
1614         read++;
1615         cnt--;
1616
1617         /*
1618          * The parser is not finished with the last write,
1619          * continue reading the user input without skipping spaces.
1620          */
1621         if (!parser->cont) {
1622                 /* skip white space */
1623                 while (cnt && isspace(ch)) {
1624                         ret = get_user(ch, ubuf++);
1625                         if (ret)
1626                                 goto out;
1627                         read++;
1628                         cnt--;
1629                 }
1630
1631                 parser->idx = 0;
1632
1633                 /* only spaces were written */
1634                 if (isspace(ch) || !ch) {
1635                         *ppos += read;
1636                         ret = read;
1637                         goto out;
1638                 }
1639         }
1640
1641         /* read the non-space input */
1642         while (cnt && !isspace(ch) && ch) {
1643                 if (parser->idx < parser->size - 1)
1644                         parser->buffer[parser->idx++] = ch;
1645                 else {
1646                         ret = -EINVAL;
1647                         goto out;
1648                 }
1649                 ret = get_user(ch, ubuf++);
1650                 if (ret)
1651                         goto out;
1652                 read++;
1653                 cnt--;
1654         }
1655
1656         /* We either got finished input or we have to wait for another call. */
1657         if (isspace(ch) || !ch) {
1658                 parser->buffer[parser->idx] = 0;
1659                 parser->cont = false;
1660         } else if (parser->idx < parser->size - 1) {
1661                 parser->cont = true;
1662                 parser->buffer[parser->idx++] = ch;
1663                 /* Make sure the parsed string always terminates with '\0'. */
1664                 parser->buffer[parser->idx] = 0;
1665         } else {
1666                 ret = -EINVAL;
1667                 goto out;
1668         }
1669
1670         *ppos += read;
1671         ret = read;
1672
1673 out:
1674         return ret;
1675 }
1676
1677 /* TODO add a seq_buf_to_buffer() */
1678 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1679 {
1680         int len;
1681
1682         if (trace_seq_used(s) <= s->seq.readpos)
1683                 return -EBUSY;
1684
1685         len = trace_seq_used(s) - s->seq.readpos;
1686         if (cnt > len)
1687                 cnt = len;
1688         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1689
1690         s->seq.readpos += cnt;
1691         return cnt;
1692 }
1693
1694 unsigned long __read_mostly     tracing_thresh;
1695
1696 #ifdef CONFIG_TRACER_MAX_TRACE
1697 static const struct file_operations tracing_max_lat_fops;
1698
1699 #ifdef LATENCY_FS_NOTIFY
1700
1701 static struct workqueue_struct *fsnotify_wq;
1702
1703 static void latency_fsnotify_workfn(struct work_struct *work)
1704 {
1705         struct trace_array *tr = container_of(work, struct trace_array,
1706                                               fsnotify_work);
1707         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1708 }
1709
1710 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1711 {
1712         struct trace_array *tr = container_of(iwork, struct trace_array,
1713                                               fsnotify_irqwork);
1714         queue_work(fsnotify_wq, &tr->fsnotify_work);
1715 }
1716
1717 static void trace_create_maxlat_file(struct trace_array *tr,
1718                                      struct dentry *d_tracer)
1719 {
1720         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1721         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1722         tr->d_max_latency = trace_create_file("tracing_max_latency",
1723                                               TRACE_MODE_WRITE,
1724                                               d_tracer, &tr->max_latency,
1725                                               &tracing_max_lat_fops);
1726 }
1727
1728 __init static int latency_fsnotify_init(void)
1729 {
1730         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1731                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1732         if (!fsnotify_wq) {
1733                 pr_err("Unable to allocate tr_max_lat_wq\n");
1734                 return -ENOMEM;
1735         }
1736         return 0;
1737 }
1738
1739 late_initcall_sync(latency_fsnotify_init);
1740
1741 void latency_fsnotify(struct trace_array *tr)
1742 {
1743         if (!fsnotify_wq)
1744                 return;
1745         /*
1746          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1747          * possible that we are called from __schedule() or do_idle(), which
1748          * could cause a deadlock.
1749          */
1750         irq_work_queue(&tr->fsnotify_irqwork);
1751 }
1752
1753 #else /* !LATENCY_FS_NOTIFY */
1754
1755 #define trace_create_maxlat_file(tr, d_tracer)                          \
1756         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1757                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1758
1759 #endif
1760
1761 /*
1762  * Copy the new maximum trace into the separate maximum-trace
1763  * structure. (this way the maximum trace is permanently saved,
1764  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1765  */
1766 static void
1767 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1768 {
1769         struct array_buffer *trace_buf = &tr->array_buffer;
1770         struct array_buffer *max_buf = &tr->max_buffer;
1771         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1772         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1773
1774         max_buf->cpu = cpu;
1775         max_buf->time_start = data->preempt_timestamp;
1776
1777         max_data->saved_latency = tr->max_latency;
1778         max_data->critical_start = data->critical_start;
1779         max_data->critical_end = data->critical_end;
1780
1781         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1782         max_data->pid = tsk->pid;
1783         /*
1784          * If tsk == current, then use current_uid(), as that does not use
1785          * RCU. The irq tracer can be called out of RCU scope.
1786          */
1787         if (tsk == current)
1788                 max_data->uid = current_uid();
1789         else
1790                 max_data->uid = task_uid(tsk);
1791
1792         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1793         max_data->policy = tsk->policy;
1794         max_data->rt_priority = tsk->rt_priority;
1795
1796         /* record this tasks comm */
1797         tracing_record_cmdline(tsk);
1798         latency_fsnotify(tr);
1799 }
1800
1801 /**
1802  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1803  * @tr: tracer
1804  * @tsk: the task with the latency
1805  * @cpu: The cpu that initiated the trace.
1806  * @cond_data: User data associated with a conditional snapshot
1807  *
1808  * Flip the buffers between the @tr and the max_tr and record information
1809  * about which task was the cause of this latency.
1810  */
1811 void
1812 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1813               void *cond_data)
1814 {
1815         if (tr->stop_count)
1816                 return;
1817
1818         WARN_ON_ONCE(!irqs_disabled());
1819
1820         if (!tr->allocated_snapshot) {
1821                 /* Only the nop tracer should hit this when disabling */
1822                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1823                 return;
1824         }
1825
1826         arch_spin_lock(&tr->max_lock);
1827
1828         /* Inherit the recordable setting from array_buffer */
1829         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1830                 ring_buffer_record_on(tr->max_buffer.buffer);
1831         else
1832                 ring_buffer_record_off(tr->max_buffer.buffer);
1833
1834 #ifdef CONFIG_TRACER_SNAPSHOT
1835         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1836                 arch_spin_unlock(&tr->max_lock);
1837                 return;
1838         }
1839 #endif
1840         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1841
1842         __update_max_tr(tr, tsk, cpu);
1843
1844         arch_spin_unlock(&tr->max_lock);
1845 }
1846
1847 /**
1848  * update_max_tr_single - only copy one trace over, and reset the rest
1849  * @tr: tracer
1850  * @tsk: task with the latency
1851  * @cpu: the cpu of the buffer to copy.
1852  *
1853  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1854  */
1855 void
1856 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1857 {
1858         int ret;
1859
1860         if (tr->stop_count)
1861                 return;
1862
1863         WARN_ON_ONCE(!irqs_disabled());
1864         if (!tr->allocated_snapshot) {
1865                 /* Only the nop tracer should hit this when disabling */
1866                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1867                 return;
1868         }
1869
1870         arch_spin_lock(&tr->max_lock);
1871
1872         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1873
1874         if (ret == -EBUSY) {
1875                 /*
1876                  * We failed to swap the buffer due to a commit taking
1877                  * place on this CPU. We fail to record, but we reset
1878                  * the max trace buffer (no one writes directly to it)
1879                  * and flag that it failed.
1880                  */
1881                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1882                         "Failed to swap buffers due to commit in progress\n");
1883         }
1884
1885         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1886
1887         __update_max_tr(tr, tsk, cpu);
1888         arch_spin_unlock(&tr->max_lock);
1889 }
1890
1891 #endif /* CONFIG_TRACER_MAX_TRACE */
1892
1893 static int wait_on_pipe(struct trace_iterator *iter, int full)
1894 {
1895         /* Iterators are static, they should be filled or empty */
1896         if (trace_buffer_iter(iter, iter->cpu_file))
1897                 return 0;
1898
1899         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1900                                 full);
1901 }
1902
1903 #ifdef CONFIG_FTRACE_STARTUP_TEST
1904 static bool selftests_can_run;
1905
1906 struct trace_selftests {
1907         struct list_head                list;
1908         struct tracer                   *type;
1909 };
1910
1911 static LIST_HEAD(postponed_selftests);
1912
1913 static int save_selftest(struct tracer *type)
1914 {
1915         struct trace_selftests *selftest;
1916
1917         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1918         if (!selftest)
1919                 return -ENOMEM;
1920
1921         selftest->type = type;
1922         list_add(&selftest->list, &postponed_selftests);
1923         return 0;
1924 }
1925
1926 static int run_tracer_selftest(struct tracer *type)
1927 {
1928         struct trace_array *tr = &global_trace;
1929         struct tracer *saved_tracer = tr->current_trace;
1930         int ret;
1931
1932         if (!type->selftest || tracing_selftest_disabled)
1933                 return 0;
1934
1935         /*
1936          * If a tracer registers early in boot up (before scheduling is
1937          * initialized and such), then do not run its selftests yet.
1938          * Instead, run it a little later in the boot process.
1939          */
1940         if (!selftests_can_run)
1941                 return save_selftest(type);
1942
1943         if (!tracing_is_on()) {
1944                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1945                         type->name);
1946                 return 0;
1947         }
1948
1949         /*
1950          * Run a selftest on this tracer.
1951          * Here we reset the trace buffer, and set the current
1952          * tracer to be this tracer. The tracer can then run some
1953          * internal tracing to verify that everything is in order.
1954          * If we fail, we do not register this tracer.
1955          */
1956         tracing_reset_online_cpus(&tr->array_buffer);
1957
1958         tr->current_trace = type;
1959
1960 #ifdef CONFIG_TRACER_MAX_TRACE
1961         if (type->use_max_tr) {
1962                 /* If we expanded the buffers, make sure the max is expanded too */
1963                 if (ring_buffer_expanded)
1964                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1965                                            RING_BUFFER_ALL_CPUS);
1966                 tr->allocated_snapshot = true;
1967         }
1968 #endif
1969
1970         /* the test is responsible for initializing and enabling */
1971         pr_info("Testing tracer %s: ", type->name);
1972         ret = type->selftest(type, tr);
1973         /* the test is responsible for resetting too */
1974         tr->current_trace = saved_tracer;
1975         if (ret) {
1976                 printk(KERN_CONT "FAILED!\n");
1977                 /* Add the warning after printing 'FAILED' */
1978                 WARN_ON(1);
1979                 return -1;
1980         }
1981         /* Only reset on passing, to avoid touching corrupted buffers */
1982         tracing_reset_online_cpus(&tr->array_buffer);
1983
1984 #ifdef CONFIG_TRACER_MAX_TRACE
1985         if (type->use_max_tr) {
1986                 tr->allocated_snapshot = false;
1987
1988                 /* Shrink the max buffer again */
1989                 if (ring_buffer_expanded)
1990                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1991                                            RING_BUFFER_ALL_CPUS);
1992         }
1993 #endif
1994
1995         printk(KERN_CONT "PASSED\n");
1996         return 0;
1997 }
1998
1999 static __init int init_trace_selftests(void)
2000 {
2001         struct trace_selftests *p, *n;
2002         struct tracer *t, **last;
2003         int ret;
2004
2005         selftests_can_run = true;
2006
2007         mutex_lock(&trace_types_lock);
2008
2009         if (list_empty(&postponed_selftests))
2010                 goto out;
2011
2012         pr_info("Running postponed tracer tests:\n");
2013
2014         tracing_selftest_running = true;
2015         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2016                 /* This loop can take minutes when sanitizers are enabled, so
2017                  * lets make sure we allow RCU processing.
2018                  */
2019                 cond_resched();
2020                 ret = run_tracer_selftest(p->type);
2021                 /* If the test fails, then warn and remove from available_tracers */
2022                 if (ret < 0) {
2023                         WARN(1, "tracer: %s failed selftest, disabling\n",
2024                              p->type->name);
2025                         last = &trace_types;
2026                         for (t = trace_types; t; t = t->next) {
2027                                 if (t == p->type) {
2028                                         *last = t->next;
2029                                         break;
2030                                 }
2031                                 last = &t->next;
2032                         }
2033                 }
2034                 list_del(&p->list);
2035                 kfree(p);
2036         }
2037         tracing_selftest_running = false;
2038
2039  out:
2040         mutex_unlock(&trace_types_lock);
2041
2042         return 0;
2043 }
2044 core_initcall(init_trace_selftests);
2045 #else
2046 static inline int run_tracer_selftest(struct tracer *type)
2047 {
2048         return 0;
2049 }
2050 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2051
2052 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2053
2054 static void __init apply_trace_boot_options(void);
2055
2056 /**
2057  * register_tracer - register a tracer with the ftrace system.
2058  * @type: the plugin for the tracer
2059  *
2060  * Register a new plugin tracer.
2061  */
2062 int __init register_tracer(struct tracer *type)
2063 {
2064         struct tracer *t;
2065         int ret = 0;
2066
2067         if (!type->name) {
2068                 pr_info("Tracer must have a name\n");
2069                 return -1;
2070         }
2071
2072         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2073                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2074                 return -1;
2075         }
2076
2077         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2078                 pr_warn("Can not register tracer %s due to lockdown\n",
2079                            type->name);
2080                 return -EPERM;
2081         }
2082
2083         mutex_lock(&trace_types_lock);
2084
2085         tracing_selftest_running = true;
2086
2087         for (t = trace_types; t; t = t->next) {
2088                 if (strcmp(type->name, t->name) == 0) {
2089                         /* already found */
2090                         pr_info("Tracer %s already registered\n",
2091                                 type->name);
2092                         ret = -1;
2093                         goto out;
2094                 }
2095         }
2096
2097         if (!type->set_flag)
2098                 type->set_flag = &dummy_set_flag;
2099         if (!type->flags) {
2100                 /*allocate a dummy tracer_flags*/
2101                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2102                 if (!type->flags) {
2103                         ret = -ENOMEM;
2104                         goto out;
2105                 }
2106                 type->flags->val = 0;
2107                 type->flags->opts = dummy_tracer_opt;
2108         } else
2109                 if (!type->flags->opts)
2110                         type->flags->opts = dummy_tracer_opt;
2111
2112         /* store the tracer for __set_tracer_option */
2113         type->flags->trace = type;
2114
2115         ret = run_tracer_selftest(type);
2116         if (ret < 0)
2117                 goto out;
2118
2119         type->next = trace_types;
2120         trace_types = type;
2121         add_tracer_options(&global_trace, type);
2122
2123  out:
2124         tracing_selftest_running = false;
2125         mutex_unlock(&trace_types_lock);
2126
2127         if (ret || !default_bootup_tracer)
2128                 goto out_unlock;
2129
2130         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2131                 goto out_unlock;
2132
2133         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2134         /* Do we want this tracer to start on bootup? */
2135         tracing_set_tracer(&global_trace, type->name);
2136         default_bootup_tracer = NULL;
2137
2138         apply_trace_boot_options();
2139
2140         /* disable other selftests, since this will break it. */
2141         disable_tracing_selftest("running a tracer");
2142
2143  out_unlock:
2144         return ret;
2145 }
2146
2147 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2148 {
2149         struct trace_buffer *buffer = buf->buffer;
2150
2151         if (!buffer)
2152                 return;
2153
2154         ring_buffer_record_disable(buffer);
2155
2156         /* Make sure all commits have finished */
2157         synchronize_rcu();
2158         ring_buffer_reset_cpu(buffer, cpu);
2159
2160         ring_buffer_record_enable(buffer);
2161 }
2162
2163 void tracing_reset_online_cpus(struct array_buffer *buf)
2164 {
2165         struct trace_buffer *buffer = buf->buffer;
2166
2167         if (!buffer)
2168                 return;
2169
2170         ring_buffer_record_disable(buffer);
2171
2172         /* Make sure all commits have finished */
2173         synchronize_rcu();
2174
2175         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2176
2177         ring_buffer_reset_online_cpus(buffer);
2178
2179         ring_buffer_record_enable(buffer);
2180 }
2181
2182 /* Must have trace_types_lock held */
2183 void tracing_reset_all_online_cpus_unlocked(void)
2184 {
2185         struct trace_array *tr;
2186
2187         lockdep_assert_held(&trace_types_lock);
2188
2189         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2190                 if (!tr->clear_trace)
2191                         continue;
2192                 tr->clear_trace = false;
2193                 tracing_reset_online_cpus(&tr->array_buffer);
2194 #ifdef CONFIG_TRACER_MAX_TRACE
2195                 tracing_reset_online_cpus(&tr->max_buffer);
2196 #endif
2197         }
2198 }
2199
2200 void tracing_reset_all_online_cpus(void)
2201 {
2202         mutex_lock(&trace_types_lock);
2203         tracing_reset_all_online_cpus_unlocked();
2204         mutex_unlock(&trace_types_lock);
2205 }
2206
2207 /*
2208  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2209  * is the tgid last observed corresponding to pid=i.
2210  */
2211 static int *tgid_map;
2212
2213 /* The maximum valid index into tgid_map. */
2214 static size_t tgid_map_max;
2215
2216 #define SAVED_CMDLINES_DEFAULT 128
2217 #define NO_CMDLINE_MAP UINT_MAX
2218 /*
2219  * Preemption must be disabled before acquiring trace_cmdline_lock.
2220  * The various trace_arrays' max_lock must be acquired in a context
2221  * where interrupt is disabled.
2222  */
2223 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2224 struct saved_cmdlines_buffer {
2225         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2226         unsigned *map_cmdline_to_pid;
2227         unsigned cmdline_num;
2228         int cmdline_idx;
2229         char *saved_cmdlines;
2230 };
2231 static struct saved_cmdlines_buffer *savedcmd;
2232
2233 static inline char *get_saved_cmdlines(int idx)
2234 {
2235         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2236 }
2237
2238 static inline void set_cmdline(int idx, const char *cmdline)
2239 {
2240         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2241 }
2242
2243 static int allocate_cmdlines_buffer(unsigned int val,
2244                                     struct saved_cmdlines_buffer *s)
2245 {
2246         s->map_cmdline_to_pid = kmalloc_array(val,
2247                                               sizeof(*s->map_cmdline_to_pid),
2248                                               GFP_KERNEL);
2249         if (!s->map_cmdline_to_pid)
2250                 return -ENOMEM;
2251
2252         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2253         if (!s->saved_cmdlines) {
2254                 kfree(s->map_cmdline_to_pid);
2255                 return -ENOMEM;
2256         }
2257
2258         s->cmdline_idx = 0;
2259         s->cmdline_num = val;
2260         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2261                sizeof(s->map_pid_to_cmdline));
2262         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2263                val * sizeof(*s->map_cmdline_to_pid));
2264
2265         return 0;
2266 }
2267
2268 static int trace_create_savedcmd(void)
2269 {
2270         int ret;
2271
2272         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2273         if (!savedcmd)
2274                 return -ENOMEM;
2275
2276         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2277         if (ret < 0) {
2278                 kfree(savedcmd);
2279                 savedcmd = NULL;
2280                 return -ENOMEM;
2281         }
2282
2283         return 0;
2284 }
2285
2286 int is_tracing_stopped(void)
2287 {
2288         return global_trace.stop_count;
2289 }
2290
2291 /**
2292  * tracing_start - quick start of the tracer
2293  *
2294  * If tracing is enabled but was stopped by tracing_stop,
2295  * this will start the tracer back up.
2296  */
2297 void tracing_start(void)
2298 {
2299         struct trace_buffer *buffer;
2300         unsigned long flags;
2301
2302         if (tracing_disabled)
2303                 return;
2304
2305         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2306         if (--global_trace.stop_count) {
2307                 if (global_trace.stop_count < 0) {
2308                         /* Someone screwed up their debugging */
2309                         WARN_ON_ONCE(1);
2310                         global_trace.stop_count = 0;
2311                 }
2312                 goto out;
2313         }
2314
2315         /* Prevent the buffers from switching */
2316         arch_spin_lock(&global_trace.max_lock);
2317
2318         buffer = global_trace.array_buffer.buffer;
2319         if (buffer)
2320                 ring_buffer_record_enable(buffer);
2321
2322 #ifdef CONFIG_TRACER_MAX_TRACE
2323         buffer = global_trace.max_buffer.buffer;
2324         if (buffer)
2325                 ring_buffer_record_enable(buffer);
2326 #endif
2327
2328         arch_spin_unlock(&global_trace.max_lock);
2329
2330  out:
2331         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2332 }
2333
2334 static void tracing_start_tr(struct trace_array *tr)
2335 {
2336         struct trace_buffer *buffer;
2337         unsigned long flags;
2338
2339         if (tracing_disabled)
2340                 return;
2341
2342         /* If global, we need to also start the max tracer */
2343         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2344                 return tracing_start();
2345
2346         raw_spin_lock_irqsave(&tr->start_lock, flags);
2347
2348         if (--tr->stop_count) {
2349                 if (tr->stop_count < 0) {
2350                         /* Someone screwed up their debugging */
2351                         WARN_ON_ONCE(1);
2352                         tr->stop_count = 0;
2353                 }
2354                 goto out;
2355         }
2356
2357         buffer = tr->array_buffer.buffer;
2358         if (buffer)
2359                 ring_buffer_record_enable(buffer);
2360
2361  out:
2362         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2363 }
2364
2365 /**
2366  * tracing_stop - quick stop of the tracer
2367  *
2368  * Light weight way to stop tracing. Use in conjunction with
2369  * tracing_start.
2370  */
2371 void tracing_stop(void)
2372 {
2373         struct trace_buffer *buffer;
2374         unsigned long flags;
2375
2376         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2377         if (global_trace.stop_count++)
2378                 goto out;
2379
2380         /* Prevent the buffers from switching */
2381         arch_spin_lock(&global_trace.max_lock);
2382
2383         buffer = global_trace.array_buffer.buffer;
2384         if (buffer)
2385                 ring_buffer_record_disable(buffer);
2386
2387 #ifdef CONFIG_TRACER_MAX_TRACE
2388         buffer = global_trace.max_buffer.buffer;
2389         if (buffer)
2390                 ring_buffer_record_disable(buffer);
2391 #endif
2392
2393         arch_spin_unlock(&global_trace.max_lock);
2394
2395  out:
2396         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2397 }
2398
2399 static void tracing_stop_tr(struct trace_array *tr)
2400 {
2401         struct trace_buffer *buffer;
2402         unsigned long flags;
2403
2404         /* If global, we need to also stop the max tracer */
2405         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2406                 return tracing_stop();
2407
2408         raw_spin_lock_irqsave(&tr->start_lock, flags);
2409         if (tr->stop_count++)
2410                 goto out;
2411
2412         buffer = tr->array_buffer.buffer;
2413         if (buffer)
2414                 ring_buffer_record_disable(buffer);
2415
2416  out:
2417         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2418 }
2419
2420 static int trace_save_cmdline(struct task_struct *tsk)
2421 {
2422         unsigned tpid, idx;
2423
2424         /* treat recording of idle task as a success */
2425         if (!tsk->pid)
2426                 return 1;
2427
2428         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2429
2430         /*
2431          * It's not the end of the world if we don't get
2432          * the lock, but we also don't want to spin
2433          * nor do we want to disable interrupts,
2434          * so if we miss here, then better luck next time.
2435          *
2436          * This is called within the scheduler and wake up, so interrupts
2437          * had better been disabled and run queue lock been held.
2438          */
2439         lockdep_assert_preemption_disabled();
2440         if (!arch_spin_trylock(&trace_cmdline_lock))
2441                 return 0;
2442
2443         idx = savedcmd->map_pid_to_cmdline[tpid];
2444         if (idx == NO_CMDLINE_MAP) {
2445                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2446
2447                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2448                 savedcmd->cmdline_idx = idx;
2449         }
2450
2451         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2452         set_cmdline(idx, tsk->comm);
2453
2454         arch_spin_unlock(&trace_cmdline_lock);
2455
2456         return 1;
2457 }
2458
2459 static void __trace_find_cmdline(int pid, char comm[])
2460 {
2461         unsigned map;
2462         int tpid;
2463
2464         if (!pid) {
2465                 strcpy(comm, "<idle>");
2466                 return;
2467         }
2468
2469         if (WARN_ON_ONCE(pid < 0)) {
2470                 strcpy(comm, "<XXX>");
2471                 return;
2472         }
2473
2474         tpid = pid & (PID_MAX_DEFAULT - 1);
2475         map = savedcmd->map_pid_to_cmdline[tpid];
2476         if (map != NO_CMDLINE_MAP) {
2477                 tpid = savedcmd->map_cmdline_to_pid[map];
2478                 if (tpid == pid) {
2479                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2480                         return;
2481                 }
2482         }
2483         strcpy(comm, "<...>");
2484 }
2485
2486 void trace_find_cmdline(int pid, char comm[])
2487 {
2488         preempt_disable();
2489         arch_spin_lock(&trace_cmdline_lock);
2490
2491         __trace_find_cmdline(pid, comm);
2492
2493         arch_spin_unlock(&trace_cmdline_lock);
2494         preempt_enable();
2495 }
2496
2497 static int *trace_find_tgid_ptr(int pid)
2498 {
2499         /*
2500          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2501          * if we observe a non-NULL tgid_map then we also observe the correct
2502          * tgid_map_max.
2503          */
2504         int *map = smp_load_acquire(&tgid_map);
2505
2506         if (unlikely(!map || pid > tgid_map_max))
2507                 return NULL;
2508
2509         return &map[pid];
2510 }
2511
2512 int trace_find_tgid(int pid)
2513 {
2514         int *ptr = trace_find_tgid_ptr(pid);
2515
2516         return ptr ? *ptr : 0;
2517 }
2518
2519 static int trace_save_tgid(struct task_struct *tsk)
2520 {
2521         int *ptr;
2522
2523         /* treat recording of idle task as a success */
2524         if (!tsk->pid)
2525                 return 1;
2526
2527         ptr = trace_find_tgid_ptr(tsk->pid);
2528         if (!ptr)
2529                 return 0;
2530
2531         *ptr = tsk->tgid;
2532         return 1;
2533 }
2534
2535 static bool tracing_record_taskinfo_skip(int flags)
2536 {
2537         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2538                 return true;
2539         if (!__this_cpu_read(trace_taskinfo_save))
2540                 return true;
2541         return false;
2542 }
2543
2544 /**
2545  * tracing_record_taskinfo - record the task info of a task
2546  *
2547  * @task:  task to record
2548  * @flags: TRACE_RECORD_CMDLINE for recording comm
2549  *         TRACE_RECORD_TGID for recording tgid
2550  */
2551 void tracing_record_taskinfo(struct task_struct *task, int flags)
2552 {
2553         bool done;
2554
2555         if (tracing_record_taskinfo_skip(flags))
2556                 return;
2557
2558         /*
2559          * Record as much task information as possible. If some fail, continue
2560          * to try to record the others.
2561          */
2562         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2563         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2564
2565         /* If recording any information failed, retry again soon. */
2566         if (!done)
2567                 return;
2568
2569         __this_cpu_write(trace_taskinfo_save, false);
2570 }
2571
2572 /**
2573  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2574  *
2575  * @prev: previous task during sched_switch
2576  * @next: next task during sched_switch
2577  * @flags: TRACE_RECORD_CMDLINE for recording comm
2578  *         TRACE_RECORD_TGID for recording tgid
2579  */
2580 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2581                                           struct task_struct *next, int flags)
2582 {
2583         bool done;
2584
2585         if (tracing_record_taskinfo_skip(flags))
2586                 return;
2587
2588         /*
2589          * Record as much task information as possible. If some fail, continue
2590          * to try to record the others.
2591          */
2592         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2593         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2594         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2595         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2596
2597         /* If recording any information failed, retry again soon. */
2598         if (!done)
2599                 return;
2600
2601         __this_cpu_write(trace_taskinfo_save, false);
2602 }
2603
2604 /* Helpers to record a specific task information */
2605 void tracing_record_cmdline(struct task_struct *task)
2606 {
2607         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2608 }
2609
2610 void tracing_record_tgid(struct task_struct *task)
2611 {
2612         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2613 }
2614
2615 /*
2616  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2617  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2618  * simplifies those functions and keeps them in sync.
2619  */
2620 enum print_line_t trace_handle_return(struct trace_seq *s)
2621 {
2622         return trace_seq_has_overflowed(s) ?
2623                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2624 }
2625 EXPORT_SYMBOL_GPL(trace_handle_return);
2626
2627 static unsigned short migration_disable_value(void)
2628 {
2629 #if defined(CONFIG_SMP)
2630         return current->migration_disabled;
2631 #else
2632         return 0;
2633 #endif
2634 }
2635
2636 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2637 {
2638         unsigned int trace_flags = irqs_status;
2639         unsigned int pc;
2640
2641         pc = preempt_count();
2642
2643         if (pc & NMI_MASK)
2644                 trace_flags |= TRACE_FLAG_NMI;
2645         if (pc & HARDIRQ_MASK)
2646                 trace_flags |= TRACE_FLAG_HARDIRQ;
2647         if (in_serving_softirq())
2648                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2649         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2650                 trace_flags |= TRACE_FLAG_BH_OFF;
2651
2652         if (tif_need_resched())
2653                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2654         if (test_preempt_need_resched())
2655                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2656         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2657                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2658 }
2659
2660 struct ring_buffer_event *
2661 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2662                           int type,
2663                           unsigned long len,
2664                           unsigned int trace_ctx)
2665 {
2666         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2667 }
2668
2669 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2670 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2671 static int trace_buffered_event_ref;
2672
2673 /**
2674  * trace_buffered_event_enable - enable buffering events
2675  *
2676  * When events are being filtered, it is quicker to use a temporary
2677  * buffer to write the event data into if there's a likely chance
2678  * that it will not be committed. The discard of the ring buffer
2679  * is not as fast as committing, and is much slower than copying
2680  * a commit.
2681  *
2682  * When an event is to be filtered, allocate per cpu buffers to
2683  * write the event data into, and if the event is filtered and discarded
2684  * it is simply dropped, otherwise, the entire data is to be committed
2685  * in one shot.
2686  */
2687 void trace_buffered_event_enable(void)
2688 {
2689         struct ring_buffer_event *event;
2690         struct page *page;
2691         int cpu;
2692
2693         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2694
2695         if (trace_buffered_event_ref++)
2696                 return;
2697
2698         for_each_tracing_cpu(cpu) {
2699                 page = alloc_pages_node(cpu_to_node(cpu),
2700                                         GFP_KERNEL | __GFP_NORETRY, 0);
2701                 if (!page)
2702                         goto failed;
2703
2704                 event = page_address(page);
2705                 memset(event, 0, sizeof(*event));
2706
2707                 per_cpu(trace_buffered_event, cpu) = event;
2708
2709                 preempt_disable();
2710                 if (cpu == smp_processor_id() &&
2711                     __this_cpu_read(trace_buffered_event) !=
2712                     per_cpu(trace_buffered_event, cpu))
2713                         WARN_ON_ONCE(1);
2714                 preempt_enable();
2715         }
2716
2717         return;
2718  failed:
2719         trace_buffered_event_disable();
2720 }
2721
2722 static void enable_trace_buffered_event(void *data)
2723 {
2724         /* Probably not needed, but do it anyway */
2725         smp_rmb();
2726         this_cpu_dec(trace_buffered_event_cnt);
2727 }
2728
2729 static void disable_trace_buffered_event(void *data)
2730 {
2731         this_cpu_inc(trace_buffered_event_cnt);
2732 }
2733
2734 /**
2735  * trace_buffered_event_disable - disable buffering events
2736  *
2737  * When a filter is removed, it is faster to not use the buffered
2738  * events, and to commit directly into the ring buffer. Free up
2739  * the temp buffers when there are no more users. This requires
2740  * special synchronization with current events.
2741  */
2742 void trace_buffered_event_disable(void)
2743 {
2744         int cpu;
2745
2746         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2747
2748         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2749                 return;
2750
2751         if (--trace_buffered_event_ref)
2752                 return;
2753
2754         preempt_disable();
2755         /* For each CPU, set the buffer as used. */
2756         smp_call_function_many(tracing_buffer_mask,
2757                                disable_trace_buffered_event, NULL, 1);
2758         preempt_enable();
2759
2760         /* Wait for all current users to finish */
2761         synchronize_rcu();
2762
2763         for_each_tracing_cpu(cpu) {
2764                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2765                 per_cpu(trace_buffered_event, cpu) = NULL;
2766         }
2767         /*
2768          * Make sure trace_buffered_event is NULL before clearing
2769          * trace_buffered_event_cnt.
2770          */
2771         smp_wmb();
2772
2773         preempt_disable();
2774         /* Do the work on each cpu */
2775         smp_call_function_many(tracing_buffer_mask,
2776                                enable_trace_buffered_event, NULL, 1);
2777         preempt_enable();
2778 }
2779
2780 static struct trace_buffer *temp_buffer;
2781
2782 struct ring_buffer_event *
2783 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2784                           struct trace_event_file *trace_file,
2785                           int type, unsigned long len,
2786                           unsigned int trace_ctx)
2787 {
2788         struct ring_buffer_event *entry;
2789         struct trace_array *tr = trace_file->tr;
2790         int val;
2791
2792         *current_rb = tr->array_buffer.buffer;
2793
2794         if (!tr->no_filter_buffering_ref &&
2795             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2796                 preempt_disable_notrace();
2797                 /*
2798                  * Filtering is on, so try to use the per cpu buffer first.
2799                  * This buffer will simulate a ring_buffer_event,
2800                  * where the type_len is zero and the array[0] will
2801                  * hold the full length.
2802                  * (see include/linux/ring-buffer.h for details on
2803                  *  how the ring_buffer_event is structured).
2804                  *
2805                  * Using a temp buffer during filtering and copying it
2806                  * on a matched filter is quicker than writing directly
2807                  * into the ring buffer and then discarding it when
2808                  * it doesn't match. That is because the discard
2809                  * requires several atomic operations to get right.
2810                  * Copying on match and doing nothing on a failed match
2811                  * is still quicker than no copy on match, but having
2812                  * to discard out of the ring buffer on a failed match.
2813                  */
2814                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2815                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2816
2817                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2818
2819                         /*
2820                          * Preemption is disabled, but interrupts and NMIs
2821                          * can still come in now. If that happens after
2822                          * the above increment, then it will have to go
2823                          * back to the old method of allocating the event
2824                          * on the ring buffer, and if the filter fails, it
2825                          * will have to call ring_buffer_discard_commit()
2826                          * to remove it.
2827                          *
2828                          * Need to also check the unlikely case that the
2829                          * length is bigger than the temp buffer size.
2830                          * If that happens, then the reserve is pretty much
2831                          * guaranteed to fail, as the ring buffer currently
2832                          * only allows events less than a page. But that may
2833                          * change in the future, so let the ring buffer reserve
2834                          * handle the failure in that case.
2835                          */
2836                         if (val == 1 && likely(len <= max_len)) {
2837                                 trace_event_setup(entry, type, trace_ctx);
2838                                 entry->array[0] = len;
2839                                 /* Return with preemption disabled */
2840                                 return entry;
2841                         }
2842                         this_cpu_dec(trace_buffered_event_cnt);
2843                 }
2844                 /* __trace_buffer_lock_reserve() disables preemption */
2845                 preempt_enable_notrace();
2846         }
2847
2848         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2849                                             trace_ctx);
2850         /*
2851          * If tracing is off, but we have triggers enabled
2852          * we still need to look at the event data. Use the temp_buffer
2853          * to store the trace event for the trigger to use. It's recursive
2854          * safe and will not be recorded anywhere.
2855          */
2856         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2857                 *current_rb = temp_buffer;
2858                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2859                                                     trace_ctx);
2860         }
2861         return entry;
2862 }
2863 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2864
2865 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2866 static DEFINE_MUTEX(tracepoint_printk_mutex);
2867
2868 static void output_printk(struct trace_event_buffer *fbuffer)
2869 {
2870         struct trace_event_call *event_call;
2871         struct trace_event_file *file;
2872         struct trace_event *event;
2873         unsigned long flags;
2874         struct trace_iterator *iter = tracepoint_print_iter;
2875
2876         /* We should never get here if iter is NULL */
2877         if (WARN_ON_ONCE(!iter))
2878                 return;
2879
2880         event_call = fbuffer->trace_file->event_call;
2881         if (!event_call || !event_call->event.funcs ||
2882             !event_call->event.funcs->trace)
2883                 return;
2884
2885         file = fbuffer->trace_file;
2886         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2887             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2888              !filter_match_preds(file->filter, fbuffer->entry)))
2889                 return;
2890
2891         event = &fbuffer->trace_file->event_call->event;
2892
2893         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2894         trace_seq_init(&iter->seq);
2895         iter->ent = fbuffer->entry;
2896         event_call->event.funcs->trace(iter, 0, event);
2897         trace_seq_putc(&iter->seq, 0);
2898         printk("%s", iter->seq.buffer);
2899
2900         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2901 }
2902
2903 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2904                              void *buffer, size_t *lenp,
2905                              loff_t *ppos)
2906 {
2907         int save_tracepoint_printk;
2908         int ret;
2909
2910         mutex_lock(&tracepoint_printk_mutex);
2911         save_tracepoint_printk = tracepoint_printk;
2912
2913         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2914
2915         /*
2916          * This will force exiting early, as tracepoint_printk
2917          * is always zero when tracepoint_printk_iter is not allocated
2918          */
2919         if (!tracepoint_print_iter)
2920                 tracepoint_printk = 0;
2921
2922         if (save_tracepoint_printk == tracepoint_printk)
2923                 goto out;
2924
2925         if (tracepoint_printk)
2926                 static_key_enable(&tracepoint_printk_key.key);
2927         else
2928                 static_key_disable(&tracepoint_printk_key.key);
2929
2930  out:
2931         mutex_unlock(&tracepoint_printk_mutex);
2932
2933         return ret;
2934 }
2935
2936 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2937 {
2938         enum event_trigger_type tt = ETT_NONE;
2939         struct trace_event_file *file = fbuffer->trace_file;
2940
2941         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2942                         fbuffer->entry, &tt))
2943                 goto discard;
2944
2945         if (static_key_false(&tracepoint_printk_key.key))
2946                 output_printk(fbuffer);
2947
2948         if (static_branch_unlikely(&trace_event_exports_enabled))
2949                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2950
2951         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2952                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2953
2954 discard:
2955         if (tt)
2956                 event_triggers_post_call(file, tt);
2957
2958 }
2959 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2960
2961 /*
2962  * Skip 3:
2963  *
2964  *   trace_buffer_unlock_commit_regs()
2965  *   trace_event_buffer_commit()
2966  *   trace_event_raw_event_xxx()
2967  */
2968 # define STACK_SKIP 3
2969
2970 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2971                                      struct trace_buffer *buffer,
2972                                      struct ring_buffer_event *event,
2973                                      unsigned int trace_ctx,
2974                                      struct pt_regs *regs)
2975 {
2976         __buffer_unlock_commit(buffer, event);
2977
2978         /*
2979          * If regs is not set, then skip the necessary functions.
2980          * Note, we can still get here via blktrace, wakeup tracer
2981          * and mmiotrace, but that's ok if they lose a function or
2982          * two. They are not that meaningful.
2983          */
2984         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2985         ftrace_trace_userstack(tr, buffer, trace_ctx);
2986 }
2987
2988 /*
2989  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2990  */
2991 void
2992 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2993                                    struct ring_buffer_event *event)
2994 {
2995         __buffer_unlock_commit(buffer, event);
2996 }
2997
2998 void
2999 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3000                parent_ip, unsigned int trace_ctx)
3001 {
3002         struct trace_event_call *call = &event_function;
3003         struct trace_buffer *buffer = tr->array_buffer.buffer;
3004         struct ring_buffer_event *event;
3005         struct ftrace_entry *entry;
3006
3007         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3008                                             trace_ctx);
3009         if (!event)
3010                 return;
3011         entry   = ring_buffer_event_data(event);
3012         entry->ip                       = ip;
3013         entry->parent_ip                = parent_ip;
3014
3015         if (!call_filter_check_discard(call, entry, buffer, event)) {
3016                 if (static_branch_unlikely(&trace_function_exports_enabled))
3017                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3018                 __buffer_unlock_commit(buffer, event);
3019         }
3020 }
3021
3022 #ifdef CONFIG_STACKTRACE
3023
3024 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3025 #define FTRACE_KSTACK_NESTING   4
3026
3027 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3028
3029 struct ftrace_stack {
3030         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3031 };
3032
3033
3034 struct ftrace_stacks {
3035         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3036 };
3037
3038 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3039 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3040
3041 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3042                                  unsigned int trace_ctx,
3043                                  int skip, struct pt_regs *regs)
3044 {
3045         struct trace_event_call *call = &event_kernel_stack;
3046         struct ring_buffer_event *event;
3047         unsigned int size, nr_entries;
3048         struct ftrace_stack *fstack;
3049         struct stack_entry *entry;
3050         int stackidx;
3051
3052         /*
3053          * Add one, for this function and the call to save_stack_trace()
3054          * If regs is set, then these functions will not be in the way.
3055          */
3056 #ifndef CONFIG_UNWINDER_ORC
3057         if (!regs)
3058                 skip++;
3059 #endif
3060
3061         preempt_disable_notrace();
3062
3063         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3064
3065         /* This should never happen. If it does, yell once and skip */
3066         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3067                 goto out;
3068
3069         /*
3070          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3071          * interrupt will either see the value pre increment or post
3072          * increment. If the interrupt happens pre increment it will have
3073          * restored the counter when it returns.  We just need a barrier to
3074          * keep gcc from moving things around.
3075          */
3076         barrier();
3077
3078         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3079         size = ARRAY_SIZE(fstack->calls);
3080
3081         if (regs) {
3082                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3083                                                    size, skip);
3084         } else {
3085                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3086         }
3087
3088         size = nr_entries * sizeof(unsigned long);
3089         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3090                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3091                                     trace_ctx);
3092         if (!event)
3093                 goto out;
3094         entry = ring_buffer_event_data(event);
3095
3096         memcpy(&entry->caller, fstack->calls, size);
3097         entry->size = nr_entries;
3098
3099         if (!call_filter_check_discard(call, entry, buffer, event))
3100                 __buffer_unlock_commit(buffer, event);
3101
3102  out:
3103         /* Again, don't let gcc optimize things here */
3104         barrier();
3105         __this_cpu_dec(ftrace_stack_reserve);
3106         preempt_enable_notrace();
3107
3108 }
3109
3110 static inline void ftrace_trace_stack(struct trace_array *tr,
3111                                       struct trace_buffer *buffer,
3112                                       unsigned int trace_ctx,
3113                                       int skip, struct pt_regs *regs)
3114 {
3115         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3116                 return;
3117
3118         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3119 }
3120
3121 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3122                    int skip)
3123 {
3124         struct trace_buffer *buffer = tr->array_buffer.buffer;
3125
3126         if (rcu_is_watching()) {
3127                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3128                 return;
3129         }
3130
3131         /*
3132          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3133          * but if the above rcu_is_watching() failed, then the NMI
3134          * triggered someplace critical, and ct_irq_enter() should
3135          * not be called from NMI.
3136          */
3137         if (unlikely(in_nmi()))
3138                 return;
3139
3140         ct_irq_enter_irqson();
3141         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3142         ct_irq_exit_irqson();
3143 }
3144
3145 /**
3146  * trace_dump_stack - record a stack back trace in the trace buffer
3147  * @skip: Number of functions to skip (helper handlers)
3148  */
3149 void trace_dump_stack(int skip)
3150 {
3151         if (tracing_disabled || tracing_selftest_running)
3152                 return;
3153
3154 #ifndef CONFIG_UNWINDER_ORC
3155         /* Skip 1 to skip this function. */
3156         skip++;
3157 #endif
3158         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3159                              tracing_gen_ctx(), skip, NULL);
3160 }
3161 EXPORT_SYMBOL_GPL(trace_dump_stack);
3162
3163 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3164 static DEFINE_PER_CPU(int, user_stack_count);
3165
3166 static void
3167 ftrace_trace_userstack(struct trace_array *tr,
3168                        struct trace_buffer *buffer, unsigned int trace_ctx)
3169 {
3170         struct trace_event_call *call = &event_user_stack;
3171         struct ring_buffer_event *event;
3172         struct userstack_entry *entry;
3173
3174         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3175                 return;
3176
3177         /*
3178          * NMIs can not handle page faults, even with fix ups.
3179          * The save user stack can (and often does) fault.
3180          */
3181         if (unlikely(in_nmi()))
3182                 return;
3183
3184         /*
3185          * prevent recursion, since the user stack tracing may
3186          * trigger other kernel events.
3187          */
3188         preempt_disable();
3189         if (__this_cpu_read(user_stack_count))
3190                 goto out;
3191
3192         __this_cpu_inc(user_stack_count);
3193
3194         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3195                                             sizeof(*entry), trace_ctx);
3196         if (!event)
3197                 goto out_drop_count;
3198         entry   = ring_buffer_event_data(event);
3199
3200         entry->tgid             = current->tgid;
3201         memset(&entry->caller, 0, sizeof(entry->caller));
3202
3203         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3204         if (!call_filter_check_discard(call, entry, buffer, event))
3205                 __buffer_unlock_commit(buffer, event);
3206
3207  out_drop_count:
3208         __this_cpu_dec(user_stack_count);
3209  out:
3210         preempt_enable();
3211 }
3212 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3213 static void ftrace_trace_userstack(struct trace_array *tr,
3214                                    struct trace_buffer *buffer,
3215                                    unsigned int trace_ctx)
3216 {
3217 }
3218 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3219
3220 #endif /* CONFIG_STACKTRACE */
3221
3222 static inline void
3223 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3224                           unsigned long long delta)
3225 {
3226         entry->bottom_delta_ts = delta & U32_MAX;
3227         entry->top_delta_ts = (delta >> 32);
3228 }
3229
3230 void trace_last_func_repeats(struct trace_array *tr,
3231                              struct trace_func_repeats *last_info,
3232                              unsigned int trace_ctx)
3233 {
3234         struct trace_buffer *buffer = tr->array_buffer.buffer;
3235         struct func_repeats_entry *entry;
3236         struct ring_buffer_event *event;
3237         u64 delta;
3238
3239         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3240                                             sizeof(*entry), trace_ctx);
3241         if (!event)
3242                 return;
3243
3244         delta = ring_buffer_event_time_stamp(buffer, event) -
3245                 last_info->ts_last_call;
3246
3247         entry = ring_buffer_event_data(event);
3248         entry->ip = last_info->ip;
3249         entry->parent_ip = last_info->parent_ip;
3250         entry->count = last_info->count;
3251         func_repeats_set_delta_ts(entry, delta);
3252
3253         __buffer_unlock_commit(buffer, event);
3254 }
3255
3256 /* created for use with alloc_percpu */
3257 struct trace_buffer_struct {
3258         int nesting;
3259         char buffer[4][TRACE_BUF_SIZE];
3260 };
3261
3262 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3263
3264 /*
3265  * This allows for lockless recording.  If we're nested too deeply, then
3266  * this returns NULL.
3267  */
3268 static char *get_trace_buf(void)
3269 {
3270         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3271
3272         if (!trace_percpu_buffer || buffer->nesting >= 4)
3273                 return NULL;
3274
3275         buffer->nesting++;
3276
3277         /* Interrupts must see nesting incremented before we use the buffer */
3278         barrier();
3279         return &buffer->buffer[buffer->nesting - 1][0];
3280 }
3281
3282 static void put_trace_buf(void)
3283 {
3284         /* Don't let the decrement of nesting leak before this */
3285         barrier();
3286         this_cpu_dec(trace_percpu_buffer->nesting);
3287 }
3288
3289 static int alloc_percpu_trace_buffer(void)
3290 {
3291         struct trace_buffer_struct __percpu *buffers;
3292
3293         if (trace_percpu_buffer)
3294                 return 0;
3295
3296         buffers = alloc_percpu(struct trace_buffer_struct);
3297         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3298                 return -ENOMEM;
3299
3300         trace_percpu_buffer = buffers;
3301         return 0;
3302 }
3303
3304 static int buffers_allocated;
3305
3306 void trace_printk_init_buffers(void)
3307 {
3308         if (buffers_allocated)
3309                 return;
3310
3311         if (alloc_percpu_trace_buffer())
3312                 return;
3313
3314         /* trace_printk() is for debug use only. Don't use it in production. */
3315
3316         pr_warn("\n");
3317         pr_warn("**********************************************************\n");
3318         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3319         pr_warn("**                                                      **\n");
3320         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3321         pr_warn("**                                                      **\n");
3322         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3323         pr_warn("** unsafe for production use.                           **\n");
3324         pr_warn("**                                                      **\n");
3325         pr_warn("** If you see this message and you are not debugging    **\n");
3326         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3327         pr_warn("**                                                      **\n");
3328         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3329         pr_warn("**********************************************************\n");
3330
3331         /* Expand the buffers to set size */
3332         tracing_update_buffers();
3333
3334         buffers_allocated = 1;
3335
3336         /*
3337          * trace_printk_init_buffers() can be called by modules.
3338          * If that happens, then we need to start cmdline recording
3339          * directly here. If the global_trace.buffer is already
3340          * allocated here, then this was called by module code.
3341          */
3342         if (global_trace.array_buffer.buffer)
3343                 tracing_start_cmdline_record();
3344 }
3345 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3346
3347 void trace_printk_start_comm(void)
3348 {
3349         /* Start tracing comms if trace printk is set */
3350         if (!buffers_allocated)
3351                 return;
3352         tracing_start_cmdline_record();
3353 }
3354
3355 static void trace_printk_start_stop_comm(int enabled)
3356 {
3357         if (!buffers_allocated)
3358                 return;
3359
3360         if (enabled)
3361                 tracing_start_cmdline_record();
3362         else
3363                 tracing_stop_cmdline_record();
3364 }
3365
3366 /**
3367  * trace_vbprintk - write binary msg to tracing buffer
3368  * @ip:    The address of the caller
3369  * @fmt:   The string format to write to the buffer
3370  * @args:  Arguments for @fmt
3371  */
3372 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3373 {
3374         struct trace_event_call *call = &event_bprint;
3375         struct ring_buffer_event *event;
3376         struct trace_buffer *buffer;
3377         struct trace_array *tr = &global_trace;
3378         struct bprint_entry *entry;
3379         unsigned int trace_ctx;
3380         char *tbuffer;
3381         int len = 0, size;
3382
3383         if (unlikely(tracing_selftest_running || tracing_disabled))
3384                 return 0;
3385
3386         /* Don't pollute graph traces with trace_vprintk internals */
3387         pause_graph_tracing();
3388
3389         trace_ctx = tracing_gen_ctx();
3390         preempt_disable_notrace();
3391
3392         tbuffer = get_trace_buf();
3393         if (!tbuffer) {
3394                 len = 0;
3395                 goto out_nobuffer;
3396         }
3397
3398         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3399
3400         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3401                 goto out_put;
3402
3403         size = sizeof(*entry) + sizeof(u32) * len;
3404         buffer = tr->array_buffer.buffer;
3405         ring_buffer_nest_start(buffer);
3406         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3407                                             trace_ctx);
3408         if (!event)
3409                 goto out;
3410         entry = ring_buffer_event_data(event);
3411         entry->ip                       = ip;
3412         entry->fmt                      = fmt;
3413
3414         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3415         if (!call_filter_check_discard(call, entry, buffer, event)) {
3416                 __buffer_unlock_commit(buffer, event);
3417                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3418         }
3419
3420 out:
3421         ring_buffer_nest_end(buffer);
3422 out_put:
3423         put_trace_buf();
3424
3425 out_nobuffer:
3426         preempt_enable_notrace();
3427         unpause_graph_tracing();
3428
3429         return len;
3430 }
3431 EXPORT_SYMBOL_GPL(trace_vbprintk);
3432
3433 __printf(3, 0)
3434 static int
3435 __trace_array_vprintk(struct trace_buffer *buffer,
3436                       unsigned long ip, const char *fmt, va_list args)
3437 {
3438         struct trace_event_call *call = &event_print;
3439         struct ring_buffer_event *event;
3440         int len = 0, size;
3441         struct print_entry *entry;
3442         unsigned int trace_ctx;
3443         char *tbuffer;
3444
3445         if (tracing_disabled || tracing_selftest_running)
3446                 return 0;
3447
3448         /* Don't pollute graph traces with trace_vprintk internals */
3449         pause_graph_tracing();
3450
3451         trace_ctx = tracing_gen_ctx();
3452         preempt_disable_notrace();
3453
3454
3455         tbuffer = get_trace_buf();
3456         if (!tbuffer) {
3457                 len = 0;
3458                 goto out_nobuffer;
3459         }
3460
3461         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3462
3463         size = sizeof(*entry) + len + 1;
3464         ring_buffer_nest_start(buffer);
3465         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3466                                             trace_ctx);
3467         if (!event)
3468                 goto out;
3469         entry = ring_buffer_event_data(event);
3470         entry->ip = ip;
3471
3472         memcpy(&entry->buf, tbuffer, len + 1);
3473         if (!call_filter_check_discard(call, entry, buffer, event)) {
3474                 __buffer_unlock_commit(buffer, event);
3475                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3476         }
3477
3478 out:
3479         ring_buffer_nest_end(buffer);
3480         put_trace_buf();
3481
3482 out_nobuffer:
3483         preempt_enable_notrace();
3484         unpause_graph_tracing();
3485
3486         return len;
3487 }
3488
3489 __printf(3, 0)
3490 int trace_array_vprintk(struct trace_array *tr,
3491                         unsigned long ip, const char *fmt, va_list args)
3492 {
3493         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3494 }
3495
3496 /**
3497  * trace_array_printk - Print a message to a specific instance
3498  * @tr: The instance trace_array descriptor
3499  * @ip: The instruction pointer that this is called from.
3500  * @fmt: The format to print (printf format)
3501  *
3502  * If a subsystem sets up its own instance, they have the right to
3503  * printk strings into their tracing instance buffer using this
3504  * function. Note, this function will not write into the top level
3505  * buffer (use trace_printk() for that), as writing into the top level
3506  * buffer should only have events that can be individually disabled.
3507  * trace_printk() is only used for debugging a kernel, and should not
3508  * be ever incorporated in normal use.
3509  *
3510  * trace_array_printk() can be used, as it will not add noise to the
3511  * top level tracing buffer.
3512  *
3513  * Note, trace_array_init_printk() must be called on @tr before this
3514  * can be used.
3515  */
3516 __printf(3, 0)
3517 int trace_array_printk(struct trace_array *tr,
3518                        unsigned long ip, const char *fmt, ...)
3519 {
3520         int ret;
3521         va_list ap;
3522
3523         if (!tr)
3524                 return -ENOENT;
3525
3526         /* This is only allowed for created instances */
3527         if (tr == &global_trace)
3528                 return 0;
3529
3530         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3531                 return 0;
3532
3533         va_start(ap, fmt);
3534         ret = trace_array_vprintk(tr, ip, fmt, ap);
3535         va_end(ap);
3536         return ret;
3537 }
3538 EXPORT_SYMBOL_GPL(trace_array_printk);
3539
3540 /**
3541  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3542  * @tr: The trace array to initialize the buffers for
3543  *
3544  * As trace_array_printk() only writes into instances, they are OK to
3545  * have in the kernel (unlike trace_printk()). This needs to be called
3546  * before trace_array_printk() can be used on a trace_array.
3547  */
3548 int trace_array_init_printk(struct trace_array *tr)
3549 {
3550         if (!tr)
3551                 return -ENOENT;
3552
3553         /* This is only allowed for created instances */
3554         if (tr == &global_trace)
3555                 return -EINVAL;
3556
3557         return alloc_percpu_trace_buffer();
3558 }
3559 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3560
3561 __printf(3, 4)
3562 int trace_array_printk_buf(struct trace_buffer *buffer,
3563                            unsigned long ip, const char *fmt, ...)
3564 {
3565         int ret;
3566         va_list ap;
3567
3568         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3569                 return 0;
3570
3571         va_start(ap, fmt);
3572         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3573         va_end(ap);
3574         return ret;
3575 }
3576
3577 __printf(2, 0)
3578 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3579 {
3580         return trace_array_vprintk(&global_trace, ip, fmt, args);
3581 }
3582 EXPORT_SYMBOL_GPL(trace_vprintk);
3583
3584 static void trace_iterator_increment(struct trace_iterator *iter)
3585 {
3586         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3587
3588         iter->idx++;
3589         if (buf_iter)
3590                 ring_buffer_iter_advance(buf_iter);
3591 }
3592
3593 static struct trace_entry *
3594 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3595                 unsigned long *lost_events)
3596 {
3597         struct ring_buffer_event *event;
3598         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3599
3600         if (buf_iter) {
3601                 event = ring_buffer_iter_peek(buf_iter, ts);
3602                 if (lost_events)
3603                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3604                                 (unsigned long)-1 : 0;
3605         } else {
3606                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3607                                          lost_events);
3608         }
3609
3610         if (event) {
3611                 iter->ent_size = ring_buffer_event_length(event);
3612                 return ring_buffer_event_data(event);
3613         }
3614         iter->ent_size = 0;
3615         return NULL;
3616 }
3617
3618 static struct trace_entry *
3619 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3620                   unsigned long *missing_events, u64 *ent_ts)
3621 {
3622         struct trace_buffer *buffer = iter->array_buffer->buffer;
3623         struct trace_entry *ent, *next = NULL;
3624         unsigned long lost_events = 0, next_lost = 0;
3625         int cpu_file = iter->cpu_file;
3626         u64 next_ts = 0, ts;
3627         int next_cpu = -1;
3628         int next_size = 0;
3629         int cpu;
3630
3631         /*
3632          * If we are in a per_cpu trace file, don't bother by iterating over
3633          * all cpu and peek directly.
3634          */
3635         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3636                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3637                         return NULL;
3638                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3639                 if (ent_cpu)
3640                         *ent_cpu = cpu_file;
3641
3642                 return ent;
3643         }
3644
3645         for_each_tracing_cpu(cpu) {
3646
3647                 if (ring_buffer_empty_cpu(buffer, cpu))
3648                         continue;
3649
3650                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3651
3652                 /*
3653                  * Pick the entry with the smallest timestamp:
3654                  */
3655                 if (ent && (!next || ts < next_ts)) {
3656                         next = ent;
3657                         next_cpu = cpu;
3658                         next_ts = ts;
3659                         next_lost = lost_events;
3660                         next_size = iter->ent_size;
3661                 }
3662         }
3663
3664         iter->ent_size = next_size;
3665
3666         if (ent_cpu)
3667                 *ent_cpu = next_cpu;
3668
3669         if (ent_ts)
3670                 *ent_ts = next_ts;
3671
3672         if (missing_events)
3673                 *missing_events = next_lost;
3674
3675         return next;
3676 }
3677
3678 #define STATIC_FMT_BUF_SIZE     128
3679 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3680
3681 static char *trace_iter_expand_format(struct trace_iterator *iter)
3682 {
3683         char *tmp;
3684
3685         /*
3686          * iter->tr is NULL when used with tp_printk, which makes
3687          * this get called where it is not safe to call krealloc().
3688          */
3689         if (!iter->tr || iter->fmt == static_fmt_buf)
3690                 return NULL;
3691
3692         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3693                        GFP_KERNEL);
3694         if (tmp) {
3695                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3696                 iter->fmt = tmp;
3697         }
3698
3699         return tmp;
3700 }
3701
3702 /* Returns true if the string is safe to dereference from an event */
3703 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3704                            bool star, int len)
3705 {
3706         unsigned long addr = (unsigned long)str;
3707         struct trace_event *trace_event;
3708         struct trace_event_call *event;
3709
3710         /* Ignore strings with no length */
3711         if (star && !len)
3712                 return true;
3713
3714         /* OK if part of the event data */
3715         if ((addr >= (unsigned long)iter->ent) &&
3716             (addr < (unsigned long)iter->ent + iter->ent_size))
3717                 return true;
3718
3719         /* OK if part of the temp seq buffer */
3720         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3721             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3722                 return true;
3723
3724         /* Core rodata can not be freed */
3725         if (is_kernel_rodata(addr))
3726                 return true;
3727
3728         if (trace_is_tracepoint_string(str))
3729                 return true;
3730
3731         /*
3732          * Now this could be a module event, referencing core module
3733          * data, which is OK.
3734          */
3735         if (!iter->ent)
3736                 return false;
3737
3738         trace_event = ftrace_find_event(iter->ent->type);
3739         if (!trace_event)
3740                 return false;
3741
3742         event = container_of(trace_event, struct trace_event_call, event);
3743         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3744                 return false;
3745
3746         /* Would rather have rodata, but this will suffice */
3747         if (within_module_core(addr, event->module))
3748                 return true;
3749
3750         return false;
3751 }
3752
3753 static const char *show_buffer(struct trace_seq *s)
3754 {
3755         struct seq_buf *seq = &s->seq;
3756
3757         seq_buf_terminate(seq);
3758
3759         return seq->buffer;
3760 }
3761
3762 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3763
3764 static int test_can_verify_check(const char *fmt, ...)
3765 {
3766         char buf[16];
3767         va_list ap;
3768         int ret;
3769
3770         /*
3771          * The verifier is dependent on vsnprintf() modifies the va_list
3772          * passed to it, where it is sent as a reference. Some architectures
3773          * (like x86_32) passes it by value, which means that vsnprintf()
3774          * does not modify the va_list passed to it, and the verifier
3775          * would then need to be able to understand all the values that
3776          * vsnprintf can use. If it is passed by value, then the verifier
3777          * is disabled.
3778          */
3779         va_start(ap, fmt);
3780         vsnprintf(buf, 16, "%d", ap);
3781         ret = va_arg(ap, int);
3782         va_end(ap);
3783
3784         return ret;
3785 }
3786
3787 static void test_can_verify(void)
3788 {
3789         if (!test_can_verify_check("%d %d", 0, 1)) {
3790                 pr_info("trace event string verifier disabled\n");
3791                 static_branch_inc(&trace_no_verify);
3792         }
3793 }
3794
3795 /**
3796  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3797  * @iter: The iterator that holds the seq buffer and the event being printed
3798  * @fmt: The format used to print the event
3799  * @ap: The va_list holding the data to print from @fmt.
3800  *
3801  * This writes the data into the @iter->seq buffer using the data from
3802  * @fmt and @ap. If the format has a %s, then the source of the string
3803  * is examined to make sure it is safe to print, otherwise it will
3804  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3805  * pointer.
3806  */
3807 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3808                          va_list ap)
3809 {
3810         const char *p = fmt;
3811         const char *str;
3812         int i, j;
3813
3814         if (WARN_ON_ONCE(!fmt))
3815                 return;
3816
3817         if (static_branch_unlikely(&trace_no_verify))
3818                 goto print;
3819
3820         /* Don't bother checking when doing a ftrace_dump() */
3821         if (iter->fmt == static_fmt_buf)
3822                 goto print;
3823
3824         while (*p) {
3825                 bool star = false;
3826                 int len = 0;
3827
3828                 j = 0;
3829
3830                 /* We only care about %s and variants */
3831                 for (i = 0; p[i]; i++) {
3832                         if (i + 1 >= iter->fmt_size) {
3833                                 /*
3834                                  * If we can't expand the copy buffer,
3835                                  * just print it.
3836                                  */
3837                                 if (!trace_iter_expand_format(iter))
3838                                         goto print;
3839                         }
3840
3841                         if (p[i] == '\\' && p[i+1]) {
3842                                 i++;
3843                                 continue;
3844                         }
3845                         if (p[i] == '%') {
3846                                 /* Need to test cases like %08.*s */
3847                                 for (j = 1; p[i+j]; j++) {
3848                                         if (isdigit(p[i+j]) ||
3849                                             p[i+j] == '.')
3850                                                 continue;
3851                                         if (p[i+j] == '*') {
3852                                                 star = true;
3853                                                 continue;
3854                                         }
3855                                         break;
3856                                 }
3857                                 if (p[i+j] == 's')
3858                                         break;
3859                                 star = false;
3860                         }
3861                         j = 0;
3862                 }
3863                 /* If no %s found then just print normally */
3864                 if (!p[i])
3865                         break;
3866
3867                 /* Copy up to the %s, and print that */
3868                 strncpy(iter->fmt, p, i);
3869                 iter->fmt[i] = '\0';
3870                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3871
3872                 /*
3873                  * If iter->seq is full, the above call no longer guarantees
3874                  * that ap is in sync with fmt processing, and further calls
3875                  * to va_arg() can return wrong positional arguments.
3876                  *
3877                  * Ensure that ap is no longer used in this case.
3878                  */
3879                 if (iter->seq.full) {
3880                         p = "";
3881                         break;
3882                 }
3883
3884                 if (star)
3885                         len = va_arg(ap, int);
3886
3887                 /* The ap now points to the string data of the %s */
3888                 str = va_arg(ap, const char *);
3889
3890                 /*
3891                  * If you hit this warning, it is likely that the
3892                  * trace event in question used %s on a string that
3893                  * was saved at the time of the event, but may not be
3894                  * around when the trace is read. Use __string(),
3895                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3896                  * instead. See samples/trace_events/trace-events-sample.h
3897                  * for reference.
3898                  */
3899                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3900                               "fmt: '%s' current_buffer: '%s'",
3901                               fmt, show_buffer(&iter->seq))) {
3902                         int ret;
3903
3904                         /* Try to safely read the string */
3905                         if (star) {
3906                                 if (len + 1 > iter->fmt_size)
3907                                         len = iter->fmt_size - 1;
3908                                 if (len < 0)
3909                                         len = 0;
3910                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3911                                 iter->fmt[len] = 0;
3912                                 star = false;
3913                         } else {
3914                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3915                                                                   iter->fmt_size);
3916                         }
3917                         if (ret < 0)
3918                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3919                         else
3920                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3921                                                  str, iter->fmt);
3922                         str = "[UNSAFE-MEMORY]";
3923                         strcpy(iter->fmt, "%s");
3924                 } else {
3925                         strncpy(iter->fmt, p + i, j + 1);
3926                         iter->fmt[j+1] = '\0';
3927                 }
3928                 if (star)
3929                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3930                 else
3931                         trace_seq_printf(&iter->seq, iter->fmt, str);
3932
3933                 p += i + j + 1;
3934         }
3935  print:
3936         if (*p)
3937                 trace_seq_vprintf(&iter->seq, p, ap);
3938 }
3939
3940 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3941 {
3942         const char *p, *new_fmt;
3943         char *q;
3944
3945         if (WARN_ON_ONCE(!fmt))
3946                 return fmt;
3947
3948         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3949                 return fmt;
3950
3951         p = fmt;
3952         new_fmt = q = iter->fmt;
3953         while (*p) {
3954                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3955                         if (!trace_iter_expand_format(iter))
3956                                 return fmt;
3957
3958                         q += iter->fmt - new_fmt;
3959                         new_fmt = iter->fmt;
3960                 }
3961
3962                 *q++ = *p++;
3963
3964                 /* Replace %p with %px */
3965                 if (p[-1] == '%') {
3966                         if (p[0] == '%') {
3967                                 *q++ = *p++;
3968                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3969                                 *q++ = *p++;
3970                                 *q++ = 'x';
3971                         }
3972                 }
3973         }
3974         *q = '\0';
3975
3976         return new_fmt;
3977 }
3978
3979 #define STATIC_TEMP_BUF_SIZE    128
3980 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3981
3982 /* Find the next real entry, without updating the iterator itself */
3983 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3984                                           int *ent_cpu, u64 *ent_ts)
3985 {
3986         /* __find_next_entry will reset ent_size */
3987         int ent_size = iter->ent_size;
3988         struct trace_entry *entry;
3989
3990         /*
3991          * If called from ftrace_dump(), then the iter->temp buffer
3992          * will be the static_temp_buf and not created from kmalloc.
3993          * If the entry size is greater than the buffer, we can
3994          * not save it. Just return NULL in that case. This is only
3995          * used to add markers when two consecutive events' time
3996          * stamps have a large delta. See trace_print_lat_context()
3997          */
3998         if (iter->temp == static_temp_buf &&
3999             STATIC_TEMP_BUF_SIZE < ent_size)
4000                 return NULL;
4001
4002         /*
4003          * The __find_next_entry() may call peek_next_entry(), which may
4004          * call ring_buffer_peek() that may make the contents of iter->ent
4005          * undefined. Need to copy iter->ent now.
4006          */
4007         if (iter->ent && iter->ent != iter->temp) {
4008                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4009                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4010                         void *temp;
4011                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4012                         if (!temp)
4013                                 return NULL;
4014                         kfree(iter->temp);
4015                         iter->temp = temp;
4016                         iter->temp_size = iter->ent_size;
4017                 }
4018                 memcpy(iter->temp, iter->ent, iter->ent_size);
4019                 iter->ent = iter->temp;
4020         }
4021         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4022         /* Put back the original ent_size */
4023         iter->ent_size = ent_size;
4024
4025         return entry;
4026 }
4027
4028 /* Find the next real entry, and increment the iterator to the next entry */
4029 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4030 {
4031         iter->ent = __find_next_entry(iter, &iter->cpu,
4032                                       &iter->lost_events, &iter->ts);
4033
4034         if (iter->ent)
4035                 trace_iterator_increment(iter);
4036
4037         return iter->ent ? iter : NULL;
4038 }
4039
4040 static void trace_consume(struct trace_iterator *iter)
4041 {
4042         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4043                             &iter->lost_events);
4044 }
4045
4046 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4047 {
4048         struct trace_iterator *iter = m->private;
4049         int i = (int)*pos;
4050         void *ent;
4051
4052         WARN_ON_ONCE(iter->leftover);
4053
4054         (*pos)++;
4055
4056         /* can't go backwards */
4057         if (iter->idx > i)
4058                 return NULL;
4059
4060         if (iter->idx < 0)
4061                 ent = trace_find_next_entry_inc(iter);
4062         else
4063                 ent = iter;
4064
4065         while (ent && iter->idx < i)
4066                 ent = trace_find_next_entry_inc(iter);
4067
4068         iter->pos = *pos;
4069
4070         return ent;
4071 }
4072
4073 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4074 {
4075         struct ring_buffer_iter *buf_iter;
4076         unsigned long entries = 0;
4077         u64 ts;
4078
4079         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4080
4081         buf_iter = trace_buffer_iter(iter, cpu);
4082         if (!buf_iter)
4083                 return;
4084
4085         ring_buffer_iter_reset(buf_iter);
4086
4087         /*
4088          * We could have the case with the max latency tracers
4089          * that a reset never took place on a cpu. This is evident
4090          * by the timestamp being before the start of the buffer.
4091          */
4092         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4093                 if (ts >= iter->array_buffer->time_start)
4094                         break;
4095                 entries++;
4096                 ring_buffer_iter_advance(buf_iter);
4097         }
4098
4099         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4100 }
4101
4102 /*
4103  * The current tracer is copied to avoid a global locking
4104  * all around.
4105  */
4106 static void *s_start(struct seq_file *m, loff_t *pos)
4107 {
4108         struct trace_iterator *iter = m->private;
4109         struct trace_array *tr = iter->tr;
4110         int cpu_file = iter->cpu_file;
4111         void *p = NULL;
4112         loff_t l = 0;
4113         int cpu;
4114
4115         /*
4116          * copy the tracer to avoid using a global lock all around.
4117          * iter->trace is a copy of current_trace, the pointer to the
4118          * name may be used instead of a strcmp(), as iter->trace->name
4119          * will point to the same string as current_trace->name.
4120          */
4121         mutex_lock(&trace_types_lock);
4122         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4123                 *iter->trace = *tr->current_trace;
4124         mutex_unlock(&trace_types_lock);
4125
4126 #ifdef CONFIG_TRACER_MAX_TRACE
4127         if (iter->snapshot && iter->trace->use_max_tr)
4128                 return ERR_PTR(-EBUSY);
4129 #endif
4130
4131         if (*pos != iter->pos) {
4132                 iter->ent = NULL;
4133                 iter->cpu = 0;
4134                 iter->idx = -1;
4135
4136                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4137                         for_each_tracing_cpu(cpu)
4138                                 tracing_iter_reset(iter, cpu);
4139                 } else
4140                         tracing_iter_reset(iter, cpu_file);
4141
4142                 iter->leftover = 0;
4143                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4144                         ;
4145
4146         } else {
4147                 /*
4148                  * If we overflowed the seq_file before, then we want
4149                  * to just reuse the trace_seq buffer again.
4150                  */
4151                 if (iter->leftover)
4152                         p = iter;
4153                 else {
4154                         l = *pos - 1;
4155                         p = s_next(m, p, &l);
4156                 }
4157         }
4158
4159         trace_event_read_lock();
4160         trace_access_lock(cpu_file);
4161         return p;
4162 }
4163
4164 static void s_stop(struct seq_file *m, void *p)
4165 {
4166         struct trace_iterator *iter = m->private;
4167
4168 #ifdef CONFIG_TRACER_MAX_TRACE
4169         if (iter->snapshot && iter->trace->use_max_tr)
4170                 return;
4171 #endif
4172
4173         trace_access_unlock(iter->cpu_file);
4174         trace_event_read_unlock();
4175 }
4176
4177 static void
4178 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4179                       unsigned long *entries, int cpu)
4180 {
4181         unsigned long count;
4182
4183         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4184         /*
4185          * If this buffer has skipped entries, then we hold all
4186          * entries for the trace and we need to ignore the
4187          * ones before the time stamp.
4188          */
4189         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4190                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4191                 /* total is the same as the entries */
4192                 *total = count;
4193         } else
4194                 *total = count +
4195                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4196         *entries = count;
4197 }
4198
4199 static void
4200 get_total_entries(struct array_buffer *buf,
4201                   unsigned long *total, unsigned long *entries)
4202 {
4203         unsigned long t, e;
4204         int cpu;
4205
4206         *total = 0;
4207         *entries = 0;
4208
4209         for_each_tracing_cpu(cpu) {
4210                 get_total_entries_cpu(buf, &t, &e, cpu);
4211                 *total += t;
4212                 *entries += e;
4213         }
4214 }
4215
4216 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4217 {
4218         unsigned long total, entries;
4219
4220         if (!tr)
4221                 tr = &global_trace;
4222
4223         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4224
4225         return entries;
4226 }
4227
4228 unsigned long trace_total_entries(struct trace_array *tr)
4229 {
4230         unsigned long total, entries;
4231
4232         if (!tr)
4233                 tr = &global_trace;
4234
4235         get_total_entries(&tr->array_buffer, &total, &entries);
4236
4237         return entries;
4238 }
4239
4240 static void print_lat_help_header(struct seq_file *m)
4241 {
4242         seq_puts(m, "#                    _------=> CPU#            \n"
4243                     "#                   / _-----=> irqs-off/BH-disabled\n"
4244                     "#                  | / _----=> need-resched    \n"
4245                     "#                  || / _---=> hardirq/softirq \n"
4246                     "#                  ||| / _--=> preempt-depth   \n"
4247                     "#                  |||| / _-=> migrate-disable \n"
4248                     "#                  ||||| /     delay           \n"
4249                     "#  cmd     pid     |||||| time  |   caller     \n"
4250                     "#     \\   /        ||||||  \\    |    /       \n");
4251 }
4252
4253 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4254 {
4255         unsigned long total;
4256         unsigned long entries;
4257
4258         get_total_entries(buf, &total, &entries);
4259         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4260                    entries, total, num_online_cpus());
4261         seq_puts(m, "#\n");
4262 }
4263
4264 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4265                                    unsigned int flags)
4266 {
4267         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4268
4269         print_event_info(buf, m);
4270
4271         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4272         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4273 }
4274
4275 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4276                                        unsigned int flags)
4277 {
4278         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4279         static const char space[] = "            ";
4280         int prec = tgid ? 12 : 2;
4281
4282         print_event_info(buf, m);
4283
4284         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4285         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4286         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4287         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4288         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4289         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4290         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4291         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4292 }
4293
4294 void
4295 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4296 {
4297         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4298         struct array_buffer *buf = iter->array_buffer;
4299         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4300         struct tracer *type = iter->trace;
4301         unsigned long entries;
4302         unsigned long total;
4303         const char *name = type->name;
4304
4305         get_total_entries(buf, &total, &entries);
4306
4307         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4308                    name, UTS_RELEASE);
4309         seq_puts(m, "# -----------------------------------"
4310                  "---------------------------------\n");
4311         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4312                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4313                    nsecs_to_usecs(data->saved_latency),
4314                    entries,
4315                    total,
4316                    buf->cpu,
4317                    preempt_model_none()      ? "server" :
4318                    preempt_model_voluntary() ? "desktop" :
4319                    preempt_model_full()      ? "preempt" :
4320                    preempt_model_rt()        ? "preempt_rt" :
4321                    "unknown",
4322                    /* These are reserved for later use */
4323                    0, 0, 0, 0);
4324 #ifdef CONFIG_SMP
4325         seq_printf(m, " #P:%d)\n", num_online_cpus());
4326 #else
4327         seq_puts(m, ")\n");
4328 #endif
4329         seq_puts(m, "#    -----------------\n");
4330         seq_printf(m, "#    | task: %.16s-%d "
4331                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4332                    data->comm, data->pid,
4333                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4334                    data->policy, data->rt_priority);
4335         seq_puts(m, "#    -----------------\n");
4336
4337         if (data->critical_start) {
4338                 seq_puts(m, "#  => started at: ");
4339                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4340                 trace_print_seq(m, &iter->seq);
4341                 seq_puts(m, "\n#  => ended at:   ");
4342                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4343                 trace_print_seq(m, &iter->seq);
4344                 seq_puts(m, "\n#\n");
4345         }
4346
4347         seq_puts(m, "#\n");
4348 }
4349
4350 static void test_cpu_buff_start(struct trace_iterator *iter)
4351 {
4352         struct trace_seq *s = &iter->seq;
4353         struct trace_array *tr = iter->tr;
4354
4355         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4356                 return;
4357
4358         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4359                 return;
4360
4361         if (cpumask_available(iter->started) &&
4362             cpumask_test_cpu(iter->cpu, iter->started))
4363                 return;
4364
4365         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4366                 return;
4367
4368         if (cpumask_available(iter->started))
4369                 cpumask_set_cpu(iter->cpu, iter->started);
4370
4371         /* Don't print started cpu buffer for the first entry of the trace */
4372         if (iter->idx > 1)
4373                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4374                                 iter->cpu);
4375 }
4376
4377 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4378 {
4379         struct trace_array *tr = iter->tr;
4380         struct trace_seq *s = &iter->seq;
4381         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4382         struct trace_entry *entry;
4383         struct trace_event *event;
4384
4385         entry = iter->ent;
4386
4387         test_cpu_buff_start(iter);
4388
4389         event = ftrace_find_event(entry->type);
4390
4391         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4392                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4393                         trace_print_lat_context(iter);
4394                 else
4395                         trace_print_context(iter);
4396         }
4397
4398         if (trace_seq_has_overflowed(s))
4399                 return TRACE_TYPE_PARTIAL_LINE;
4400
4401         if (event)
4402                 return event->funcs->trace(iter, sym_flags, event);
4403
4404         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4405
4406         return trace_handle_return(s);
4407 }
4408
4409 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4410 {
4411         struct trace_array *tr = iter->tr;
4412         struct trace_seq *s = &iter->seq;
4413         struct trace_entry *entry;
4414         struct trace_event *event;
4415
4416         entry = iter->ent;
4417
4418         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4419                 trace_seq_printf(s, "%d %d %llu ",
4420                                  entry->pid, iter->cpu, iter->ts);
4421
4422         if (trace_seq_has_overflowed(s))
4423                 return TRACE_TYPE_PARTIAL_LINE;
4424
4425         event = ftrace_find_event(entry->type);
4426         if (event)
4427                 return event->funcs->raw(iter, 0, event);
4428
4429         trace_seq_printf(s, "%d ?\n", entry->type);
4430
4431         return trace_handle_return(s);
4432 }
4433
4434 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4435 {
4436         struct trace_array *tr = iter->tr;
4437         struct trace_seq *s = &iter->seq;
4438         unsigned char newline = '\n';
4439         struct trace_entry *entry;
4440         struct trace_event *event;
4441
4442         entry = iter->ent;
4443
4444         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4445                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4446                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4447                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4448                 if (trace_seq_has_overflowed(s))
4449                         return TRACE_TYPE_PARTIAL_LINE;
4450         }
4451
4452         event = ftrace_find_event(entry->type);
4453         if (event) {
4454                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4455                 if (ret != TRACE_TYPE_HANDLED)
4456                         return ret;
4457         }
4458
4459         SEQ_PUT_FIELD(s, newline);
4460
4461         return trace_handle_return(s);
4462 }
4463
4464 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4465 {
4466         struct trace_array *tr = iter->tr;
4467         struct trace_seq *s = &iter->seq;
4468         struct trace_entry *entry;
4469         struct trace_event *event;
4470
4471         entry = iter->ent;
4472
4473         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4474                 SEQ_PUT_FIELD(s, entry->pid);
4475                 SEQ_PUT_FIELD(s, iter->cpu);
4476                 SEQ_PUT_FIELD(s, iter->ts);
4477                 if (trace_seq_has_overflowed(s))
4478                         return TRACE_TYPE_PARTIAL_LINE;
4479         }
4480
4481         event = ftrace_find_event(entry->type);
4482         return event ? event->funcs->binary(iter, 0, event) :
4483                 TRACE_TYPE_HANDLED;
4484 }
4485
4486 int trace_empty(struct trace_iterator *iter)
4487 {
4488         struct ring_buffer_iter *buf_iter;
4489         int cpu;
4490
4491         /* If we are looking at one CPU buffer, only check that one */
4492         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4493                 cpu = iter->cpu_file;
4494                 buf_iter = trace_buffer_iter(iter, cpu);
4495                 if (buf_iter) {
4496                         if (!ring_buffer_iter_empty(buf_iter))
4497                                 return 0;
4498                 } else {
4499                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4500                                 return 0;
4501                 }
4502                 return 1;
4503         }
4504
4505         for_each_tracing_cpu(cpu) {
4506                 buf_iter = trace_buffer_iter(iter, cpu);
4507                 if (buf_iter) {
4508                         if (!ring_buffer_iter_empty(buf_iter))
4509                                 return 0;
4510                 } else {
4511                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4512                                 return 0;
4513                 }
4514         }
4515
4516         return 1;
4517 }
4518
4519 /*  Called with trace_event_read_lock() held. */
4520 enum print_line_t print_trace_line(struct trace_iterator *iter)
4521 {
4522         struct trace_array *tr = iter->tr;
4523         unsigned long trace_flags = tr->trace_flags;
4524         enum print_line_t ret;
4525
4526         if (iter->lost_events) {
4527                 if (iter->lost_events == (unsigned long)-1)
4528                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4529                                          iter->cpu);
4530                 else
4531                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4532                                          iter->cpu, iter->lost_events);
4533                 if (trace_seq_has_overflowed(&iter->seq))
4534                         return TRACE_TYPE_PARTIAL_LINE;
4535         }
4536
4537         if (iter->trace && iter->trace->print_line) {
4538                 ret = iter->trace->print_line(iter);
4539                 if (ret != TRACE_TYPE_UNHANDLED)
4540                         return ret;
4541         }
4542
4543         if (iter->ent->type == TRACE_BPUTS &&
4544                         trace_flags & TRACE_ITER_PRINTK &&
4545                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4546                 return trace_print_bputs_msg_only(iter);
4547
4548         if (iter->ent->type == TRACE_BPRINT &&
4549                         trace_flags & TRACE_ITER_PRINTK &&
4550                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4551                 return trace_print_bprintk_msg_only(iter);
4552
4553         if (iter->ent->type == TRACE_PRINT &&
4554                         trace_flags & TRACE_ITER_PRINTK &&
4555                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4556                 return trace_print_printk_msg_only(iter);
4557
4558         if (trace_flags & TRACE_ITER_BIN)
4559                 return print_bin_fmt(iter);
4560
4561         if (trace_flags & TRACE_ITER_HEX)
4562                 return print_hex_fmt(iter);
4563
4564         if (trace_flags & TRACE_ITER_RAW)
4565                 return print_raw_fmt(iter);
4566
4567         return print_trace_fmt(iter);
4568 }
4569
4570 void trace_latency_header(struct seq_file *m)
4571 {
4572         struct trace_iterator *iter = m->private;
4573         struct trace_array *tr = iter->tr;
4574
4575         /* print nothing if the buffers are empty */
4576         if (trace_empty(iter))
4577                 return;
4578
4579         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4580                 print_trace_header(m, iter);
4581
4582         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4583                 print_lat_help_header(m);
4584 }
4585
4586 void trace_default_header(struct seq_file *m)
4587 {
4588         struct trace_iterator *iter = m->private;
4589         struct trace_array *tr = iter->tr;
4590         unsigned long trace_flags = tr->trace_flags;
4591
4592         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4593                 return;
4594
4595         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4596                 /* print nothing if the buffers are empty */
4597                 if (trace_empty(iter))
4598                         return;
4599                 print_trace_header(m, iter);
4600                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4601                         print_lat_help_header(m);
4602         } else {
4603                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4604                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4605                                 print_func_help_header_irq(iter->array_buffer,
4606                                                            m, trace_flags);
4607                         else
4608                                 print_func_help_header(iter->array_buffer, m,
4609                                                        trace_flags);
4610                 }
4611         }
4612 }
4613
4614 static void test_ftrace_alive(struct seq_file *m)
4615 {
4616         if (!ftrace_is_dead())
4617                 return;
4618         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4619                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4620 }
4621
4622 #ifdef CONFIG_TRACER_MAX_TRACE
4623 static void show_snapshot_main_help(struct seq_file *m)
4624 {
4625         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4626                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4627                     "#                      Takes a snapshot of the main buffer.\n"
4628                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4629                     "#                      (Doesn't have to be '2' works with any number that\n"
4630                     "#                       is not a '0' or '1')\n");
4631 }
4632
4633 static void show_snapshot_percpu_help(struct seq_file *m)
4634 {
4635         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4636 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4637         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4638                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4639 #else
4640         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4641                     "#                     Must use main snapshot file to allocate.\n");
4642 #endif
4643         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4644                     "#                      (Doesn't have to be '2' works with any number that\n"
4645                     "#                       is not a '0' or '1')\n");
4646 }
4647
4648 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4649 {
4650         if (iter->tr->allocated_snapshot)
4651                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4652         else
4653                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4654
4655         seq_puts(m, "# Snapshot commands:\n");
4656         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4657                 show_snapshot_main_help(m);
4658         else
4659                 show_snapshot_percpu_help(m);
4660 }
4661 #else
4662 /* Should never be called */
4663 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4664 #endif
4665
4666 static int s_show(struct seq_file *m, void *v)
4667 {
4668         struct trace_iterator *iter = v;
4669         int ret;
4670
4671         if (iter->ent == NULL) {
4672                 if (iter->tr) {
4673                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4674                         seq_puts(m, "#\n");
4675                         test_ftrace_alive(m);
4676                 }
4677                 if (iter->snapshot && trace_empty(iter))
4678                         print_snapshot_help(m, iter);
4679                 else if (iter->trace && iter->trace->print_header)
4680                         iter->trace->print_header(m);
4681                 else
4682                         trace_default_header(m);
4683
4684         } else if (iter->leftover) {
4685                 /*
4686                  * If we filled the seq_file buffer earlier, we
4687                  * want to just show it now.
4688                  */
4689                 ret = trace_print_seq(m, &iter->seq);
4690
4691                 /* ret should this time be zero, but you never know */
4692                 iter->leftover = ret;
4693
4694         } else {
4695                 print_trace_line(iter);
4696                 ret = trace_print_seq(m, &iter->seq);
4697                 /*
4698                  * If we overflow the seq_file buffer, then it will
4699                  * ask us for this data again at start up.
4700                  * Use that instead.
4701                  *  ret is 0 if seq_file write succeeded.
4702                  *        -1 otherwise.
4703                  */
4704                 iter->leftover = ret;
4705         }
4706
4707         return 0;
4708 }
4709
4710 /*
4711  * Should be used after trace_array_get(), trace_types_lock
4712  * ensures that i_cdev was already initialized.
4713  */
4714 static inline int tracing_get_cpu(struct inode *inode)
4715 {
4716         if (inode->i_cdev) /* See trace_create_cpu_file() */
4717                 return (long)inode->i_cdev - 1;
4718         return RING_BUFFER_ALL_CPUS;
4719 }
4720
4721 static const struct seq_operations tracer_seq_ops = {
4722         .start          = s_start,
4723         .next           = s_next,
4724         .stop           = s_stop,
4725         .show           = s_show,
4726 };
4727
4728 static struct trace_iterator *
4729 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4730 {
4731         struct trace_array *tr = inode->i_private;
4732         struct trace_iterator *iter;
4733         int cpu;
4734
4735         if (tracing_disabled)
4736                 return ERR_PTR(-ENODEV);
4737
4738         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4739         if (!iter)
4740                 return ERR_PTR(-ENOMEM);
4741
4742         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4743                                     GFP_KERNEL);
4744         if (!iter->buffer_iter)
4745                 goto release;
4746
4747         /*
4748          * trace_find_next_entry() may need to save off iter->ent.
4749          * It will place it into the iter->temp buffer. As most
4750          * events are less than 128, allocate a buffer of that size.
4751          * If one is greater, then trace_find_next_entry() will
4752          * allocate a new buffer to adjust for the bigger iter->ent.
4753          * It's not critical if it fails to get allocated here.
4754          */
4755         iter->temp = kmalloc(128, GFP_KERNEL);
4756         if (iter->temp)
4757                 iter->temp_size = 128;
4758
4759         /*
4760          * trace_event_printf() may need to modify given format
4761          * string to replace %p with %px so that it shows real address
4762          * instead of hash value. However, that is only for the event
4763          * tracing, other tracer may not need. Defer the allocation
4764          * until it is needed.
4765          */
4766         iter->fmt = NULL;
4767         iter->fmt_size = 0;
4768
4769         /*
4770          * We make a copy of the current tracer to avoid concurrent
4771          * changes on it while we are reading.
4772          */
4773         mutex_lock(&trace_types_lock);
4774         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4775         if (!iter->trace)
4776                 goto fail;
4777
4778         *iter->trace = *tr->current_trace;
4779
4780         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4781                 goto fail;
4782
4783         iter->tr = tr;
4784
4785 #ifdef CONFIG_TRACER_MAX_TRACE
4786         /* Currently only the top directory has a snapshot */
4787         if (tr->current_trace->print_max || snapshot)
4788                 iter->array_buffer = &tr->max_buffer;
4789         else
4790 #endif
4791                 iter->array_buffer = &tr->array_buffer;
4792         iter->snapshot = snapshot;
4793         iter->pos = -1;
4794         iter->cpu_file = tracing_get_cpu(inode);
4795         mutex_init(&iter->mutex);
4796
4797         /* Notify the tracer early; before we stop tracing. */
4798         if (iter->trace->open)
4799                 iter->trace->open(iter);
4800
4801         /* Annotate start of buffers if we had overruns */
4802         if (ring_buffer_overruns(iter->array_buffer->buffer))
4803                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4804
4805         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4806         if (trace_clocks[tr->clock_id].in_ns)
4807                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4808
4809         /*
4810          * If pause-on-trace is enabled, then stop the trace while
4811          * dumping, unless this is the "snapshot" file
4812          */
4813         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4814                 tracing_stop_tr(tr);
4815
4816         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4817                 for_each_tracing_cpu(cpu) {
4818                         iter->buffer_iter[cpu] =
4819                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4820                                                          cpu, GFP_KERNEL);
4821                 }
4822                 ring_buffer_read_prepare_sync();
4823                 for_each_tracing_cpu(cpu) {
4824                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4825                         tracing_iter_reset(iter, cpu);
4826                 }
4827         } else {
4828                 cpu = iter->cpu_file;
4829                 iter->buffer_iter[cpu] =
4830                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4831                                                  cpu, GFP_KERNEL);
4832                 ring_buffer_read_prepare_sync();
4833                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4834                 tracing_iter_reset(iter, cpu);
4835         }
4836
4837         mutex_unlock(&trace_types_lock);
4838
4839         return iter;
4840
4841  fail:
4842         mutex_unlock(&trace_types_lock);
4843         kfree(iter->trace);
4844         kfree(iter->temp);
4845         kfree(iter->buffer_iter);
4846 release:
4847         seq_release_private(inode, file);
4848         return ERR_PTR(-ENOMEM);
4849 }
4850
4851 int tracing_open_generic(struct inode *inode, struct file *filp)
4852 {
4853         int ret;
4854
4855         ret = tracing_check_open_get_tr(NULL);
4856         if (ret)
4857                 return ret;
4858
4859         filp->private_data = inode->i_private;
4860         return 0;
4861 }
4862
4863 bool tracing_is_disabled(void)
4864 {
4865         return (tracing_disabled) ? true: false;
4866 }
4867
4868 /*
4869  * Open and update trace_array ref count.
4870  * Must have the current trace_array passed to it.
4871  */
4872 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4873 {
4874         struct trace_array *tr = inode->i_private;
4875         int ret;
4876
4877         ret = tracing_check_open_get_tr(tr);
4878         if (ret)
4879                 return ret;
4880
4881         filp->private_data = inode->i_private;
4882
4883         return 0;
4884 }
4885
4886 static int tracing_mark_open(struct inode *inode, struct file *filp)
4887 {
4888         stream_open(inode, filp);
4889         return tracing_open_generic_tr(inode, filp);
4890 }
4891
4892 static int tracing_release(struct inode *inode, struct file *file)
4893 {
4894         struct trace_array *tr = inode->i_private;
4895         struct seq_file *m = file->private_data;
4896         struct trace_iterator *iter;
4897         int cpu;
4898
4899         if (!(file->f_mode & FMODE_READ)) {
4900                 trace_array_put(tr);
4901                 return 0;
4902         }
4903
4904         /* Writes do not use seq_file */
4905         iter = m->private;
4906         mutex_lock(&trace_types_lock);
4907
4908         for_each_tracing_cpu(cpu) {
4909                 if (iter->buffer_iter[cpu])
4910                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4911         }
4912
4913         if (iter->trace && iter->trace->close)
4914                 iter->trace->close(iter);
4915
4916         if (!iter->snapshot && tr->stop_count)
4917                 /* reenable tracing if it was previously enabled */
4918                 tracing_start_tr(tr);
4919
4920         __trace_array_put(tr);
4921
4922         mutex_unlock(&trace_types_lock);
4923
4924         mutex_destroy(&iter->mutex);
4925         free_cpumask_var(iter->started);
4926         kfree(iter->fmt);
4927         kfree(iter->temp);
4928         kfree(iter->trace);
4929         kfree(iter->buffer_iter);
4930         seq_release_private(inode, file);
4931
4932         return 0;
4933 }
4934
4935 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4936 {
4937         struct trace_array *tr = inode->i_private;
4938
4939         trace_array_put(tr);
4940         return 0;
4941 }
4942
4943 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4944 {
4945         struct trace_array *tr = inode->i_private;
4946
4947         trace_array_put(tr);
4948
4949         return single_release(inode, file);
4950 }
4951
4952 static int tracing_open(struct inode *inode, struct file *file)
4953 {
4954         struct trace_array *tr = inode->i_private;
4955         struct trace_iterator *iter;
4956         int ret;
4957
4958         ret = tracing_check_open_get_tr(tr);
4959         if (ret)
4960                 return ret;
4961
4962         /* If this file was open for write, then erase contents */
4963         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4964                 int cpu = tracing_get_cpu(inode);
4965                 struct array_buffer *trace_buf = &tr->array_buffer;
4966
4967 #ifdef CONFIG_TRACER_MAX_TRACE
4968                 if (tr->current_trace->print_max)
4969                         trace_buf = &tr->max_buffer;
4970 #endif
4971
4972                 if (cpu == RING_BUFFER_ALL_CPUS)
4973                         tracing_reset_online_cpus(trace_buf);
4974                 else
4975                         tracing_reset_cpu(trace_buf, cpu);
4976         }
4977
4978         if (file->f_mode & FMODE_READ) {
4979                 iter = __tracing_open(inode, file, false);
4980                 if (IS_ERR(iter))
4981                         ret = PTR_ERR(iter);
4982                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4983                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4984         }
4985
4986         if (ret < 0)
4987                 trace_array_put(tr);
4988
4989         return ret;
4990 }
4991
4992 /*
4993  * Some tracers are not suitable for instance buffers.
4994  * A tracer is always available for the global array (toplevel)
4995  * or if it explicitly states that it is.
4996  */
4997 static bool
4998 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4999 {
5000         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5001 }
5002
5003 /* Find the next tracer that this trace array may use */
5004 static struct tracer *
5005 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5006 {
5007         while (t && !trace_ok_for_array(t, tr))
5008                 t = t->next;
5009
5010         return t;
5011 }
5012
5013 static void *
5014 t_next(struct seq_file *m, void *v, loff_t *pos)
5015 {
5016         struct trace_array *tr = m->private;
5017         struct tracer *t = v;
5018
5019         (*pos)++;
5020
5021         if (t)
5022                 t = get_tracer_for_array(tr, t->next);
5023
5024         return t;
5025 }
5026
5027 static void *t_start(struct seq_file *m, loff_t *pos)
5028 {
5029         struct trace_array *tr = m->private;
5030         struct tracer *t;
5031         loff_t l = 0;
5032
5033         mutex_lock(&trace_types_lock);
5034
5035         t = get_tracer_for_array(tr, trace_types);
5036         for (; t && l < *pos; t = t_next(m, t, &l))
5037                         ;
5038
5039         return t;
5040 }
5041
5042 static void t_stop(struct seq_file *m, void *p)
5043 {
5044         mutex_unlock(&trace_types_lock);
5045 }
5046
5047 static int t_show(struct seq_file *m, void *v)
5048 {
5049         struct tracer *t = v;
5050
5051         if (!t)
5052                 return 0;
5053
5054         seq_puts(m, t->name);
5055         if (t->next)
5056                 seq_putc(m, ' ');
5057         else
5058                 seq_putc(m, '\n');
5059
5060         return 0;
5061 }
5062
5063 static const struct seq_operations show_traces_seq_ops = {
5064         .start          = t_start,
5065         .next           = t_next,
5066         .stop           = t_stop,
5067         .show           = t_show,
5068 };
5069
5070 static int show_traces_open(struct inode *inode, struct file *file)
5071 {
5072         struct trace_array *tr = inode->i_private;
5073         struct seq_file *m;
5074         int ret;
5075
5076         ret = tracing_check_open_get_tr(tr);
5077         if (ret)
5078                 return ret;
5079
5080         ret = seq_open(file, &show_traces_seq_ops);
5081         if (ret) {
5082                 trace_array_put(tr);
5083                 return ret;
5084         }
5085
5086         m = file->private_data;
5087         m->private = tr;
5088
5089         return 0;
5090 }
5091
5092 static int show_traces_release(struct inode *inode, struct file *file)
5093 {
5094         struct trace_array *tr = inode->i_private;
5095
5096         trace_array_put(tr);
5097         return seq_release(inode, file);
5098 }
5099
5100 static ssize_t
5101 tracing_write_stub(struct file *filp, const char __user *ubuf,
5102                    size_t count, loff_t *ppos)
5103 {
5104         return count;
5105 }
5106
5107 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5108 {
5109         int ret;
5110
5111         if (file->f_mode & FMODE_READ)
5112                 ret = seq_lseek(file, offset, whence);
5113         else
5114                 file->f_pos = ret = 0;
5115
5116         return ret;
5117 }
5118
5119 static const struct file_operations tracing_fops = {
5120         .open           = tracing_open,
5121         .read           = seq_read,
5122         .write          = tracing_write_stub,
5123         .llseek         = tracing_lseek,
5124         .release        = tracing_release,
5125 };
5126
5127 static const struct file_operations show_traces_fops = {
5128         .open           = show_traces_open,
5129         .read           = seq_read,
5130         .llseek         = seq_lseek,
5131         .release        = show_traces_release,
5132 };
5133
5134 static ssize_t
5135 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5136                      size_t count, loff_t *ppos)
5137 {
5138         struct trace_array *tr = file_inode(filp)->i_private;
5139         char *mask_str;
5140         int len;
5141
5142         len = snprintf(NULL, 0, "%*pb\n",
5143                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5144         mask_str = kmalloc(len, GFP_KERNEL);
5145         if (!mask_str)
5146                 return -ENOMEM;
5147
5148         len = snprintf(mask_str, len, "%*pb\n",
5149                        cpumask_pr_args(tr->tracing_cpumask));
5150         if (len >= count) {
5151                 count = -EINVAL;
5152                 goto out_err;
5153         }
5154         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5155
5156 out_err:
5157         kfree(mask_str);
5158
5159         return count;
5160 }
5161
5162 int tracing_set_cpumask(struct trace_array *tr,
5163                         cpumask_var_t tracing_cpumask_new)
5164 {
5165         int cpu;
5166
5167         if (!tr)
5168                 return -EINVAL;
5169
5170         local_irq_disable();
5171         arch_spin_lock(&tr->max_lock);
5172         for_each_tracing_cpu(cpu) {
5173                 /*
5174                  * Increase/decrease the disabled counter if we are
5175                  * about to flip a bit in the cpumask:
5176                  */
5177                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5178                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5179                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5180                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5181                 }
5182                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5183                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5184                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5185                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5186                 }
5187         }
5188         arch_spin_unlock(&tr->max_lock);
5189         local_irq_enable();
5190
5191         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5192
5193         return 0;
5194 }
5195
5196 static ssize_t
5197 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5198                       size_t count, loff_t *ppos)
5199 {
5200         struct trace_array *tr = file_inode(filp)->i_private;
5201         cpumask_var_t tracing_cpumask_new;
5202         int err;
5203
5204         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5205                 return -ENOMEM;
5206
5207         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5208         if (err)
5209                 goto err_free;
5210
5211         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5212         if (err)
5213                 goto err_free;
5214
5215         free_cpumask_var(tracing_cpumask_new);
5216
5217         return count;
5218
5219 err_free:
5220         free_cpumask_var(tracing_cpumask_new);
5221
5222         return err;
5223 }
5224
5225 static const struct file_operations tracing_cpumask_fops = {
5226         .open           = tracing_open_generic_tr,
5227         .read           = tracing_cpumask_read,
5228         .write          = tracing_cpumask_write,
5229         .release        = tracing_release_generic_tr,
5230         .llseek         = generic_file_llseek,
5231 };
5232
5233 static int tracing_trace_options_show(struct seq_file *m, void *v)
5234 {
5235         struct tracer_opt *trace_opts;
5236         struct trace_array *tr = m->private;
5237         u32 tracer_flags;
5238         int i;
5239
5240         mutex_lock(&trace_types_lock);
5241         tracer_flags = tr->current_trace->flags->val;
5242         trace_opts = tr->current_trace->flags->opts;
5243
5244         for (i = 0; trace_options[i]; i++) {
5245                 if (tr->trace_flags & (1 << i))
5246                         seq_printf(m, "%s\n", trace_options[i]);
5247                 else
5248                         seq_printf(m, "no%s\n", trace_options[i]);
5249         }
5250
5251         for (i = 0; trace_opts[i].name; i++) {
5252                 if (tracer_flags & trace_opts[i].bit)
5253                         seq_printf(m, "%s\n", trace_opts[i].name);
5254                 else
5255                         seq_printf(m, "no%s\n", trace_opts[i].name);
5256         }
5257         mutex_unlock(&trace_types_lock);
5258
5259         return 0;
5260 }
5261
5262 static int __set_tracer_option(struct trace_array *tr,
5263                                struct tracer_flags *tracer_flags,
5264                                struct tracer_opt *opts, int neg)
5265 {
5266         struct tracer *trace = tracer_flags->trace;
5267         int ret;
5268
5269         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5270         if (ret)
5271                 return ret;
5272
5273         if (neg)
5274                 tracer_flags->val &= ~opts->bit;
5275         else
5276                 tracer_flags->val |= opts->bit;
5277         return 0;
5278 }
5279
5280 /* Try to assign a tracer specific option */
5281 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5282 {
5283         struct tracer *trace = tr->current_trace;
5284         struct tracer_flags *tracer_flags = trace->flags;
5285         struct tracer_opt *opts = NULL;
5286         int i;
5287
5288         for (i = 0; tracer_flags->opts[i].name; i++) {
5289                 opts = &tracer_flags->opts[i];
5290
5291                 if (strcmp(cmp, opts->name) == 0)
5292                         return __set_tracer_option(tr, trace->flags, opts, neg);
5293         }
5294
5295         return -EINVAL;
5296 }
5297
5298 /* Some tracers require overwrite to stay enabled */
5299 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5300 {
5301         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5302                 return -1;
5303
5304         return 0;
5305 }
5306
5307 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5308 {
5309         int *map;
5310
5311         if ((mask == TRACE_ITER_RECORD_TGID) ||
5312             (mask == TRACE_ITER_RECORD_CMD))
5313                 lockdep_assert_held(&event_mutex);
5314
5315         /* do nothing if flag is already set */
5316         if (!!(tr->trace_flags & mask) == !!enabled)
5317                 return 0;
5318
5319         /* Give the tracer a chance to approve the change */
5320         if (tr->current_trace->flag_changed)
5321                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5322                         return -EINVAL;
5323
5324         if (enabled)
5325                 tr->trace_flags |= mask;
5326         else
5327                 tr->trace_flags &= ~mask;
5328
5329         if (mask == TRACE_ITER_RECORD_CMD)
5330                 trace_event_enable_cmd_record(enabled);
5331
5332         if (mask == TRACE_ITER_RECORD_TGID) {
5333                 if (!tgid_map) {
5334                         tgid_map_max = pid_max;
5335                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5336                                        GFP_KERNEL);
5337
5338                         /*
5339                          * Pairs with smp_load_acquire() in
5340                          * trace_find_tgid_ptr() to ensure that if it observes
5341                          * the tgid_map we just allocated then it also observes
5342                          * the corresponding tgid_map_max value.
5343                          */
5344                         smp_store_release(&tgid_map, map);
5345                 }
5346                 if (!tgid_map) {
5347                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5348                         return -ENOMEM;
5349                 }
5350
5351                 trace_event_enable_tgid_record(enabled);
5352         }
5353
5354         if (mask == TRACE_ITER_EVENT_FORK)
5355                 trace_event_follow_fork(tr, enabled);
5356
5357         if (mask == TRACE_ITER_FUNC_FORK)
5358                 ftrace_pid_follow_fork(tr, enabled);
5359
5360         if (mask == TRACE_ITER_OVERWRITE) {
5361                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5362 #ifdef CONFIG_TRACER_MAX_TRACE
5363                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5364 #endif
5365         }
5366
5367         if (mask == TRACE_ITER_PRINTK) {
5368                 trace_printk_start_stop_comm(enabled);
5369                 trace_printk_control(enabled);
5370         }
5371
5372         return 0;
5373 }
5374
5375 int trace_set_options(struct trace_array *tr, char *option)
5376 {
5377         char *cmp;
5378         int neg = 0;
5379         int ret;
5380         size_t orig_len = strlen(option);
5381         int len;
5382
5383         cmp = strstrip(option);
5384
5385         len = str_has_prefix(cmp, "no");
5386         if (len)
5387                 neg = 1;
5388
5389         cmp += len;
5390
5391         mutex_lock(&event_mutex);
5392         mutex_lock(&trace_types_lock);
5393
5394         ret = match_string(trace_options, -1, cmp);
5395         /* If no option could be set, test the specific tracer options */
5396         if (ret < 0)
5397                 ret = set_tracer_option(tr, cmp, neg);
5398         else
5399                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5400
5401         mutex_unlock(&trace_types_lock);
5402         mutex_unlock(&event_mutex);
5403
5404         /*
5405          * If the first trailing whitespace is replaced with '\0' by strstrip,
5406          * turn it back into a space.
5407          */
5408         if (orig_len > strlen(option))
5409                 option[strlen(option)] = ' ';
5410
5411         return ret;
5412 }
5413
5414 static void __init apply_trace_boot_options(void)
5415 {
5416         char *buf = trace_boot_options_buf;
5417         char *option;
5418
5419         while (true) {
5420                 option = strsep(&buf, ",");
5421
5422                 if (!option)
5423                         break;
5424
5425                 if (*option)
5426                         trace_set_options(&global_trace, option);
5427
5428                 /* Put back the comma to allow this to be called again */
5429                 if (buf)
5430                         *(buf - 1) = ',';
5431         }
5432 }
5433
5434 static ssize_t
5435 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5436                         size_t cnt, loff_t *ppos)
5437 {
5438         struct seq_file *m = filp->private_data;
5439         struct trace_array *tr = m->private;
5440         char buf[64];
5441         int ret;
5442
5443         if (cnt >= sizeof(buf))
5444                 return -EINVAL;
5445
5446         if (copy_from_user(buf, ubuf, cnt))
5447                 return -EFAULT;
5448
5449         buf[cnt] = 0;
5450
5451         ret = trace_set_options(tr, buf);
5452         if (ret < 0)
5453                 return ret;
5454
5455         *ppos += cnt;
5456
5457         return cnt;
5458 }
5459
5460 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5461 {
5462         struct trace_array *tr = inode->i_private;
5463         int ret;
5464
5465         ret = tracing_check_open_get_tr(tr);
5466         if (ret)
5467                 return ret;
5468
5469         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5470         if (ret < 0)
5471                 trace_array_put(tr);
5472
5473         return ret;
5474 }
5475
5476 static const struct file_operations tracing_iter_fops = {
5477         .open           = tracing_trace_options_open,
5478         .read           = seq_read,
5479         .llseek         = seq_lseek,
5480         .release        = tracing_single_release_tr,
5481         .write          = tracing_trace_options_write,
5482 };
5483
5484 static const char readme_msg[] =
5485         "tracing mini-HOWTO:\n\n"
5486         "# echo 0 > tracing_on : quick way to disable tracing\n"
5487         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5488         " Important files:\n"
5489         "  trace\t\t\t- The static contents of the buffer\n"
5490         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5491         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5492         "  current_tracer\t- function and latency tracers\n"
5493         "  available_tracers\t- list of configured tracers for current_tracer\n"
5494         "  error_log\t- error log for failed commands (that support it)\n"
5495         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5496         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5497         "  trace_clock\t\t- change the clock used to order events\n"
5498         "       local:   Per cpu clock but may not be synced across CPUs\n"
5499         "      global:   Synced across CPUs but slows tracing down.\n"
5500         "     counter:   Not a clock, but just an increment\n"
5501         "      uptime:   Jiffy counter from time of boot\n"
5502         "        perf:   Same clock that perf events use\n"
5503 #ifdef CONFIG_X86_64
5504         "     x86-tsc:   TSC cycle counter\n"
5505 #endif
5506         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5507         "       delta:   Delta difference against a buffer-wide timestamp\n"
5508         "    absolute:   Absolute (standalone) timestamp\n"
5509         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5510         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5511         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5512         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5513         "\t\t\t  Remove sub-buffer with rmdir\n"
5514         "  trace_options\t\t- Set format or modify how tracing happens\n"
5515         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5516         "\t\t\t  option name\n"
5517         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5518 #ifdef CONFIG_DYNAMIC_FTRACE
5519         "\n  available_filter_functions - list of functions that can be filtered on\n"
5520         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5521         "\t\t\t  functions\n"
5522         "\t     accepts: func_full_name or glob-matching-pattern\n"
5523         "\t     modules: Can select a group via module\n"
5524         "\t      Format: :mod:<module-name>\n"
5525         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5526         "\t    triggers: a command to perform when function is hit\n"
5527         "\t      Format: <function>:<trigger>[:count]\n"
5528         "\t     trigger: traceon, traceoff\n"
5529         "\t\t      enable_event:<system>:<event>\n"
5530         "\t\t      disable_event:<system>:<event>\n"
5531 #ifdef CONFIG_STACKTRACE
5532         "\t\t      stacktrace\n"
5533 #endif
5534 #ifdef CONFIG_TRACER_SNAPSHOT
5535         "\t\t      snapshot\n"
5536 #endif
5537         "\t\t      dump\n"
5538         "\t\t      cpudump\n"
5539         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5540         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5541         "\t     The first one will disable tracing every time do_fault is hit\n"
5542         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5543         "\t       The first time do trap is hit and it disables tracing, the\n"
5544         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5545         "\t       the counter will not decrement. It only decrements when the\n"
5546         "\t       trigger did work\n"
5547         "\t     To remove trigger without count:\n"
5548         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5549         "\t     To remove trigger with a count:\n"
5550         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5551         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5552         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5553         "\t    modules: Can select a group via module command :mod:\n"
5554         "\t    Does not accept triggers\n"
5555 #endif /* CONFIG_DYNAMIC_FTRACE */
5556 #ifdef CONFIG_FUNCTION_TRACER
5557         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5558         "\t\t    (function)\n"
5559         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5560         "\t\t    (function)\n"
5561 #endif
5562 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5563         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5564         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5565         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5566 #endif
5567 #ifdef CONFIG_TRACER_SNAPSHOT
5568         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5569         "\t\t\t  snapshot buffer. Read the contents for more\n"
5570         "\t\t\t  information\n"
5571 #endif
5572 #ifdef CONFIG_STACK_TRACER
5573         "  stack_trace\t\t- Shows the max stack trace when active\n"
5574         "  stack_max_size\t- Shows current max stack size that was traced\n"
5575         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5576         "\t\t\t  new trace)\n"
5577 #ifdef CONFIG_DYNAMIC_FTRACE
5578         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5579         "\t\t\t  traces\n"
5580 #endif
5581 #endif /* CONFIG_STACK_TRACER */
5582 #ifdef CONFIG_DYNAMIC_EVENTS
5583         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5584         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5585 #endif
5586 #ifdef CONFIG_KPROBE_EVENTS
5587         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5588         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5589 #endif
5590 #ifdef CONFIG_UPROBE_EVENTS
5591         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5592         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5593 #endif
5594 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5595         "\t  accepts: event-definitions (one definition per line)\n"
5596         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5597         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5598 #ifdef CONFIG_HIST_TRIGGERS
5599         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5600 #endif
5601         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
5602         "\t           -:[<group>/][<event>]\n"
5603 #ifdef CONFIG_KPROBE_EVENTS
5604         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5605   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5606 #endif
5607 #ifdef CONFIG_UPROBE_EVENTS
5608   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5609 #endif
5610         "\t     args: <name>=fetcharg[:type]\n"
5611         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5612 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5613         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5614 #else
5615         "\t           $stack<index>, $stack, $retval, $comm,\n"
5616 #endif
5617         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5618         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5619         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5620         "\t           symstr, <type>\\[<array-size>\\]\n"
5621 #ifdef CONFIG_HIST_TRIGGERS
5622         "\t    field: <stype> <name>;\n"
5623         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5624         "\t           [unsigned] char/int/long\n"
5625 #endif
5626         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5627         "\t            of the <attached-group>/<attached-event>.\n"
5628 #endif
5629         "  events/\t\t- Directory containing all trace event subsystems:\n"
5630         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5631         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5632         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5633         "\t\t\t  events\n"
5634         "      filter\t\t- If set, only events passing filter are traced\n"
5635         "  events/<system>/<event>/\t- Directory containing control files for\n"
5636         "\t\t\t  <event>:\n"
5637         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5638         "      filter\t\t- If set, only events passing filter are traced\n"
5639         "      trigger\t\t- If set, a command to perform when event is hit\n"
5640         "\t    Format: <trigger>[:count][if <filter>]\n"
5641         "\t   trigger: traceon, traceoff\n"
5642         "\t            enable_event:<system>:<event>\n"
5643         "\t            disable_event:<system>:<event>\n"
5644 #ifdef CONFIG_HIST_TRIGGERS
5645         "\t            enable_hist:<system>:<event>\n"
5646         "\t            disable_hist:<system>:<event>\n"
5647 #endif
5648 #ifdef CONFIG_STACKTRACE
5649         "\t\t    stacktrace\n"
5650 #endif
5651 #ifdef CONFIG_TRACER_SNAPSHOT
5652         "\t\t    snapshot\n"
5653 #endif
5654 #ifdef CONFIG_HIST_TRIGGERS
5655         "\t\t    hist (see below)\n"
5656 #endif
5657         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5658         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5659         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5660         "\t                  events/block/block_unplug/trigger\n"
5661         "\t   The first disables tracing every time block_unplug is hit.\n"
5662         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5663         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5664         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5665         "\t   Like function triggers, the counter is only decremented if it\n"
5666         "\t    enabled or disabled tracing.\n"
5667         "\t   To remove a trigger without a count:\n"
5668         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5669         "\t   To remove a trigger with a count:\n"
5670         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5671         "\t   Filters can be ignored when removing a trigger.\n"
5672 #ifdef CONFIG_HIST_TRIGGERS
5673         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5674         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5675         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5676         "\t            [:values=<field1[,field2,...]>]\n"
5677         "\t            [:sort=<field1[,field2,...]>]\n"
5678         "\t            [:size=#entries]\n"
5679         "\t            [:pause][:continue][:clear]\n"
5680         "\t            [:name=histname1]\n"
5681         "\t            [:nohitcount]\n"
5682         "\t            [:<handler>.<action>]\n"
5683         "\t            [if <filter>]\n\n"
5684         "\t    Note, special fields can be used as well:\n"
5685         "\t            common_timestamp - to record current timestamp\n"
5686         "\t            common_cpu - to record the CPU the event happened on\n"
5687         "\n"
5688         "\t    A hist trigger variable can be:\n"
5689         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5690         "\t        - a reference to another variable e.g. y=$x,\n"
5691         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5692         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5693         "\n"
5694         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5695         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5696         "\t    variable reference, field or numeric literal.\n"
5697         "\n"
5698         "\t    When a matching event is hit, an entry is added to a hash\n"
5699         "\t    table using the key(s) and value(s) named, and the value of a\n"
5700         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5701         "\t    correspond to fields in the event's format description.  Keys\n"
5702         "\t    can be any field, or the special string 'stacktrace'.\n"
5703         "\t    Compound keys consisting of up to two fields can be specified\n"
5704         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5705         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5706         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5707         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5708         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5709         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5710         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5711         "\t    its histogram data will be shared with other triggers of the\n"
5712         "\t    same name, and trigger hits will update this common data.\n\n"
5713         "\t    Reading the 'hist' file for the event will dump the hash\n"
5714         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5715         "\t    triggers attached to an event, there will be a table for each\n"
5716         "\t    trigger in the output.  The table displayed for a named\n"
5717         "\t    trigger will be the same as any other instance having the\n"
5718         "\t    same name.  The default format used to display a given field\n"
5719         "\t    can be modified by appending any of the following modifiers\n"
5720         "\t    to the field name, as applicable:\n\n"
5721         "\t            .hex        display a number as a hex value\n"
5722         "\t            .sym        display an address as a symbol\n"
5723         "\t            .sym-offset display an address as a symbol and offset\n"
5724         "\t            .execname   display a common_pid as a program name\n"
5725         "\t            .syscall    display a syscall id as a syscall name\n"
5726         "\t            .log2       display log2 value rather than raw number\n"
5727         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5728         "\t            .usecs      display a common_timestamp in microseconds\n"
5729         "\t            .percent    display a number of percentage value\n"
5730         "\t            .graph      display a bar-graph of a value\n\n"
5731         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5732         "\t    trigger or to start a hist trigger but not log any events\n"
5733         "\t    until told to do so.  'continue' can be used to start or\n"
5734         "\t    restart a paused hist trigger.\n\n"
5735         "\t    The 'clear' parameter will clear the contents of a running\n"
5736         "\t    hist trigger and leave its current paused/active state\n"
5737         "\t    unchanged.\n\n"
5738         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5739         "\t    raw hitcount in the histogram.\n\n"
5740         "\t    The enable_hist and disable_hist triggers can be used to\n"
5741         "\t    have one event conditionally start and stop another event's\n"
5742         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5743         "\t    the enable_event and disable_event triggers.\n\n"
5744         "\t    Hist trigger handlers and actions are executed whenever a\n"
5745         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5746         "\t        <handler>.<action>\n\n"
5747         "\t    The available handlers are:\n\n"
5748         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5749         "\t        onmax(var)               - invoke if var exceeds current max\n"
5750         "\t        onchange(var)            - invoke action if var changes\n\n"
5751         "\t    The available actions are:\n\n"
5752         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5753         "\t        save(field,...)                      - save current event fields\n"
5754 #ifdef CONFIG_TRACER_SNAPSHOT
5755         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5756 #endif
5757 #ifdef CONFIG_SYNTH_EVENTS
5758         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5759         "\t  Write into this file to define/undefine new synthetic events.\n"
5760         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5761 #endif
5762 #endif
5763 ;
5764
5765 static ssize_t
5766 tracing_readme_read(struct file *filp, char __user *ubuf,
5767                        size_t cnt, loff_t *ppos)
5768 {
5769         return simple_read_from_buffer(ubuf, cnt, ppos,
5770                                         readme_msg, strlen(readme_msg));
5771 }
5772
5773 static const struct file_operations tracing_readme_fops = {
5774         .open           = tracing_open_generic,
5775         .read           = tracing_readme_read,
5776         .llseek         = generic_file_llseek,
5777 };
5778
5779 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5780 {
5781         int pid = ++(*pos);
5782
5783         return trace_find_tgid_ptr(pid);
5784 }
5785
5786 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5787 {
5788         int pid = *pos;
5789
5790         return trace_find_tgid_ptr(pid);
5791 }
5792
5793 static void saved_tgids_stop(struct seq_file *m, void *v)
5794 {
5795 }
5796
5797 static int saved_tgids_show(struct seq_file *m, void *v)
5798 {
5799         int *entry = (int *)v;
5800         int pid = entry - tgid_map;
5801         int tgid = *entry;
5802
5803         if (tgid == 0)
5804                 return SEQ_SKIP;
5805
5806         seq_printf(m, "%d %d\n", pid, tgid);
5807         return 0;
5808 }
5809
5810 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5811         .start          = saved_tgids_start,
5812         .stop           = saved_tgids_stop,
5813         .next           = saved_tgids_next,
5814         .show           = saved_tgids_show,
5815 };
5816
5817 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5818 {
5819         int ret;
5820
5821         ret = tracing_check_open_get_tr(NULL);
5822         if (ret)
5823                 return ret;
5824
5825         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5826 }
5827
5828
5829 static const struct file_operations tracing_saved_tgids_fops = {
5830         .open           = tracing_saved_tgids_open,
5831         .read           = seq_read,
5832         .llseek         = seq_lseek,
5833         .release        = seq_release,
5834 };
5835
5836 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5837 {
5838         unsigned int *ptr = v;
5839
5840         if (*pos || m->count)
5841                 ptr++;
5842
5843         (*pos)++;
5844
5845         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5846              ptr++) {
5847                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5848                         continue;
5849
5850                 return ptr;
5851         }
5852
5853         return NULL;
5854 }
5855
5856 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5857 {
5858         void *v;
5859         loff_t l = 0;
5860
5861         preempt_disable();
5862         arch_spin_lock(&trace_cmdline_lock);
5863
5864         v = &savedcmd->map_cmdline_to_pid[0];
5865         while (l <= *pos) {
5866                 v = saved_cmdlines_next(m, v, &l);
5867                 if (!v)
5868                         return NULL;
5869         }
5870
5871         return v;
5872 }
5873
5874 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5875 {
5876         arch_spin_unlock(&trace_cmdline_lock);
5877         preempt_enable();
5878 }
5879
5880 static int saved_cmdlines_show(struct seq_file *m, void *v)
5881 {
5882         char buf[TASK_COMM_LEN];
5883         unsigned int *pid = v;
5884
5885         __trace_find_cmdline(*pid, buf);
5886         seq_printf(m, "%d %s\n", *pid, buf);
5887         return 0;
5888 }
5889
5890 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5891         .start          = saved_cmdlines_start,
5892         .next           = saved_cmdlines_next,
5893         .stop           = saved_cmdlines_stop,
5894         .show           = saved_cmdlines_show,
5895 };
5896
5897 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5898 {
5899         int ret;
5900
5901         ret = tracing_check_open_get_tr(NULL);
5902         if (ret)
5903                 return ret;
5904
5905         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5906 }
5907
5908 static const struct file_operations tracing_saved_cmdlines_fops = {
5909         .open           = tracing_saved_cmdlines_open,
5910         .read           = seq_read,
5911         .llseek         = seq_lseek,
5912         .release        = seq_release,
5913 };
5914
5915 static ssize_t
5916 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5917                                  size_t cnt, loff_t *ppos)
5918 {
5919         char buf[64];
5920         int r;
5921
5922         preempt_disable();
5923         arch_spin_lock(&trace_cmdline_lock);
5924         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5925         arch_spin_unlock(&trace_cmdline_lock);
5926         preempt_enable();
5927
5928         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5929 }
5930
5931 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5932 {
5933         kfree(s->saved_cmdlines);
5934         kfree(s->map_cmdline_to_pid);
5935         kfree(s);
5936 }
5937
5938 static int tracing_resize_saved_cmdlines(unsigned int val)
5939 {
5940         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5941
5942         s = kmalloc(sizeof(*s), GFP_KERNEL);
5943         if (!s)
5944                 return -ENOMEM;
5945
5946         if (allocate_cmdlines_buffer(val, s) < 0) {
5947                 kfree(s);
5948                 return -ENOMEM;
5949         }
5950
5951         preempt_disable();
5952         arch_spin_lock(&trace_cmdline_lock);
5953         savedcmd_temp = savedcmd;
5954         savedcmd = s;
5955         arch_spin_unlock(&trace_cmdline_lock);
5956         preempt_enable();
5957         free_saved_cmdlines_buffer(savedcmd_temp);
5958
5959         return 0;
5960 }
5961
5962 static ssize_t
5963 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5964                                   size_t cnt, loff_t *ppos)
5965 {
5966         unsigned long val;
5967         int ret;
5968
5969         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5970         if (ret)
5971                 return ret;
5972
5973         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5974         if (!val || val > PID_MAX_DEFAULT)
5975                 return -EINVAL;
5976
5977         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5978         if (ret < 0)
5979                 return ret;
5980
5981         *ppos += cnt;
5982
5983         return cnt;
5984 }
5985
5986 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5987         .open           = tracing_open_generic,
5988         .read           = tracing_saved_cmdlines_size_read,
5989         .write          = tracing_saved_cmdlines_size_write,
5990 };
5991
5992 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5993 static union trace_eval_map_item *
5994 update_eval_map(union trace_eval_map_item *ptr)
5995 {
5996         if (!ptr->map.eval_string) {
5997                 if (ptr->tail.next) {
5998                         ptr = ptr->tail.next;
5999                         /* Set ptr to the next real item (skip head) */
6000                         ptr++;
6001                 } else
6002                         return NULL;
6003         }
6004         return ptr;
6005 }
6006
6007 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6008 {
6009         union trace_eval_map_item *ptr = v;
6010
6011         /*
6012          * Paranoid! If ptr points to end, we don't want to increment past it.
6013          * This really should never happen.
6014          */
6015         (*pos)++;
6016         ptr = update_eval_map(ptr);
6017         if (WARN_ON_ONCE(!ptr))
6018                 return NULL;
6019
6020         ptr++;
6021         ptr = update_eval_map(ptr);
6022
6023         return ptr;
6024 }
6025
6026 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6027 {
6028         union trace_eval_map_item *v;
6029         loff_t l = 0;
6030
6031         mutex_lock(&trace_eval_mutex);
6032
6033         v = trace_eval_maps;
6034         if (v)
6035                 v++;
6036
6037         while (v && l < *pos) {
6038                 v = eval_map_next(m, v, &l);
6039         }
6040
6041         return v;
6042 }
6043
6044 static void eval_map_stop(struct seq_file *m, void *v)
6045 {
6046         mutex_unlock(&trace_eval_mutex);
6047 }
6048
6049 static int eval_map_show(struct seq_file *m, void *v)
6050 {
6051         union trace_eval_map_item *ptr = v;
6052
6053         seq_printf(m, "%s %ld (%s)\n",
6054                    ptr->map.eval_string, ptr->map.eval_value,
6055                    ptr->map.system);
6056
6057         return 0;
6058 }
6059
6060 static const struct seq_operations tracing_eval_map_seq_ops = {
6061         .start          = eval_map_start,
6062         .next           = eval_map_next,
6063         .stop           = eval_map_stop,
6064         .show           = eval_map_show,
6065 };
6066
6067 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6068 {
6069         int ret;
6070
6071         ret = tracing_check_open_get_tr(NULL);
6072         if (ret)
6073                 return ret;
6074
6075         return seq_open(filp, &tracing_eval_map_seq_ops);
6076 }
6077
6078 static const struct file_operations tracing_eval_map_fops = {
6079         .open           = tracing_eval_map_open,
6080         .read           = seq_read,
6081         .llseek         = seq_lseek,
6082         .release        = seq_release,
6083 };
6084
6085 static inline union trace_eval_map_item *
6086 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6087 {
6088         /* Return tail of array given the head */
6089         return ptr + ptr->head.length + 1;
6090 }
6091
6092 static void
6093 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6094                            int len)
6095 {
6096         struct trace_eval_map **stop;
6097         struct trace_eval_map **map;
6098         union trace_eval_map_item *map_array;
6099         union trace_eval_map_item *ptr;
6100
6101         stop = start + len;
6102
6103         /*
6104          * The trace_eval_maps contains the map plus a head and tail item,
6105          * where the head holds the module and length of array, and the
6106          * tail holds a pointer to the next list.
6107          */
6108         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6109         if (!map_array) {
6110                 pr_warn("Unable to allocate trace eval mapping\n");
6111                 return;
6112         }
6113
6114         mutex_lock(&trace_eval_mutex);
6115
6116         if (!trace_eval_maps)
6117                 trace_eval_maps = map_array;
6118         else {
6119                 ptr = trace_eval_maps;
6120                 for (;;) {
6121                         ptr = trace_eval_jmp_to_tail(ptr);
6122                         if (!ptr->tail.next)
6123                                 break;
6124                         ptr = ptr->tail.next;
6125
6126                 }
6127                 ptr->tail.next = map_array;
6128         }
6129         map_array->head.mod = mod;
6130         map_array->head.length = len;
6131         map_array++;
6132
6133         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6134                 map_array->map = **map;
6135                 map_array++;
6136         }
6137         memset(map_array, 0, sizeof(*map_array));
6138
6139         mutex_unlock(&trace_eval_mutex);
6140 }
6141
6142 static void trace_create_eval_file(struct dentry *d_tracer)
6143 {
6144         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6145                           NULL, &tracing_eval_map_fops);
6146 }
6147
6148 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6149 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6150 static inline void trace_insert_eval_map_file(struct module *mod,
6151                               struct trace_eval_map **start, int len) { }
6152 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6153
6154 static void trace_insert_eval_map(struct module *mod,
6155                                   struct trace_eval_map **start, int len)
6156 {
6157         struct trace_eval_map **map;
6158
6159         if (len <= 0)
6160                 return;
6161
6162         map = start;
6163
6164         trace_event_eval_update(map, len);
6165
6166         trace_insert_eval_map_file(mod, start, len);
6167 }
6168
6169 static ssize_t
6170 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6171                        size_t cnt, loff_t *ppos)
6172 {
6173         struct trace_array *tr = filp->private_data;
6174         char buf[MAX_TRACER_SIZE+2];
6175         int r;
6176
6177         mutex_lock(&trace_types_lock);
6178         r = sprintf(buf, "%s\n", tr->current_trace->name);
6179         mutex_unlock(&trace_types_lock);
6180
6181         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6182 }
6183
6184 int tracer_init(struct tracer *t, struct trace_array *tr)
6185 {
6186         tracing_reset_online_cpus(&tr->array_buffer);
6187         return t->init(tr);
6188 }
6189
6190 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6191 {
6192         int cpu;
6193
6194         for_each_tracing_cpu(cpu)
6195                 per_cpu_ptr(buf->data, cpu)->entries = val;
6196 }
6197
6198 #ifdef CONFIG_TRACER_MAX_TRACE
6199 /* resize @tr's buffer to the size of @size_tr's entries */
6200 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6201                                         struct array_buffer *size_buf, int cpu_id)
6202 {
6203         int cpu, ret = 0;
6204
6205         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6206                 for_each_tracing_cpu(cpu) {
6207                         ret = ring_buffer_resize(trace_buf->buffer,
6208                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6209                         if (ret < 0)
6210                                 break;
6211                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6212                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6213                 }
6214         } else {
6215                 ret = ring_buffer_resize(trace_buf->buffer,
6216                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6217                 if (ret == 0)
6218                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6219                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6220         }
6221
6222         return ret;
6223 }
6224 #endif /* CONFIG_TRACER_MAX_TRACE */
6225
6226 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6227                                         unsigned long size, int cpu)
6228 {
6229         int ret;
6230
6231         /*
6232          * If kernel or user changes the size of the ring buffer
6233          * we use the size that was given, and we can forget about
6234          * expanding it later.
6235          */
6236         ring_buffer_expanded = true;
6237
6238         /* May be called before buffers are initialized */
6239         if (!tr->array_buffer.buffer)
6240                 return 0;
6241
6242         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6243         if (ret < 0)
6244                 return ret;
6245
6246 #ifdef CONFIG_TRACER_MAX_TRACE
6247         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6248             !tr->current_trace->use_max_tr)
6249                 goto out;
6250
6251         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6252         if (ret < 0) {
6253                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6254                                                      &tr->array_buffer, cpu);
6255                 if (r < 0) {
6256                         /*
6257                          * AARGH! We are left with different
6258                          * size max buffer!!!!
6259                          * The max buffer is our "snapshot" buffer.
6260                          * When a tracer needs a snapshot (one of the
6261                          * latency tracers), it swaps the max buffer
6262                          * with the saved snap shot. We succeeded to
6263                          * update the size of the main buffer, but failed to
6264                          * update the size of the max buffer. But when we tried
6265                          * to reset the main buffer to the original size, we
6266                          * failed there too. This is very unlikely to
6267                          * happen, but if it does, warn and kill all
6268                          * tracing.
6269                          */
6270                         WARN_ON(1);
6271                         tracing_disabled = 1;
6272                 }
6273                 return ret;
6274         }
6275
6276         if (cpu == RING_BUFFER_ALL_CPUS)
6277                 set_buffer_entries(&tr->max_buffer, size);
6278         else
6279                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6280
6281  out:
6282 #endif /* CONFIG_TRACER_MAX_TRACE */
6283
6284         if (cpu == RING_BUFFER_ALL_CPUS)
6285                 set_buffer_entries(&tr->array_buffer, size);
6286         else
6287                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6288
6289         return ret;
6290 }
6291
6292 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6293                                   unsigned long size, int cpu_id)
6294 {
6295         int ret;
6296
6297         mutex_lock(&trace_types_lock);
6298
6299         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6300                 /* make sure, this cpu is enabled in the mask */
6301                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6302                         ret = -EINVAL;
6303                         goto out;
6304                 }
6305         }
6306
6307         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6308         if (ret < 0)
6309                 ret = -ENOMEM;
6310
6311 out:
6312         mutex_unlock(&trace_types_lock);
6313
6314         return ret;
6315 }
6316
6317
6318 /**
6319  * tracing_update_buffers - used by tracing facility to expand ring buffers
6320  *
6321  * To save on memory when the tracing is never used on a system with it
6322  * configured in. The ring buffers are set to a minimum size. But once
6323  * a user starts to use the tracing facility, then they need to grow
6324  * to their default size.
6325  *
6326  * This function is to be called when a tracer is about to be used.
6327  */
6328 int tracing_update_buffers(void)
6329 {
6330         int ret = 0;
6331
6332         mutex_lock(&trace_types_lock);
6333         if (!ring_buffer_expanded)
6334                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6335                                                 RING_BUFFER_ALL_CPUS);
6336         mutex_unlock(&trace_types_lock);
6337
6338         return ret;
6339 }
6340
6341 struct trace_option_dentry;
6342
6343 static void
6344 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6345
6346 /*
6347  * Used to clear out the tracer before deletion of an instance.
6348  * Must have trace_types_lock held.
6349  */
6350 static void tracing_set_nop(struct trace_array *tr)
6351 {
6352         if (tr->current_trace == &nop_trace)
6353                 return;
6354         
6355         tr->current_trace->enabled--;
6356
6357         if (tr->current_trace->reset)
6358                 tr->current_trace->reset(tr);
6359
6360         tr->current_trace = &nop_trace;
6361 }
6362
6363 static bool tracer_options_updated;
6364
6365 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6366 {
6367         /* Only enable if the directory has been created already. */
6368         if (!tr->dir)
6369                 return;
6370
6371         /* Only create trace option files after update_tracer_options finish */
6372         if (!tracer_options_updated)
6373                 return;
6374
6375         create_trace_option_files(tr, t);
6376 }
6377
6378 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6379 {
6380         struct tracer *t;
6381 #ifdef CONFIG_TRACER_MAX_TRACE
6382         bool had_max_tr;
6383 #endif
6384         int ret = 0;
6385
6386         mutex_lock(&trace_types_lock);
6387
6388         if (!ring_buffer_expanded) {
6389                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6390                                                 RING_BUFFER_ALL_CPUS);
6391                 if (ret < 0)
6392                         goto out;
6393                 ret = 0;
6394         }
6395
6396         for (t = trace_types; t; t = t->next) {
6397                 if (strcmp(t->name, buf) == 0)
6398                         break;
6399         }
6400         if (!t) {
6401                 ret = -EINVAL;
6402                 goto out;
6403         }
6404         if (t == tr->current_trace)
6405                 goto out;
6406
6407 #ifdef CONFIG_TRACER_SNAPSHOT
6408         if (t->use_max_tr) {
6409                 local_irq_disable();
6410                 arch_spin_lock(&tr->max_lock);
6411                 if (tr->cond_snapshot)
6412                         ret = -EBUSY;
6413                 arch_spin_unlock(&tr->max_lock);
6414                 local_irq_enable();
6415                 if (ret)
6416                         goto out;
6417         }
6418 #endif
6419         /* Some tracers won't work on kernel command line */
6420         if (system_state < SYSTEM_RUNNING && t->noboot) {
6421                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6422                         t->name);
6423                 goto out;
6424         }
6425
6426         /* Some tracers are only allowed for the top level buffer */
6427         if (!trace_ok_for_array(t, tr)) {
6428                 ret = -EINVAL;
6429                 goto out;
6430         }
6431
6432         /* If trace pipe files are being read, we can't change the tracer */
6433         if (tr->trace_ref) {
6434                 ret = -EBUSY;
6435                 goto out;
6436         }
6437
6438         trace_branch_disable();
6439
6440         tr->current_trace->enabled--;
6441
6442         if (tr->current_trace->reset)
6443                 tr->current_trace->reset(tr);
6444
6445 #ifdef CONFIG_TRACER_MAX_TRACE
6446         had_max_tr = tr->current_trace->use_max_tr;
6447
6448         /* Current trace needs to be nop_trace before synchronize_rcu */
6449         tr->current_trace = &nop_trace;
6450
6451         if (had_max_tr && !t->use_max_tr) {
6452                 /*
6453                  * We need to make sure that the update_max_tr sees that
6454                  * current_trace changed to nop_trace to keep it from
6455                  * swapping the buffers after we resize it.
6456                  * The update_max_tr is called from interrupts disabled
6457                  * so a synchronized_sched() is sufficient.
6458                  */
6459                 synchronize_rcu();
6460                 free_snapshot(tr);
6461         }
6462
6463         if (t->use_max_tr && !tr->allocated_snapshot) {
6464                 ret = tracing_alloc_snapshot_instance(tr);
6465                 if (ret < 0)
6466                         goto out;
6467         }
6468 #else
6469         tr->current_trace = &nop_trace;
6470 #endif
6471
6472         if (t->init) {
6473                 ret = tracer_init(t, tr);
6474                 if (ret)
6475                         goto out;
6476         }
6477
6478         tr->current_trace = t;
6479         tr->current_trace->enabled++;
6480         trace_branch_enable(tr);
6481  out:
6482         mutex_unlock(&trace_types_lock);
6483
6484         return ret;
6485 }
6486
6487 static ssize_t
6488 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6489                         size_t cnt, loff_t *ppos)
6490 {
6491         struct trace_array *tr = filp->private_data;
6492         char buf[MAX_TRACER_SIZE+1];
6493         char *name;
6494         size_t ret;
6495         int err;
6496
6497         ret = cnt;
6498
6499         if (cnt > MAX_TRACER_SIZE)
6500                 cnt = MAX_TRACER_SIZE;
6501
6502         if (copy_from_user(buf, ubuf, cnt))
6503                 return -EFAULT;
6504
6505         buf[cnt] = 0;
6506
6507         name = strim(buf);
6508
6509         err = tracing_set_tracer(tr, name);
6510         if (err)
6511                 return err;
6512
6513         *ppos += ret;
6514
6515         return ret;
6516 }
6517
6518 static ssize_t
6519 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6520                    size_t cnt, loff_t *ppos)
6521 {
6522         char buf[64];
6523         int r;
6524
6525         r = snprintf(buf, sizeof(buf), "%ld\n",
6526                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6527         if (r > sizeof(buf))
6528                 r = sizeof(buf);
6529         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6530 }
6531
6532 static ssize_t
6533 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6534                     size_t cnt, loff_t *ppos)
6535 {
6536         unsigned long val;
6537         int ret;
6538
6539         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6540         if (ret)
6541                 return ret;
6542
6543         *ptr = val * 1000;
6544
6545         return cnt;
6546 }
6547
6548 static ssize_t
6549 tracing_thresh_read(struct file *filp, char __user *ubuf,
6550                     size_t cnt, loff_t *ppos)
6551 {
6552         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6553 }
6554
6555 static ssize_t
6556 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6557                      size_t cnt, loff_t *ppos)
6558 {
6559         struct trace_array *tr = filp->private_data;
6560         int ret;
6561
6562         mutex_lock(&trace_types_lock);
6563         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6564         if (ret < 0)
6565                 goto out;
6566
6567         if (tr->current_trace->update_thresh) {
6568                 ret = tr->current_trace->update_thresh(tr);
6569                 if (ret < 0)
6570                         goto out;
6571         }
6572
6573         ret = cnt;
6574 out:
6575         mutex_unlock(&trace_types_lock);
6576
6577         return ret;
6578 }
6579
6580 #ifdef CONFIG_TRACER_MAX_TRACE
6581
6582 static ssize_t
6583 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6584                      size_t cnt, loff_t *ppos)
6585 {
6586         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6587 }
6588
6589 static ssize_t
6590 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6591                       size_t cnt, loff_t *ppos)
6592 {
6593         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6594 }
6595
6596 #endif
6597
6598 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6599 {
6600         struct trace_array *tr = inode->i_private;
6601         struct trace_iterator *iter;
6602         int ret;
6603
6604         ret = tracing_check_open_get_tr(tr);
6605         if (ret)
6606                 return ret;
6607
6608         mutex_lock(&trace_types_lock);
6609
6610         /* create a buffer to store the information to pass to userspace */
6611         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6612         if (!iter) {
6613                 ret = -ENOMEM;
6614                 __trace_array_put(tr);
6615                 goto out;
6616         }
6617
6618         trace_seq_init(&iter->seq);
6619         iter->trace = tr->current_trace;
6620
6621         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6622                 ret = -ENOMEM;
6623                 goto fail;
6624         }
6625
6626         /* trace pipe does not show start of buffer */
6627         cpumask_setall(iter->started);
6628
6629         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6630                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6631
6632         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6633         if (trace_clocks[tr->clock_id].in_ns)
6634                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6635
6636         iter->tr = tr;
6637         iter->array_buffer = &tr->array_buffer;
6638         iter->cpu_file = tracing_get_cpu(inode);
6639         mutex_init(&iter->mutex);
6640         filp->private_data = iter;
6641
6642         if (iter->trace->pipe_open)
6643                 iter->trace->pipe_open(iter);
6644
6645         nonseekable_open(inode, filp);
6646
6647         tr->trace_ref++;
6648 out:
6649         mutex_unlock(&trace_types_lock);
6650         return ret;
6651
6652 fail:
6653         kfree(iter);
6654         __trace_array_put(tr);
6655         mutex_unlock(&trace_types_lock);
6656         return ret;
6657 }
6658
6659 static int tracing_release_pipe(struct inode *inode, struct file *file)
6660 {
6661         struct trace_iterator *iter = file->private_data;
6662         struct trace_array *tr = inode->i_private;
6663
6664         mutex_lock(&trace_types_lock);
6665
6666         tr->trace_ref--;
6667
6668         if (iter->trace->pipe_close)
6669                 iter->trace->pipe_close(iter);
6670
6671         mutex_unlock(&trace_types_lock);
6672
6673         free_cpumask_var(iter->started);
6674         kfree(iter->fmt);
6675         mutex_destroy(&iter->mutex);
6676         kfree(iter);
6677
6678         trace_array_put(tr);
6679
6680         return 0;
6681 }
6682
6683 static __poll_t
6684 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6685 {
6686         struct trace_array *tr = iter->tr;
6687
6688         /* Iterators are static, they should be filled or empty */
6689         if (trace_buffer_iter(iter, iter->cpu_file))
6690                 return EPOLLIN | EPOLLRDNORM;
6691
6692         if (tr->trace_flags & TRACE_ITER_BLOCK)
6693                 /*
6694                  * Always select as readable when in blocking mode
6695                  */
6696                 return EPOLLIN | EPOLLRDNORM;
6697         else
6698                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6699                                              filp, poll_table, iter->tr->buffer_percent);
6700 }
6701
6702 static __poll_t
6703 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6704 {
6705         struct trace_iterator *iter = filp->private_data;
6706
6707         return trace_poll(iter, filp, poll_table);
6708 }
6709
6710 /* Must be called with iter->mutex held. */
6711 static int tracing_wait_pipe(struct file *filp)
6712 {
6713         struct trace_iterator *iter = filp->private_data;
6714         int ret;
6715
6716         while (trace_empty(iter)) {
6717
6718                 if ((filp->f_flags & O_NONBLOCK)) {
6719                         return -EAGAIN;
6720                 }
6721
6722                 /*
6723                  * We block until we read something and tracing is disabled.
6724                  * We still block if tracing is disabled, but we have never
6725                  * read anything. This allows a user to cat this file, and
6726                  * then enable tracing. But after we have read something,
6727                  * we give an EOF when tracing is again disabled.
6728                  *
6729                  * iter->pos will be 0 if we haven't read anything.
6730                  */
6731                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6732                         break;
6733
6734                 mutex_unlock(&iter->mutex);
6735
6736                 ret = wait_on_pipe(iter, 0);
6737
6738                 mutex_lock(&iter->mutex);
6739
6740                 if (ret)
6741                         return ret;
6742         }
6743
6744         return 1;
6745 }
6746
6747 /*
6748  * Consumer reader.
6749  */
6750 static ssize_t
6751 tracing_read_pipe(struct file *filp, char __user *ubuf,
6752                   size_t cnt, loff_t *ppos)
6753 {
6754         struct trace_iterator *iter = filp->private_data;
6755         ssize_t sret;
6756
6757         /*
6758          * Avoid more than one consumer on a single file descriptor
6759          * This is just a matter of traces coherency, the ring buffer itself
6760          * is protected.
6761          */
6762         mutex_lock(&iter->mutex);
6763
6764         /* return any leftover data */
6765         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6766         if (sret != -EBUSY)
6767                 goto out;
6768
6769         trace_seq_init(&iter->seq);
6770
6771         if (iter->trace->read) {
6772                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6773                 if (sret)
6774                         goto out;
6775         }
6776
6777 waitagain:
6778         sret = tracing_wait_pipe(filp);
6779         if (sret <= 0)
6780                 goto out;
6781
6782         /* stop when tracing is finished */
6783         if (trace_empty(iter)) {
6784                 sret = 0;
6785                 goto out;
6786         }
6787
6788         if (cnt >= PAGE_SIZE)
6789                 cnt = PAGE_SIZE - 1;
6790
6791         /* reset all but tr, trace, and overruns */
6792         trace_iterator_reset(iter);
6793         cpumask_clear(iter->started);
6794         trace_seq_init(&iter->seq);
6795
6796         trace_event_read_lock();
6797         trace_access_lock(iter->cpu_file);
6798         while (trace_find_next_entry_inc(iter) != NULL) {
6799                 enum print_line_t ret;
6800                 int save_len = iter->seq.seq.len;
6801
6802                 ret = print_trace_line(iter);
6803                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6804                         /*
6805                          * If one print_trace_line() fills entire trace_seq in one shot,
6806                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6807                          * In this case, we need to consume it, otherwise, loop will peek
6808                          * this event next time, resulting in an infinite loop.
6809                          */
6810                         if (save_len == 0) {
6811                                 iter->seq.full = 0;
6812                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6813                                 trace_consume(iter);
6814                                 break;
6815                         }
6816
6817                         /* In other cases, don't print partial lines */
6818                         iter->seq.seq.len = save_len;
6819                         break;
6820                 }
6821                 if (ret != TRACE_TYPE_NO_CONSUME)
6822                         trace_consume(iter);
6823
6824                 if (trace_seq_used(&iter->seq) >= cnt)
6825                         break;
6826
6827                 /*
6828                  * Setting the full flag means we reached the trace_seq buffer
6829                  * size and we should leave by partial output condition above.
6830                  * One of the trace_seq_* functions is not used properly.
6831                  */
6832                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6833                           iter->ent->type);
6834         }
6835         trace_access_unlock(iter->cpu_file);
6836         trace_event_read_unlock();
6837
6838         /* Now copy what we have to the user */
6839         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6840         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6841                 trace_seq_init(&iter->seq);
6842
6843         /*
6844          * If there was nothing to send to user, in spite of consuming trace
6845          * entries, go back to wait for more entries.
6846          */
6847         if (sret == -EBUSY)
6848                 goto waitagain;
6849
6850 out:
6851         mutex_unlock(&iter->mutex);
6852
6853         return sret;
6854 }
6855
6856 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6857                                      unsigned int idx)
6858 {
6859         __free_page(spd->pages[idx]);
6860 }
6861
6862 static size_t
6863 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6864 {
6865         size_t count;
6866         int save_len;
6867         int ret;
6868
6869         /* Seq buffer is page-sized, exactly what we need. */
6870         for (;;) {
6871                 save_len = iter->seq.seq.len;
6872                 ret = print_trace_line(iter);
6873
6874                 if (trace_seq_has_overflowed(&iter->seq)) {
6875                         iter->seq.seq.len = save_len;
6876                         break;
6877                 }
6878
6879                 /*
6880                  * This should not be hit, because it should only
6881                  * be set if the iter->seq overflowed. But check it
6882                  * anyway to be safe.
6883                  */
6884                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6885                         iter->seq.seq.len = save_len;
6886                         break;
6887                 }
6888
6889                 count = trace_seq_used(&iter->seq) - save_len;
6890                 if (rem < count) {
6891                         rem = 0;
6892                         iter->seq.seq.len = save_len;
6893                         break;
6894                 }
6895
6896                 if (ret != TRACE_TYPE_NO_CONSUME)
6897                         trace_consume(iter);
6898                 rem -= count;
6899                 if (!trace_find_next_entry_inc(iter))   {
6900                         rem = 0;
6901                         iter->ent = NULL;
6902                         break;
6903                 }
6904         }
6905
6906         return rem;
6907 }
6908
6909 static ssize_t tracing_splice_read_pipe(struct file *filp,
6910                                         loff_t *ppos,
6911                                         struct pipe_inode_info *pipe,
6912                                         size_t len,
6913                                         unsigned int flags)
6914 {
6915         struct page *pages_def[PIPE_DEF_BUFFERS];
6916         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6917         struct trace_iterator *iter = filp->private_data;
6918         struct splice_pipe_desc spd = {
6919                 .pages          = pages_def,
6920                 .partial        = partial_def,
6921                 .nr_pages       = 0, /* This gets updated below. */
6922                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6923                 .ops            = &default_pipe_buf_ops,
6924                 .spd_release    = tracing_spd_release_pipe,
6925         };
6926         ssize_t ret;
6927         size_t rem;
6928         unsigned int i;
6929
6930         if (splice_grow_spd(pipe, &spd))
6931                 return -ENOMEM;
6932
6933         mutex_lock(&iter->mutex);
6934
6935         if (iter->trace->splice_read) {
6936                 ret = iter->trace->splice_read(iter, filp,
6937                                                ppos, pipe, len, flags);
6938                 if (ret)
6939                         goto out_err;
6940         }
6941
6942         ret = tracing_wait_pipe(filp);
6943         if (ret <= 0)
6944                 goto out_err;
6945
6946         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6947                 ret = -EFAULT;
6948                 goto out_err;
6949         }
6950
6951         trace_event_read_lock();
6952         trace_access_lock(iter->cpu_file);
6953
6954         /* Fill as many pages as possible. */
6955         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6956                 spd.pages[i] = alloc_page(GFP_KERNEL);
6957                 if (!spd.pages[i])
6958                         break;
6959
6960                 rem = tracing_fill_pipe_page(rem, iter);
6961
6962                 /* Copy the data into the page, so we can start over. */
6963                 ret = trace_seq_to_buffer(&iter->seq,
6964                                           page_address(spd.pages[i]),
6965                                           trace_seq_used(&iter->seq));
6966                 if (ret < 0) {
6967                         __free_page(spd.pages[i]);
6968                         break;
6969                 }
6970                 spd.partial[i].offset = 0;
6971                 spd.partial[i].len = trace_seq_used(&iter->seq);
6972
6973                 trace_seq_init(&iter->seq);
6974         }
6975
6976         trace_access_unlock(iter->cpu_file);
6977         trace_event_read_unlock();
6978         mutex_unlock(&iter->mutex);
6979
6980         spd.nr_pages = i;
6981
6982         if (i)
6983                 ret = splice_to_pipe(pipe, &spd);
6984         else
6985                 ret = 0;
6986 out:
6987         splice_shrink_spd(&spd);
6988         return ret;
6989
6990 out_err:
6991         mutex_unlock(&iter->mutex);
6992         goto out;
6993 }
6994
6995 static ssize_t
6996 tracing_entries_read(struct file *filp, char __user *ubuf,
6997                      size_t cnt, loff_t *ppos)
6998 {
6999         struct inode *inode = file_inode(filp);
7000         struct trace_array *tr = inode->i_private;
7001         int cpu = tracing_get_cpu(inode);
7002         char buf[64];
7003         int r = 0;
7004         ssize_t ret;
7005
7006         mutex_lock(&trace_types_lock);
7007
7008         if (cpu == RING_BUFFER_ALL_CPUS) {
7009                 int cpu, buf_size_same;
7010                 unsigned long size;
7011
7012                 size = 0;
7013                 buf_size_same = 1;
7014                 /* check if all cpu sizes are same */
7015                 for_each_tracing_cpu(cpu) {
7016                         /* fill in the size from first enabled cpu */
7017                         if (size == 0)
7018                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7019                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7020                                 buf_size_same = 0;
7021                                 break;
7022                         }
7023                 }
7024
7025                 if (buf_size_same) {
7026                         if (!ring_buffer_expanded)
7027                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7028                                             size >> 10,
7029                                             trace_buf_size >> 10);
7030                         else
7031                                 r = sprintf(buf, "%lu\n", size >> 10);
7032                 } else
7033                         r = sprintf(buf, "X\n");
7034         } else
7035                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7036
7037         mutex_unlock(&trace_types_lock);
7038
7039         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7040         return ret;
7041 }
7042
7043 static ssize_t
7044 tracing_entries_write(struct file *filp, const char __user *ubuf,
7045                       size_t cnt, loff_t *ppos)
7046 {
7047         struct inode *inode = file_inode(filp);
7048         struct trace_array *tr = inode->i_private;
7049         unsigned long val;
7050         int ret;
7051
7052         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7053         if (ret)
7054                 return ret;
7055
7056         /* must have at least 1 entry */
7057         if (!val)
7058                 return -EINVAL;
7059
7060         /* value is in KB */
7061         val <<= 10;
7062         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7063         if (ret < 0)
7064                 return ret;
7065
7066         *ppos += cnt;
7067
7068         return cnt;
7069 }
7070
7071 static ssize_t
7072 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7073                                 size_t cnt, loff_t *ppos)
7074 {
7075         struct trace_array *tr = filp->private_data;
7076         char buf[64];
7077         int r, cpu;
7078         unsigned long size = 0, expanded_size = 0;
7079
7080         mutex_lock(&trace_types_lock);
7081         for_each_tracing_cpu(cpu) {
7082                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7083                 if (!ring_buffer_expanded)
7084                         expanded_size += trace_buf_size >> 10;
7085         }
7086         if (ring_buffer_expanded)
7087                 r = sprintf(buf, "%lu\n", size);
7088         else
7089                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7090         mutex_unlock(&trace_types_lock);
7091
7092         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7093 }
7094
7095 static ssize_t
7096 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7097                           size_t cnt, loff_t *ppos)
7098 {
7099         /*
7100          * There is no need to read what the user has written, this function
7101          * is just to make sure that there is no error when "echo" is used
7102          */
7103
7104         *ppos += cnt;
7105
7106         return cnt;
7107 }
7108
7109 static int
7110 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7111 {
7112         struct trace_array *tr = inode->i_private;
7113
7114         /* disable tracing ? */
7115         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7116                 tracer_tracing_off(tr);
7117         /* resize the ring buffer to 0 */
7118         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7119
7120         trace_array_put(tr);
7121
7122         return 0;
7123 }
7124
7125 static ssize_t
7126 tracing_mark_write(struct file *filp, const char __user *ubuf,
7127                                         size_t cnt, loff_t *fpos)
7128 {
7129         struct trace_array *tr = filp->private_data;
7130         struct ring_buffer_event *event;
7131         enum event_trigger_type tt = ETT_NONE;
7132         struct trace_buffer *buffer;
7133         struct print_entry *entry;
7134         ssize_t written;
7135         int size;
7136         int len;
7137
7138 /* Used in tracing_mark_raw_write() as well */
7139 #define FAULTED_STR "<faulted>"
7140 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7141
7142         if (tracing_disabled)
7143                 return -EINVAL;
7144
7145         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7146                 return -EINVAL;
7147
7148         if (cnt > TRACE_BUF_SIZE)
7149                 cnt = TRACE_BUF_SIZE;
7150
7151         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7152
7153         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7154
7155         /* If less than "<faulted>", then make sure we can still add that */
7156         if (cnt < FAULTED_SIZE)
7157                 size += FAULTED_SIZE - cnt;
7158
7159         buffer = tr->array_buffer.buffer;
7160         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7161                                             tracing_gen_ctx());
7162         if (unlikely(!event))
7163                 /* Ring buffer disabled, return as if not open for write */
7164                 return -EBADF;
7165
7166         entry = ring_buffer_event_data(event);
7167         entry->ip = _THIS_IP_;
7168
7169         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7170         if (len) {
7171                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7172                 cnt = FAULTED_SIZE;
7173                 written = -EFAULT;
7174         } else
7175                 written = cnt;
7176
7177         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7178                 /* do not add \n before testing triggers, but add \0 */
7179                 entry->buf[cnt] = '\0';
7180                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7181         }
7182
7183         if (entry->buf[cnt - 1] != '\n') {
7184                 entry->buf[cnt] = '\n';
7185                 entry->buf[cnt + 1] = '\0';
7186         } else
7187                 entry->buf[cnt] = '\0';
7188
7189         if (static_branch_unlikely(&trace_marker_exports_enabled))
7190                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7191         __buffer_unlock_commit(buffer, event);
7192
7193         if (tt)
7194                 event_triggers_post_call(tr->trace_marker_file, tt);
7195
7196         return written;
7197 }
7198
7199 /* Limit it for now to 3K (including tag) */
7200 #define RAW_DATA_MAX_SIZE (1024*3)
7201
7202 static ssize_t
7203 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7204                                         size_t cnt, loff_t *fpos)
7205 {
7206         struct trace_array *tr = filp->private_data;
7207         struct ring_buffer_event *event;
7208         struct trace_buffer *buffer;
7209         struct raw_data_entry *entry;
7210         ssize_t written;
7211         int size;
7212         int len;
7213
7214 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7215
7216         if (tracing_disabled)
7217                 return -EINVAL;
7218
7219         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7220                 return -EINVAL;
7221
7222         /* The marker must at least have a tag id */
7223         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7224                 return -EINVAL;
7225
7226         if (cnt > TRACE_BUF_SIZE)
7227                 cnt = TRACE_BUF_SIZE;
7228
7229         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7230
7231         size = sizeof(*entry) + cnt;
7232         if (cnt < FAULT_SIZE_ID)
7233                 size += FAULT_SIZE_ID - cnt;
7234
7235         buffer = tr->array_buffer.buffer;
7236         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7237                                             tracing_gen_ctx());
7238         if (!event)
7239                 /* Ring buffer disabled, return as if not open for write */
7240                 return -EBADF;
7241
7242         entry = ring_buffer_event_data(event);
7243
7244         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7245         if (len) {
7246                 entry->id = -1;
7247                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7248                 written = -EFAULT;
7249         } else
7250                 written = cnt;
7251
7252         __buffer_unlock_commit(buffer, event);
7253
7254         return written;
7255 }
7256
7257 static int tracing_clock_show(struct seq_file *m, void *v)
7258 {
7259         struct trace_array *tr = m->private;
7260         int i;
7261
7262         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7263                 seq_printf(m,
7264                         "%s%s%s%s", i ? " " : "",
7265                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7266                         i == tr->clock_id ? "]" : "");
7267         seq_putc(m, '\n');
7268
7269         return 0;
7270 }
7271
7272 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7273 {
7274         int i;
7275
7276         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7277                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7278                         break;
7279         }
7280         if (i == ARRAY_SIZE(trace_clocks))
7281                 return -EINVAL;
7282
7283         mutex_lock(&trace_types_lock);
7284
7285         tr->clock_id = i;
7286
7287         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7288
7289         /*
7290          * New clock may not be consistent with the previous clock.
7291          * Reset the buffer so that it doesn't have incomparable timestamps.
7292          */
7293         tracing_reset_online_cpus(&tr->array_buffer);
7294
7295 #ifdef CONFIG_TRACER_MAX_TRACE
7296         if (tr->max_buffer.buffer)
7297                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7298         tracing_reset_online_cpus(&tr->max_buffer);
7299 #endif
7300
7301         mutex_unlock(&trace_types_lock);
7302
7303         return 0;
7304 }
7305
7306 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7307                                    size_t cnt, loff_t *fpos)
7308 {
7309         struct seq_file *m = filp->private_data;
7310         struct trace_array *tr = m->private;
7311         char buf[64];
7312         const char *clockstr;
7313         int ret;
7314
7315         if (cnt >= sizeof(buf))
7316                 return -EINVAL;
7317
7318         if (copy_from_user(buf, ubuf, cnt))
7319                 return -EFAULT;
7320
7321         buf[cnt] = 0;
7322
7323         clockstr = strstrip(buf);
7324
7325         ret = tracing_set_clock(tr, clockstr);
7326         if (ret)
7327                 return ret;
7328
7329         *fpos += cnt;
7330
7331         return cnt;
7332 }
7333
7334 static int tracing_clock_open(struct inode *inode, struct file *file)
7335 {
7336         struct trace_array *tr = inode->i_private;
7337         int ret;
7338
7339         ret = tracing_check_open_get_tr(tr);
7340         if (ret)
7341                 return ret;
7342
7343         ret = single_open(file, tracing_clock_show, inode->i_private);
7344         if (ret < 0)
7345                 trace_array_put(tr);
7346
7347         return ret;
7348 }
7349
7350 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7351 {
7352         struct trace_array *tr = m->private;
7353
7354         mutex_lock(&trace_types_lock);
7355
7356         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7357                 seq_puts(m, "delta [absolute]\n");
7358         else
7359                 seq_puts(m, "[delta] absolute\n");
7360
7361         mutex_unlock(&trace_types_lock);
7362
7363         return 0;
7364 }
7365
7366 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7367 {
7368         struct trace_array *tr = inode->i_private;
7369         int ret;
7370
7371         ret = tracing_check_open_get_tr(tr);
7372         if (ret)
7373                 return ret;
7374
7375         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7376         if (ret < 0)
7377                 trace_array_put(tr);
7378
7379         return ret;
7380 }
7381
7382 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7383 {
7384         if (rbe == this_cpu_read(trace_buffered_event))
7385                 return ring_buffer_time_stamp(buffer);
7386
7387         return ring_buffer_event_time_stamp(buffer, rbe);
7388 }
7389
7390 /*
7391  * Set or disable using the per CPU trace_buffer_event when possible.
7392  */
7393 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7394 {
7395         int ret = 0;
7396
7397         mutex_lock(&trace_types_lock);
7398
7399         if (set && tr->no_filter_buffering_ref++)
7400                 goto out;
7401
7402         if (!set) {
7403                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7404                         ret = -EINVAL;
7405                         goto out;
7406                 }
7407
7408                 --tr->no_filter_buffering_ref;
7409         }
7410  out:
7411         mutex_unlock(&trace_types_lock);
7412
7413         return ret;
7414 }
7415
7416 struct ftrace_buffer_info {
7417         struct trace_iterator   iter;
7418         void                    *spare;
7419         unsigned int            spare_cpu;
7420         unsigned int            read;
7421 };
7422
7423 #ifdef CONFIG_TRACER_SNAPSHOT
7424 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7425 {
7426         struct trace_array *tr = inode->i_private;
7427         struct trace_iterator *iter;
7428         struct seq_file *m;
7429         int ret;
7430
7431         ret = tracing_check_open_get_tr(tr);
7432         if (ret)
7433                 return ret;
7434
7435         if (file->f_mode & FMODE_READ) {
7436                 iter = __tracing_open(inode, file, true);
7437                 if (IS_ERR(iter))
7438                         ret = PTR_ERR(iter);
7439         } else {
7440                 /* Writes still need the seq_file to hold the private data */
7441                 ret = -ENOMEM;
7442                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7443                 if (!m)
7444                         goto out;
7445                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7446                 if (!iter) {
7447                         kfree(m);
7448                         goto out;
7449                 }
7450                 ret = 0;
7451
7452                 iter->tr = tr;
7453                 iter->array_buffer = &tr->max_buffer;
7454                 iter->cpu_file = tracing_get_cpu(inode);
7455                 m->private = iter;
7456                 file->private_data = m;
7457         }
7458 out:
7459         if (ret < 0)
7460                 trace_array_put(tr);
7461
7462         return ret;
7463 }
7464
7465 static ssize_t
7466 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7467                        loff_t *ppos)
7468 {
7469         struct seq_file *m = filp->private_data;
7470         struct trace_iterator *iter = m->private;
7471         struct trace_array *tr = iter->tr;
7472         unsigned long val;
7473         int ret;
7474
7475         ret = tracing_update_buffers();
7476         if (ret < 0)
7477                 return ret;
7478
7479         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7480         if (ret)
7481                 return ret;
7482
7483         mutex_lock(&trace_types_lock);
7484
7485         if (tr->current_trace->use_max_tr) {
7486                 ret = -EBUSY;
7487                 goto out;
7488         }
7489
7490         local_irq_disable();
7491         arch_spin_lock(&tr->max_lock);
7492         if (tr->cond_snapshot)
7493                 ret = -EBUSY;
7494         arch_spin_unlock(&tr->max_lock);
7495         local_irq_enable();
7496         if (ret)
7497                 goto out;
7498
7499         switch (val) {
7500         case 0:
7501                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7502                         ret = -EINVAL;
7503                         break;
7504                 }
7505                 if (tr->allocated_snapshot)
7506                         free_snapshot(tr);
7507                 break;
7508         case 1:
7509 /* Only allow per-cpu swap if the ring buffer supports it */
7510 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7511                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7512                         ret = -EINVAL;
7513                         break;
7514                 }
7515 #endif
7516                 if (tr->allocated_snapshot)
7517                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7518                                         &tr->array_buffer, iter->cpu_file);
7519                 else
7520                         ret = tracing_alloc_snapshot_instance(tr);
7521                 if (ret < 0)
7522                         break;
7523                 local_irq_disable();
7524                 /* Now, we're going to swap */
7525                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7526                         update_max_tr(tr, current, smp_processor_id(), NULL);
7527                 else
7528                         update_max_tr_single(tr, current, iter->cpu_file);
7529                 local_irq_enable();
7530                 break;
7531         default:
7532                 if (tr->allocated_snapshot) {
7533                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7534                                 tracing_reset_online_cpus(&tr->max_buffer);
7535                         else
7536                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7537                 }
7538                 break;
7539         }
7540
7541         if (ret >= 0) {
7542                 *ppos += cnt;
7543                 ret = cnt;
7544         }
7545 out:
7546         mutex_unlock(&trace_types_lock);
7547         return ret;
7548 }
7549
7550 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7551 {
7552         struct seq_file *m = file->private_data;
7553         int ret;
7554
7555         ret = tracing_release(inode, file);
7556
7557         if (file->f_mode & FMODE_READ)
7558                 return ret;
7559
7560         /* If write only, the seq_file is just a stub */
7561         if (m)
7562                 kfree(m->private);
7563         kfree(m);
7564
7565         return 0;
7566 }
7567
7568 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7569 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7570                                     size_t count, loff_t *ppos);
7571 static int tracing_buffers_release(struct inode *inode, struct file *file);
7572 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7573                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7574
7575 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7576 {
7577         struct ftrace_buffer_info *info;
7578         int ret;
7579
7580         /* The following checks for tracefs lockdown */
7581         ret = tracing_buffers_open(inode, filp);
7582         if (ret < 0)
7583                 return ret;
7584
7585         info = filp->private_data;
7586
7587         if (info->iter.trace->use_max_tr) {
7588                 tracing_buffers_release(inode, filp);
7589                 return -EBUSY;
7590         }
7591
7592         info->iter.snapshot = true;
7593         info->iter.array_buffer = &info->iter.tr->max_buffer;
7594
7595         return ret;
7596 }
7597
7598 #endif /* CONFIG_TRACER_SNAPSHOT */
7599
7600
7601 static const struct file_operations tracing_thresh_fops = {
7602         .open           = tracing_open_generic,
7603         .read           = tracing_thresh_read,
7604         .write          = tracing_thresh_write,
7605         .llseek         = generic_file_llseek,
7606 };
7607
7608 #ifdef CONFIG_TRACER_MAX_TRACE
7609 static const struct file_operations tracing_max_lat_fops = {
7610         .open           = tracing_open_generic,
7611         .read           = tracing_max_lat_read,
7612         .write          = tracing_max_lat_write,
7613         .llseek         = generic_file_llseek,
7614 };
7615 #endif
7616
7617 static const struct file_operations set_tracer_fops = {
7618         .open           = tracing_open_generic,
7619         .read           = tracing_set_trace_read,
7620         .write          = tracing_set_trace_write,
7621         .llseek         = generic_file_llseek,
7622 };
7623
7624 static const struct file_operations tracing_pipe_fops = {
7625         .open           = tracing_open_pipe,
7626         .poll           = tracing_poll_pipe,
7627         .read           = tracing_read_pipe,
7628         .splice_read    = tracing_splice_read_pipe,
7629         .release        = tracing_release_pipe,
7630         .llseek         = no_llseek,
7631 };
7632
7633 static const struct file_operations tracing_entries_fops = {
7634         .open           = tracing_open_generic_tr,
7635         .read           = tracing_entries_read,
7636         .write          = tracing_entries_write,
7637         .llseek         = generic_file_llseek,
7638         .release        = tracing_release_generic_tr,
7639 };
7640
7641 static const struct file_operations tracing_total_entries_fops = {
7642         .open           = tracing_open_generic_tr,
7643         .read           = tracing_total_entries_read,
7644         .llseek         = generic_file_llseek,
7645         .release        = tracing_release_generic_tr,
7646 };
7647
7648 static const struct file_operations tracing_free_buffer_fops = {
7649         .open           = tracing_open_generic_tr,
7650         .write          = tracing_free_buffer_write,
7651         .release        = tracing_free_buffer_release,
7652 };
7653
7654 static const struct file_operations tracing_mark_fops = {
7655         .open           = tracing_mark_open,
7656         .write          = tracing_mark_write,
7657         .release        = tracing_release_generic_tr,
7658 };
7659
7660 static const struct file_operations tracing_mark_raw_fops = {
7661         .open           = tracing_mark_open,
7662         .write          = tracing_mark_raw_write,
7663         .release        = tracing_release_generic_tr,
7664 };
7665
7666 static const struct file_operations trace_clock_fops = {
7667         .open           = tracing_clock_open,
7668         .read           = seq_read,
7669         .llseek         = seq_lseek,
7670         .release        = tracing_single_release_tr,
7671         .write          = tracing_clock_write,
7672 };
7673
7674 static const struct file_operations trace_time_stamp_mode_fops = {
7675         .open           = tracing_time_stamp_mode_open,
7676         .read           = seq_read,
7677         .llseek         = seq_lseek,
7678         .release        = tracing_single_release_tr,
7679 };
7680
7681 #ifdef CONFIG_TRACER_SNAPSHOT
7682 static const struct file_operations snapshot_fops = {
7683         .open           = tracing_snapshot_open,
7684         .read           = seq_read,
7685         .write          = tracing_snapshot_write,
7686         .llseek         = tracing_lseek,
7687         .release        = tracing_snapshot_release,
7688 };
7689
7690 static const struct file_operations snapshot_raw_fops = {
7691         .open           = snapshot_raw_open,
7692         .read           = tracing_buffers_read,
7693         .release        = tracing_buffers_release,
7694         .splice_read    = tracing_buffers_splice_read,
7695         .llseek         = no_llseek,
7696 };
7697
7698 #endif /* CONFIG_TRACER_SNAPSHOT */
7699
7700 /*
7701  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7702  * @filp: The active open file structure
7703  * @ubuf: The userspace provided buffer to read value into
7704  * @cnt: The maximum number of bytes to read
7705  * @ppos: The current "file" position
7706  *
7707  * This function implements the write interface for a struct trace_min_max_param.
7708  * The filp->private_data must point to a trace_min_max_param structure that
7709  * defines where to write the value, the min and the max acceptable values,
7710  * and a lock to protect the write.
7711  */
7712 static ssize_t
7713 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7714 {
7715         struct trace_min_max_param *param = filp->private_data;
7716         u64 val;
7717         int err;
7718
7719         if (!param)
7720                 return -EFAULT;
7721
7722         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7723         if (err)
7724                 return err;
7725
7726         if (param->lock)
7727                 mutex_lock(param->lock);
7728
7729         if (param->min && val < *param->min)
7730                 err = -EINVAL;
7731
7732         if (param->max && val > *param->max)
7733                 err = -EINVAL;
7734
7735         if (!err)
7736                 *param->val = val;
7737
7738         if (param->lock)
7739                 mutex_unlock(param->lock);
7740
7741         if (err)
7742                 return err;
7743
7744         return cnt;
7745 }
7746
7747 /*
7748  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7749  * @filp: The active open file structure
7750  * @ubuf: The userspace provided buffer to read value into
7751  * @cnt: The maximum number of bytes to read
7752  * @ppos: The current "file" position
7753  *
7754  * This function implements the read interface for a struct trace_min_max_param.
7755  * The filp->private_data must point to a trace_min_max_param struct with valid
7756  * data.
7757  */
7758 static ssize_t
7759 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7760 {
7761         struct trace_min_max_param *param = filp->private_data;
7762         char buf[U64_STR_SIZE];
7763         int len;
7764         u64 val;
7765
7766         if (!param)
7767                 return -EFAULT;
7768
7769         val = *param->val;
7770
7771         if (cnt > sizeof(buf))
7772                 cnt = sizeof(buf);
7773
7774         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7775
7776         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7777 }
7778
7779 const struct file_operations trace_min_max_fops = {
7780         .open           = tracing_open_generic,
7781         .read           = trace_min_max_read,
7782         .write          = trace_min_max_write,
7783 };
7784
7785 #define TRACING_LOG_ERRS_MAX    8
7786 #define TRACING_LOG_LOC_MAX     128
7787
7788 #define CMD_PREFIX "  Command: "
7789
7790 struct err_info {
7791         const char      **errs; /* ptr to loc-specific array of err strings */
7792         u8              type;   /* index into errs -> specific err string */
7793         u16             pos;    /* caret position */
7794         u64             ts;
7795 };
7796
7797 struct tracing_log_err {
7798         struct list_head        list;
7799         struct err_info         info;
7800         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7801         char                    *cmd;                     /* what caused err */
7802 };
7803
7804 static DEFINE_MUTEX(tracing_err_log_lock);
7805
7806 static struct tracing_log_err *alloc_tracing_log_err(int len)
7807 {
7808         struct tracing_log_err *err;
7809
7810         err = kzalloc(sizeof(*err), GFP_KERNEL);
7811         if (!err)
7812                 return ERR_PTR(-ENOMEM);
7813
7814         err->cmd = kzalloc(len, GFP_KERNEL);
7815         if (!err->cmd) {
7816                 kfree(err);
7817                 return ERR_PTR(-ENOMEM);
7818         }
7819
7820         return err;
7821 }
7822
7823 static void free_tracing_log_err(struct tracing_log_err *err)
7824 {
7825         kfree(err->cmd);
7826         kfree(err);
7827 }
7828
7829 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7830                                                    int len)
7831 {
7832         struct tracing_log_err *err;
7833         char *cmd;
7834
7835         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7836                 err = alloc_tracing_log_err(len);
7837                 if (PTR_ERR(err) != -ENOMEM)
7838                         tr->n_err_log_entries++;
7839
7840                 return err;
7841         }
7842         cmd = kzalloc(len, GFP_KERNEL);
7843         if (!cmd)
7844                 return ERR_PTR(-ENOMEM);
7845         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7846         kfree(err->cmd);
7847         err->cmd = cmd;
7848         list_del(&err->list);
7849
7850         return err;
7851 }
7852
7853 /**
7854  * err_pos - find the position of a string within a command for error careting
7855  * @cmd: The tracing command that caused the error
7856  * @str: The string to position the caret at within @cmd
7857  *
7858  * Finds the position of the first occurrence of @str within @cmd.  The
7859  * return value can be passed to tracing_log_err() for caret placement
7860  * within @cmd.
7861  *
7862  * Returns the index within @cmd of the first occurrence of @str or 0
7863  * if @str was not found.
7864  */
7865 unsigned int err_pos(char *cmd, const char *str)
7866 {
7867         char *found;
7868
7869         if (WARN_ON(!strlen(cmd)))
7870                 return 0;
7871
7872         found = strstr(cmd, str);
7873         if (found)
7874                 return found - cmd;
7875
7876         return 0;
7877 }
7878
7879 /**
7880  * tracing_log_err - write an error to the tracing error log
7881  * @tr: The associated trace array for the error (NULL for top level array)
7882  * @loc: A string describing where the error occurred
7883  * @cmd: The tracing command that caused the error
7884  * @errs: The array of loc-specific static error strings
7885  * @type: The index into errs[], which produces the specific static err string
7886  * @pos: The position the caret should be placed in the cmd
7887  *
7888  * Writes an error into tracing/error_log of the form:
7889  *
7890  * <loc>: error: <text>
7891  *   Command: <cmd>
7892  *              ^
7893  *
7894  * tracing/error_log is a small log file containing the last
7895  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7896  * unless there has been a tracing error, and the error log can be
7897  * cleared and have its memory freed by writing the empty string in
7898  * truncation mode to it i.e. echo > tracing/error_log.
7899  *
7900  * NOTE: the @errs array along with the @type param are used to
7901  * produce a static error string - this string is not copied and saved
7902  * when the error is logged - only a pointer to it is saved.  See
7903  * existing callers for examples of how static strings are typically
7904  * defined for use with tracing_log_err().
7905  */
7906 void tracing_log_err(struct trace_array *tr,
7907                      const char *loc, const char *cmd,
7908                      const char **errs, u8 type, u16 pos)
7909 {
7910         struct tracing_log_err *err;
7911         int len = 0;
7912
7913         if (!tr)
7914                 tr = &global_trace;
7915
7916         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7917
7918         mutex_lock(&tracing_err_log_lock);
7919         err = get_tracing_log_err(tr, len);
7920         if (PTR_ERR(err) == -ENOMEM) {
7921                 mutex_unlock(&tracing_err_log_lock);
7922                 return;
7923         }
7924
7925         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7926         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7927
7928         err->info.errs = errs;
7929         err->info.type = type;
7930         err->info.pos = pos;
7931         err->info.ts = local_clock();
7932
7933         list_add_tail(&err->list, &tr->err_log);
7934         mutex_unlock(&tracing_err_log_lock);
7935 }
7936
7937 static void clear_tracing_err_log(struct trace_array *tr)
7938 {
7939         struct tracing_log_err *err, *next;
7940
7941         mutex_lock(&tracing_err_log_lock);
7942         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7943                 list_del(&err->list);
7944                 free_tracing_log_err(err);
7945         }
7946
7947         tr->n_err_log_entries = 0;
7948         mutex_unlock(&tracing_err_log_lock);
7949 }
7950
7951 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7952 {
7953         struct trace_array *tr = m->private;
7954
7955         mutex_lock(&tracing_err_log_lock);
7956
7957         return seq_list_start(&tr->err_log, *pos);
7958 }
7959
7960 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7961 {
7962         struct trace_array *tr = m->private;
7963
7964         return seq_list_next(v, &tr->err_log, pos);
7965 }
7966
7967 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7968 {
7969         mutex_unlock(&tracing_err_log_lock);
7970 }
7971
7972 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7973 {
7974         u16 i;
7975
7976         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7977                 seq_putc(m, ' ');
7978         for (i = 0; i < pos; i++)
7979                 seq_putc(m, ' ');
7980         seq_puts(m, "^\n");
7981 }
7982
7983 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7984 {
7985         struct tracing_log_err *err = v;
7986
7987         if (err) {
7988                 const char *err_text = err->info.errs[err->info.type];
7989                 u64 sec = err->info.ts;
7990                 u32 nsec;
7991
7992                 nsec = do_div(sec, NSEC_PER_SEC);
7993                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7994                            err->loc, err_text);
7995                 seq_printf(m, "%s", err->cmd);
7996                 tracing_err_log_show_pos(m, err->info.pos);
7997         }
7998
7999         return 0;
8000 }
8001
8002 static const struct seq_operations tracing_err_log_seq_ops = {
8003         .start  = tracing_err_log_seq_start,
8004         .next   = tracing_err_log_seq_next,
8005         .stop   = tracing_err_log_seq_stop,
8006         .show   = tracing_err_log_seq_show
8007 };
8008
8009 static int tracing_err_log_open(struct inode *inode, struct file *file)
8010 {
8011         struct trace_array *tr = inode->i_private;
8012         int ret = 0;
8013
8014         ret = tracing_check_open_get_tr(tr);
8015         if (ret)
8016                 return ret;
8017
8018         /* If this file was opened for write, then erase contents */
8019         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8020                 clear_tracing_err_log(tr);
8021
8022         if (file->f_mode & FMODE_READ) {
8023                 ret = seq_open(file, &tracing_err_log_seq_ops);
8024                 if (!ret) {
8025                         struct seq_file *m = file->private_data;
8026                         m->private = tr;
8027                 } else {
8028                         trace_array_put(tr);
8029                 }
8030         }
8031         return ret;
8032 }
8033
8034 static ssize_t tracing_err_log_write(struct file *file,
8035                                      const char __user *buffer,
8036                                      size_t count, loff_t *ppos)
8037 {
8038         return count;
8039 }
8040
8041 static int tracing_err_log_release(struct inode *inode, struct file *file)
8042 {
8043         struct trace_array *tr = inode->i_private;
8044
8045         trace_array_put(tr);
8046
8047         if (file->f_mode & FMODE_READ)
8048                 seq_release(inode, file);
8049
8050         return 0;
8051 }
8052
8053 static const struct file_operations tracing_err_log_fops = {
8054         .open           = tracing_err_log_open,
8055         .write          = tracing_err_log_write,
8056         .read           = seq_read,
8057         .llseek         = seq_lseek,
8058         .release        = tracing_err_log_release,
8059 };
8060
8061 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8062 {
8063         struct trace_array *tr = inode->i_private;
8064         struct ftrace_buffer_info *info;
8065         int ret;
8066
8067         ret = tracing_check_open_get_tr(tr);
8068         if (ret)
8069                 return ret;
8070
8071         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8072         if (!info) {
8073                 trace_array_put(tr);
8074                 return -ENOMEM;
8075         }
8076
8077         mutex_lock(&trace_types_lock);
8078
8079         info->iter.tr           = tr;
8080         info->iter.cpu_file     = tracing_get_cpu(inode);
8081         info->iter.trace        = tr->current_trace;
8082         info->iter.array_buffer = &tr->array_buffer;
8083         info->spare             = NULL;
8084         /* Force reading ring buffer for first read */
8085         info->read              = (unsigned int)-1;
8086
8087         filp->private_data = info;
8088
8089         tr->trace_ref++;
8090
8091         mutex_unlock(&trace_types_lock);
8092
8093         ret = nonseekable_open(inode, filp);
8094         if (ret < 0)
8095                 trace_array_put(tr);
8096
8097         return ret;
8098 }
8099
8100 static __poll_t
8101 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8102 {
8103         struct ftrace_buffer_info *info = filp->private_data;
8104         struct trace_iterator *iter = &info->iter;
8105
8106         return trace_poll(iter, filp, poll_table);
8107 }
8108
8109 static ssize_t
8110 tracing_buffers_read(struct file *filp, char __user *ubuf,
8111                      size_t count, loff_t *ppos)
8112 {
8113         struct ftrace_buffer_info *info = filp->private_data;
8114         struct trace_iterator *iter = &info->iter;
8115         ssize_t ret = 0;
8116         ssize_t size;
8117
8118         if (!count)
8119                 return 0;
8120
8121 #ifdef CONFIG_TRACER_MAX_TRACE
8122         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8123                 return -EBUSY;
8124 #endif
8125
8126         if (!info->spare) {
8127                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8128                                                           iter->cpu_file);
8129                 if (IS_ERR(info->spare)) {
8130                         ret = PTR_ERR(info->spare);
8131                         info->spare = NULL;
8132                 } else {
8133                         info->spare_cpu = iter->cpu_file;
8134                 }
8135         }
8136         if (!info->spare)
8137                 return ret;
8138
8139         /* Do we have previous read data to read? */
8140         if (info->read < PAGE_SIZE)
8141                 goto read;
8142
8143  again:
8144         trace_access_lock(iter->cpu_file);
8145         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8146                                     &info->spare,
8147                                     count,
8148                                     iter->cpu_file, 0);
8149         trace_access_unlock(iter->cpu_file);
8150
8151         if (ret < 0) {
8152                 if (trace_empty(iter)) {
8153                         if ((filp->f_flags & O_NONBLOCK))
8154                                 return -EAGAIN;
8155
8156                         ret = wait_on_pipe(iter, 0);
8157                         if (ret)
8158                                 return ret;
8159
8160                         goto again;
8161                 }
8162                 return 0;
8163         }
8164
8165         info->read = 0;
8166  read:
8167         size = PAGE_SIZE - info->read;
8168         if (size > count)
8169                 size = count;
8170
8171         ret = copy_to_user(ubuf, info->spare + info->read, size);
8172         if (ret == size)
8173                 return -EFAULT;
8174
8175         size -= ret;
8176
8177         *ppos += size;
8178         info->read += size;
8179
8180         return size;
8181 }
8182
8183 static int tracing_buffers_release(struct inode *inode, struct file *file)
8184 {
8185         struct ftrace_buffer_info *info = file->private_data;
8186         struct trace_iterator *iter = &info->iter;
8187
8188         mutex_lock(&trace_types_lock);
8189
8190         iter->tr->trace_ref--;
8191
8192         __trace_array_put(iter->tr);
8193
8194         iter->wait_index++;
8195         /* Make sure the waiters see the new wait_index */
8196         smp_wmb();
8197
8198         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8199
8200         if (info->spare)
8201                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8202                                            info->spare_cpu, info->spare);
8203         kvfree(info);
8204
8205         mutex_unlock(&trace_types_lock);
8206
8207         return 0;
8208 }
8209
8210 struct buffer_ref {
8211         struct trace_buffer     *buffer;
8212         void                    *page;
8213         int                     cpu;
8214         refcount_t              refcount;
8215 };
8216
8217 static void buffer_ref_release(struct buffer_ref *ref)
8218 {
8219         if (!refcount_dec_and_test(&ref->refcount))
8220                 return;
8221         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8222         kfree(ref);
8223 }
8224
8225 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8226                                     struct pipe_buffer *buf)
8227 {
8228         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8229
8230         buffer_ref_release(ref);
8231         buf->private = 0;
8232 }
8233
8234 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8235                                 struct pipe_buffer *buf)
8236 {
8237         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8238
8239         if (refcount_read(&ref->refcount) > INT_MAX/2)
8240                 return false;
8241
8242         refcount_inc(&ref->refcount);
8243         return true;
8244 }
8245
8246 /* Pipe buffer operations for a buffer. */
8247 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8248         .release                = buffer_pipe_buf_release,
8249         .get                    = buffer_pipe_buf_get,
8250 };
8251
8252 /*
8253  * Callback from splice_to_pipe(), if we need to release some pages
8254  * at the end of the spd in case we error'ed out in filling the pipe.
8255  */
8256 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8257 {
8258         struct buffer_ref *ref =
8259                 (struct buffer_ref *)spd->partial[i].private;
8260
8261         buffer_ref_release(ref);
8262         spd->partial[i].private = 0;
8263 }
8264
8265 static ssize_t
8266 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8267                             struct pipe_inode_info *pipe, size_t len,
8268                             unsigned int flags)
8269 {
8270         struct ftrace_buffer_info *info = file->private_data;
8271         struct trace_iterator *iter = &info->iter;
8272         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8273         struct page *pages_def[PIPE_DEF_BUFFERS];
8274         struct splice_pipe_desc spd = {
8275                 .pages          = pages_def,
8276                 .partial        = partial_def,
8277                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8278                 .ops            = &buffer_pipe_buf_ops,
8279                 .spd_release    = buffer_spd_release,
8280         };
8281         struct buffer_ref *ref;
8282         int entries, i;
8283         ssize_t ret = 0;
8284
8285 #ifdef CONFIG_TRACER_MAX_TRACE
8286         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8287                 return -EBUSY;
8288 #endif
8289
8290         if (*ppos & (PAGE_SIZE - 1))
8291                 return -EINVAL;
8292
8293         if (len & (PAGE_SIZE - 1)) {
8294                 if (len < PAGE_SIZE)
8295                         return -EINVAL;
8296                 len &= PAGE_MASK;
8297         }
8298
8299         if (splice_grow_spd(pipe, &spd))
8300                 return -ENOMEM;
8301
8302  again:
8303         trace_access_lock(iter->cpu_file);
8304         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8305
8306         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8307                 struct page *page;
8308                 int r;
8309
8310                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8311                 if (!ref) {
8312                         ret = -ENOMEM;
8313                         break;
8314                 }
8315
8316                 refcount_set(&ref->refcount, 1);
8317                 ref->buffer = iter->array_buffer->buffer;
8318                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8319                 if (IS_ERR(ref->page)) {
8320                         ret = PTR_ERR(ref->page);
8321                         ref->page = NULL;
8322                         kfree(ref);
8323                         break;
8324                 }
8325                 ref->cpu = iter->cpu_file;
8326
8327                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8328                                           len, iter->cpu_file, 1);
8329                 if (r < 0) {
8330                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8331                                                    ref->page);
8332                         kfree(ref);
8333                         break;
8334                 }
8335
8336                 page = virt_to_page(ref->page);
8337
8338                 spd.pages[i] = page;
8339                 spd.partial[i].len = PAGE_SIZE;
8340                 spd.partial[i].offset = 0;
8341                 spd.partial[i].private = (unsigned long)ref;
8342                 spd.nr_pages++;
8343                 *ppos += PAGE_SIZE;
8344
8345                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8346         }
8347
8348         trace_access_unlock(iter->cpu_file);
8349         spd.nr_pages = i;
8350
8351         /* did we read anything? */
8352         if (!spd.nr_pages) {
8353                 long wait_index;
8354
8355                 if (ret)
8356                         goto out;
8357
8358                 ret = -EAGAIN;
8359                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8360                         goto out;
8361
8362                 wait_index = READ_ONCE(iter->wait_index);
8363
8364                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8365                 if (ret)
8366                         goto out;
8367
8368                 /* No need to wait after waking up when tracing is off */
8369                 if (!tracer_tracing_is_on(iter->tr))
8370                         goto out;
8371
8372                 /* Make sure we see the new wait_index */
8373                 smp_rmb();
8374                 if (wait_index != iter->wait_index)
8375                         goto out;
8376
8377                 goto again;
8378         }
8379
8380         ret = splice_to_pipe(pipe, &spd);
8381 out:
8382         splice_shrink_spd(&spd);
8383
8384         return ret;
8385 }
8386
8387 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8388 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8389 {
8390         struct ftrace_buffer_info *info = file->private_data;
8391         struct trace_iterator *iter = &info->iter;
8392
8393         if (cmd)
8394                 return -ENOIOCTLCMD;
8395
8396         mutex_lock(&trace_types_lock);
8397
8398         iter->wait_index++;
8399         /* Make sure the waiters see the new wait_index */
8400         smp_wmb();
8401
8402         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8403
8404         mutex_unlock(&trace_types_lock);
8405         return 0;
8406 }
8407
8408 static const struct file_operations tracing_buffers_fops = {
8409         .open           = tracing_buffers_open,
8410         .read           = tracing_buffers_read,
8411         .poll           = tracing_buffers_poll,
8412         .release        = tracing_buffers_release,
8413         .splice_read    = tracing_buffers_splice_read,
8414         .unlocked_ioctl = tracing_buffers_ioctl,
8415         .llseek         = no_llseek,
8416 };
8417
8418 static ssize_t
8419 tracing_stats_read(struct file *filp, char __user *ubuf,
8420                    size_t count, loff_t *ppos)
8421 {
8422         struct inode *inode = file_inode(filp);
8423         struct trace_array *tr = inode->i_private;
8424         struct array_buffer *trace_buf = &tr->array_buffer;
8425         int cpu = tracing_get_cpu(inode);
8426         struct trace_seq *s;
8427         unsigned long cnt;
8428         unsigned long long t;
8429         unsigned long usec_rem;
8430
8431         s = kmalloc(sizeof(*s), GFP_KERNEL);
8432         if (!s)
8433                 return -ENOMEM;
8434
8435         trace_seq_init(s);
8436
8437         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8438         trace_seq_printf(s, "entries: %ld\n", cnt);
8439
8440         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8441         trace_seq_printf(s, "overrun: %ld\n", cnt);
8442
8443         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8444         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8445
8446         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8447         trace_seq_printf(s, "bytes: %ld\n", cnt);
8448
8449         if (trace_clocks[tr->clock_id].in_ns) {
8450                 /* local or global for trace_clock */
8451                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8452                 usec_rem = do_div(t, USEC_PER_SEC);
8453                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8454                                                                 t, usec_rem);
8455
8456                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8457                 usec_rem = do_div(t, USEC_PER_SEC);
8458                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8459         } else {
8460                 /* counter or tsc mode for trace_clock */
8461                 trace_seq_printf(s, "oldest event ts: %llu\n",
8462                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8463
8464                 trace_seq_printf(s, "now ts: %llu\n",
8465                                 ring_buffer_time_stamp(trace_buf->buffer));
8466         }
8467
8468         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8469         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8470
8471         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8472         trace_seq_printf(s, "read events: %ld\n", cnt);
8473
8474         count = simple_read_from_buffer(ubuf, count, ppos,
8475                                         s->buffer, trace_seq_used(s));
8476
8477         kfree(s);
8478
8479         return count;
8480 }
8481
8482 static const struct file_operations tracing_stats_fops = {
8483         .open           = tracing_open_generic_tr,
8484         .read           = tracing_stats_read,
8485         .llseek         = generic_file_llseek,
8486         .release        = tracing_release_generic_tr,
8487 };
8488
8489 #ifdef CONFIG_DYNAMIC_FTRACE
8490
8491 static ssize_t
8492 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8493                   size_t cnt, loff_t *ppos)
8494 {
8495         ssize_t ret;
8496         char *buf;
8497         int r;
8498
8499         /* 256 should be plenty to hold the amount needed */
8500         buf = kmalloc(256, GFP_KERNEL);
8501         if (!buf)
8502                 return -ENOMEM;
8503
8504         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8505                       ftrace_update_tot_cnt,
8506                       ftrace_number_of_pages,
8507                       ftrace_number_of_groups);
8508
8509         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8510         kfree(buf);
8511         return ret;
8512 }
8513
8514 static const struct file_operations tracing_dyn_info_fops = {
8515         .open           = tracing_open_generic,
8516         .read           = tracing_read_dyn_info,
8517         .llseek         = generic_file_llseek,
8518 };
8519 #endif /* CONFIG_DYNAMIC_FTRACE */
8520
8521 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8522 static void
8523 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8524                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8525                 void *data)
8526 {
8527         tracing_snapshot_instance(tr);
8528 }
8529
8530 static void
8531 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8532                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8533                       void *data)
8534 {
8535         struct ftrace_func_mapper *mapper = data;
8536         long *count = NULL;
8537
8538         if (mapper)
8539                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8540
8541         if (count) {
8542
8543                 if (*count <= 0)
8544                         return;
8545
8546                 (*count)--;
8547         }
8548
8549         tracing_snapshot_instance(tr);
8550 }
8551
8552 static int
8553 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8554                       struct ftrace_probe_ops *ops, void *data)
8555 {
8556         struct ftrace_func_mapper *mapper = data;
8557         long *count = NULL;
8558
8559         seq_printf(m, "%ps:", (void *)ip);
8560
8561         seq_puts(m, "snapshot");
8562
8563         if (mapper)
8564                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8565
8566         if (count)
8567                 seq_printf(m, ":count=%ld\n", *count);
8568         else
8569                 seq_puts(m, ":unlimited\n");
8570
8571         return 0;
8572 }
8573
8574 static int
8575 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8576                      unsigned long ip, void *init_data, void **data)
8577 {
8578         struct ftrace_func_mapper *mapper = *data;
8579
8580         if (!mapper) {
8581                 mapper = allocate_ftrace_func_mapper();
8582                 if (!mapper)
8583                         return -ENOMEM;
8584                 *data = mapper;
8585         }
8586
8587         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8588 }
8589
8590 static void
8591 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8592                      unsigned long ip, void *data)
8593 {
8594         struct ftrace_func_mapper *mapper = data;
8595
8596         if (!ip) {
8597                 if (!mapper)
8598                         return;
8599                 free_ftrace_func_mapper(mapper, NULL);
8600                 return;
8601         }
8602
8603         ftrace_func_mapper_remove_ip(mapper, ip);
8604 }
8605
8606 static struct ftrace_probe_ops snapshot_probe_ops = {
8607         .func                   = ftrace_snapshot,
8608         .print                  = ftrace_snapshot_print,
8609 };
8610
8611 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8612         .func                   = ftrace_count_snapshot,
8613         .print                  = ftrace_snapshot_print,
8614         .init                   = ftrace_snapshot_init,
8615         .free                   = ftrace_snapshot_free,
8616 };
8617
8618 static int
8619 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8620                                char *glob, char *cmd, char *param, int enable)
8621 {
8622         struct ftrace_probe_ops *ops;
8623         void *count = (void *)-1;
8624         char *number;
8625         int ret;
8626
8627         if (!tr)
8628                 return -ENODEV;
8629
8630         /* hash funcs only work with set_ftrace_filter */
8631         if (!enable)
8632                 return -EINVAL;
8633
8634         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8635
8636         if (glob[0] == '!')
8637                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8638
8639         if (!param)
8640                 goto out_reg;
8641
8642         number = strsep(&param, ":");
8643
8644         if (!strlen(number))
8645                 goto out_reg;
8646
8647         /*
8648          * We use the callback data field (which is a pointer)
8649          * as our counter.
8650          */
8651         ret = kstrtoul(number, 0, (unsigned long *)&count);
8652         if (ret)
8653                 return ret;
8654
8655  out_reg:
8656         ret = tracing_alloc_snapshot_instance(tr);
8657         if (ret < 0)
8658                 goto out;
8659
8660         ret = register_ftrace_function_probe(glob, tr, ops, count);
8661
8662  out:
8663         return ret < 0 ? ret : 0;
8664 }
8665
8666 static struct ftrace_func_command ftrace_snapshot_cmd = {
8667         .name                   = "snapshot",
8668         .func                   = ftrace_trace_snapshot_callback,
8669 };
8670
8671 static __init int register_snapshot_cmd(void)
8672 {
8673         return register_ftrace_command(&ftrace_snapshot_cmd);
8674 }
8675 #else
8676 static inline __init int register_snapshot_cmd(void) { return 0; }
8677 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8678
8679 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8680 {
8681         if (WARN_ON(!tr->dir))
8682                 return ERR_PTR(-ENODEV);
8683
8684         /* Top directory uses NULL as the parent */
8685         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8686                 return NULL;
8687
8688         /* All sub buffers have a descriptor */
8689         return tr->dir;
8690 }
8691
8692 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8693 {
8694         struct dentry *d_tracer;
8695
8696         if (tr->percpu_dir)
8697                 return tr->percpu_dir;
8698
8699         d_tracer = tracing_get_dentry(tr);
8700         if (IS_ERR(d_tracer))
8701                 return NULL;
8702
8703         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8704
8705         MEM_FAIL(!tr->percpu_dir,
8706                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8707
8708         return tr->percpu_dir;
8709 }
8710
8711 static struct dentry *
8712 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8713                       void *data, long cpu, const struct file_operations *fops)
8714 {
8715         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8716
8717         if (ret) /* See tracing_get_cpu() */
8718                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8719         return ret;
8720 }
8721
8722 static void
8723 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8724 {
8725         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8726         struct dentry *d_cpu;
8727         char cpu_dir[30]; /* 30 characters should be more than enough */
8728
8729         if (!d_percpu)
8730                 return;
8731
8732         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8733         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8734         if (!d_cpu) {
8735                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8736                 return;
8737         }
8738
8739         /* per cpu trace_pipe */
8740         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8741                                 tr, cpu, &tracing_pipe_fops);
8742
8743         /* per cpu trace */
8744         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8745                                 tr, cpu, &tracing_fops);
8746
8747         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8748                                 tr, cpu, &tracing_buffers_fops);
8749
8750         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8751                                 tr, cpu, &tracing_stats_fops);
8752
8753         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8754                                 tr, cpu, &tracing_entries_fops);
8755
8756 #ifdef CONFIG_TRACER_SNAPSHOT
8757         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8758                                 tr, cpu, &snapshot_fops);
8759
8760         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8761                                 tr, cpu, &snapshot_raw_fops);
8762 #endif
8763 }
8764
8765 #ifdef CONFIG_FTRACE_SELFTEST
8766 /* Let selftest have access to static functions in this file */
8767 #include "trace_selftest.c"
8768 #endif
8769
8770 static ssize_t
8771 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8772                         loff_t *ppos)
8773 {
8774         struct trace_option_dentry *topt = filp->private_data;
8775         char *buf;
8776
8777         if (topt->flags->val & topt->opt->bit)
8778                 buf = "1\n";
8779         else
8780                 buf = "0\n";
8781
8782         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8783 }
8784
8785 static ssize_t
8786 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8787                          loff_t *ppos)
8788 {
8789         struct trace_option_dentry *topt = filp->private_data;
8790         unsigned long val;
8791         int ret;
8792
8793         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8794         if (ret)
8795                 return ret;
8796
8797         if (val != 0 && val != 1)
8798                 return -EINVAL;
8799
8800         if (!!(topt->flags->val & topt->opt->bit) != val) {
8801                 mutex_lock(&trace_types_lock);
8802                 ret = __set_tracer_option(topt->tr, topt->flags,
8803                                           topt->opt, !val);
8804                 mutex_unlock(&trace_types_lock);
8805                 if (ret)
8806                         return ret;
8807         }
8808
8809         *ppos += cnt;
8810
8811         return cnt;
8812 }
8813
8814
8815 static const struct file_operations trace_options_fops = {
8816         .open = tracing_open_generic,
8817         .read = trace_options_read,
8818         .write = trace_options_write,
8819         .llseek = generic_file_llseek,
8820 };
8821
8822 /*
8823  * In order to pass in both the trace_array descriptor as well as the index
8824  * to the flag that the trace option file represents, the trace_array
8825  * has a character array of trace_flags_index[], which holds the index
8826  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8827  * The address of this character array is passed to the flag option file
8828  * read/write callbacks.
8829  *
8830  * In order to extract both the index and the trace_array descriptor,
8831  * get_tr_index() uses the following algorithm.
8832  *
8833  *   idx = *ptr;
8834  *
8835  * As the pointer itself contains the address of the index (remember
8836  * index[1] == 1).
8837  *
8838  * Then to get the trace_array descriptor, by subtracting that index
8839  * from the ptr, we get to the start of the index itself.
8840  *
8841  *   ptr - idx == &index[0]
8842  *
8843  * Then a simple container_of() from that pointer gets us to the
8844  * trace_array descriptor.
8845  */
8846 static void get_tr_index(void *data, struct trace_array **ptr,
8847                          unsigned int *pindex)
8848 {
8849         *pindex = *(unsigned char *)data;
8850
8851         *ptr = container_of(data - *pindex, struct trace_array,
8852                             trace_flags_index);
8853 }
8854
8855 static ssize_t
8856 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8857                         loff_t *ppos)
8858 {
8859         void *tr_index = filp->private_data;
8860         struct trace_array *tr;
8861         unsigned int index;
8862         char *buf;
8863
8864         get_tr_index(tr_index, &tr, &index);
8865
8866         if (tr->trace_flags & (1 << index))
8867                 buf = "1\n";
8868         else
8869                 buf = "0\n";
8870
8871         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8872 }
8873
8874 static ssize_t
8875 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8876                          loff_t *ppos)
8877 {
8878         void *tr_index = filp->private_data;
8879         struct trace_array *tr;
8880         unsigned int index;
8881         unsigned long val;
8882         int ret;
8883
8884         get_tr_index(tr_index, &tr, &index);
8885
8886         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8887         if (ret)
8888                 return ret;
8889
8890         if (val != 0 && val != 1)
8891                 return -EINVAL;
8892
8893         mutex_lock(&event_mutex);
8894         mutex_lock(&trace_types_lock);
8895         ret = set_tracer_flag(tr, 1 << index, val);
8896         mutex_unlock(&trace_types_lock);
8897         mutex_unlock(&event_mutex);
8898
8899         if (ret < 0)
8900                 return ret;
8901
8902         *ppos += cnt;
8903
8904         return cnt;
8905 }
8906
8907 static const struct file_operations trace_options_core_fops = {
8908         .open = tracing_open_generic,
8909         .read = trace_options_core_read,
8910         .write = trace_options_core_write,
8911         .llseek = generic_file_llseek,
8912 };
8913
8914 struct dentry *trace_create_file(const char *name,
8915                                  umode_t mode,
8916                                  struct dentry *parent,
8917                                  void *data,
8918                                  const struct file_operations *fops)
8919 {
8920         struct dentry *ret;
8921
8922         ret = tracefs_create_file(name, mode, parent, data, fops);
8923         if (!ret)
8924                 pr_warn("Could not create tracefs '%s' entry\n", name);
8925
8926         return ret;
8927 }
8928
8929
8930 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8931 {
8932         struct dentry *d_tracer;
8933
8934         if (tr->options)
8935                 return tr->options;
8936
8937         d_tracer = tracing_get_dentry(tr);
8938         if (IS_ERR(d_tracer))
8939                 return NULL;
8940
8941         tr->options = tracefs_create_dir("options", d_tracer);
8942         if (!tr->options) {
8943                 pr_warn("Could not create tracefs directory 'options'\n");
8944                 return NULL;
8945         }
8946
8947         return tr->options;
8948 }
8949
8950 static void
8951 create_trace_option_file(struct trace_array *tr,
8952                          struct trace_option_dentry *topt,
8953                          struct tracer_flags *flags,
8954                          struct tracer_opt *opt)
8955 {
8956         struct dentry *t_options;
8957
8958         t_options = trace_options_init_dentry(tr);
8959         if (!t_options)
8960                 return;
8961
8962         topt->flags = flags;
8963         topt->opt = opt;
8964         topt->tr = tr;
8965
8966         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8967                                         t_options, topt, &trace_options_fops);
8968
8969 }
8970
8971 static void
8972 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8973 {
8974         struct trace_option_dentry *topts;
8975         struct trace_options *tr_topts;
8976         struct tracer_flags *flags;
8977         struct tracer_opt *opts;
8978         int cnt;
8979         int i;
8980
8981         if (!tracer)
8982                 return;
8983
8984         flags = tracer->flags;
8985
8986         if (!flags || !flags->opts)
8987                 return;
8988
8989         /*
8990          * If this is an instance, only create flags for tracers
8991          * the instance may have.
8992          */
8993         if (!trace_ok_for_array(tracer, tr))
8994                 return;
8995
8996         for (i = 0; i < tr->nr_topts; i++) {
8997                 /* Make sure there's no duplicate flags. */
8998                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8999                         return;
9000         }
9001
9002         opts = flags->opts;
9003
9004         for (cnt = 0; opts[cnt].name; cnt++)
9005                 ;
9006
9007         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9008         if (!topts)
9009                 return;
9010
9011         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9012                             GFP_KERNEL);
9013         if (!tr_topts) {
9014                 kfree(topts);
9015                 return;
9016         }
9017
9018         tr->topts = tr_topts;
9019         tr->topts[tr->nr_topts].tracer = tracer;
9020         tr->topts[tr->nr_topts].topts = topts;
9021         tr->nr_topts++;
9022
9023         for (cnt = 0; opts[cnt].name; cnt++) {
9024                 create_trace_option_file(tr, &topts[cnt], flags,
9025                                          &opts[cnt]);
9026                 MEM_FAIL(topts[cnt].entry == NULL,
9027                           "Failed to create trace option: %s",
9028                           opts[cnt].name);
9029         }
9030 }
9031
9032 static struct dentry *
9033 create_trace_option_core_file(struct trace_array *tr,
9034                               const char *option, long index)
9035 {
9036         struct dentry *t_options;
9037
9038         t_options = trace_options_init_dentry(tr);
9039         if (!t_options)
9040                 return NULL;
9041
9042         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9043                                  (void *)&tr->trace_flags_index[index],
9044                                  &trace_options_core_fops);
9045 }
9046
9047 static void create_trace_options_dir(struct trace_array *tr)
9048 {
9049         struct dentry *t_options;
9050         bool top_level = tr == &global_trace;
9051         int i;
9052
9053         t_options = trace_options_init_dentry(tr);
9054         if (!t_options)
9055                 return;
9056
9057         for (i = 0; trace_options[i]; i++) {
9058                 if (top_level ||
9059                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9060                         create_trace_option_core_file(tr, trace_options[i], i);
9061         }
9062 }
9063
9064 static ssize_t
9065 rb_simple_read(struct file *filp, char __user *ubuf,
9066                size_t cnt, loff_t *ppos)
9067 {
9068         struct trace_array *tr = filp->private_data;
9069         char buf[64];
9070         int r;
9071
9072         r = tracer_tracing_is_on(tr);
9073         r = sprintf(buf, "%d\n", r);
9074
9075         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9076 }
9077
9078 static ssize_t
9079 rb_simple_write(struct file *filp, const char __user *ubuf,
9080                 size_t cnt, loff_t *ppos)
9081 {
9082         struct trace_array *tr = filp->private_data;
9083         struct trace_buffer *buffer = tr->array_buffer.buffer;
9084         unsigned long val;
9085         int ret;
9086
9087         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9088         if (ret)
9089                 return ret;
9090
9091         if (buffer) {
9092                 mutex_lock(&trace_types_lock);
9093                 if (!!val == tracer_tracing_is_on(tr)) {
9094                         val = 0; /* do nothing */
9095                 } else if (val) {
9096                         tracer_tracing_on(tr);
9097                         if (tr->current_trace->start)
9098                                 tr->current_trace->start(tr);
9099                 } else {
9100                         tracer_tracing_off(tr);
9101                         if (tr->current_trace->stop)
9102                                 tr->current_trace->stop(tr);
9103                         /* Wake up any waiters */
9104                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9105                 }
9106                 mutex_unlock(&trace_types_lock);
9107         }
9108
9109         (*ppos)++;
9110
9111         return cnt;
9112 }
9113
9114 static const struct file_operations rb_simple_fops = {
9115         .open           = tracing_open_generic_tr,
9116         .read           = rb_simple_read,
9117         .write          = rb_simple_write,
9118         .release        = tracing_release_generic_tr,
9119         .llseek         = default_llseek,
9120 };
9121
9122 static ssize_t
9123 buffer_percent_read(struct file *filp, char __user *ubuf,
9124                     size_t cnt, loff_t *ppos)
9125 {
9126         struct trace_array *tr = filp->private_data;
9127         char buf[64];
9128         int r;
9129
9130         r = tr->buffer_percent;
9131         r = sprintf(buf, "%d\n", r);
9132
9133         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9134 }
9135
9136 static ssize_t
9137 buffer_percent_write(struct file *filp, const char __user *ubuf,
9138                      size_t cnt, loff_t *ppos)
9139 {
9140         struct trace_array *tr = filp->private_data;
9141         unsigned long val;
9142         int ret;
9143
9144         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9145         if (ret)
9146                 return ret;
9147
9148         if (val > 100)
9149                 return -EINVAL;
9150
9151         tr->buffer_percent = val;
9152
9153         (*ppos)++;
9154
9155         return cnt;
9156 }
9157
9158 static const struct file_operations buffer_percent_fops = {
9159         .open           = tracing_open_generic_tr,
9160         .read           = buffer_percent_read,
9161         .write          = buffer_percent_write,
9162         .release        = tracing_release_generic_tr,
9163         .llseek         = default_llseek,
9164 };
9165
9166 static struct dentry *trace_instance_dir;
9167
9168 static void
9169 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9170
9171 static int
9172 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9173 {
9174         enum ring_buffer_flags rb_flags;
9175
9176         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9177
9178         buf->tr = tr;
9179
9180         buf->buffer = ring_buffer_alloc(size, rb_flags);
9181         if (!buf->buffer)
9182                 return -ENOMEM;
9183
9184         buf->data = alloc_percpu(struct trace_array_cpu);
9185         if (!buf->data) {
9186                 ring_buffer_free(buf->buffer);
9187                 buf->buffer = NULL;
9188                 return -ENOMEM;
9189         }
9190
9191         /* Allocate the first page for all buffers */
9192         set_buffer_entries(&tr->array_buffer,
9193                            ring_buffer_size(tr->array_buffer.buffer, 0));
9194
9195         return 0;
9196 }
9197
9198 static void free_trace_buffer(struct array_buffer *buf)
9199 {
9200         if (buf->buffer) {
9201                 ring_buffer_free(buf->buffer);
9202                 buf->buffer = NULL;
9203                 free_percpu(buf->data);
9204                 buf->data = NULL;
9205         }
9206 }
9207
9208 static int allocate_trace_buffers(struct trace_array *tr, int size)
9209 {
9210         int ret;
9211
9212         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9213         if (ret)
9214                 return ret;
9215
9216 #ifdef CONFIG_TRACER_MAX_TRACE
9217         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9218                                     allocate_snapshot ? size : 1);
9219         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9220                 free_trace_buffer(&tr->array_buffer);
9221                 return -ENOMEM;
9222         }
9223         tr->allocated_snapshot = allocate_snapshot;
9224
9225         /*
9226          * Only the top level trace array gets its snapshot allocated
9227          * from the kernel command line.
9228          */
9229         allocate_snapshot = false;
9230 #endif
9231
9232         return 0;
9233 }
9234
9235 static void free_trace_buffers(struct trace_array *tr)
9236 {
9237         if (!tr)
9238                 return;
9239
9240         free_trace_buffer(&tr->array_buffer);
9241
9242 #ifdef CONFIG_TRACER_MAX_TRACE
9243         free_trace_buffer(&tr->max_buffer);
9244 #endif
9245 }
9246
9247 static void init_trace_flags_index(struct trace_array *tr)
9248 {
9249         int i;
9250
9251         /* Used by the trace options files */
9252         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9253                 tr->trace_flags_index[i] = i;
9254 }
9255
9256 static void __update_tracer_options(struct trace_array *tr)
9257 {
9258         struct tracer *t;
9259
9260         for (t = trace_types; t; t = t->next)
9261                 add_tracer_options(tr, t);
9262 }
9263
9264 static void update_tracer_options(struct trace_array *tr)
9265 {
9266         mutex_lock(&trace_types_lock);
9267         tracer_options_updated = true;
9268         __update_tracer_options(tr);
9269         mutex_unlock(&trace_types_lock);
9270 }
9271
9272 /* Must have trace_types_lock held */
9273 struct trace_array *trace_array_find(const char *instance)
9274 {
9275         struct trace_array *tr, *found = NULL;
9276
9277         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9278                 if (tr->name && strcmp(tr->name, instance) == 0) {
9279                         found = tr;
9280                         break;
9281                 }
9282         }
9283
9284         return found;
9285 }
9286
9287 struct trace_array *trace_array_find_get(const char *instance)
9288 {
9289         struct trace_array *tr;
9290
9291         mutex_lock(&trace_types_lock);
9292         tr = trace_array_find(instance);
9293         if (tr)
9294                 tr->ref++;
9295         mutex_unlock(&trace_types_lock);
9296
9297         return tr;
9298 }
9299
9300 static int trace_array_create_dir(struct trace_array *tr)
9301 {
9302         int ret;
9303
9304         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9305         if (!tr->dir)
9306                 return -EINVAL;
9307
9308         ret = event_trace_add_tracer(tr->dir, tr);
9309         if (ret) {
9310                 tracefs_remove(tr->dir);
9311                 return ret;
9312         }
9313
9314         init_tracer_tracefs(tr, tr->dir);
9315         __update_tracer_options(tr);
9316
9317         return ret;
9318 }
9319
9320 static struct trace_array *trace_array_create(const char *name)
9321 {
9322         struct trace_array *tr;
9323         int ret;
9324
9325         ret = -ENOMEM;
9326         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9327         if (!tr)
9328                 return ERR_PTR(ret);
9329
9330         tr->name = kstrdup(name, GFP_KERNEL);
9331         if (!tr->name)
9332                 goto out_free_tr;
9333
9334         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9335                 goto out_free_tr;
9336
9337         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9338
9339         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9340
9341         raw_spin_lock_init(&tr->start_lock);
9342
9343         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9344
9345         tr->current_trace = &nop_trace;
9346
9347         INIT_LIST_HEAD(&tr->systems);
9348         INIT_LIST_HEAD(&tr->events);
9349         INIT_LIST_HEAD(&tr->hist_vars);
9350         INIT_LIST_HEAD(&tr->err_log);
9351
9352         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9353                 goto out_free_tr;
9354
9355         if (ftrace_allocate_ftrace_ops(tr) < 0)
9356                 goto out_free_tr;
9357
9358         ftrace_init_trace_array(tr);
9359
9360         init_trace_flags_index(tr);
9361
9362         if (trace_instance_dir) {
9363                 ret = trace_array_create_dir(tr);
9364                 if (ret)
9365                         goto out_free_tr;
9366         } else
9367                 __trace_early_add_events(tr);
9368
9369         list_add(&tr->list, &ftrace_trace_arrays);
9370
9371         tr->ref++;
9372
9373         return tr;
9374
9375  out_free_tr:
9376         ftrace_free_ftrace_ops(tr);
9377         free_trace_buffers(tr);
9378         free_cpumask_var(tr->tracing_cpumask);
9379         kfree(tr->name);
9380         kfree(tr);
9381
9382         return ERR_PTR(ret);
9383 }
9384
9385 static int instance_mkdir(const char *name)
9386 {
9387         struct trace_array *tr;
9388         int ret;
9389
9390         mutex_lock(&event_mutex);
9391         mutex_lock(&trace_types_lock);
9392
9393         ret = -EEXIST;
9394         if (trace_array_find(name))
9395                 goto out_unlock;
9396
9397         tr = trace_array_create(name);
9398
9399         ret = PTR_ERR_OR_ZERO(tr);
9400
9401 out_unlock:
9402         mutex_unlock(&trace_types_lock);
9403         mutex_unlock(&event_mutex);
9404         return ret;
9405 }
9406
9407 /**
9408  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9409  * @name: The name of the trace array to be looked up/created.
9410  *
9411  * Returns pointer to trace array with given name.
9412  * NULL, if it cannot be created.
9413  *
9414  * NOTE: This function increments the reference counter associated with the
9415  * trace array returned. This makes sure it cannot be freed while in use.
9416  * Use trace_array_put() once the trace array is no longer needed.
9417  * If the trace_array is to be freed, trace_array_destroy() needs to
9418  * be called after the trace_array_put(), or simply let user space delete
9419  * it from the tracefs instances directory. But until the
9420  * trace_array_put() is called, user space can not delete it.
9421  *
9422  */
9423 struct trace_array *trace_array_get_by_name(const char *name)
9424 {
9425         struct trace_array *tr;
9426
9427         mutex_lock(&event_mutex);
9428         mutex_lock(&trace_types_lock);
9429
9430         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9431                 if (tr->name && strcmp(tr->name, name) == 0)
9432                         goto out_unlock;
9433         }
9434
9435         tr = trace_array_create(name);
9436
9437         if (IS_ERR(tr))
9438                 tr = NULL;
9439 out_unlock:
9440         if (tr)
9441                 tr->ref++;
9442
9443         mutex_unlock(&trace_types_lock);
9444         mutex_unlock(&event_mutex);
9445         return tr;
9446 }
9447 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9448
9449 static int __remove_instance(struct trace_array *tr)
9450 {
9451         int i;
9452
9453         /* Reference counter for a newly created trace array = 1. */
9454         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9455                 return -EBUSY;
9456
9457         list_del(&tr->list);
9458
9459         /* Disable all the flags that were enabled coming in */
9460         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9461                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9462                         set_tracer_flag(tr, 1 << i, 0);
9463         }
9464
9465         tracing_set_nop(tr);
9466         clear_ftrace_function_probes(tr);
9467         event_trace_del_tracer(tr);
9468         ftrace_clear_pids(tr);
9469         ftrace_destroy_function_files(tr);
9470         tracefs_remove(tr->dir);
9471         free_percpu(tr->last_func_repeats);
9472         free_trace_buffers(tr);
9473
9474         for (i = 0; i < tr->nr_topts; i++) {
9475                 kfree(tr->topts[i].topts);
9476         }
9477         kfree(tr->topts);
9478
9479         free_cpumask_var(tr->tracing_cpumask);
9480         kfree(tr->name);
9481         kfree(tr);
9482
9483         return 0;
9484 }
9485
9486 int trace_array_destroy(struct trace_array *this_tr)
9487 {
9488         struct trace_array *tr;
9489         int ret;
9490
9491         if (!this_tr)
9492                 return -EINVAL;
9493
9494         mutex_lock(&event_mutex);
9495         mutex_lock(&trace_types_lock);
9496
9497         ret = -ENODEV;
9498
9499         /* Making sure trace array exists before destroying it. */
9500         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9501                 if (tr == this_tr) {
9502                         ret = __remove_instance(tr);
9503                         break;
9504                 }
9505         }
9506
9507         mutex_unlock(&trace_types_lock);
9508         mutex_unlock(&event_mutex);
9509
9510         return ret;
9511 }
9512 EXPORT_SYMBOL_GPL(trace_array_destroy);
9513
9514 static int instance_rmdir(const char *name)
9515 {
9516         struct trace_array *tr;
9517         int ret;
9518
9519         mutex_lock(&event_mutex);
9520         mutex_lock(&trace_types_lock);
9521
9522         ret = -ENODEV;
9523         tr = trace_array_find(name);
9524         if (tr)
9525                 ret = __remove_instance(tr);
9526
9527         mutex_unlock(&trace_types_lock);
9528         mutex_unlock(&event_mutex);
9529
9530         return ret;
9531 }
9532
9533 static __init void create_trace_instances(struct dentry *d_tracer)
9534 {
9535         struct trace_array *tr;
9536
9537         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9538                                                          instance_mkdir,
9539                                                          instance_rmdir);
9540         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9541                 return;
9542
9543         mutex_lock(&event_mutex);
9544         mutex_lock(&trace_types_lock);
9545
9546         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9547                 if (!tr->name)
9548                         continue;
9549                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9550                              "Failed to create instance directory\n"))
9551                         break;
9552         }
9553
9554         mutex_unlock(&trace_types_lock);
9555         mutex_unlock(&event_mutex);
9556 }
9557
9558 static void
9559 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9560 {
9561         struct trace_event_file *file;
9562         int cpu;
9563
9564         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9565                         tr, &show_traces_fops);
9566
9567         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9568                         tr, &set_tracer_fops);
9569
9570         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9571                           tr, &tracing_cpumask_fops);
9572
9573         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9574                           tr, &tracing_iter_fops);
9575
9576         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9577                           tr, &tracing_fops);
9578
9579         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9580                           tr, &tracing_pipe_fops);
9581
9582         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9583                           tr, &tracing_entries_fops);
9584
9585         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9586                           tr, &tracing_total_entries_fops);
9587
9588         trace_create_file("free_buffer", 0200, d_tracer,
9589                           tr, &tracing_free_buffer_fops);
9590
9591         trace_create_file("trace_marker", 0220, d_tracer,
9592                           tr, &tracing_mark_fops);
9593
9594         file = __find_event_file(tr, "ftrace", "print");
9595         if (file && file->dir)
9596                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9597                                   file, &event_trigger_fops);
9598         tr->trace_marker_file = file;
9599
9600         trace_create_file("trace_marker_raw", 0220, d_tracer,
9601                           tr, &tracing_mark_raw_fops);
9602
9603         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9604                           &trace_clock_fops);
9605
9606         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9607                           tr, &rb_simple_fops);
9608
9609         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9610                           &trace_time_stamp_mode_fops);
9611
9612         tr->buffer_percent = 50;
9613
9614         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9615                         tr, &buffer_percent_fops);
9616
9617         create_trace_options_dir(tr);
9618
9619 #ifdef CONFIG_TRACER_MAX_TRACE
9620         trace_create_maxlat_file(tr, d_tracer);
9621 #endif
9622
9623         if (ftrace_create_function_files(tr, d_tracer))
9624                 MEM_FAIL(1, "Could not allocate function filter files");
9625
9626 #ifdef CONFIG_TRACER_SNAPSHOT
9627         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9628                           tr, &snapshot_fops);
9629 #endif
9630
9631         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9632                           tr, &tracing_err_log_fops);
9633
9634         for_each_tracing_cpu(cpu)
9635                 tracing_init_tracefs_percpu(tr, cpu);
9636
9637         ftrace_init_tracefs(tr, d_tracer);
9638 }
9639
9640 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9641 {
9642         struct vfsmount *mnt;
9643         struct file_system_type *type;
9644
9645         /*
9646          * To maintain backward compatibility for tools that mount
9647          * debugfs to get to the tracing facility, tracefs is automatically
9648          * mounted to the debugfs/tracing directory.
9649          */
9650         type = get_fs_type("tracefs");
9651         if (!type)
9652                 return NULL;
9653         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9654         put_filesystem(type);
9655         if (IS_ERR(mnt))
9656                 return NULL;
9657         mntget(mnt);
9658
9659         return mnt;
9660 }
9661
9662 /**
9663  * tracing_init_dentry - initialize top level trace array
9664  *
9665  * This is called when creating files or directories in the tracing
9666  * directory. It is called via fs_initcall() by any of the boot up code
9667  * and expects to return the dentry of the top level tracing directory.
9668  */
9669 int tracing_init_dentry(void)
9670 {
9671         struct trace_array *tr = &global_trace;
9672
9673         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9674                 pr_warn("Tracing disabled due to lockdown\n");
9675                 return -EPERM;
9676         }
9677
9678         /* The top level trace array uses  NULL as parent */
9679         if (tr->dir)
9680                 return 0;
9681
9682         if (WARN_ON(!tracefs_initialized()))
9683                 return -ENODEV;
9684
9685         /*
9686          * As there may still be users that expect the tracing
9687          * files to exist in debugfs/tracing, we must automount
9688          * the tracefs file system there, so older tools still
9689          * work with the newer kernel.
9690          */
9691         tr->dir = debugfs_create_automount("tracing", NULL,
9692                                            trace_automount, NULL);
9693
9694         return 0;
9695 }
9696
9697 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9698 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9699
9700 static struct workqueue_struct *eval_map_wq __initdata;
9701 static struct work_struct eval_map_work __initdata;
9702 static struct work_struct tracerfs_init_work __initdata;
9703
9704 static void __init eval_map_work_func(struct work_struct *work)
9705 {
9706         int len;
9707
9708         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9709         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9710 }
9711
9712 static int __init trace_eval_init(void)
9713 {
9714         INIT_WORK(&eval_map_work, eval_map_work_func);
9715
9716         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9717         if (!eval_map_wq) {
9718                 pr_err("Unable to allocate eval_map_wq\n");
9719                 /* Do work here */
9720                 eval_map_work_func(&eval_map_work);
9721                 return -ENOMEM;
9722         }
9723
9724         queue_work(eval_map_wq, &eval_map_work);
9725         return 0;
9726 }
9727
9728 subsys_initcall(trace_eval_init);
9729
9730 static int __init trace_eval_sync(void)
9731 {
9732         /* Make sure the eval map updates are finished */
9733         if (eval_map_wq)
9734                 destroy_workqueue(eval_map_wq);
9735         return 0;
9736 }
9737
9738 late_initcall_sync(trace_eval_sync);
9739
9740
9741 #ifdef CONFIG_MODULES
9742 static void trace_module_add_evals(struct module *mod)
9743 {
9744         if (!mod->num_trace_evals)
9745                 return;
9746
9747         /*
9748          * Modules with bad taint do not have events created, do
9749          * not bother with enums either.
9750          */
9751         if (trace_module_has_bad_taint(mod))
9752                 return;
9753
9754         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9755 }
9756
9757 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9758 static void trace_module_remove_evals(struct module *mod)
9759 {
9760         union trace_eval_map_item *map;
9761         union trace_eval_map_item **last = &trace_eval_maps;
9762
9763         if (!mod->num_trace_evals)
9764                 return;
9765
9766         mutex_lock(&trace_eval_mutex);
9767
9768         map = trace_eval_maps;
9769
9770         while (map) {
9771                 if (map->head.mod == mod)
9772                         break;
9773                 map = trace_eval_jmp_to_tail(map);
9774                 last = &map->tail.next;
9775                 map = map->tail.next;
9776         }
9777         if (!map)
9778                 goto out;
9779
9780         *last = trace_eval_jmp_to_tail(map)->tail.next;
9781         kfree(map);
9782  out:
9783         mutex_unlock(&trace_eval_mutex);
9784 }
9785 #else
9786 static inline void trace_module_remove_evals(struct module *mod) { }
9787 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9788
9789 static int trace_module_notify(struct notifier_block *self,
9790                                unsigned long val, void *data)
9791 {
9792         struct module *mod = data;
9793
9794         switch (val) {
9795         case MODULE_STATE_COMING:
9796                 trace_module_add_evals(mod);
9797                 break;
9798         case MODULE_STATE_GOING:
9799                 trace_module_remove_evals(mod);
9800                 break;
9801         }
9802
9803         return NOTIFY_OK;
9804 }
9805
9806 static struct notifier_block trace_module_nb = {
9807         .notifier_call = trace_module_notify,
9808         .priority = 0,
9809 };
9810 #endif /* CONFIG_MODULES */
9811
9812 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9813 {
9814
9815         event_trace_init();
9816
9817         init_tracer_tracefs(&global_trace, NULL);
9818         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9819
9820         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9821                         &global_trace, &tracing_thresh_fops);
9822
9823         trace_create_file("README", TRACE_MODE_READ, NULL,
9824                         NULL, &tracing_readme_fops);
9825
9826         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9827                         NULL, &tracing_saved_cmdlines_fops);
9828
9829         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9830                           NULL, &tracing_saved_cmdlines_size_fops);
9831
9832         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9833                         NULL, &tracing_saved_tgids_fops);
9834
9835         trace_create_eval_file(NULL);
9836
9837 #ifdef CONFIG_MODULES
9838         register_module_notifier(&trace_module_nb);
9839 #endif
9840
9841 #ifdef CONFIG_DYNAMIC_FTRACE
9842         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9843                         NULL, &tracing_dyn_info_fops);
9844 #endif
9845
9846         create_trace_instances(NULL);
9847
9848         update_tracer_options(&global_trace);
9849 }
9850
9851 static __init int tracer_init_tracefs(void)
9852 {
9853         int ret;
9854
9855         trace_access_lock_init();
9856
9857         ret = tracing_init_dentry();
9858         if (ret)
9859                 return 0;
9860
9861         if (eval_map_wq) {
9862                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9863                 queue_work(eval_map_wq, &tracerfs_init_work);
9864         } else {
9865                 tracer_init_tracefs_work_func(NULL);
9866         }
9867
9868         rv_init_interface();
9869
9870         return 0;
9871 }
9872
9873 fs_initcall(tracer_init_tracefs);
9874
9875 static int trace_die_panic_handler(struct notifier_block *self,
9876                                 unsigned long ev, void *unused);
9877
9878 static struct notifier_block trace_panic_notifier = {
9879         .notifier_call = trace_die_panic_handler,
9880         .priority = INT_MAX - 1,
9881 };
9882
9883 static struct notifier_block trace_die_notifier = {
9884         .notifier_call = trace_die_panic_handler,
9885         .priority = INT_MAX - 1,
9886 };
9887
9888 /*
9889  * The idea is to execute the following die/panic callback early, in order
9890  * to avoid showing irrelevant information in the trace (like other panic
9891  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9892  * warnings get disabled (to prevent potential log flooding).
9893  */
9894 static int trace_die_panic_handler(struct notifier_block *self,
9895                                 unsigned long ev, void *unused)
9896 {
9897         if (!ftrace_dump_on_oops)
9898                 return NOTIFY_DONE;
9899
9900         /* The die notifier requires DIE_OOPS to trigger */
9901         if (self == &trace_die_notifier && ev != DIE_OOPS)
9902                 return NOTIFY_DONE;
9903
9904         ftrace_dump(ftrace_dump_on_oops);
9905
9906         return NOTIFY_DONE;
9907 }
9908
9909 /*
9910  * printk is set to max of 1024, we really don't need it that big.
9911  * Nothing should be printing 1000 characters anyway.
9912  */
9913 #define TRACE_MAX_PRINT         1000
9914
9915 /*
9916  * Define here KERN_TRACE so that we have one place to modify
9917  * it if we decide to change what log level the ftrace dump
9918  * should be at.
9919  */
9920 #define KERN_TRACE              KERN_EMERG
9921
9922 void
9923 trace_printk_seq(struct trace_seq *s)
9924 {
9925         /* Probably should print a warning here. */
9926         if (s->seq.len >= TRACE_MAX_PRINT)
9927                 s->seq.len = TRACE_MAX_PRINT;
9928
9929         /*
9930          * More paranoid code. Although the buffer size is set to
9931          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9932          * an extra layer of protection.
9933          */
9934         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9935                 s->seq.len = s->seq.size - 1;
9936
9937         /* should be zero ended, but we are paranoid. */
9938         s->buffer[s->seq.len] = 0;
9939
9940         printk(KERN_TRACE "%s", s->buffer);
9941
9942         trace_seq_init(s);
9943 }
9944
9945 void trace_init_global_iter(struct trace_iterator *iter)
9946 {
9947         iter->tr = &global_trace;
9948         iter->trace = iter->tr->current_trace;
9949         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9950         iter->array_buffer = &global_trace.array_buffer;
9951
9952         if (iter->trace && iter->trace->open)
9953                 iter->trace->open(iter);
9954
9955         /* Annotate start of buffers if we had overruns */
9956         if (ring_buffer_overruns(iter->array_buffer->buffer))
9957                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9958
9959         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9960         if (trace_clocks[iter->tr->clock_id].in_ns)
9961                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9962
9963         /* Can not use kmalloc for iter.temp and iter.fmt */
9964         iter->temp = static_temp_buf;
9965         iter->temp_size = STATIC_TEMP_BUF_SIZE;
9966         iter->fmt = static_fmt_buf;
9967         iter->fmt_size = STATIC_FMT_BUF_SIZE;
9968 }
9969
9970 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9971 {
9972         /* use static because iter can be a bit big for the stack */
9973         static struct trace_iterator iter;
9974         static atomic_t dump_running;
9975         struct trace_array *tr = &global_trace;
9976         unsigned int old_userobj;
9977         unsigned long flags;
9978         int cnt = 0, cpu;
9979
9980         /* Only allow one dump user at a time. */
9981         if (atomic_inc_return(&dump_running) != 1) {
9982                 atomic_dec(&dump_running);
9983                 return;
9984         }
9985
9986         /*
9987          * Always turn off tracing when we dump.
9988          * We don't need to show trace output of what happens
9989          * between multiple crashes.
9990          *
9991          * If the user does a sysrq-z, then they can re-enable
9992          * tracing with echo 1 > tracing_on.
9993          */
9994         tracing_off();
9995
9996         local_irq_save(flags);
9997
9998         /* Simulate the iterator */
9999         trace_init_global_iter(&iter);
10000
10001         for_each_tracing_cpu(cpu) {
10002                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10003         }
10004
10005         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10006
10007         /* don't look at user memory in panic mode */
10008         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10009
10010         switch (oops_dump_mode) {
10011         case DUMP_ALL:
10012                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10013                 break;
10014         case DUMP_ORIG:
10015                 iter.cpu_file = raw_smp_processor_id();
10016                 break;
10017         case DUMP_NONE:
10018                 goto out_enable;
10019         default:
10020                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10021                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10022         }
10023
10024         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10025
10026         /* Did function tracer already get disabled? */
10027         if (ftrace_is_dead()) {
10028                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10029                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10030         }
10031
10032         /*
10033          * We need to stop all tracing on all CPUS to read
10034          * the next buffer. This is a bit expensive, but is
10035          * not done often. We fill all what we can read,
10036          * and then release the locks again.
10037          */
10038
10039         while (!trace_empty(&iter)) {
10040
10041                 if (!cnt)
10042                         printk(KERN_TRACE "---------------------------------\n");
10043
10044                 cnt++;
10045
10046                 trace_iterator_reset(&iter);
10047                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10048
10049                 if (trace_find_next_entry_inc(&iter) != NULL) {
10050                         int ret;
10051
10052                         ret = print_trace_line(&iter);
10053                         if (ret != TRACE_TYPE_NO_CONSUME)
10054                                 trace_consume(&iter);
10055                 }
10056                 touch_nmi_watchdog();
10057
10058                 trace_printk_seq(&iter.seq);
10059         }
10060
10061         if (!cnt)
10062                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10063         else
10064                 printk(KERN_TRACE "---------------------------------\n");
10065
10066  out_enable:
10067         tr->trace_flags |= old_userobj;
10068
10069         for_each_tracing_cpu(cpu) {
10070                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10071         }
10072         atomic_dec(&dump_running);
10073         local_irq_restore(flags);
10074 }
10075 EXPORT_SYMBOL_GPL(ftrace_dump);
10076
10077 #define WRITE_BUFSIZE  4096
10078
10079 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10080                                 size_t count, loff_t *ppos,
10081                                 int (*createfn)(const char *))
10082 {
10083         char *kbuf, *buf, *tmp;
10084         int ret = 0;
10085         size_t done = 0;
10086         size_t size;
10087
10088         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10089         if (!kbuf)
10090                 return -ENOMEM;
10091
10092         while (done < count) {
10093                 size = count - done;
10094
10095                 if (size >= WRITE_BUFSIZE)
10096                         size = WRITE_BUFSIZE - 1;
10097
10098                 if (copy_from_user(kbuf, buffer + done, size)) {
10099                         ret = -EFAULT;
10100                         goto out;
10101                 }
10102                 kbuf[size] = '\0';
10103                 buf = kbuf;
10104                 do {
10105                         tmp = strchr(buf, '\n');
10106                         if (tmp) {
10107                                 *tmp = '\0';
10108                                 size = tmp - buf + 1;
10109                         } else {
10110                                 size = strlen(buf);
10111                                 if (done + size < count) {
10112                                         if (buf != kbuf)
10113                                                 break;
10114                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10115                                         pr_warn("Line length is too long: Should be less than %d\n",
10116                                                 WRITE_BUFSIZE - 2);
10117                                         ret = -EINVAL;
10118                                         goto out;
10119                                 }
10120                         }
10121                         done += size;
10122
10123                         /* Remove comments */
10124                         tmp = strchr(buf, '#');
10125
10126                         if (tmp)
10127                                 *tmp = '\0';
10128
10129                         ret = createfn(buf);
10130                         if (ret)
10131                                 goto out;
10132                         buf += size;
10133
10134                 } while (done < count);
10135         }
10136         ret = done;
10137
10138 out:
10139         kfree(kbuf);
10140
10141         return ret;
10142 }
10143
10144 __init static int tracer_alloc_buffers(void)
10145 {
10146         int ring_buf_size;
10147         int ret = -ENOMEM;
10148
10149
10150         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10151                 pr_warn("Tracing disabled due to lockdown\n");
10152                 return -EPERM;
10153         }
10154
10155         /*
10156          * Make sure we don't accidentally add more trace options
10157          * than we have bits for.
10158          */
10159         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10160
10161         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10162                 goto out;
10163
10164         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10165                 goto out_free_buffer_mask;
10166
10167         /* Only allocate trace_printk buffers if a trace_printk exists */
10168         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10169                 /* Must be called before global_trace.buffer is allocated */
10170                 trace_printk_init_buffers();
10171
10172         /* To save memory, keep the ring buffer size to its minimum */
10173         if (ring_buffer_expanded)
10174                 ring_buf_size = trace_buf_size;
10175         else
10176                 ring_buf_size = 1;
10177
10178         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10179         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10180
10181         raw_spin_lock_init(&global_trace.start_lock);
10182
10183         /*
10184          * The prepare callbacks allocates some memory for the ring buffer. We
10185          * don't free the buffer if the CPU goes down. If we were to free
10186          * the buffer, then the user would lose any trace that was in the
10187          * buffer. The memory will be removed once the "instance" is removed.
10188          */
10189         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10190                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10191                                       NULL);
10192         if (ret < 0)
10193                 goto out_free_cpumask;
10194         /* Used for event triggers */
10195         ret = -ENOMEM;
10196         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10197         if (!temp_buffer)
10198                 goto out_rm_hp_state;
10199
10200         if (trace_create_savedcmd() < 0)
10201                 goto out_free_temp_buffer;
10202
10203         /* TODO: make the number of buffers hot pluggable with CPUS */
10204         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10205                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10206                 goto out_free_savedcmd;
10207         }
10208
10209         if (global_trace.buffer_disabled)
10210                 tracing_off();
10211
10212         if (trace_boot_clock) {
10213                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10214                 if (ret < 0)
10215                         pr_warn("Trace clock %s not defined, going back to default\n",
10216                                 trace_boot_clock);
10217         }
10218
10219         /*
10220          * register_tracer() might reference current_trace, so it
10221          * needs to be set before we register anything. This is
10222          * just a bootstrap of current_trace anyway.
10223          */
10224         global_trace.current_trace = &nop_trace;
10225
10226         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10227
10228         ftrace_init_global_array_ops(&global_trace);
10229
10230         init_trace_flags_index(&global_trace);
10231
10232         register_tracer(&nop_trace);
10233
10234         /* Function tracing may start here (via kernel command line) */
10235         init_function_trace();
10236
10237         /* All seems OK, enable tracing */
10238         tracing_disabled = 0;
10239
10240         atomic_notifier_chain_register(&panic_notifier_list,
10241                                        &trace_panic_notifier);
10242
10243         register_die_notifier(&trace_die_notifier);
10244
10245         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10246
10247         INIT_LIST_HEAD(&global_trace.systems);
10248         INIT_LIST_HEAD(&global_trace.events);
10249         INIT_LIST_HEAD(&global_trace.hist_vars);
10250         INIT_LIST_HEAD(&global_trace.err_log);
10251         list_add(&global_trace.list, &ftrace_trace_arrays);
10252
10253         apply_trace_boot_options();
10254
10255         register_snapshot_cmd();
10256
10257         test_can_verify();
10258
10259         return 0;
10260
10261 out_free_savedcmd:
10262         free_saved_cmdlines_buffer(savedcmd);
10263 out_free_temp_buffer:
10264         ring_buffer_free(temp_buffer);
10265 out_rm_hp_state:
10266         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10267 out_free_cpumask:
10268         free_cpumask_var(global_trace.tracing_cpumask);
10269 out_free_buffer_mask:
10270         free_cpumask_var(tracing_buffer_mask);
10271 out:
10272         return ret;
10273 }
10274
10275 void __init ftrace_boot_snapshot(void)
10276 {
10277         if (snapshot_at_boot) {
10278                 tracing_snapshot();
10279                 internal_trace_puts("** Boot snapshot taken **\n");
10280         }
10281 }
10282
10283 void __init early_trace_init(void)
10284 {
10285         if (tracepoint_printk) {
10286                 tracepoint_print_iter =
10287                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10288                 if (MEM_FAIL(!tracepoint_print_iter,
10289                              "Failed to allocate trace iterator\n"))
10290                         tracepoint_printk = 0;
10291                 else
10292                         static_key_enable(&tracepoint_printk_key.key);
10293         }
10294         tracer_alloc_buffers();
10295
10296         init_events();
10297 }
10298
10299 void __init trace_init(void)
10300 {
10301         trace_event_init();
10302 }
10303
10304 __init static void clear_boot_tracer(void)
10305 {
10306         /*
10307          * The default tracer at boot buffer is an init section.
10308          * This function is called in lateinit. If we did not
10309          * find the boot tracer, then clear it out, to prevent
10310          * later registration from accessing the buffer that is
10311          * about to be freed.
10312          */
10313         if (!default_bootup_tracer)
10314                 return;
10315
10316         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10317                default_bootup_tracer);
10318         default_bootup_tracer = NULL;
10319 }
10320
10321 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10322 __init static void tracing_set_default_clock(void)
10323 {
10324         /* sched_clock_stable() is determined in late_initcall */
10325         if (!trace_boot_clock && !sched_clock_stable()) {
10326                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10327                         pr_warn("Can not set tracing clock due to lockdown\n");
10328                         return;
10329                 }
10330
10331                 printk(KERN_WARNING
10332                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10333                        "If you want to keep using the local clock, then add:\n"
10334                        "  \"trace_clock=local\"\n"
10335                        "on the kernel command line\n");
10336                 tracing_set_clock(&global_trace, "global");
10337         }
10338 }
10339 #else
10340 static inline void tracing_set_default_clock(void) { }
10341 #endif
10342
10343 __init static int late_trace_init(void)
10344 {
10345         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10346                 static_key_disable(&tracepoint_printk_key.key);
10347                 tracepoint_printk = 0;
10348         }
10349
10350         tracing_set_default_clock();
10351         clear_boot_tracer();
10352         return 0;
10353 }
10354
10355 late_initcall_sync(late_trace_init);