Merge tag 'integrity-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/zohar...
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188
189 static int __init set_cmdline_ftrace(char *str)
190 {
191         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192         default_bootup_tracer = bootup_tracer_buf;
193         /* We are using ftrace early, expand it */
194         ring_buffer_expanded = true;
195         return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201         if (*str++ != '=' || !*str || !strcmp("1", str)) {
202                 ftrace_dump_on_oops = DUMP_ALL;
203                 return 1;
204         }
205
206         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207                 ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
215 static int __init stop_trace_on_warning(char *str)
216 {
217         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218                 __disable_trace_on_warning = 1;
219         return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
223 static int __init boot_alloc_snapshot(char *str)
224 {
225         allocate_snapshot = true;
226         /* We also need the main ring buffer expanded */
227         ring_buffer_expanded = true;
228         return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234
235 static int __init set_trace_boot_options(char *str)
236 {
237         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238         return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244
245 static int __init set_trace_boot_clock(char *str)
246 {
247         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248         trace_boot_clock = trace_boot_clock_buf;
249         return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252
253 static int __init set_tracepoint_printk(char *str)
254 {
255         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256                 tracepoint_printk = 1;
257         return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263         tracepoint_printk_stop_on_boot = true;
264         return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267
268 unsigned long long ns2usecs(u64 nsec)
269 {
270         nsec += 500;
271         do_div(nsec, 1000);
272         return nsec;
273 }
274
275 static void
276 trace_process_export(struct trace_export *export,
277                struct ring_buffer_event *event, int flag)
278 {
279         struct trace_entry *entry;
280         unsigned int size = 0;
281
282         if (export->flags & flag) {
283                 entry = ring_buffer_event_data(event);
284                 size = ring_buffer_event_length(event);
285                 export->write(export, entry, size);
286         }
287 }
288
289 static DEFINE_MUTEX(ftrace_export_lock);
290
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299         if (export->flags & TRACE_EXPORT_FUNCTION)
300                 static_branch_inc(&trace_function_exports_enabled);
301
302         if (export->flags & TRACE_EXPORT_EVENT)
303                 static_branch_inc(&trace_event_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_MARKER)
306                 static_branch_inc(&trace_marker_exports_enabled);
307 }
308
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311         if (export->flags & TRACE_EXPORT_FUNCTION)
312                 static_branch_dec(&trace_function_exports_enabled);
313
314         if (export->flags & TRACE_EXPORT_EVENT)
315                 static_branch_dec(&trace_event_exports_enabled);
316
317         if (export->flags & TRACE_EXPORT_MARKER)
318                 static_branch_dec(&trace_marker_exports_enabled);
319 }
320
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323         struct trace_export *export;
324
325         preempt_disable_notrace();
326
327         export = rcu_dereference_raw_check(ftrace_exports_list);
328         while (export) {
329                 trace_process_export(export, event, flag);
330                 export = rcu_dereference_raw_check(export->next);
331         }
332
333         preempt_enable_notrace();
334 }
335
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339         rcu_assign_pointer(export->next, *list);
340         /*
341          * We are entering export into the list but another
342          * CPU might be walking that list. We need to make sure
343          * the export->next pointer is valid before another CPU sees
344          * the export pointer included into the list.
345          */
346         rcu_assign_pointer(*list, export);
347 }
348
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352         struct trace_export **p;
353
354         for (p = list; *p != NULL; p = &(*p)->next)
355                 if (*p == export)
356                         break;
357
358         if (*p != export)
359                 return -1;
360
361         rcu_assign_pointer(*p, (*p)->next);
362
363         return 0;
364 }
365
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369         ftrace_exports_enable(export);
370
371         add_trace_export(list, export);
372 }
373
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377         int ret;
378
379         ret = rm_trace_export(list, export);
380         ftrace_exports_disable(export);
381
382         return ret;
383 }
384
385 int register_ftrace_export(struct trace_export *export)
386 {
387         if (WARN_ON_ONCE(!export->write))
388                 return -1;
389
390         mutex_lock(&ftrace_export_lock);
391
392         add_ftrace_export(&ftrace_exports_list, export);
393
394         mutex_unlock(&ftrace_export_lock);
395
396         return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402         int ret;
403
404         mutex_lock(&ftrace_export_lock);
405
406         ret = rm_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS                                             \
416         (FUNCTION_DEFAULT_FLAGS |                                       \
417          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
418          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
419          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
420          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
421          TRACE_ITER_HASH_PTR)
422
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
425                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436         .trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438
439 LIST_HEAD(ftrace_trace_arrays);
440
441 int trace_array_get(struct trace_array *this_tr)
442 {
443         struct trace_array *tr;
444         int ret = -ENODEV;
445
446         mutex_lock(&trace_types_lock);
447         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448                 if (tr == this_tr) {
449                         tr->ref++;
450                         ret = 0;
451                         break;
452                 }
453         }
454         mutex_unlock(&trace_types_lock);
455
456         return ret;
457 }
458
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461         WARN_ON(!this_tr->ref);
462         this_tr->ref--;
463 }
464
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476         if (!this_tr)
477                 return;
478
479         mutex_lock(&trace_types_lock);
480         __trace_array_put(this_tr);
481         mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487         int ret;
488
489         ret = security_locked_down(LOCKDOWN_TRACEFS);
490         if (ret)
491                 return ret;
492
493         if (tracing_disabled)
494                 return -ENODEV;
495
496         if (tr && trace_array_get(tr) < 0)
497                 return -ENODEV;
498
499         return 0;
500 }
501
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503                               struct trace_buffer *buffer,
504                               struct ring_buffer_event *event)
505 {
506         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507             !filter_match_preds(call->filter, rec)) {
508                 __trace_event_discard_commit(buffer, event);
509                 return 1;
510         }
511
512         return 0;
513 }
514
515 void trace_free_pid_list(struct trace_pid_list *pid_list)
516 {
517         vfree(pid_list->pids);
518         kfree(pid_list);
519 }
520
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531         /*
532          * If pid_max changed after filtered_pids was created, we
533          * by default ignore all pids greater than the previous pid_max.
534          */
535         if (search_pid >= filtered_pids->pid_max)
536                 return false;
537
538         return test_bit(search_pid, filtered_pids->pids);
539 }
540
541 /**
542  * trace_ignore_this_task - should a task be ignored for tracing
543  * @filtered_pids: The list of pids to check
544  * @filtered_no_pids: The list of pids not to be traced
545  * @task: The task that should be ignored if not filtered
546  *
547  * Checks if @task should be traced or not from @filtered_pids.
548  * Returns true if @task should *NOT* be traced.
549  * Returns false if @task should be traced.
550  */
551 bool
552 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
553                        struct trace_pid_list *filtered_no_pids,
554                        struct task_struct *task)
555 {
556         /*
557          * If filtered_no_pids is not empty, and the task's pid is listed
558          * in filtered_no_pids, then return true.
559          * Otherwise, if filtered_pids is empty, that means we can
560          * trace all tasks. If it has content, then only trace pids
561          * within filtered_pids.
562          */
563
564         return (filtered_pids &&
565                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
566                 (filtered_no_pids &&
567                  trace_find_filtered_pid(filtered_no_pids, task->pid));
568 }
569
570 /**
571  * trace_filter_add_remove_task - Add or remove a task from a pid_list
572  * @pid_list: The list to modify
573  * @self: The current task for fork or NULL for exit
574  * @task: The task to add or remove
575  *
576  * If adding a task, if @self is defined, the task is only added if @self
577  * is also included in @pid_list. This happens on fork and tasks should
578  * only be added when the parent is listed. If @self is NULL, then the
579  * @task pid will be removed from the list, which would happen on exit
580  * of a task.
581  */
582 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
583                                   struct task_struct *self,
584                                   struct task_struct *task)
585 {
586         if (!pid_list)
587                 return;
588
589         /* For forks, we only add if the forking task is listed */
590         if (self) {
591                 if (!trace_find_filtered_pid(pid_list, self->pid))
592                         return;
593         }
594
595         /* Sorry, but we don't support pid_max changing after setting */
596         if (task->pid >= pid_list->pid_max)
597                 return;
598
599         /* "self" is set for forks, and NULL for exits */
600         if (self)
601                 set_bit(task->pid, pid_list->pids);
602         else
603                 clear_bit(task->pid, pid_list->pids);
604 }
605
606 /**
607  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
608  * @pid_list: The pid list to show
609  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
610  * @pos: The position of the file
611  *
612  * This is used by the seq_file "next" operation to iterate the pids
613  * listed in a trace_pid_list structure.
614  *
615  * Returns the pid+1 as we want to display pid of zero, but NULL would
616  * stop the iteration.
617  */
618 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
619 {
620         unsigned long pid = (unsigned long)v;
621
622         (*pos)++;
623
624         /* pid already is +1 of the actual previous bit */
625         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
626
627         /* Return pid + 1 to allow zero to be represented */
628         if (pid < pid_list->pid_max)
629                 return (void *)(pid + 1);
630
631         return NULL;
632 }
633
634 /**
635  * trace_pid_start - Used for seq_file to start reading pid lists
636  * @pid_list: The pid list to show
637  * @pos: The position of the file
638  *
639  * This is used by seq_file "start" operation to start the iteration
640  * of listing pids.
641  *
642  * Returns the pid+1 as we want to display pid of zero, but NULL would
643  * stop the iteration.
644  */
645 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
646 {
647         unsigned long pid;
648         loff_t l = 0;
649
650         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
651         if (pid >= pid_list->pid_max)
652                 return NULL;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret = 0;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         pid_list->pid_max = READ_ONCE(pid_max);
709
710         /* Only truncating will shrink pid_max */
711         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
712                 pid_list->pid_max = filtered_pids->pid_max;
713
714         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
715         if (!pid_list->pids) {
716                 trace_parser_put(&parser);
717                 kfree(pid_list);
718                 return -ENOMEM;
719         }
720
721         if (filtered_pids) {
722                 /* copy the current bits to the new max */
723                 for_each_set_bit(pid, filtered_pids->pids,
724                                  filtered_pids->pid_max) {
725                         set_bit(pid, pid_list->pids);
726                         nr_pids++;
727                 }
728         }
729
730         while (cnt > 0) {
731
732                 pos = 0;
733
734                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
735                 if (ret < 0 || !trace_parser_loaded(&parser))
736                         break;
737
738                 read += ret;
739                 ubuf += ret;
740                 cnt -= ret;
741
742                 ret = -EINVAL;
743                 if (kstrtoul(parser.buffer, 0, &val))
744                         break;
745                 if (val >= pid_list->pid_max)
746                         break;
747
748                 pid = (pid_t)val;
749
750                 set_bit(pid, pid_list->pids);
751                 nr_pids++;
752
753                 trace_parser_clear(&parser);
754                 ret = 0;
755         }
756         trace_parser_put(&parser);
757
758         if (ret < 0) {
759                 trace_free_pid_list(pid_list);
760                 return ret;
761         }
762
763         if (!nr_pids) {
764                 /* Cleared the list of pids */
765                 trace_free_pid_list(pid_list);
766                 read = ret;
767                 pid_list = NULL;
768         }
769
770         *new_pid_list = pid_list;
771
772         return read;
773 }
774
775 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
776 {
777         u64 ts;
778
779         /* Early boot up does not have a buffer yet */
780         if (!buf->buffer)
781                 return trace_clock_local();
782
783         ts = ring_buffer_time_stamp(buf->buffer);
784         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
785
786         return ts;
787 }
788
789 u64 ftrace_now(int cpu)
790 {
791         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
792 }
793
794 /**
795  * tracing_is_enabled - Show if global_trace has been enabled
796  *
797  * Shows if the global trace has been enabled or not. It uses the
798  * mirror flag "buffer_disabled" to be used in fast paths such as for
799  * the irqsoff tracer. But it may be inaccurate due to races. If you
800  * need to know the accurate state, use tracing_is_on() which is a little
801  * slower, but accurate.
802  */
803 int tracing_is_enabled(void)
804 {
805         /*
806          * For quick access (irqsoff uses this in fast path), just
807          * return the mirror variable of the state of the ring buffer.
808          * It's a little racy, but we don't really care.
809          */
810         smp_rmb();
811         return !global_trace.buffer_disabled;
812 }
813
814 /*
815  * trace_buf_size is the size in bytes that is allocated
816  * for a buffer. Note, the number of bytes is always rounded
817  * to page size.
818  *
819  * This number is purposely set to a low number of 16384.
820  * If the dump on oops happens, it will be much appreciated
821  * to not have to wait for all that output. Anyway this can be
822  * boot time and run time configurable.
823  */
824 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
825
826 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
827
828 /* trace_types holds a link list of available tracers. */
829 static struct tracer            *trace_types __read_mostly;
830
831 /*
832  * trace_types_lock is used to protect the trace_types list.
833  */
834 DEFINE_MUTEX(trace_types_lock);
835
836 /*
837  * serialize the access of the ring buffer
838  *
839  * ring buffer serializes readers, but it is low level protection.
840  * The validity of the events (which returns by ring_buffer_peek() ..etc)
841  * are not protected by ring buffer.
842  *
843  * The content of events may become garbage if we allow other process consumes
844  * these events concurrently:
845  *   A) the page of the consumed events may become a normal page
846  *      (not reader page) in ring buffer, and this page will be rewritten
847  *      by events producer.
848  *   B) The page of the consumed events may become a page for splice_read,
849  *      and this page will be returned to system.
850  *
851  * These primitives allow multi process access to different cpu ring buffer
852  * concurrently.
853  *
854  * These primitives don't distinguish read-only and read-consume access.
855  * Multi read-only access are also serialized.
856  */
857
858 #ifdef CONFIG_SMP
859 static DECLARE_RWSEM(all_cpu_access_lock);
860 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
861
862 static inline void trace_access_lock(int cpu)
863 {
864         if (cpu == RING_BUFFER_ALL_CPUS) {
865                 /* gain it for accessing the whole ring buffer. */
866                 down_write(&all_cpu_access_lock);
867         } else {
868                 /* gain it for accessing a cpu ring buffer. */
869
870                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
871                 down_read(&all_cpu_access_lock);
872
873                 /* Secondly block other access to this @cpu ring buffer. */
874                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
875         }
876 }
877
878 static inline void trace_access_unlock(int cpu)
879 {
880         if (cpu == RING_BUFFER_ALL_CPUS) {
881                 up_write(&all_cpu_access_lock);
882         } else {
883                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
884                 up_read(&all_cpu_access_lock);
885         }
886 }
887
888 static inline void trace_access_lock_init(void)
889 {
890         int cpu;
891
892         for_each_possible_cpu(cpu)
893                 mutex_init(&per_cpu(cpu_access_lock, cpu));
894 }
895
896 #else
897
898 static DEFINE_MUTEX(access_lock);
899
900 static inline void trace_access_lock(int cpu)
901 {
902         (void)cpu;
903         mutex_lock(&access_lock);
904 }
905
906 static inline void trace_access_unlock(int cpu)
907 {
908         (void)cpu;
909         mutex_unlock(&access_lock);
910 }
911
912 static inline void trace_access_lock_init(void)
913 {
914 }
915
916 #endif
917
918 #ifdef CONFIG_STACKTRACE
919 static void __ftrace_trace_stack(struct trace_buffer *buffer,
920                                  unsigned int trace_ctx,
921                                  int skip, struct pt_regs *regs);
922 static inline void ftrace_trace_stack(struct trace_array *tr,
923                                       struct trace_buffer *buffer,
924                                       unsigned int trace_ctx,
925                                       int skip, struct pt_regs *regs);
926
927 #else
928 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
929                                         unsigned int trace_ctx,
930                                         int skip, struct pt_regs *regs)
931 {
932 }
933 static inline void ftrace_trace_stack(struct trace_array *tr,
934                                       struct trace_buffer *buffer,
935                                       unsigned long trace_ctx,
936                                       int skip, struct pt_regs *regs)
937 {
938 }
939
940 #endif
941
942 static __always_inline void
943 trace_event_setup(struct ring_buffer_event *event,
944                   int type, unsigned int trace_ctx)
945 {
946         struct trace_entry *ent = ring_buffer_event_data(event);
947
948         tracing_generic_entry_update(ent, type, trace_ctx);
949 }
950
951 static __always_inline struct ring_buffer_event *
952 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
953                           int type,
954                           unsigned long len,
955                           unsigned int trace_ctx)
956 {
957         struct ring_buffer_event *event;
958
959         event = ring_buffer_lock_reserve(buffer, len);
960         if (event != NULL)
961                 trace_event_setup(event, type, trace_ctx);
962
963         return event;
964 }
965
966 void tracer_tracing_on(struct trace_array *tr)
967 {
968         if (tr->array_buffer.buffer)
969                 ring_buffer_record_on(tr->array_buffer.buffer);
970         /*
971          * This flag is looked at when buffers haven't been allocated
972          * yet, or by some tracers (like irqsoff), that just want to
973          * know if the ring buffer has been disabled, but it can handle
974          * races of where it gets disabled but we still do a record.
975          * As the check is in the fast path of the tracers, it is more
976          * important to be fast than accurate.
977          */
978         tr->buffer_disabled = 0;
979         /* Make the flag seen by readers */
980         smp_wmb();
981 }
982
983 /**
984  * tracing_on - enable tracing buffers
985  *
986  * This function enables tracing buffers that may have been
987  * disabled with tracing_off.
988  */
989 void tracing_on(void)
990 {
991         tracer_tracing_on(&global_trace);
992 }
993 EXPORT_SYMBOL_GPL(tracing_on);
994
995
996 static __always_inline void
997 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
998 {
999         __this_cpu_write(trace_taskinfo_save, true);
1000
1001         /* If this is the temp buffer, we need to commit fully */
1002         if (this_cpu_read(trace_buffered_event) == event) {
1003                 /* Length is in event->array[0] */
1004                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005                 /* Release the temp buffer */
1006                 this_cpu_dec(trace_buffered_event_cnt);
1007         } else
1008                 ring_buffer_unlock_commit(buffer, event);
1009 }
1010
1011 /**
1012  * __trace_puts - write a constant string into the trace buffer.
1013  * @ip:    The address of the caller
1014  * @str:   The constant string to write
1015  * @size:  The size of the string.
1016  */
1017 int __trace_puts(unsigned long ip, const char *str, int size)
1018 {
1019         struct ring_buffer_event *event;
1020         struct trace_buffer *buffer;
1021         struct print_entry *entry;
1022         unsigned int trace_ctx;
1023         int alloc;
1024
1025         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026                 return 0;
1027
1028         if (unlikely(tracing_selftest_running || tracing_disabled))
1029                 return 0;
1030
1031         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032
1033         trace_ctx = tracing_gen_ctx();
1034         buffer = global_trace.array_buffer.buffer;
1035         ring_buffer_nest_start(buffer);
1036         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037                                             trace_ctx);
1038         if (!event) {
1039                 size = 0;
1040                 goto out;
1041         }
1042
1043         entry = ring_buffer_event_data(event);
1044         entry->ip = ip;
1045
1046         memcpy(&entry->buf, str, size);
1047
1048         /* Add a newline if necessary */
1049         if (entry->buf[size - 1] != '\n') {
1050                 entry->buf[size] = '\n';
1051                 entry->buf[size + 1] = '\0';
1052         } else
1053                 entry->buf[size] = '\0';
1054
1055         __buffer_unlock_commit(buffer, event);
1056         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057  out:
1058         ring_buffer_nest_end(buffer);
1059         return size;
1060 }
1061 EXPORT_SYMBOL_GPL(__trace_puts);
1062
1063 /**
1064  * __trace_bputs - write the pointer to a constant string into trace buffer
1065  * @ip:    The address of the caller
1066  * @str:   The constant string to write to the buffer to
1067  */
1068 int __trace_bputs(unsigned long ip, const char *str)
1069 {
1070         struct ring_buffer_event *event;
1071         struct trace_buffer *buffer;
1072         struct bputs_entry *entry;
1073         unsigned int trace_ctx;
1074         int size = sizeof(struct bputs_entry);
1075         int ret = 0;
1076
1077         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078                 return 0;
1079
1080         if (unlikely(tracing_selftest_running || tracing_disabled))
1081                 return 0;
1082
1083         trace_ctx = tracing_gen_ctx();
1084         buffer = global_trace.array_buffer.buffer;
1085
1086         ring_buffer_nest_start(buffer);
1087         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088                                             trace_ctx);
1089         if (!event)
1090                 goto out;
1091
1092         entry = ring_buffer_event_data(event);
1093         entry->ip                       = ip;
1094         entry->str                      = str;
1095
1096         __buffer_unlock_commit(buffer, event);
1097         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098
1099         ret = 1;
1100  out:
1101         ring_buffer_nest_end(buffer);
1102         return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(__trace_bputs);
1105
1106 #ifdef CONFIG_TRACER_SNAPSHOT
1107 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108                                            void *cond_data)
1109 {
1110         struct tracer *tracer = tr->current_trace;
1111         unsigned long flags;
1112
1113         if (in_nmi()) {
1114                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1116                 return;
1117         }
1118
1119         if (!tr->allocated_snapshot) {
1120                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121                 internal_trace_puts("*** stopping trace here!   ***\n");
1122                 tracing_off();
1123                 return;
1124         }
1125
1126         /* Note, snapshot can not be used when the tracer uses it */
1127         if (tracer->use_max_tr) {
1128                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130                 return;
1131         }
1132
1133         local_irq_save(flags);
1134         update_max_tr(tr, current, smp_processor_id(), cond_data);
1135         local_irq_restore(flags);
1136 }
1137
1138 void tracing_snapshot_instance(struct trace_array *tr)
1139 {
1140         tracing_snapshot_instance_cond(tr, NULL);
1141 }
1142
1143 /**
1144  * tracing_snapshot - take a snapshot of the current buffer.
1145  *
1146  * This causes a swap between the snapshot buffer and the current live
1147  * tracing buffer. You can use this to take snapshots of the live
1148  * trace when some condition is triggered, but continue to trace.
1149  *
1150  * Note, make sure to allocate the snapshot with either
1151  * a tracing_snapshot_alloc(), or by doing it manually
1152  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153  *
1154  * If the snapshot buffer is not allocated, it will stop tracing.
1155  * Basically making a permanent snapshot.
1156  */
1157 void tracing_snapshot(void)
1158 {
1159         struct trace_array *tr = &global_trace;
1160
1161         tracing_snapshot_instance(tr);
1162 }
1163 EXPORT_SYMBOL_GPL(tracing_snapshot);
1164
1165 /**
1166  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167  * @tr:         The tracing instance to snapshot
1168  * @cond_data:  The data to be tested conditionally, and possibly saved
1169  *
1170  * This is the same as tracing_snapshot() except that the snapshot is
1171  * conditional - the snapshot will only happen if the
1172  * cond_snapshot.update() implementation receiving the cond_data
1173  * returns true, which means that the trace array's cond_snapshot
1174  * update() operation used the cond_data to determine whether the
1175  * snapshot should be taken, and if it was, presumably saved it along
1176  * with the snapshot.
1177  */
1178 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179 {
1180         tracing_snapshot_instance_cond(tr, cond_data);
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183
1184 /**
1185  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186  * @tr:         The tracing instance
1187  *
1188  * When the user enables a conditional snapshot using
1189  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190  * with the snapshot.  This accessor is used to retrieve it.
1191  *
1192  * Should not be called from cond_snapshot.update(), since it takes
1193  * the tr->max_lock lock, which the code calling
1194  * cond_snapshot.update() has already done.
1195  *
1196  * Returns the cond_data associated with the trace array's snapshot.
1197  */
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200         void *cond_data = NULL;
1201
1202         arch_spin_lock(&tr->max_lock);
1203
1204         if (tr->cond_snapshot)
1205                 cond_data = tr->cond_snapshot->cond_data;
1206
1207         arch_spin_unlock(&tr->max_lock);
1208
1209         return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214                                         struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216
1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219         int ret;
1220
1221         if (!tr->allocated_snapshot) {
1222
1223                 /* allocate spare buffer */
1224                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226                 if (ret < 0)
1227                         return ret;
1228
1229                 tr->allocated_snapshot = true;
1230         }
1231
1232         return 0;
1233 }
1234
1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237         /*
1238          * We don't free the ring buffer. instead, resize it because
1239          * The max_tr ring buffer has some state (e.g. ring->clock) and
1240          * we want preserve it.
1241          */
1242         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243         set_buffer_entries(&tr->max_buffer, 1);
1244         tracing_reset_online_cpus(&tr->max_buffer);
1245         tr->allocated_snapshot = false;
1246 }
1247
1248 /**
1249  * tracing_alloc_snapshot - allocate snapshot buffer.
1250  *
1251  * This only allocates the snapshot buffer if it isn't already
1252  * allocated - it doesn't also take a snapshot.
1253  *
1254  * This is meant to be used in cases where the snapshot buffer needs
1255  * to be set up for events that can't sleep but need to be able to
1256  * trigger a snapshot.
1257  */
1258 int tracing_alloc_snapshot(void)
1259 {
1260         struct trace_array *tr = &global_trace;
1261         int ret;
1262
1263         ret = tracing_alloc_snapshot_instance(tr);
1264         WARN_ON(ret < 0);
1265
1266         return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269
1270 /**
1271  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272  *
1273  * This is similar to tracing_snapshot(), but it will allocate the
1274  * snapshot buffer if it isn't already allocated. Use this only
1275  * where it is safe to sleep, as the allocation may sleep.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  */
1281 void tracing_snapshot_alloc(void)
1282 {
1283         int ret;
1284
1285         ret = tracing_alloc_snapshot();
1286         if (ret < 0)
1287                 return;
1288
1289         tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292
1293 /**
1294  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295  * @tr:         The tracing instance
1296  * @cond_data:  User data to associate with the snapshot
1297  * @update:     Implementation of the cond_snapshot update function
1298  *
1299  * Check whether the conditional snapshot for the given instance has
1300  * already been enabled, or if the current tracer is already using a
1301  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302  * save the cond_data and update function inside.
1303  *
1304  * Returns 0 if successful, error otherwise.
1305  */
1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307                                  cond_update_fn_t update)
1308 {
1309         struct cond_snapshot *cond_snapshot;
1310         int ret = 0;
1311
1312         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313         if (!cond_snapshot)
1314                 return -ENOMEM;
1315
1316         cond_snapshot->cond_data = cond_data;
1317         cond_snapshot->update = update;
1318
1319         mutex_lock(&trace_types_lock);
1320
1321         ret = tracing_alloc_snapshot_instance(tr);
1322         if (ret)
1323                 goto fail_unlock;
1324
1325         if (tr->current_trace->use_max_tr) {
1326                 ret = -EBUSY;
1327                 goto fail_unlock;
1328         }
1329
1330         /*
1331          * The cond_snapshot can only change to NULL without the
1332          * trace_types_lock. We don't care if we race with it going
1333          * to NULL, but we want to make sure that it's not set to
1334          * something other than NULL when we get here, which we can
1335          * do safely with only holding the trace_types_lock and not
1336          * having to take the max_lock.
1337          */
1338         if (tr->cond_snapshot) {
1339                 ret = -EBUSY;
1340                 goto fail_unlock;
1341         }
1342
1343         arch_spin_lock(&tr->max_lock);
1344         tr->cond_snapshot = cond_snapshot;
1345         arch_spin_unlock(&tr->max_lock);
1346
1347         mutex_unlock(&trace_types_lock);
1348
1349         return ret;
1350
1351  fail_unlock:
1352         mutex_unlock(&trace_types_lock);
1353         kfree(cond_snapshot);
1354         return ret;
1355 }
1356 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357
1358 /**
1359  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360  * @tr:         The tracing instance
1361  *
1362  * Check whether the conditional snapshot for the given instance is
1363  * enabled; if so, free the cond_snapshot associated with it,
1364  * otherwise return -EINVAL.
1365  *
1366  * Returns 0 if successful, error otherwise.
1367  */
1368 int tracing_snapshot_cond_disable(struct trace_array *tr)
1369 {
1370         int ret = 0;
1371
1372         arch_spin_lock(&tr->max_lock);
1373
1374         if (!tr->cond_snapshot)
1375                 ret = -EINVAL;
1376         else {
1377                 kfree(tr->cond_snapshot);
1378                 tr->cond_snapshot = NULL;
1379         }
1380
1381         arch_spin_unlock(&tr->max_lock);
1382
1383         return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400         return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405         /* Give warning */
1406         tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411         return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416         return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421         return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428         if (tr->array_buffer.buffer)
1429                 ring_buffer_record_off(tr->array_buffer.buffer);
1430         /*
1431          * This flag is looked at when buffers haven't been allocated
1432          * yet, or by some tracers (like irqsoff), that just want to
1433          * know if the ring buffer has been disabled, but it can handle
1434          * races of where it gets disabled but we still do a record.
1435          * As the check is in the fast path of the tracers, it is more
1436          * important to be fast than accurate.
1437          */
1438         tr->buffer_disabled = 1;
1439         /* Make the flag seen by readers */
1440         smp_wmb();
1441 }
1442
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453         tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456
1457 void disable_trace_on_warning(void)
1458 {
1459         if (__disable_trace_on_warning) {
1460                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                         "Disabling tracing due to warning\n");
1462                 tracing_off();
1463         }
1464 }
1465
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474         if (tr->array_buffer.buffer)
1475                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476         return !tr->buffer_disabled;
1477 }
1478
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484         return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488 static int __init set_buf_size(char *str)
1489 {
1490         unsigned long buf_size;
1491
1492         if (!str)
1493                 return 0;
1494         buf_size = memparse(str, &str);
1495         /* nr_entries can not be zero */
1496         if (buf_size == 0)
1497                 return 0;
1498         trace_buf_size = buf_size;
1499         return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502
1503 static int __init set_tracing_thresh(char *str)
1504 {
1505         unsigned long threshold;
1506         int ret;
1507
1508         if (!str)
1509                 return 0;
1510         ret = kstrtoul(str, 0, &threshold);
1511         if (ret < 0)
1512                 return 0;
1513         tracing_thresh = threshold * 1000;
1514         return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517
1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520         return nsecs / 1000;
1521 }
1522
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534         TRACE_FLAGS
1535         NULL
1536 };
1537
1538 static struct {
1539         u64 (*func)(void);
1540         const char *name;
1541         int in_ns;              /* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543         { trace_clock_local,            "local",        1 },
1544         { trace_clock_global,           "global",       1 },
1545         { trace_clock_counter,          "counter",      0 },
1546         { trace_clock_jiffies,          "uptime",       0 },
1547         { trace_clock,                  "perf",         1 },
1548         { ktime_get_mono_fast_ns,       "mono",         1 },
1549         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1550         { ktime_get_boot_fast_ns,       "boot",         1 },
1551         ARCH_TRACE_CLOCKS
1552 };
1553
1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556         if (trace_clocks[tr->clock_id].in_ns)
1557                 return true;
1558
1559         return false;
1560 }
1561
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567         memset(parser, 0, sizeof(*parser));
1568
1569         parser->buffer = kmalloc(size, GFP_KERNEL);
1570         if (!parser->buffer)
1571                 return 1;
1572
1573         parser->size = size;
1574         return 0;
1575 }
1576
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582         kfree(parser->buffer);
1583         parser->buffer = NULL;
1584 }
1585
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598         size_t cnt, loff_t *ppos)
1599 {
1600         char ch;
1601         size_t read = 0;
1602         ssize_t ret;
1603
1604         if (!*ppos)
1605                 trace_parser_clear(parser);
1606
1607         ret = get_user(ch, ubuf++);
1608         if (ret)
1609                 goto out;
1610
1611         read++;
1612         cnt--;
1613
1614         /*
1615          * The parser is not finished with the last write,
1616          * continue reading the user input without skipping spaces.
1617          */
1618         if (!parser->cont) {
1619                 /* skip white space */
1620                 while (cnt && isspace(ch)) {
1621                         ret = get_user(ch, ubuf++);
1622                         if (ret)
1623                                 goto out;
1624                         read++;
1625                         cnt--;
1626                 }
1627
1628                 parser->idx = 0;
1629
1630                 /* only spaces were written */
1631                 if (isspace(ch) || !ch) {
1632                         *ppos += read;
1633                         ret = read;
1634                         goto out;
1635                 }
1636         }
1637
1638         /* read the non-space input */
1639         while (cnt && !isspace(ch) && ch) {
1640                 if (parser->idx < parser->size - 1)
1641                         parser->buffer[parser->idx++] = ch;
1642                 else {
1643                         ret = -EINVAL;
1644                         goto out;
1645                 }
1646                 ret = get_user(ch, ubuf++);
1647                 if (ret)
1648                         goto out;
1649                 read++;
1650                 cnt--;
1651         }
1652
1653         /* We either got finished input or we have to wait for another call. */
1654         if (isspace(ch) || !ch) {
1655                 parser->buffer[parser->idx] = 0;
1656                 parser->cont = false;
1657         } else if (parser->idx < parser->size - 1) {
1658                 parser->cont = true;
1659                 parser->buffer[parser->idx++] = ch;
1660                 /* Make sure the parsed string always terminates with '\0'. */
1661                 parser->buffer[parser->idx] = 0;
1662         } else {
1663                 ret = -EINVAL;
1664                 goto out;
1665         }
1666
1667         *ppos += read;
1668         ret = read;
1669
1670 out:
1671         return ret;
1672 }
1673
1674 /* TODO add a seq_buf_to_buffer() */
1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677         int len;
1678
1679         if (trace_seq_used(s) <= s->seq.readpos)
1680                 return -EBUSY;
1681
1682         len = trace_seq_used(s) - s->seq.readpos;
1683         if (cnt > len)
1684                 cnt = len;
1685         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686
1687         s->seq.readpos += cnt;
1688         return cnt;
1689 }
1690
1691 unsigned long __read_mostly     tracing_thresh;
1692 static const struct file_operations tracing_max_lat_fops;
1693
1694 #ifdef LATENCY_FS_NOTIFY
1695
1696 static struct workqueue_struct *fsnotify_wq;
1697
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700         struct trace_array *tr = container_of(work, struct trace_array,
1701                                               fsnotify_work);
1702         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707         struct trace_array *tr = container_of(iwork, struct trace_array,
1708                                               fsnotify_irqwork);
1709         queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713                                      struct dentry *d_tracer)
1714 {
1715         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718                                               d_tracer, &tr->max_latency,
1719                                               &tracing_max_lat_fops);
1720 }
1721
1722 __init static int latency_fsnotify_init(void)
1723 {
1724         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1726         if (!fsnotify_wq) {
1727                 pr_err("Unable to allocate tr_max_lat_wq\n");
1728                 return -ENOMEM;
1729         }
1730         return 0;
1731 }
1732
1733 late_initcall_sync(latency_fsnotify_init);
1734
1735 void latency_fsnotify(struct trace_array *tr)
1736 {
1737         if (!fsnotify_wq)
1738                 return;
1739         /*
1740          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741          * possible that we are called from __schedule() or do_idle(), which
1742          * could cause a deadlock.
1743          */
1744         irq_work_queue(&tr->fsnotify_irqwork);
1745 }
1746
1747 /*
1748  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1749  *  defined(CONFIG_FSNOTIFY)
1750  */
1751 #else
1752
1753 #define trace_create_maxlat_file(tr, d_tracer)                          \
1754         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1755                           &tr->max_latency, &tracing_max_lat_fops)
1756
1757 #endif
1758
1759 #ifdef CONFIG_TRACER_MAX_TRACE
1760 /*
1761  * Copy the new maximum trace into the separate maximum-trace
1762  * structure. (this way the maximum trace is permanently saved,
1763  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1764  */
1765 static void
1766 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1767 {
1768         struct array_buffer *trace_buf = &tr->array_buffer;
1769         struct array_buffer *max_buf = &tr->max_buffer;
1770         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1771         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1772
1773         max_buf->cpu = cpu;
1774         max_buf->time_start = data->preempt_timestamp;
1775
1776         max_data->saved_latency = tr->max_latency;
1777         max_data->critical_start = data->critical_start;
1778         max_data->critical_end = data->critical_end;
1779
1780         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1781         max_data->pid = tsk->pid;
1782         /*
1783          * If tsk == current, then use current_uid(), as that does not use
1784          * RCU. The irq tracer can be called out of RCU scope.
1785          */
1786         if (tsk == current)
1787                 max_data->uid = current_uid();
1788         else
1789                 max_data->uid = task_uid(tsk);
1790
1791         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1792         max_data->policy = tsk->policy;
1793         max_data->rt_priority = tsk->rt_priority;
1794
1795         /* record this tasks comm */
1796         tracing_record_cmdline(tsk);
1797         latency_fsnotify(tr);
1798 }
1799
1800 /**
1801  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1802  * @tr: tracer
1803  * @tsk: the task with the latency
1804  * @cpu: The cpu that initiated the trace.
1805  * @cond_data: User data associated with a conditional snapshot
1806  *
1807  * Flip the buffers between the @tr and the max_tr and record information
1808  * about which task was the cause of this latency.
1809  */
1810 void
1811 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1812               void *cond_data)
1813 {
1814         if (tr->stop_count)
1815                 return;
1816
1817         WARN_ON_ONCE(!irqs_disabled());
1818
1819         if (!tr->allocated_snapshot) {
1820                 /* Only the nop tracer should hit this when disabling */
1821                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1822                 return;
1823         }
1824
1825         arch_spin_lock(&tr->max_lock);
1826
1827         /* Inherit the recordable setting from array_buffer */
1828         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1829                 ring_buffer_record_on(tr->max_buffer.buffer);
1830         else
1831                 ring_buffer_record_off(tr->max_buffer.buffer);
1832
1833 #ifdef CONFIG_TRACER_SNAPSHOT
1834         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1835                 goto out_unlock;
1836 #endif
1837         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838
1839         __update_max_tr(tr, tsk, cpu);
1840
1841  out_unlock:
1842         arch_spin_unlock(&tr->max_lock);
1843 }
1844
1845 /**
1846  * update_max_tr_single - only copy one trace over, and reset the rest
1847  * @tr: tracer
1848  * @tsk: task with the latency
1849  * @cpu: the cpu of the buffer to copy.
1850  *
1851  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1852  */
1853 void
1854 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1855 {
1856         int ret;
1857
1858         if (tr->stop_count)
1859                 return;
1860
1861         WARN_ON_ONCE(!irqs_disabled());
1862         if (!tr->allocated_snapshot) {
1863                 /* Only the nop tracer should hit this when disabling */
1864                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1865                 return;
1866         }
1867
1868         arch_spin_lock(&tr->max_lock);
1869
1870         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1871
1872         if (ret == -EBUSY) {
1873                 /*
1874                  * We failed to swap the buffer due to a commit taking
1875                  * place on this CPU. We fail to record, but we reset
1876                  * the max trace buffer (no one writes directly to it)
1877                  * and flag that it failed.
1878                  */
1879                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1880                         "Failed to swap buffers due to commit in progress\n");
1881         }
1882
1883         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1884
1885         __update_max_tr(tr, tsk, cpu);
1886         arch_spin_unlock(&tr->max_lock);
1887 }
1888 #endif /* CONFIG_TRACER_MAX_TRACE */
1889
1890 static int wait_on_pipe(struct trace_iterator *iter, int full)
1891 {
1892         /* Iterators are static, they should be filled or empty */
1893         if (trace_buffer_iter(iter, iter->cpu_file))
1894                 return 0;
1895
1896         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1897                                 full);
1898 }
1899
1900 #ifdef CONFIG_FTRACE_STARTUP_TEST
1901 static bool selftests_can_run;
1902
1903 struct trace_selftests {
1904         struct list_head                list;
1905         struct tracer                   *type;
1906 };
1907
1908 static LIST_HEAD(postponed_selftests);
1909
1910 static int save_selftest(struct tracer *type)
1911 {
1912         struct trace_selftests *selftest;
1913
1914         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1915         if (!selftest)
1916                 return -ENOMEM;
1917
1918         selftest->type = type;
1919         list_add(&selftest->list, &postponed_selftests);
1920         return 0;
1921 }
1922
1923 static int run_tracer_selftest(struct tracer *type)
1924 {
1925         struct trace_array *tr = &global_trace;
1926         struct tracer *saved_tracer = tr->current_trace;
1927         int ret;
1928
1929         if (!type->selftest || tracing_selftest_disabled)
1930                 return 0;
1931
1932         /*
1933          * If a tracer registers early in boot up (before scheduling is
1934          * initialized and such), then do not run its selftests yet.
1935          * Instead, run it a little later in the boot process.
1936          */
1937         if (!selftests_can_run)
1938                 return save_selftest(type);
1939
1940         if (!tracing_is_on()) {
1941                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1942                         type->name);
1943                 return 0;
1944         }
1945
1946         /*
1947          * Run a selftest on this tracer.
1948          * Here we reset the trace buffer, and set the current
1949          * tracer to be this tracer. The tracer can then run some
1950          * internal tracing to verify that everything is in order.
1951          * If we fail, we do not register this tracer.
1952          */
1953         tracing_reset_online_cpus(&tr->array_buffer);
1954
1955         tr->current_trace = type;
1956
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958         if (type->use_max_tr) {
1959                 /* If we expanded the buffers, make sure the max is expanded too */
1960                 if (ring_buffer_expanded)
1961                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1962                                            RING_BUFFER_ALL_CPUS);
1963                 tr->allocated_snapshot = true;
1964         }
1965 #endif
1966
1967         /* the test is responsible for initializing and enabling */
1968         pr_info("Testing tracer %s: ", type->name);
1969         ret = type->selftest(type, tr);
1970         /* the test is responsible for resetting too */
1971         tr->current_trace = saved_tracer;
1972         if (ret) {
1973                 printk(KERN_CONT "FAILED!\n");
1974                 /* Add the warning after printing 'FAILED' */
1975                 WARN_ON(1);
1976                 return -1;
1977         }
1978         /* Only reset on passing, to avoid touching corrupted buffers */
1979         tracing_reset_online_cpus(&tr->array_buffer);
1980
1981 #ifdef CONFIG_TRACER_MAX_TRACE
1982         if (type->use_max_tr) {
1983                 tr->allocated_snapshot = false;
1984
1985                 /* Shrink the max buffer again */
1986                 if (ring_buffer_expanded)
1987                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1988                                            RING_BUFFER_ALL_CPUS);
1989         }
1990 #endif
1991
1992         printk(KERN_CONT "PASSED\n");
1993         return 0;
1994 }
1995
1996 static __init int init_trace_selftests(void)
1997 {
1998         struct trace_selftests *p, *n;
1999         struct tracer *t, **last;
2000         int ret;
2001
2002         selftests_can_run = true;
2003
2004         mutex_lock(&trace_types_lock);
2005
2006         if (list_empty(&postponed_selftests))
2007                 goto out;
2008
2009         pr_info("Running postponed tracer tests:\n");
2010
2011         tracing_selftest_running = true;
2012         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2013                 /* This loop can take minutes when sanitizers are enabled, so
2014                  * lets make sure we allow RCU processing.
2015                  */
2016                 cond_resched();
2017                 ret = run_tracer_selftest(p->type);
2018                 /* If the test fails, then warn and remove from available_tracers */
2019                 if (ret < 0) {
2020                         WARN(1, "tracer: %s failed selftest, disabling\n",
2021                              p->type->name);
2022                         last = &trace_types;
2023                         for (t = trace_types; t; t = t->next) {
2024                                 if (t == p->type) {
2025                                         *last = t->next;
2026                                         break;
2027                                 }
2028                                 last = &t->next;
2029                         }
2030                 }
2031                 list_del(&p->list);
2032                 kfree(p);
2033         }
2034         tracing_selftest_running = false;
2035
2036  out:
2037         mutex_unlock(&trace_types_lock);
2038
2039         return 0;
2040 }
2041 core_initcall(init_trace_selftests);
2042 #else
2043 static inline int run_tracer_selftest(struct tracer *type)
2044 {
2045         return 0;
2046 }
2047 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2048
2049 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2050
2051 static void __init apply_trace_boot_options(void);
2052
2053 /**
2054  * register_tracer - register a tracer with the ftrace system.
2055  * @type: the plugin for the tracer
2056  *
2057  * Register a new plugin tracer.
2058  */
2059 int __init register_tracer(struct tracer *type)
2060 {
2061         struct tracer *t;
2062         int ret = 0;
2063
2064         if (!type->name) {
2065                 pr_info("Tracer must have a name\n");
2066                 return -1;
2067         }
2068
2069         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2070                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2071                 return -1;
2072         }
2073
2074         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2075                 pr_warn("Can not register tracer %s due to lockdown\n",
2076                            type->name);
2077                 return -EPERM;
2078         }
2079
2080         mutex_lock(&trace_types_lock);
2081
2082         tracing_selftest_running = true;
2083
2084         for (t = trace_types; t; t = t->next) {
2085                 if (strcmp(type->name, t->name) == 0) {
2086                         /* already found */
2087                         pr_info("Tracer %s already registered\n",
2088                                 type->name);
2089                         ret = -1;
2090                         goto out;
2091                 }
2092         }
2093
2094         if (!type->set_flag)
2095                 type->set_flag = &dummy_set_flag;
2096         if (!type->flags) {
2097                 /*allocate a dummy tracer_flags*/
2098                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2099                 if (!type->flags) {
2100                         ret = -ENOMEM;
2101                         goto out;
2102                 }
2103                 type->flags->val = 0;
2104                 type->flags->opts = dummy_tracer_opt;
2105         } else
2106                 if (!type->flags->opts)
2107                         type->flags->opts = dummy_tracer_opt;
2108
2109         /* store the tracer for __set_tracer_option */
2110         type->flags->trace = type;
2111
2112         ret = run_tracer_selftest(type);
2113         if (ret < 0)
2114                 goto out;
2115
2116         type->next = trace_types;
2117         trace_types = type;
2118         add_tracer_options(&global_trace, type);
2119
2120  out:
2121         tracing_selftest_running = false;
2122         mutex_unlock(&trace_types_lock);
2123
2124         if (ret || !default_bootup_tracer)
2125                 goto out_unlock;
2126
2127         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2128                 goto out_unlock;
2129
2130         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2131         /* Do we want this tracer to start on bootup? */
2132         tracing_set_tracer(&global_trace, type->name);
2133         default_bootup_tracer = NULL;
2134
2135         apply_trace_boot_options();
2136
2137         /* disable other selftests, since this will break it. */
2138         disable_tracing_selftest("running a tracer");
2139
2140  out_unlock:
2141         return ret;
2142 }
2143
2144 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2145 {
2146         struct trace_buffer *buffer = buf->buffer;
2147
2148         if (!buffer)
2149                 return;
2150
2151         ring_buffer_record_disable(buffer);
2152
2153         /* Make sure all commits have finished */
2154         synchronize_rcu();
2155         ring_buffer_reset_cpu(buffer, cpu);
2156
2157         ring_buffer_record_enable(buffer);
2158 }
2159
2160 void tracing_reset_online_cpus(struct array_buffer *buf)
2161 {
2162         struct trace_buffer *buffer = buf->buffer;
2163
2164         if (!buffer)
2165                 return;
2166
2167         ring_buffer_record_disable(buffer);
2168
2169         /* Make sure all commits have finished */
2170         synchronize_rcu();
2171
2172         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2173
2174         ring_buffer_reset_online_cpus(buffer);
2175
2176         ring_buffer_record_enable(buffer);
2177 }
2178
2179 /* Must have trace_types_lock held */
2180 void tracing_reset_all_online_cpus(void)
2181 {
2182         struct trace_array *tr;
2183
2184         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2185                 if (!tr->clear_trace)
2186                         continue;
2187                 tr->clear_trace = false;
2188                 tracing_reset_online_cpus(&tr->array_buffer);
2189 #ifdef CONFIG_TRACER_MAX_TRACE
2190                 tracing_reset_online_cpus(&tr->max_buffer);
2191 #endif
2192         }
2193 }
2194
2195 /*
2196  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2197  * is the tgid last observed corresponding to pid=i.
2198  */
2199 static int *tgid_map;
2200
2201 /* The maximum valid index into tgid_map. */
2202 static size_t tgid_map_max;
2203
2204 #define SAVED_CMDLINES_DEFAULT 128
2205 #define NO_CMDLINE_MAP UINT_MAX
2206 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2207 struct saved_cmdlines_buffer {
2208         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2209         unsigned *map_cmdline_to_pid;
2210         unsigned cmdline_num;
2211         int cmdline_idx;
2212         char *saved_cmdlines;
2213 };
2214 static struct saved_cmdlines_buffer *savedcmd;
2215
2216 static inline char *get_saved_cmdlines(int idx)
2217 {
2218         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2219 }
2220
2221 static inline void set_cmdline(int idx, const char *cmdline)
2222 {
2223         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2224 }
2225
2226 static int allocate_cmdlines_buffer(unsigned int val,
2227                                     struct saved_cmdlines_buffer *s)
2228 {
2229         s->map_cmdline_to_pid = kmalloc_array(val,
2230                                               sizeof(*s->map_cmdline_to_pid),
2231                                               GFP_KERNEL);
2232         if (!s->map_cmdline_to_pid)
2233                 return -ENOMEM;
2234
2235         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2236         if (!s->saved_cmdlines) {
2237                 kfree(s->map_cmdline_to_pid);
2238                 return -ENOMEM;
2239         }
2240
2241         s->cmdline_idx = 0;
2242         s->cmdline_num = val;
2243         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2244                sizeof(s->map_pid_to_cmdline));
2245         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2246                val * sizeof(*s->map_cmdline_to_pid));
2247
2248         return 0;
2249 }
2250
2251 static int trace_create_savedcmd(void)
2252 {
2253         int ret;
2254
2255         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2256         if (!savedcmd)
2257                 return -ENOMEM;
2258
2259         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2260         if (ret < 0) {
2261                 kfree(savedcmd);
2262                 savedcmd = NULL;
2263                 return -ENOMEM;
2264         }
2265
2266         return 0;
2267 }
2268
2269 int is_tracing_stopped(void)
2270 {
2271         return global_trace.stop_count;
2272 }
2273
2274 /**
2275  * tracing_start - quick start of the tracer
2276  *
2277  * If tracing is enabled but was stopped by tracing_stop,
2278  * this will start the tracer back up.
2279  */
2280 void tracing_start(void)
2281 {
2282         struct trace_buffer *buffer;
2283         unsigned long flags;
2284
2285         if (tracing_disabled)
2286                 return;
2287
2288         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2289         if (--global_trace.stop_count) {
2290                 if (global_trace.stop_count < 0) {
2291                         /* Someone screwed up their debugging */
2292                         WARN_ON_ONCE(1);
2293                         global_trace.stop_count = 0;
2294                 }
2295                 goto out;
2296         }
2297
2298         /* Prevent the buffers from switching */
2299         arch_spin_lock(&global_trace.max_lock);
2300
2301         buffer = global_trace.array_buffer.buffer;
2302         if (buffer)
2303                 ring_buffer_record_enable(buffer);
2304
2305 #ifdef CONFIG_TRACER_MAX_TRACE
2306         buffer = global_trace.max_buffer.buffer;
2307         if (buffer)
2308                 ring_buffer_record_enable(buffer);
2309 #endif
2310
2311         arch_spin_unlock(&global_trace.max_lock);
2312
2313  out:
2314         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2315 }
2316
2317 static void tracing_start_tr(struct trace_array *tr)
2318 {
2319         struct trace_buffer *buffer;
2320         unsigned long flags;
2321
2322         if (tracing_disabled)
2323                 return;
2324
2325         /* If global, we need to also start the max tracer */
2326         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2327                 return tracing_start();
2328
2329         raw_spin_lock_irqsave(&tr->start_lock, flags);
2330
2331         if (--tr->stop_count) {
2332                 if (tr->stop_count < 0) {
2333                         /* Someone screwed up their debugging */
2334                         WARN_ON_ONCE(1);
2335                         tr->stop_count = 0;
2336                 }
2337                 goto out;
2338         }
2339
2340         buffer = tr->array_buffer.buffer;
2341         if (buffer)
2342                 ring_buffer_record_enable(buffer);
2343
2344  out:
2345         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2346 }
2347
2348 /**
2349  * tracing_stop - quick stop of the tracer
2350  *
2351  * Light weight way to stop tracing. Use in conjunction with
2352  * tracing_start.
2353  */
2354 void tracing_stop(void)
2355 {
2356         struct trace_buffer *buffer;
2357         unsigned long flags;
2358
2359         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2360         if (global_trace.stop_count++)
2361                 goto out;
2362
2363         /* Prevent the buffers from switching */
2364         arch_spin_lock(&global_trace.max_lock);
2365
2366         buffer = global_trace.array_buffer.buffer;
2367         if (buffer)
2368                 ring_buffer_record_disable(buffer);
2369
2370 #ifdef CONFIG_TRACER_MAX_TRACE
2371         buffer = global_trace.max_buffer.buffer;
2372         if (buffer)
2373                 ring_buffer_record_disable(buffer);
2374 #endif
2375
2376         arch_spin_unlock(&global_trace.max_lock);
2377
2378  out:
2379         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2380 }
2381
2382 static void tracing_stop_tr(struct trace_array *tr)
2383 {
2384         struct trace_buffer *buffer;
2385         unsigned long flags;
2386
2387         /* If global, we need to also stop the max tracer */
2388         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2389                 return tracing_stop();
2390
2391         raw_spin_lock_irqsave(&tr->start_lock, flags);
2392         if (tr->stop_count++)
2393                 goto out;
2394
2395         buffer = tr->array_buffer.buffer;
2396         if (buffer)
2397                 ring_buffer_record_disable(buffer);
2398
2399  out:
2400         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2401 }
2402
2403 static int trace_save_cmdline(struct task_struct *tsk)
2404 {
2405         unsigned tpid, idx;
2406
2407         /* treat recording of idle task as a success */
2408         if (!tsk->pid)
2409                 return 1;
2410
2411         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412
2413         /*
2414          * It's not the end of the world if we don't get
2415          * the lock, but we also don't want to spin
2416          * nor do we want to disable interrupts,
2417          * so if we miss here, then better luck next time.
2418          */
2419         if (!arch_spin_trylock(&trace_cmdline_lock))
2420                 return 0;
2421
2422         idx = savedcmd->map_pid_to_cmdline[tpid];
2423         if (idx == NO_CMDLINE_MAP) {
2424                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2425
2426                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2427                 savedcmd->cmdline_idx = idx;
2428         }
2429
2430         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2431         set_cmdline(idx, tsk->comm);
2432
2433         arch_spin_unlock(&trace_cmdline_lock);
2434
2435         return 1;
2436 }
2437
2438 static void __trace_find_cmdline(int pid, char comm[])
2439 {
2440         unsigned map;
2441         int tpid;
2442
2443         if (!pid) {
2444                 strcpy(comm, "<idle>");
2445                 return;
2446         }
2447
2448         if (WARN_ON_ONCE(pid < 0)) {
2449                 strcpy(comm, "<XXX>");
2450                 return;
2451         }
2452
2453         tpid = pid & (PID_MAX_DEFAULT - 1);
2454         map = savedcmd->map_pid_to_cmdline[tpid];
2455         if (map != NO_CMDLINE_MAP) {
2456                 tpid = savedcmd->map_cmdline_to_pid[map];
2457                 if (tpid == pid) {
2458                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2459                         return;
2460                 }
2461         }
2462         strcpy(comm, "<...>");
2463 }
2464
2465 void trace_find_cmdline(int pid, char comm[])
2466 {
2467         preempt_disable();
2468         arch_spin_lock(&trace_cmdline_lock);
2469
2470         __trace_find_cmdline(pid, comm);
2471
2472         arch_spin_unlock(&trace_cmdline_lock);
2473         preempt_enable();
2474 }
2475
2476 static int *trace_find_tgid_ptr(int pid)
2477 {
2478         /*
2479          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2480          * if we observe a non-NULL tgid_map then we also observe the correct
2481          * tgid_map_max.
2482          */
2483         int *map = smp_load_acquire(&tgid_map);
2484
2485         if (unlikely(!map || pid > tgid_map_max))
2486                 return NULL;
2487
2488         return &map[pid];
2489 }
2490
2491 int trace_find_tgid(int pid)
2492 {
2493         int *ptr = trace_find_tgid_ptr(pid);
2494
2495         return ptr ? *ptr : 0;
2496 }
2497
2498 static int trace_save_tgid(struct task_struct *tsk)
2499 {
2500         int *ptr;
2501
2502         /* treat recording of idle task as a success */
2503         if (!tsk->pid)
2504                 return 1;
2505
2506         ptr = trace_find_tgid_ptr(tsk->pid);
2507         if (!ptr)
2508                 return 0;
2509
2510         *ptr = tsk->tgid;
2511         return 1;
2512 }
2513
2514 static bool tracing_record_taskinfo_skip(int flags)
2515 {
2516         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2517                 return true;
2518         if (!__this_cpu_read(trace_taskinfo_save))
2519                 return true;
2520         return false;
2521 }
2522
2523 /**
2524  * tracing_record_taskinfo - record the task info of a task
2525  *
2526  * @task:  task to record
2527  * @flags: TRACE_RECORD_CMDLINE for recording comm
2528  *         TRACE_RECORD_TGID for recording tgid
2529  */
2530 void tracing_record_taskinfo(struct task_struct *task, int flags)
2531 {
2532         bool done;
2533
2534         if (tracing_record_taskinfo_skip(flags))
2535                 return;
2536
2537         /*
2538          * Record as much task information as possible. If some fail, continue
2539          * to try to record the others.
2540          */
2541         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2542         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2543
2544         /* If recording any information failed, retry again soon. */
2545         if (!done)
2546                 return;
2547
2548         __this_cpu_write(trace_taskinfo_save, false);
2549 }
2550
2551 /**
2552  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2553  *
2554  * @prev: previous task during sched_switch
2555  * @next: next task during sched_switch
2556  * @flags: TRACE_RECORD_CMDLINE for recording comm
2557  *         TRACE_RECORD_TGID for recording tgid
2558  */
2559 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2560                                           struct task_struct *next, int flags)
2561 {
2562         bool done;
2563
2564         if (tracing_record_taskinfo_skip(flags))
2565                 return;
2566
2567         /*
2568          * Record as much task information as possible. If some fail, continue
2569          * to try to record the others.
2570          */
2571         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2572         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2573         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2574         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2575
2576         /* If recording any information failed, retry again soon. */
2577         if (!done)
2578                 return;
2579
2580         __this_cpu_write(trace_taskinfo_save, false);
2581 }
2582
2583 /* Helpers to record a specific task information */
2584 void tracing_record_cmdline(struct task_struct *task)
2585 {
2586         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2587 }
2588
2589 void tracing_record_tgid(struct task_struct *task)
2590 {
2591         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2592 }
2593
2594 /*
2595  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2596  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2597  * simplifies those functions and keeps them in sync.
2598  */
2599 enum print_line_t trace_handle_return(struct trace_seq *s)
2600 {
2601         return trace_seq_has_overflowed(s) ?
2602                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2603 }
2604 EXPORT_SYMBOL_GPL(trace_handle_return);
2605
2606 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2607 {
2608         unsigned int trace_flags = irqs_status;
2609         unsigned int pc;
2610
2611         pc = preempt_count();
2612
2613         if (pc & NMI_MASK)
2614                 trace_flags |= TRACE_FLAG_NMI;
2615         if (pc & HARDIRQ_MASK)
2616                 trace_flags |= TRACE_FLAG_HARDIRQ;
2617         if (in_serving_softirq())
2618                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2619
2620         if (tif_need_resched())
2621                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2622         if (test_preempt_need_resched())
2623                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2624         return (trace_flags << 16) | (pc & 0xff);
2625 }
2626
2627 struct ring_buffer_event *
2628 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2629                           int type,
2630                           unsigned long len,
2631                           unsigned int trace_ctx)
2632 {
2633         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2634 }
2635
2636 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2637 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2638 static int trace_buffered_event_ref;
2639
2640 /**
2641  * trace_buffered_event_enable - enable buffering events
2642  *
2643  * When events are being filtered, it is quicker to use a temporary
2644  * buffer to write the event data into if there's a likely chance
2645  * that it will not be committed. The discard of the ring buffer
2646  * is not as fast as committing, and is much slower than copying
2647  * a commit.
2648  *
2649  * When an event is to be filtered, allocate per cpu buffers to
2650  * write the event data into, and if the event is filtered and discarded
2651  * it is simply dropped, otherwise, the entire data is to be committed
2652  * in one shot.
2653  */
2654 void trace_buffered_event_enable(void)
2655 {
2656         struct ring_buffer_event *event;
2657         struct page *page;
2658         int cpu;
2659
2660         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2661
2662         if (trace_buffered_event_ref++)
2663                 return;
2664
2665         for_each_tracing_cpu(cpu) {
2666                 page = alloc_pages_node(cpu_to_node(cpu),
2667                                         GFP_KERNEL | __GFP_NORETRY, 0);
2668                 if (!page)
2669                         goto failed;
2670
2671                 event = page_address(page);
2672                 memset(event, 0, sizeof(*event));
2673
2674                 per_cpu(trace_buffered_event, cpu) = event;
2675
2676                 preempt_disable();
2677                 if (cpu == smp_processor_id() &&
2678                     __this_cpu_read(trace_buffered_event) !=
2679                     per_cpu(trace_buffered_event, cpu))
2680                         WARN_ON_ONCE(1);
2681                 preempt_enable();
2682         }
2683
2684         return;
2685  failed:
2686         trace_buffered_event_disable();
2687 }
2688
2689 static void enable_trace_buffered_event(void *data)
2690 {
2691         /* Probably not needed, but do it anyway */
2692         smp_rmb();
2693         this_cpu_dec(trace_buffered_event_cnt);
2694 }
2695
2696 static void disable_trace_buffered_event(void *data)
2697 {
2698         this_cpu_inc(trace_buffered_event_cnt);
2699 }
2700
2701 /**
2702  * trace_buffered_event_disable - disable buffering events
2703  *
2704  * When a filter is removed, it is faster to not use the buffered
2705  * events, and to commit directly into the ring buffer. Free up
2706  * the temp buffers when there are no more users. This requires
2707  * special synchronization with current events.
2708  */
2709 void trace_buffered_event_disable(void)
2710 {
2711         int cpu;
2712
2713         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2714
2715         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2716                 return;
2717
2718         if (--trace_buffered_event_ref)
2719                 return;
2720
2721         preempt_disable();
2722         /* For each CPU, set the buffer as used. */
2723         smp_call_function_many(tracing_buffer_mask,
2724                                disable_trace_buffered_event, NULL, 1);
2725         preempt_enable();
2726
2727         /* Wait for all current users to finish */
2728         synchronize_rcu();
2729
2730         for_each_tracing_cpu(cpu) {
2731                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2732                 per_cpu(trace_buffered_event, cpu) = NULL;
2733         }
2734         /*
2735          * Make sure trace_buffered_event is NULL before clearing
2736          * trace_buffered_event_cnt.
2737          */
2738         smp_wmb();
2739
2740         preempt_disable();
2741         /* Do the work on each cpu */
2742         smp_call_function_many(tracing_buffer_mask,
2743                                enable_trace_buffered_event, NULL, 1);
2744         preempt_enable();
2745 }
2746
2747 static struct trace_buffer *temp_buffer;
2748
2749 struct ring_buffer_event *
2750 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2751                           struct trace_event_file *trace_file,
2752                           int type, unsigned long len,
2753                           unsigned int trace_ctx)
2754 {
2755         struct ring_buffer_event *entry;
2756         struct trace_array *tr = trace_file->tr;
2757         int val;
2758
2759         *current_rb = tr->array_buffer.buffer;
2760
2761         if (!tr->no_filter_buffering_ref &&
2762             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2763             (entry = this_cpu_read(trace_buffered_event))) {
2764                 /*
2765                  * Filtering is on, so try to use the per cpu buffer first.
2766                  * This buffer will simulate a ring_buffer_event,
2767                  * where the type_len is zero and the array[0] will
2768                  * hold the full length.
2769                  * (see include/linux/ring-buffer.h for details on
2770                  *  how the ring_buffer_event is structured).
2771                  *
2772                  * Using a temp buffer during filtering and copying it
2773                  * on a matched filter is quicker than writing directly
2774                  * into the ring buffer and then discarding it when
2775                  * it doesn't match. That is because the discard
2776                  * requires several atomic operations to get right.
2777                  * Copying on match and doing nothing on a failed match
2778                  * is still quicker than no copy on match, but having
2779                  * to discard out of the ring buffer on a failed match.
2780                  */
2781                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2782
2783                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2784
2785                 /*
2786                  * Preemption is disabled, but interrupts and NMIs
2787                  * can still come in now. If that happens after
2788                  * the above increment, then it will have to go
2789                  * back to the old method of allocating the event
2790                  * on the ring buffer, and if the filter fails, it
2791                  * will have to call ring_buffer_discard_commit()
2792                  * to remove it.
2793                  *
2794                  * Need to also check the unlikely case that the
2795                  * length is bigger than the temp buffer size.
2796                  * If that happens, then the reserve is pretty much
2797                  * guaranteed to fail, as the ring buffer currently
2798                  * only allows events less than a page. But that may
2799                  * change in the future, so let the ring buffer reserve
2800                  * handle the failure in that case.
2801                  */
2802                 if (val == 1 && likely(len <= max_len)) {
2803                         trace_event_setup(entry, type, trace_ctx);
2804                         entry->array[0] = len;
2805                         return entry;
2806                 }
2807                 this_cpu_dec(trace_buffered_event_cnt);
2808         }
2809
2810         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2811                                             trace_ctx);
2812         /*
2813          * If tracing is off, but we have triggers enabled
2814          * we still need to look at the event data. Use the temp_buffer
2815          * to store the trace event for the trigger to use. It's recursive
2816          * safe and will not be recorded anywhere.
2817          */
2818         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2819                 *current_rb = temp_buffer;
2820                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2821                                                     trace_ctx);
2822         }
2823         return entry;
2824 }
2825 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2826
2827 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2828 static DEFINE_MUTEX(tracepoint_printk_mutex);
2829
2830 static void output_printk(struct trace_event_buffer *fbuffer)
2831 {
2832         struct trace_event_call *event_call;
2833         struct trace_event_file *file;
2834         struct trace_event *event;
2835         unsigned long flags;
2836         struct trace_iterator *iter = tracepoint_print_iter;
2837
2838         /* We should never get here if iter is NULL */
2839         if (WARN_ON_ONCE(!iter))
2840                 return;
2841
2842         event_call = fbuffer->trace_file->event_call;
2843         if (!event_call || !event_call->event.funcs ||
2844             !event_call->event.funcs->trace)
2845                 return;
2846
2847         file = fbuffer->trace_file;
2848         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2849             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2850              !filter_match_preds(file->filter, fbuffer->entry)))
2851                 return;
2852
2853         event = &fbuffer->trace_file->event_call->event;
2854
2855         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2856         trace_seq_init(&iter->seq);
2857         iter->ent = fbuffer->entry;
2858         event_call->event.funcs->trace(iter, 0, event);
2859         trace_seq_putc(&iter->seq, 0);
2860         printk("%s", iter->seq.buffer);
2861
2862         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2863 }
2864
2865 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2866                              void *buffer, size_t *lenp,
2867                              loff_t *ppos)
2868 {
2869         int save_tracepoint_printk;
2870         int ret;
2871
2872         mutex_lock(&tracepoint_printk_mutex);
2873         save_tracepoint_printk = tracepoint_printk;
2874
2875         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2876
2877         /*
2878          * This will force exiting early, as tracepoint_printk
2879          * is always zero when tracepoint_printk_iter is not allocated
2880          */
2881         if (!tracepoint_print_iter)
2882                 tracepoint_printk = 0;
2883
2884         if (save_tracepoint_printk == tracepoint_printk)
2885                 goto out;
2886
2887         if (tracepoint_printk)
2888                 static_key_enable(&tracepoint_printk_key.key);
2889         else
2890                 static_key_disable(&tracepoint_printk_key.key);
2891
2892  out:
2893         mutex_unlock(&tracepoint_printk_mutex);
2894
2895         return ret;
2896 }
2897
2898 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2899 {
2900         enum event_trigger_type tt = ETT_NONE;
2901         struct trace_event_file *file = fbuffer->trace_file;
2902
2903         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2904                         fbuffer->entry, &tt))
2905                 goto discard;
2906
2907         if (static_key_false(&tracepoint_printk_key.key))
2908                 output_printk(fbuffer);
2909
2910         if (static_branch_unlikely(&trace_event_exports_enabled))
2911                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2912
2913         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2914                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2915
2916 discard:
2917         if (tt)
2918                 event_triggers_post_call(file, tt);
2919
2920 }
2921 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2922
2923 /*
2924  * Skip 3:
2925  *
2926  *   trace_buffer_unlock_commit_regs()
2927  *   trace_event_buffer_commit()
2928  *   trace_event_raw_event_xxx()
2929  */
2930 # define STACK_SKIP 3
2931
2932 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2933                                      struct trace_buffer *buffer,
2934                                      struct ring_buffer_event *event,
2935                                      unsigned int trace_ctx,
2936                                      struct pt_regs *regs)
2937 {
2938         __buffer_unlock_commit(buffer, event);
2939
2940         /*
2941          * If regs is not set, then skip the necessary functions.
2942          * Note, we can still get here via blktrace, wakeup tracer
2943          * and mmiotrace, but that's ok if they lose a function or
2944          * two. They are not that meaningful.
2945          */
2946         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2947         ftrace_trace_userstack(tr, buffer, trace_ctx);
2948 }
2949
2950 /*
2951  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2952  */
2953 void
2954 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2955                                    struct ring_buffer_event *event)
2956 {
2957         __buffer_unlock_commit(buffer, event);
2958 }
2959
2960 void
2961 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2962                parent_ip, unsigned int trace_ctx)
2963 {
2964         struct trace_event_call *call = &event_function;
2965         struct trace_buffer *buffer = tr->array_buffer.buffer;
2966         struct ring_buffer_event *event;
2967         struct ftrace_entry *entry;
2968
2969         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2970                                             trace_ctx);
2971         if (!event)
2972                 return;
2973         entry   = ring_buffer_event_data(event);
2974         entry->ip                       = ip;
2975         entry->parent_ip                = parent_ip;
2976
2977         if (!call_filter_check_discard(call, entry, buffer, event)) {
2978                 if (static_branch_unlikely(&trace_function_exports_enabled))
2979                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2980                 __buffer_unlock_commit(buffer, event);
2981         }
2982 }
2983
2984 #ifdef CONFIG_STACKTRACE
2985
2986 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2987 #define FTRACE_KSTACK_NESTING   4
2988
2989 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2990
2991 struct ftrace_stack {
2992         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2993 };
2994
2995
2996 struct ftrace_stacks {
2997         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2998 };
2999
3000 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3001 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3002
3003 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3004                                  unsigned int trace_ctx,
3005                                  int skip, struct pt_regs *regs)
3006 {
3007         struct trace_event_call *call = &event_kernel_stack;
3008         struct ring_buffer_event *event;
3009         unsigned int size, nr_entries;
3010         struct ftrace_stack *fstack;
3011         struct stack_entry *entry;
3012         int stackidx;
3013
3014         /*
3015          * Add one, for this function and the call to save_stack_trace()
3016          * If regs is set, then these functions will not be in the way.
3017          */
3018 #ifndef CONFIG_UNWINDER_ORC
3019         if (!regs)
3020                 skip++;
3021 #endif
3022
3023         preempt_disable_notrace();
3024
3025         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3026
3027         /* This should never happen. If it does, yell once and skip */
3028         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3029                 goto out;
3030
3031         /*
3032          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3033          * interrupt will either see the value pre increment or post
3034          * increment. If the interrupt happens pre increment it will have
3035          * restored the counter when it returns.  We just need a barrier to
3036          * keep gcc from moving things around.
3037          */
3038         barrier();
3039
3040         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3041         size = ARRAY_SIZE(fstack->calls);
3042
3043         if (regs) {
3044                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3045                                                    size, skip);
3046         } else {
3047                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3048         }
3049
3050         size = nr_entries * sizeof(unsigned long);
3051         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3052                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3053                                     trace_ctx);
3054         if (!event)
3055                 goto out;
3056         entry = ring_buffer_event_data(event);
3057
3058         memcpy(&entry->caller, fstack->calls, size);
3059         entry->size = nr_entries;
3060
3061         if (!call_filter_check_discard(call, entry, buffer, event))
3062                 __buffer_unlock_commit(buffer, event);
3063
3064  out:
3065         /* Again, don't let gcc optimize things here */
3066         barrier();
3067         __this_cpu_dec(ftrace_stack_reserve);
3068         preempt_enable_notrace();
3069
3070 }
3071
3072 static inline void ftrace_trace_stack(struct trace_array *tr,
3073                                       struct trace_buffer *buffer,
3074                                       unsigned int trace_ctx,
3075                                       int skip, struct pt_regs *regs)
3076 {
3077         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3078                 return;
3079
3080         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3081 }
3082
3083 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3084                    int skip)
3085 {
3086         struct trace_buffer *buffer = tr->array_buffer.buffer;
3087
3088         if (rcu_is_watching()) {
3089                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3090                 return;
3091         }
3092
3093         /*
3094          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3095          * but if the above rcu_is_watching() failed, then the NMI
3096          * triggered someplace critical, and rcu_irq_enter() should
3097          * not be called from NMI.
3098          */
3099         if (unlikely(in_nmi()))
3100                 return;
3101
3102         rcu_irq_enter_irqson();
3103         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3104         rcu_irq_exit_irqson();
3105 }
3106
3107 /**
3108  * trace_dump_stack - record a stack back trace in the trace buffer
3109  * @skip: Number of functions to skip (helper handlers)
3110  */
3111 void trace_dump_stack(int skip)
3112 {
3113         if (tracing_disabled || tracing_selftest_running)
3114                 return;
3115
3116 #ifndef CONFIG_UNWINDER_ORC
3117         /* Skip 1 to skip this function. */
3118         skip++;
3119 #endif
3120         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3121                              tracing_gen_ctx(), skip, NULL);
3122 }
3123 EXPORT_SYMBOL_GPL(trace_dump_stack);
3124
3125 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3126 static DEFINE_PER_CPU(int, user_stack_count);
3127
3128 static void
3129 ftrace_trace_userstack(struct trace_array *tr,
3130                        struct trace_buffer *buffer, unsigned int trace_ctx)
3131 {
3132         struct trace_event_call *call = &event_user_stack;
3133         struct ring_buffer_event *event;
3134         struct userstack_entry *entry;
3135
3136         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3137                 return;
3138
3139         /*
3140          * NMIs can not handle page faults, even with fix ups.
3141          * The save user stack can (and often does) fault.
3142          */
3143         if (unlikely(in_nmi()))
3144                 return;
3145
3146         /*
3147          * prevent recursion, since the user stack tracing may
3148          * trigger other kernel events.
3149          */
3150         preempt_disable();
3151         if (__this_cpu_read(user_stack_count))
3152                 goto out;
3153
3154         __this_cpu_inc(user_stack_count);
3155
3156         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3157                                             sizeof(*entry), trace_ctx);
3158         if (!event)
3159                 goto out_drop_count;
3160         entry   = ring_buffer_event_data(event);
3161
3162         entry->tgid             = current->tgid;
3163         memset(&entry->caller, 0, sizeof(entry->caller));
3164
3165         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3166         if (!call_filter_check_discard(call, entry, buffer, event))
3167                 __buffer_unlock_commit(buffer, event);
3168
3169  out_drop_count:
3170         __this_cpu_dec(user_stack_count);
3171  out:
3172         preempt_enable();
3173 }
3174 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3175 static void ftrace_trace_userstack(struct trace_array *tr,
3176                                    struct trace_buffer *buffer,
3177                                    unsigned int trace_ctx)
3178 {
3179 }
3180 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3181
3182 #endif /* CONFIG_STACKTRACE */
3183
3184 static inline void
3185 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3186                           unsigned long long delta)
3187 {
3188         entry->bottom_delta_ts = delta & U32_MAX;
3189         entry->top_delta_ts = (delta >> 32);
3190 }
3191
3192 void trace_last_func_repeats(struct trace_array *tr,
3193                              struct trace_func_repeats *last_info,
3194                              unsigned int trace_ctx)
3195 {
3196         struct trace_buffer *buffer = tr->array_buffer.buffer;
3197         struct func_repeats_entry *entry;
3198         struct ring_buffer_event *event;
3199         u64 delta;
3200
3201         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3202                                             sizeof(*entry), trace_ctx);
3203         if (!event)
3204                 return;
3205
3206         delta = ring_buffer_event_time_stamp(buffer, event) -
3207                 last_info->ts_last_call;
3208
3209         entry = ring_buffer_event_data(event);
3210         entry->ip = last_info->ip;
3211         entry->parent_ip = last_info->parent_ip;
3212         entry->count = last_info->count;
3213         func_repeats_set_delta_ts(entry, delta);
3214
3215         __buffer_unlock_commit(buffer, event);
3216 }
3217
3218 /* created for use with alloc_percpu */
3219 struct trace_buffer_struct {
3220         int nesting;
3221         char buffer[4][TRACE_BUF_SIZE];
3222 };
3223
3224 static struct trace_buffer_struct *trace_percpu_buffer;
3225
3226 /*
3227  * This allows for lockless recording.  If we're nested too deeply, then
3228  * this returns NULL.
3229  */
3230 static char *get_trace_buf(void)
3231 {
3232         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3233
3234         if (!buffer || buffer->nesting >= 4)
3235                 return NULL;
3236
3237         buffer->nesting++;
3238
3239         /* Interrupts must see nesting incremented before we use the buffer */
3240         barrier();
3241         return &buffer->buffer[buffer->nesting - 1][0];
3242 }
3243
3244 static void put_trace_buf(void)
3245 {
3246         /* Don't let the decrement of nesting leak before this */
3247         barrier();
3248         this_cpu_dec(trace_percpu_buffer->nesting);
3249 }
3250
3251 static int alloc_percpu_trace_buffer(void)
3252 {
3253         struct trace_buffer_struct *buffers;
3254
3255         if (trace_percpu_buffer)
3256                 return 0;
3257
3258         buffers = alloc_percpu(struct trace_buffer_struct);
3259         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3260                 return -ENOMEM;
3261
3262         trace_percpu_buffer = buffers;
3263         return 0;
3264 }
3265
3266 static int buffers_allocated;
3267
3268 void trace_printk_init_buffers(void)
3269 {
3270         if (buffers_allocated)
3271                 return;
3272
3273         if (alloc_percpu_trace_buffer())
3274                 return;
3275
3276         /* trace_printk() is for debug use only. Don't use it in production. */
3277
3278         pr_warn("\n");
3279         pr_warn("**********************************************************\n");
3280         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3281         pr_warn("**                                                      **\n");
3282         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3283         pr_warn("**                                                      **\n");
3284         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3285         pr_warn("** unsafe for production use.                           **\n");
3286         pr_warn("**                                                      **\n");
3287         pr_warn("** If you see this message and you are not debugging    **\n");
3288         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3289         pr_warn("**                                                      **\n");
3290         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3291         pr_warn("**********************************************************\n");
3292
3293         /* Expand the buffers to set size */
3294         tracing_update_buffers();
3295
3296         buffers_allocated = 1;
3297
3298         /*
3299          * trace_printk_init_buffers() can be called by modules.
3300          * If that happens, then we need to start cmdline recording
3301          * directly here. If the global_trace.buffer is already
3302          * allocated here, then this was called by module code.
3303          */
3304         if (global_trace.array_buffer.buffer)
3305                 tracing_start_cmdline_record();
3306 }
3307 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3308
3309 void trace_printk_start_comm(void)
3310 {
3311         /* Start tracing comms if trace printk is set */
3312         if (!buffers_allocated)
3313                 return;
3314         tracing_start_cmdline_record();
3315 }
3316
3317 static void trace_printk_start_stop_comm(int enabled)
3318 {
3319         if (!buffers_allocated)
3320                 return;
3321
3322         if (enabled)
3323                 tracing_start_cmdline_record();
3324         else
3325                 tracing_stop_cmdline_record();
3326 }
3327
3328 /**
3329  * trace_vbprintk - write binary msg to tracing buffer
3330  * @ip:    The address of the caller
3331  * @fmt:   The string format to write to the buffer
3332  * @args:  Arguments for @fmt
3333  */
3334 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3335 {
3336         struct trace_event_call *call = &event_bprint;
3337         struct ring_buffer_event *event;
3338         struct trace_buffer *buffer;
3339         struct trace_array *tr = &global_trace;
3340         struct bprint_entry *entry;
3341         unsigned int trace_ctx;
3342         char *tbuffer;
3343         int len = 0, size;
3344
3345         if (unlikely(tracing_selftest_running || tracing_disabled))
3346                 return 0;
3347
3348         /* Don't pollute graph traces with trace_vprintk internals */
3349         pause_graph_tracing();
3350
3351         trace_ctx = tracing_gen_ctx();
3352         preempt_disable_notrace();
3353
3354         tbuffer = get_trace_buf();
3355         if (!tbuffer) {
3356                 len = 0;
3357                 goto out_nobuffer;
3358         }
3359
3360         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3361
3362         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3363                 goto out_put;
3364
3365         size = sizeof(*entry) + sizeof(u32) * len;
3366         buffer = tr->array_buffer.buffer;
3367         ring_buffer_nest_start(buffer);
3368         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3369                                             trace_ctx);
3370         if (!event)
3371                 goto out;
3372         entry = ring_buffer_event_data(event);
3373         entry->ip                       = ip;
3374         entry->fmt                      = fmt;
3375
3376         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3377         if (!call_filter_check_discard(call, entry, buffer, event)) {
3378                 __buffer_unlock_commit(buffer, event);
3379                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3380         }
3381
3382 out:
3383         ring_buffer_nest_end(buffer);
3384 out_put:
3385         put_trace_buf();
3386
3387 out_nobuffer:
3388         preempt_enable_notrace();
3389         unpause_graph_tracing();
3390
3391         return len;
3392 }
3393 EXPORT_SYMBOL_GPL(trace_vbprintk);
3394
3395 __printf(3, 0)
3396 static int
3397 __trace_array_vprintk(struct trace_buffer *buffer,
3398                       unsigned long ip, const char *fmt, va_list args)
3399 {
3400         struct trace_event_call *call = &event_print;
3401         struct ring_buffer_event *event;
3402         int len = 0, size;
3403         struct print_entry *entry;
3404         unsigned int trace_ctx;
3405         char *tbuffer;
3406
3407         if (tracing_disabled || tracing_selftest_running)
3408                 return 0;
3409
3410         /* Don't pollute graph traces with trace_vprintk internals */
3411         pause_graph_tracing();
3412
3413         trace_ctx = tracing_gen_ctx();
3414         preempt_disable_notrace();
3415
3416
3417         tbuffer = get_trace_buf();
3418         if (!tbuffer) {
3419                 len = 0;
3420                 goto out_nobuffer;
3421         }
3422
3423         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3424
3425         size = sizeof(*entry) + len + 1;
3426         ring_buffer_nest_start(buffer);
3427         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3428                                             trace_ctx);
3429         if (!event)
3430                 goto out;
3431         entry = ring_buffer_event_data(event);
3432         entry->ip = ip;
3433
3434         memcpy(&entry->buf, tbuffer, len + 1);
3435         if (!call_filter_check_discard(call, entry, buffer, event)) {
3436                 __buffer_unlock_commit(buffer, event);
3437                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3438         }
3439
3440 out:
3441         ring_buffer_nest_end(buffer);
3442         put_trace_buf();
3443
3444 out_nobuffer:
3445         preempt_enable_notrace();
3446         unpause_graph_tracing();
3447
3448         return len;
3449 }
3450
3451 __printf(3, 0)
3452 int trace_array_vprintk(struct trace_array *tr,
3453                         unsigned long ip, const char *fmt, va_list args)
3454 {
3455         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3456 }
3457
3458 /**
3459  * trace_array_printk - Print a message to a specific instance
3460  * @tr: The instance trace_array descriptor
3461  * @ip: The instruction pointer that this is called from.
3462  * @fmt: The format to print (printf format)
3463  *
3464  * If a subsystem sets up its own instance, they have the right to
3465  * printk strings into their tracing instance buffer using this
3466  * function. Note, this function will not write into the top level
3467  * buffer (use trace_printk() for that), as writing into the top level
3468  * buffer should only have events that can be individually disabled.
3469  * trace_printk() is only used for debugging a kernel, and should not
3470  * be ever incorporated in normal use.
3471  *
3472  * trace_array_printk() can be used, as it will not add noise to the
3473  * top level tracing buffer.
3474  *
3475  * Note, trace_array_init_printk() must be called on @tr before this
3476  * can be used.
3477  */
3478 __printf(3, 0)
3479 int trace_array_printk(struct trace_array *tr,
3480                        unsigned long ip, const char *fmt, ...)
3481 {
3482         int ret;
3483         va_list ap;
3484
3485         if (!tr)
3486                 return -ENOENT;
3487
3488         /* This is only allowed for created instances */
3489         if (tr == &global_trace)
3490                 return 0;
3491
3492         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3493                 return 0;
3494
3495         va_start(ap, fmt);
3496         ret = trace_array_vprintk(tr, ip, fmt, ap);
3497         va_end(ap);
3498         return ret;
3499 }
3500 EXPORT_SYMBOL_GPL(trace_array_printk);
3501
3502 /**
3503  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3504  * @tr: The trace array to initialize the buffers for
3505  *
3506  * As trace_array_printk() only writes into instances, they are OK to
3507  * have in the kernel (unlike trace_printk()). This needs to be called
3508  * before trace_array_printk() can be used on a trace_array.
3509  */
3510 int trace_array_init_printk(struct trace_array *tr)
3511 {
3512         if (!tr)
3513                 return -ENOENT;
3514
3515         /* This is only allowed for created instances */
3516         if (tr == &global_trace)
3517                 return -EINVAL;
3518
3519         return alloc_percpu_trace_buffer();
3520 }
3521 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3522
3523 __printf(3, 4)
3524 int trace_array_printk_buf(struct trace_buffer *buffer,
3525                            unsigned long ip, const char *fmt, ...)
3526 {
3527         int ret;
3528         va_list ap;
3529
3530         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3531                 return 0;
3532
3533         va_start(ap, fmt);
3534         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3535         va_end(ap);
3536         return ret;
3537 }
3538
3539 __printf(2, 0)
3540 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3541 {
3542         return trace_array_vprintk(&global_trace, ip, fmt, args);
3543 }
3544 EXPORT_SYMBOL_GPL(trace_vprintk);
3545
3546 static void trace_iterator_increment(struct trace_iterator *iter)
3547 {
3548         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3549
3550         iter->idx++;
3551         if (buf_iter)
3552                 ring_buffer_iter_advance(buf_iter);
3553 }
3554
3555 static struct trace_entry *
3556 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3557                 unsigned long *lost_events)
3558 {
3559         struct ring_buffer_event *event;
3560         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3561
3562         if (buf_iter) {
3563                 event = ring_buffer_iter_peek(buf_iter, ts);
3564                 if (lost_events)
3565                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3566                                 (unsigned long)-1 : 0;
3567         } else {
3568                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3569                                          lost_events);
3570         }
3571
3572         if (event) {
3573                 iter->ent_size = ring_buffer_event_length(event);
3574                 return ring_buffer_event_data(event);
3575         }
3576         iter->ent_size = 0;
3577         return NULL;
3578 }
3579
3580 static struct trace_entry *
3581 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3582                   unsigned long *missing_events, u64 *ent_ts)
3583 {
3584         struct trace_buffer *buffer = iter->array_buffer->buffer;
3585         struct trace_entry *ent, *next = NULL;
3586         unsigned long lost_events = 0, next_lost = 0;
3587         int cpu_file = iter->cpu_file;
3588         u64 next_ts = 0, ts;
3589         int next_cpu = -1;
3590         int next_size = 0;
3591         int cpu;
3592
3593         /*
3594          * If we are in a per_cpu trace file, don't bother by iterating over
3595          * all cpu and peek directly.
3596          */
3597         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3598                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3599                         return NULL;
3600                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3601                 if (ent_cpu)
3602                         *ent_cpu = cpu_file;
3603
3604                 return ent;
3605         }
3606
3607         for_each_tracing_cpu(cpu) {
3608
3609                 if (ring_buffer_empty_cpu(buffer, cpu))
3610                         continue;
3611
3612                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3613
3614                 /*
3615                  * Pick the entry with the smallest timestamp:
3616                  */
3617                 if (ent && (!next || ts < next_ts)) {
3618                         next = ent;
3619                         next_cpu = cpu;
3620                         next_ts = ts;
3621                         next_lost = lost_events;
3622                         next_size = iter->ent_size;
3623                 }
3624         }
3625
3626         iter->ent_size = next_size;
3627
3628         if (ent_cpu)
3629                 *ent_cpu = next_cpu;
3630
3631         if (ent_ts)
3632                 *ent_ts = next_ts;
3633
3634         if (missing_events)
3635                 *missing_events = next_lost;
3636
3637         return next;
3638 }
3639
3640 #define STATIC_FMT_BUF_SIZE     128
3641 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3642
3643 static char *trace_iter_expand_format(struct trace_iterator *iter)
3644 {
3645         char *tmp;
3646
3647         /*
3648          * iter->tr is NULL when used with tp_printk, which makes
3649          * this get called where it is not safe to call krealloc().
3650          */
3651         if (!iter->tr || iter->fmt == static_fmt_buf)
3652                 return NULL;
3653
3654         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3655                        GFP_KERNEL);
3656         if (tmp) {
3657                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3658                 iter->fmt = tmp;
3659         }
3660
3661         return tmp;
3662 }
3663
3664 /* Returns true if the string is safe to dereference from an event */
3665 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3666 {
3667         unsigned long addr = (unsigned long)str;
3668         struct trace_event *trace_event;
3669         struct trace_event_call *event;
3670
3671         /* OK if part of the event data */
3672         if ((addr >= (unsigned long)iter->ent) &&
3673             (addr < (unsigned long)iter->ent + iter->ent_size))
3674                 return true;
3675
3676         /* OK if part of the temp seq buffer */
3677         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3678             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3679                 return true;
3680
3681         /* Core rodata can not be freed */
3682         if (is_kernel_rodata(addr))
3683                 return true;
3684
3685         if (trace_is_tracepoint_string(str))
3686                 return true;
3687
3688         /*
3689          * Now this could be a module event, referencing core module
3690          * data, which is OK.
3691          */
3692         if (!iter->ent)
3693                 return false;
3694
3695         trace_event = ftrace_find_event(iter->ent->type);
3696         if (!trace_event)
3697                 return false;
3698
3699         event = container_of(trace_event, struct trace_event_call, event);
3700         if (!event->mod)
3701                 return false;
3702
3703         /* Would rather have rodata, but this will suffice */
3704         if (within_module_core(addr, event->mod))
3705                 return true;
3706
3707         return false;
3708 }
3709
3710 static const char *show_buffer(struct trace_seq *s)
3711 {
3712         struct seq_buf *seq = &s->seq;
3713
3714         seq_buf_terminate(seq);
3715
3716         return seq->buffer;
3717 }
3718
3719 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3720
3721 static int test_can_verify_check(const char *fmt, ...)
3722 {
3723         char buf[16];
3724         va_list ap;
3725         int ret;
3726
3727         /*
3728          * The verifier is dependent on vsnprintf() modifies the va_list
3729          * passed to it, where it is sent as a reference. Some architectures
3730          * (like x86_32) passes it by value, which means that vsnprintf()
3731          * does not modify the va_list passed to it, and the verifier
3732          * would then need to be able to understand all the values that
3733          * vsnprintf can use. If it is passed by value, then the verifier
3734          * is disabled.
3735          */
3736         va_start(ap, fmt);
3737         vsnprintf(buf, 16, "%d", ap);
3738         ret = va_arg(ap, int);
3739         va_end(ap);
3740
3741         return ret;
3742 }
3743
3744 static void test_can_verify(void)
3745 {
3746         if (!test_can_verify_check("%d %d", 0, 1)) {
3747                 pr_info("trace event string verifier disabled\n");
3748                 static_branch_inc(&trace_no_verify);
3749         }
3750 }
3751
3752 /**
3753  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3754  * @iter: The iterator that holds the seq buffer and the event being printed
3755  * @fmt: The format used to print the event
3756  * @ap: The va_list holding the data to print from @fmt.
3757  *
3758  * This writes the data into the @iter->seq buffer using the data from
3759  * @fmt and @ap. If the format has a %s, then the source of the string
3760  * is examined to make sure it is safe to print, otherwise it will
3761  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3762  * pointer.
3763  */
3764 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3765                          va_list ap)
3766 {
3767         const char *p = fmt;
3768         const char *str;
3769         int i, j;
3770
3771         if (WARN_ON_ONCE(!fmt))
3772                 return;
3773
3774         if (static_branch_unlikely(&trace_no_verify))
3775                 goto print;
3776
3777         /* Don't bother checking when doing a ftrace_dump() */
3778         if (iter->fmt == static_fmt_buf)
3779                 goto print;
3780
3781         while (*p) {
3782                 bool star = false;
3783                 int len = 0;
3784
3785                 j = 0;
3786
3787                 /* We only care about %s and variants */
3788                 for (i = 0; p[i]; i++) {
3789                         if (i + 1 >= iter->fmt_size) {
3790                                 /*
3791                                  * If we can't expand the copy buffer,
3792                                  * just print it.
3793                                  */
3794                                 if (!trace_iter_expand_format(iter))
3795                                         goto print;
3796                         }
3797
3798                         if (p[i] == '\\' && p[i+1]) {
3799                                 i++;
3800                                 continue;
3801                         }
3802                         if (p[i] == '%') {
3803                                 /* Need to test cases like %08.*s */
3804                                 for (j = 1; p[i+j]; j++) {
3805                                         if (isdigit(p[i+j]) ||
3806                                             p[i+j] == '.')
3807                                                 continue;
3808                                         if (p[i+j] == '*') {
3809                                                 star = true;
3810                                                 continue;
3811                                         }
3812                                         break;
3813                                 }
3814                                 if (p[i+j] == 's')
3815                                         break;
3816                                 star = false;
3817                         }
3818                         j = 0;
3819                 }
3820                 /* If no %s found then just print normally */
3821                 if (!p[i])
3822                         break;
3823
3824                 /* Copy up to the %s, and print that */
3825                 strncpy(iter->fmt, p, i);
3826                 iter->fmt[i] = '\0';
3827                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3828
3829                 if (star)
3830                         len = va_arg(ap, int);
3831
3832                 /* The ap now points to the string data of the %s */
3833                 str = va_arg(ap, const char *);
3834
3835                 /*
3836                  * If you hit this warning, it is likely that the
3837                  * trace event in question used %s on a string that
3838                  * was saved at the time of the event, but may not be
3839                  * around when the trace is read. Use __string(),
3840                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3841                  * instead. See samples/trace_events/trace-events-sample.h
3842                  * for reference.
3843                  */
3844                 if (WARN_ONCE(!trace_safe_str(iter, str),
3845                               "fmt: '%s' current_buffer: '%s'",
3846                               fmt, show_buffer(&iter->seq))) {
3847                         int ret;
3848
3849                         /* Try to safely read the string */
3850                         if (star) {
3851                                 if (len + 1 > iter->fmt_size)
3852                                         len = iter->fmt_size - 1;
3853                                 if (len < 0)
3854                                         len = 0;
3855                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3856                                 iter->fmt[len] = 0;
3857                                 star = false;
3858                         } else {
3859                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3860                                                                   iter->fmt_size);
3861                         }
3862                         if (ret < 0)
3863                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3864                         else
3865                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3866                                                  str, iter->fmt);
3867                         str = "[UNSAFE-MEMORY]";
3868                         strcpy(iter->fmt, "%s");
3869                 } else {
3870                         strncpy(iter->fmt, p + i, j + 1);
3871                         iter->fmt[j+1] = '\0';
3872                 }
3873                 if (star)
3874                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3875                 else
3876                         trace_seq_printf(&iter->seq, iter->fmt, str);
3877
3878                 p += i + j + 1;
3879         }
3880  print:
3881         if (*p)
3882                 trace_seq_vprintf(&iter->seq, p, ap);
3883 }
3884
3885 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3886 {
3887         const char *p, *new_fmt;
3888         char *q;
3889
3890         if (WARN_ON_ONCE(!fmt))
3891                 return fmt;
3892
3893         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3894                 return fmt;
3895
3896         p = fmt;
3897         new_fmt = q = iter->fmt;
3898         while (*p) {
3899                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3900                         if (!trace_iter_expand_format(iter))
3901                                 return fmt;
3902
3903                         q += iter->fmt - new_fmt;
3904                         new_fmt = iter->fmt;
3905                 }
3906
3907                 *q++ = *p++;
3908
3909                 /* Replace %p with %px */
3910                 if (p[-1] == '%') {
3911                         if (p[0] == '%') {
3912                                 *q++ = *p++;
3913                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3914                                 *q++ = *p++;
3915                                 *q++ = 'x';
3916                         }
3917                 }
3918         }
3919         *q = '\0';
3920
3921         return new_fmt;
3922 }
3923
3924 #define STATIC_TEMP_BUF_SIZE    128
3925 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3926
3927 /* Find the next real entry, without updating the iterator itself */
3928 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3929                                           int *ent_cpu, u64 *ent_ts)
3930 {
3931         /* __find_next_entry will reset ent_size */
3932         int ent_size = iter->ent_size;
3933         struct trace_entry *entry;
3934
3935         /*
3936          * If called from ftrace_dump(), then the iter->temp buffer
3937          * will be the static_temp_buf and not created from kmalloc.
3938          * If the entry size is greater than the buffer, we can
3939          * not save it. Just return NULL in that case. This is only
3940          * used to add markers when two consecutive events' time
3941          * stamps have a large delta. See trace_print_lat_context()
3942          */
3943         if (iter->temp == static_temp_buf &&
3944             STATIC_TEMP_BUF_SIZE < ent_size)
3945                 return NULL;
3946
3947         /*
3948          * The __find_next_entry() may call peek_next_entry(), which may
3949          * call ring_buffer_peek() that may make the contents of iter->ent
3950          * undefined. Need to copy iter->ent now.
3951          */
3952         if (iter->ent && iter->ent != iter->temp) {
3953                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3954                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3955                         void *temp;
3956                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3957                         if (!temp)
3958                                 return NULL;
3959                         kfree(iter->temp);
3960                         iter->temp = temp;
3961                         iter->temp_size = iter->ent_size;
3962                 }
3963                 memcpy(iter->temp, iter->ent, iter->ent_size);
3964                 iter->ent = iter->temp;
3965         }
3966         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3967         /* Put back the original ent_size */
3968         iter->ent_size = ent_size;
3969
3970         return entry;
3971 }
3972
3973 /* Find the next real entry, and increment the iterator to the next entry */
3974 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3975 {
3976         iter->ent = __find_next_entry(iter, &iter->cpu,
3977                                       &iter->lost_events, &iter->ts);
3978
3979         if (iter->ent)
3980                 trace_iterator_increment(iter);
3981
3982         return iter->ent ? iter : NULL;
3983 }
3984
3985 static void trace_consume(struct trace_iterator *iter)
3986 {
3987         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3988                             &iter->lost_events);
3989 }
3990
3991 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3992 {
3993         struct trace_iterator *iter = m->private;
3994         int i = (int)*pos;
3995         void *ent;
3996
3997         WARN_ON_ONCE(iter->leftover);
3998
3999         (*pos)++;
4000
4001         /* can't go backwards */
4002         if (iter->idx > i)
4003                 return NULL;
4004
4005         if (iter->idx < 0)
4006                 ent = trace_find_next_entry_inc(iter);
4007         else
4008                 ent = iter;
4009
4010         while (ent && iter->idx < i)
4011                 ent = trace_find_next_entry_inc(iter);
4012
4013         iter->pos = *pos;
4014
4015         return ent;
4016 }
4017
4018 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4019 {
4020         struct ring_buffer_iter *buf_iter;
4021         unsigned long entries = 0;
4022         u64 ts;
4023
4024         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4025
4026         buf_iter = trace_buffer_iter(iter, cpu);
4027         if (!buf_iter)
4028                 return;
4029
4030         ring_buffer_iter_reset(buf_iter);
4031
4032         /*
4033          * We could have the case with the max latency tracers
4034          * that a reset never took place on a cpu. This is evident
4035          * by the timestamp being before the start of the buffer.
4036          */
4037         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4038                 if (ts >= iter->array_buffer->time_start)
4039                         break;
4040                 entries++;
4041                 ring_buffer_iter_advance(buf_iter);
4042         }
4043
4044         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4045 }
4046
4047 /*
4048  * The current tracer is copied to avoid a global locking
4049  * all around.
4050  */
4051 static void *s_start(struct seq_file *m, loff_t *pos)
4052 {
4053         struct trace_iterator *iter = m->private;
4054         struct trace_array *tr = iter->tr;
4055         int cpu_file = iter->cpu_file;
4056         void *p = NULL;
4057         loff_t l = 0;
4058         int cpu;
4059
4060         /*
4061          * copy the tracer to avoid using a global lock all around.
4062          * iter->trace is a copy of current_trace, the pointer to the
4063          * name may be used instead of a strcmp(), as iter->trace->name
4064          * will point to the same string as current_trace->name.
4065          */
4066         mutex_lock(&trace_types_lock);
4067         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4068                 *iter->trace = *tr->current_trace;
4069         mutex_unlock(&trace_types_lock);
4070
4071 #ifdef CONFIG_TRACER_MAX_TRACE
4072         if (iter->snapshot && iter->trace->use_max_tr)
4073                 return ERR_PTR(-EBUSY);
4074 #endif
4075
4076         if (*pos != iter->pos) {
4077                 iter->ent = NULL;
4078                 iter->cpu = 0;
4079                 iter->idx = -1;
4080
4081                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4082                         for_each_tracing_cpu(cpu)
4083                                 tracing_iter_reset(iter, cpu);
4084                 } else
4085                         tracing_iter_reset(iter, cpu_file);
4086
4087                 iter->leftover = 0;
4088                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4089                         ;
4090
4091         } else {
4092                 /*
4093                  * If we overflowed the seq_file before, then we want
4094                  * to just reuse the trace_seq buffer again.
4095                  */
4096                 if (iter->leftover)
4097                         p = iter;
4098                 else {
4099                         l = *pos - 1;
4100                         p = s_next(m, p, &l);
4101                 }
4102         }
4103
4104         trace_event_read_lock();
4105         trace_access_lock(cpu_file);
4106         return p;
4107 }
4108
4109 static void s_stop(struct seq_file *m, void *p)
4110 {
4111         struct trace_iterator *iter = m->private;
4112
4113 #ifdef CONFIG_TRACER_MAX_TRACE
4114         if (iter->snapshot && iter->trace->use_max_tr)
4115                 return;
4116 #endif
4117
4118         trace_access_unlock(iter->cpu_file);
4119         trace_event_read_unlock();
4120 }
4121
4122 static void
4123 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4124                       unsigned long *entries, int cpu)
4125 {
4126         unsigned long count;
4127
4128         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4129         /*
4130          * If this buffer has skipped entries, then we hold all
4131          * entries for the trace and we need to ignore the
4132          * ones before the time stamp.
4133          */
4134         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4135                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4136                 /* total is the same as the entries */
4137                 *total = count;
4138         } else
4139                 *total = count +
4140                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4141         *entries = count;
4142 }
4143
4144 static void
4145 get_total_entries(struct array_buffer *buf,
4146                   unsigned long *total, unsigned long *entries)
4147 {
4148         unsigned long t, e;
4149         int cpu;
4150
4151         *total = 0;
4152         *entries = 0;
4153
4154         for_each_tracing_cpu(cpu) {
4155                 get_total_entries_cpu(buf, &t, &e, cpu);
4156                 *total += t;
4157                 *entries += e;
4158         }
4159 }
4160
4161 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4162 {
4163         unsigned long total, entries;
4164
4165         if (!tr)
4166                 tr = &global_trace;
4167
4168         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4169
4170         return entries;
4171 }
4172
4173 unsigned long trace_total_entries(struct trace_array *tr)
4174 {
4175         unsigned long total, entries;
4176
4177         if (!tr)
4178                 tr = &global_trace;
4179
4180         get_total_entries(&tr->array_buffer, &total, &entries);
4181
4182         return entries;
4183 }
4184
4185 static void print_lat_help_header(struct seq_file *m)
4186 {
4187         seq_puts(m, "#                    _------=> CPU#            \n"
4188                     "#                   / _-----=> irqs-off        \n"
4189                     "#                  | / _----=> need-resched    \n"
4190                     "#                  || / _---=> hardirq/softirq \n"
4191                     "#                  ||| / _--=> preempt-depth   \n"
4192                     "#                  |||| /     delay            \n"
4193                     "#  cmd     pid     ||||| time  |   caller      \n"
4194                     "#     \\   /        |||||  \\    |   /         \n");
4195 }
4196
4197 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4198 {
4199         unsigned long total;
4200         unsigned long entries;
4201
4202         get_total_entries(buf, &total, &entries);
4203         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4204                    entries, total, num_online_cpus());
4205         seq_puts(m, "#\n");
4206 }
4207
4208 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4209                                    unsigned int flags)
4210 {
4211         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4212
4213         print_event_info(buf, m);
4214
4215         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4216         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4217 }
4218
4219 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4220                                        unsigned int flags)
4221 {
4222         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4223         const char *space = "            ";
4224         int prec = tgid ? 12 : 2;
4225
4226         print_event_info(buf, m);
4227
4228         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4229         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4230         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4231         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4232         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4233         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4234         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4235 }
4236
4237 void
4238 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4239 {
4240         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4241         struct array_buffer *buf = iter->array_buffer;
4242         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4243         struct tracer *type = iter->trace;
4244         unsigned long entries;
4245         unsigned long total;
4246         const char *name = "preemption";
4247
4248         name = type->name;
4249
4250         get_total_entries(buf, &total, &entries);
4251
4252         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4253                    name, UTS_RELEASE);
4254         seq_puts(m, "# -----------------------------------"
4255                  "---------------------------------\n");
4256         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4257                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4258                    nsecs_to_usecs(data->saved_latency),
4259                    entries,
4260                    total,
4261                    buf->cpu,
4262 #if defined(CONFIG_PREEMPT_NONE)
4263                    "server",
4264 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4265                    "desktop",
4266 #elif defined(CONFIG_PREEMPT)
4267                    "preempt",
4268 #elif defined(CONFIG_PREEMPT_RT)
4269                    "preempt_rt",
4270 #else
4271                    "unknown",
4272 #endif
4273                    /* These are reserved for later use */
4274                    0, 0, 0, 0);
4275 #ifdef CONFIG_SMP
4276         seq_printf(m, " #P:%d)\n", num_online_cpus());
4277 #else
4278         seq_puts(m, ")\n");
4279 #endif
4280         seq_puts(m, "#    -----------------\n");
4281         seq_printf(m, "#    | task: %.16s-%d "
4282                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4283                    data->comm, data->pid,
4284                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4285                    data->policy, data->rt_priority);
4286         seq_puts(m, "#    -----------------\n");
4287
4288         if (data->critical_start) {
4289                 seq_puts(m, "#  => started at: ");
4290                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4291                 trace_print_seq(m, &iter->seq);
4292                 seq_puts(m, "\n#  => ended at:   ");
4293                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4294                 trace_print_seq(m, &iter->seq);
4295                 seq_puts(m, "\n#\n");
4296         }
4297
4298         seq_puts(m, "#\n");
4299 }
4300
4301 static void test_cpu_buff_start(struct trace_iterator *iter)
4302 {
4303         struct trace_seq *s = &iter->seq;
4304         struct trace_array *tr = iter->tr;
4305
4306         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4307                 return;
4308
4309         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4310                 return;
4311
4312         if (cpumask_available(iter->started) &&
4313             cpumask_test_cpu(iter->cpu, iter->started))
4314                 return;
4315
4316         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4317                 return;
4318
4319         if (cpumask_available(iter->started))
4320                 cpumask_set_cpu(iter->cpu, iter->started);
4321
4322         /* Don't print started cpu buffer for the first entry of the trace */
4323         if (iter->idx > 1)
4324                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4325                                 iter->cpu);
4326 }
4327
4328 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4329 {
4330         struct trace_array *tr = iter->tr;
4331         struct trace_seq *s = &iter->seq;
4332         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4333         struct trace_entry *entry;
4334         struct trace_event *event;
4335
4336         entry = iter->ent;
4337
4338         test_cpu_buff_start(iter);
4339
4340         event = ftrace_find_event(entry->type);
4341
4342         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4343                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4344                         trace_print_lat_context(iter);
4345                 else
4346                         trace_print_context(iter);
4347         }
4348
4349         if (trace_seq_has_overflowed(s))
4350                 return TRACE_TYPE_PARTIAL_LINE;
4351
4352         if (event)
4353                 return event->funcs->trace(iter, sym_flags, event);
4354
4355         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4356
4357         return trace_handle_return(s);
4358 }
4359
4360 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4361 {
4362         struct trace_array *tr = iter->tr;
4363         struct trace_seq *s = &iter->seq;
4364         struct trace_entry *entry;
4365         struct trace_event *event;
4366
4367         entry = iter->ent;
4368
4369         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4370                 trace_seq_printf(s, "%d %d %llu ",
4371                                  entry->pid, iter->cpu, iter->ts);
4372
4373         if (trace_seq_has_overflowed(s))
4374                 return TRACE_TYPE_PARTIAL_LINE;
4375
4376         event = ftrace_find_event(entry->type);
4377         if (event)
4378                 return event->funcs->raw(iter, 0, event);
4379
4380         trace_seq_printf(s, "%d ?\n", entry->type);
4381
4382         return trace_handle_return(s);
4383 }
4384
4385 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4386 {
4387         struct trace_array *tr = iter->tr;
4388         struct trace_seq *s = &iter->seq;
4389         unsigned char newline = '\n';
4390         struct trace_entry *entry;
4391         struct trace_event *event;
4392
4393         entry = iter->ent;
4394
4395         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4396                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4397                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4398                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4399                 if (trace_seq_has_overflowed(s))
4400                         return TRACE_TYPE_PARTIAL_LINE;
4401         }
4402
4403         event = ftrace_find_event(entry->type);
4404         if (event) {
4405                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4406                 if (ret != TRACE_TYPE_HANDLED)
4407                         return ret;
4408         }
4409
4410         SEQ_PUT_FIELD(s, newline);
4411
4412         return trace_handle_return(s);
4413 }
4414
4415 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4416 {
4417         struct trace_array *tr = iter->tr;
4418         struct trace_seq *s = &iter->seq;
4419         struct trace_entry *entry;
4420         struct trace_event *event;
4421
4422         entry = iter->ent;
4423
4424         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4425                 SEQ_PUT_FIELD(s, entry->pid);
4426                 SEQ_PUT_FIELD(s, iter->cpu);
4427                 SEQ_PUT_FIELD(s, iter->ts);
4428                 if (trace_seq_has_overflowed(s))
4429                         return TRACE_TYPE_PARTIAL_LINE;
4430         }
4431
4432         event = ftrace_find_event(entry->type);
4433         return event ? event->funcs->binary(iter, 0, event) :
4434                 TRACE_TYPE_HANDLED;
4435 }
4436
4437 int trace_empty(struct trace_iterator *iter)
4438 {
4439         struct ring_buffer_iter *buf_iter;
4440         int cpu;
4441
4442         /* If we are looking at one CPU buffer, only check that one */
4443         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4444                 cpu = iter->cpu_file;
4445                 buf_iter = trace_buffer_iter(iter, cpu);
4446                 if (buf_iter) {
4447                         if (!ring_buffer_iter_empty(buf_iter))
4448                                 return 0;
4449                 } else {
4450                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451                                 return 0;
4452                 }
4453                 return 1;
4454         }
4455
4456         for_each_tracing_cpu(cpu) {
4457                 buf_iter = trace_buffer_iter(iter, cpu);
4458                 if (buf_iter) {
4459                         if (!ring_buffer_iter_empty(buf_iter))
4460                                 return 0;
4461                 } else {
4462                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4463                                 return 0;
4464                 }
4465         }
4466
4467         return 1;
4468 }
4469
4470 /*  Called with trace_event_read_lock() held. */
4471 enum print_line_t print_trace_line(struct trace_iterator *iter)
4472 {
4473         struct trace_array *tr = iter->tr;
4474         unsigned long trace_flags = tr->trace_flags;
4475         enum print_line_t ret;
4476
4477         if (iter->lost_events) {
4478                 if (iter->lost_events == (unsigned long)-1)
4479                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4480                                          iter->cpu);
4481                 else
4482                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4483                                          iter->cpu, iter->lost_events);
4484                 if (trace_seq_has_overflowed(&iter->seq))
4485                         return TRACE_TYPE_PARTIAL_LINE;
4486         }
4487
4488         if (iter->trace && iter->trace->print_line) {
4489                 ret = iter->trace->print_line(iter);
4490                 if (ret != TRACE_TYPE_UNHANDLED)
4491                         return ret;
4492         }
4493
4494         if (iter->ent->type == TRACE_BPUTS &&
4495                         trace_flags & TRACE_ITER_PRINTK &&
4496                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4497                 return trace_print_bputs_msg_only(iter);
4498
4499         if (iter->ent->type == TRACE_BPRINT &&
4500                         trace_flags & TRACE_ITER_PRINTK &&
4501                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4502                 return trace_print_bprintk_msg_only(iter);
4503
4504         if (iter->ent->type == TRACE_PRINT &&
4505                         trace_flags & TRACE_ITER_PRINTK &&
4506                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4507                 return trace_print_printk_msg_only(iter);
4508
4509         if (trace_flags & TRACE_ITER_BIN)
4510                 return print_bin_fmt(iter);
4511
4512         if (trace_flags & TRACE_ITER_HEX)
4513                 return print_hex_fmt(iter);
4514
4515         if (trace_flags & TRACE_ITER_RAW)
4516                 return print_raw_fmt(iter);
4517
4518         return print_trace_fmt(iter);
4519 }
4520
4521 void trace_latency_header(struct seq_file *m)
4522 {
4523         struct trace_iterator *iter = m->private;
4524         struct trace_array *tr = iter->tr;
4525
4526         /* print nothing if the buffers are empty */
4527         if (trace_empty(iter))
4528                 return;
4529
4530         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4531                 print_trace_header(m, iter);
4532
4533         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4534                 print_lat_help_header(m);
4535 }
4536
4537 void trace_default_header(struct seq_file *m)
4538 {
4539         struct trace_iterator *iter = m->private;
4540         struct trace_array *tr = iter->tr;
4541         unsigned long trace_flags = tr->trace_flags;
4542
4543         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4544                 return;
4545
4546         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4547                 /* print nothing if the buffers are empty */
4548                 if (trace_empty(iter))
4549                         return;
4550                 print_trace_header(m, iter);
4551                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4552                         print_lat_help_header(m);
4553         } else {
4554                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4555                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4556                                 print_func_help_header_irq(iter->array_buffer,
4557                                                            m, trace_flags);
4558                         else
4559                                 print_func_help_header(iter->array_buffer, m,
4560                                                        trace_flags);
4561                 }
4562         }
4563 }
4564
4565 static void test_ftrace_alive(struct seq_file *m)
4566 {
4567         if (!ftrace_is_dead())
4568                 return;
4569         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4570                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4571 }
4572
4573 #ifdef CONFIG_TRACER_MAX_TRACE
4574 static void show_snapshot_main_help(struct seq_file *m)
4575 {
4576         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4577                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4578                     "#                      Takes a snapshot of the main buffer.\n"
4579                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4580                     "#                      (Doesn't have to be '2' works with any number that\n"
4581                     "#                       is not a '0' or '1')\n");
4582 }
4583
4584 static void show_snapshot_percpu_help(struct seq_file *m)
4585 {
4586         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4587 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4588         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4589                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4590 #else
4591         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4592                     "#                     Must use main snapshot file to allocate.\n");
4593 #endif
4594         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4595                     "#                      (Doesn't have to be '2' works with any number that\n"
4596                     "#                       is not a '0' or '1')\n");
4597 }
4598
4599 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4600 {
4601         if (iter->tr->allocated_snapshot)
4602                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4603         else
4604                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4605
4606         seq_puts(m, "# Snapshot commands:\n");
4607         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4608                 show_snapshot_main_help(m);
4609         else
4610                 show_snapshot_percpu_help(m);
4611 }
4612 #else
4613 /* Should never be called */
4614 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4615 #endif
4616
4617 static int s_show(struct seq_file *m, void *v)
4618 {
4619         struct trace_iterator *iter = v;
4620         int ret;
4621
4622         if (iter->ent == NULL) {
4623                 if (iter->tr) {
4624                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4625                         seq_puts(m, "#\n");
4626                         test_ftrace_alive(m);
4627                 }
4628                 if (iter->snapshot && trace_empty(iter))
4629                         print_snapshot_help(m, iter);
4630                 else if (iter->trace && iter->trace->print_header)
4631                         iter->trace->print_header(m);
4632                 else
4633                         trace_default_header(m);
4634
4635         } else if (iter->leftover) {
4636                 /*
4637                  * If we filled the seq_file buffer earlier, we
4638                  * want to just show it now.
4639                  */
4640                 ret = trace_print_seq(m, &iter->seq);
4641
4642                 /* ret should this time be zero, but you never know */
4643                 iter->leftover = ret;
4644
4645         } else {
4646                 print_trace_line(iter);
4647                 ret = trace_print_seq(m, &iter->seq);
4648                 /*
4649                  * If we overflow the seq_file buffer, then it will
4650                  * ask us for this data again at start up.
4651                  * Use that instead.
4652                  *  ret is 0 if seq_file write succeeded.
4653                  *        -1 otherwise.
4654                  */
4655                 iter->leftover = ret;
4656         }
4657
4658         return 0;
4659 }
4660
4661 /*
4662  * Should be used after trace_array_get(), trace_types_lock
4663  * ensures that i_cdev was already initialized.
4664  */
4665 static inline int tracing_get_cpu(struct inode *inode)
4666 {
4667         if (inode->i_cdev) /* See trace_create_cpu_file() */
4668                 return (long)inode->i_cdev - 1;
4669         return RING_BUFFER_ALL_CPUS;
4670 }
4671
4672 static const struct seq_operations tracer_seq_ops = {
4673         .start          = s_start,
4674         .next           = s_next,
4675         .stop           = s_stop,
4676         .show           = s_show,
4677 };
4678
4679 static struct trace_iterator *
4680 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4681 {
4682         struct trace_array *tr = inode->i_private;
4683         struct trace_iterator *iter;
4684         int cpu;
4685
4686         if (tracing_disabled)
4687                 return ERR_PTR(-ENODEV);
4688
4689         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4690         if (!iter)
4691                 return ERR_PTR(-ENOMEM);
4692
4693         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4694                                     GFP_KERNEL);
4695         if (!iter->buffer_iter)
4696                 goto release;
4697
4698         /*
4699          * trace_find_next_entry() may need to save off iter->ent.
4700          * It will place it into the iter->temp buffer. As most
4701          * events are less than 128, allocate a buffer of that size.
4702          * If one is greater, then trace_find_next_entry() will
4703          * allocate a new buffer to adjust for the bigger iter->ent.
4704          * It's not critical if it fails to get allocated here.
4705          */
4706         iter->temp = kmalloc(128, GFP_KERNEL);
4707         if (iter->temp)
4708                 iter->temp_size = 128;
4709
4710         /*
4711          * trace_event_printf() may need to modify given format
4712          * string to replace %p with %px so that it shows real address
4713          * instead of hash value. However, that is only for the event
4714          * tracing, other tracer may not need. Defer the allocation
4715          * until it is needed.
4716          */
4717         iter->fmt = NULL;
4718         iter->fmt_size = 0;
4719
4720         /*
4721          * We make a copy of the current tracer to avoid concurrent
4722          * changes on it while we are reading.
4723          */
4724         mutex_lock(&trace_types_lock);
4725         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4726         if (!iter->trace)
4727                 goto fail;
4728
4729         *iter->trace = *tr->current_trace;
4730
4731         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4732                 goto fail;
4733
4734         iter->tr = tr;
4735
4736 #ifdef CONFIG_TRACER_MAX_TRACE
4737         /* Currently only the top directory has a snapshot */
4738         if (tr->current_trace->print_max || snapshot)
4739                 iter->array_buffer = &tr->max_buffer;
4740         else
4741 #endif
4742                 iter->array_buffer = &tr->array_buffer;
4743         iter->snapshot = snapshot;
4744         iter->pos = -1;
4745         iter->cpu_file = tracing_get_cpu(inode);
4746         mutex_init(&iter->mutex);
4747
4748         /* Notify the tracer early; before we stop tracing. */
4749         if (iter->trace->open)
4750                 iter->trace->open(iter);
4751
4752         /* Annotate start of buffers if we had overruns */
4753         if (ring_buffer_overruns(iter->array_buffer->buffer))
4754                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4755
4756         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4757         if (trace_clocks[tr->clock_id].in_ns)
4758                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4759
4760         /*
4761          * If pause-on-trace is enabled, then stop the trace while
4762          * dumping, unless this is the "snapshot" file
4763          */
4764         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4765                 tracing_stop_tr(tr);
4766
4767         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4768                 for_each_tracing_cpu(cpu) {
4769                         iter->buffer_iter[cpu] =
4770                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4771                                                          cpu, GFP_KERNEL);
4772                 }
4773                 ring_buffer_read_prepare_sync();
4774                 for_each_tracing_cpu(cpu) {
4775                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4776                         tracing_iter_reset(iter, cpu);
4777                 }
4778         } else {
4779                 cpu = iter->cpu_file;
4780                 iter->buffer_iter[cpu] =
4781                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4782                                                  cpu, GFP_KERNEL);
4783                 ring_buffer_read_prepare_sync();
4784                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4785                 tracing_iter_reset(iter, cpu);
4786         }
4787
4788         mutex_unlock(&trace_types_lock);
4789
4790         return iter;
4791
4792  fail:
4793         mutex_unlock(&trace_types_lock);
4794         kfree(iter->trace);
4795         kfree(iter->temp);
4796         kfree(iter->buffer_iter);
4797 release:
4798         seq_release_private(inode, file);
4799         return ERR_PTR(-ENOMEM);
4800 }
4801
4802 int tracing_open_generic(struct inode *inode, struct file *filp)
4803 {
4804         int ret;
4805
4806         ret = tracing_check_open_get_tr(NULL);
4807         if (ret)
4808                 return ret;
4809
4810         filp->private_data = inode->i_private;
4811         return 0;
4812 }
4813
4814 bool tracing_is_disabled(void)
4815 {
4816         return (tracing_disabled) ? true: false;
4817 }
4818
4819 /*
4820  * Open and update trace_array ref count.
4821  * Must have the current trace_array passed to it.
4822  */
4823 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4824 {
4825         struct trace_array *tr = inode->i_private;
4826         int ret;
4827
4828         ret = tracing_check_open_get_tr(tr);
4829         if (ret)
4830                 return ret;
4831
4832         filp->private_data = inode->i_private;
4833
4834         return 0;
4835 }
4836
4837 static int tracing_release(struct inode *inode, struct file *file)
4838 {
4839         struct trace_array *tr = inode->i_private;
4840         struct seq_file *m = file->private_data;
4841         struct trace_iterator *iter;
4842         int cpu;
4843
4844         if (!(file->f_mode & FMODE_READ)) {
4845                 trace_array_put(tr);
4846                 return 0;
4847         }
4848
4849         /* Writes do not use seq_file */
4850         iter = m->private;
4851         mutex_lock(&trace_types_lock);
4852
4853         for_each_tracing_cpu(cpu) {
4854                 if (iter->buffer_iter[cpu])
4855                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4856         }
4857
4858         if (iter->trace && iter->trace->close)
4859                 iter->trace->close(iter);
4860
4861         if (!iter->snapshot && tr->stop_count)
4862                 /* reenable tracing if it was previously enabled */
4863                 tracing_start_tr(tr);
4864
4865         __trace_array_put(tr);
4866
4867         mutex_unlock(&trace_types_lock);
4868
4869         mutex_destroy(&iter->mutex);
4870         free_cpumask_var(iter->started);
4871         kfree(iter->fmt);
4872         kfree(iter->temp);
4873         kfree(iter->trace);
4874         kfree(iter->buffer_iter);
4875         seq_release_private(inode, file);
4876
4877         return 0;
4878 }
4879
4880 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4881 {
4882         struct trace_array *tr = inode->i_private;
4883
4884         trace_array_put(tr);
4885         return 0;
4886 }
4887
4888 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4889 {
4890         struct trace_array *tr = inode->i_private;
4891
4892         trace_array_put(tr);
4893
4894         return single_release(inode, file);
4895 }
4896
4897 static int tracing_open(struct inode *inode, struct file *file)
4898 {
4899         struct trace_array *tr = inode->i_private;
4900         struct trace_iterator *iter;
4901         int ret;
4902
4903         ret = tracing_check_open_get_tr(tr);
4904         if (ret)
4905                 return ret;
4906
4907         /* If this file was open for write, then erase contents */
4908         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4909                 int cpu = tracing_get_cpu(inode);
4910                 struct array_buffer *trace_buf = &tr->array_buffer;
4911
4912 #ifdef CONFIG_TRACER_MAX_TRACE
4913                 if (tr->current_trace->print_max)
4914                         trace_buf = &tr->max_buffer;
4915 #endif
4916
4917                 if (cpu == RING_BUFFER_ALL_CPUS)
4918                         tracing_reset_online_cpus(trace_buf);
4919                 else
4920                         tracing_reset_cpu(trace_buf, cpu);
4921         }
4922
4923         if (file->f_mode & FMODE_READ) {
4924                 iter = __tracing_open(inode, file, false);
4925                 if (IS_ERR(iter))
4926                         ret = PTR_ERR(iter);
4927                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4928                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4929         }
4930
4931         if (ret < 0)
4932                 trace_array_put(tr);
4933
4934         return ret;
4935 }
4936
4937 /*
4938  * Some tracers are not suitable for instance buffers.
4939  * A tracer is always available for the global array (toplevel)
4940  * or if it explicitly states that it is.
4941  */
4942 static bool
4943 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4944 {
4945         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4946 }
4947
4948 /* Find the next tracer that this trace array may use */
4949 static struct tracer *
4950 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4951 {
4952         while (t && !trace_ok_for_array(t, tr))
4953                 t = t->next;
4954
4955         return t;
4956 }
4957
4958 static void *
4959 t_next(struct seq_file *m, void *v, loff_t *pos)
4960 {
4961         struct trace_array *tr = m->private;
4962         struct tracer *t = v;
4963
4964         (*pos)++;
4965
4966         if (t)
4967                 t = get_tracer_for_array(tr, t->next);
4968
4969         return t;
4970 }
4971
4972 static void *t_start(struct seq_file *m, loff_t *pos)
4973 {
4974         struct trace_array *tr = m->private;
4975         struct tracer *t;
4976         loff_t l = 0;
4977
4978         mutex_lock(&trace_types_lock);
4979
4980         t = get_tracer_for_array(tr, trace_types);
4981         for (; t && l < *pos; t = t_next(m, t, &l))
4982                         ;
4983
4984         return t;
4985 }
4986
4987 static void t_stop(struct seq_file *m, void *p)
4988 {
4989         mutex_unlock(&trace_types_lock);
4990 }
4991
4992 static int t_show(struct seq_file *m, void *v)
4993 {
4994         struct tracer *t = v;
4995
4996         if (!t)
4997                 return 0;
4998
4999         seq_puts(m, t->name);
5000         if (t->next)
5001                 seq_putc(m, ' ');
5002         else
5003                 seq_putc(m, '\n');
5004
5005         return 0;
5006 }
5007
5008 static const struct seq_operations show_traces_seq_ops = {
5009         .start          = t_start,
5010         .next           = t_next,
5011         .stop           = t_stop,
5012         .show           = t_show,
5013 };
5014
5015 static int show_traces_open(struct inode *inode, struct file *file)
5016 {
5017         struct trace_array *tr = inode->i_private;
5018         struct seq_file *m;
5019         int ret;
5020
5021         ret = tracing_check_open_get_tr(tr);
5022         if (ret)
5023                 return ret;
5024
5025         ret = seq_open(file, &show_traces_seq_ops);
5026         if (ret) {
5027                 trace_array_put(tr);
5028                 return ret;
5029         }
5030
5031         m = file->private_data;
5032         m->private = tr;
5033
5034         return 0;
5035 }
5036
5037 static int show_traces_release(struct inode *inode, struct file *file)
5038 {
5039         struct trace_array *tr = inode->i_private;
5040
5041         trace_array_put(tr);
5042         return seq_release(inode, file);
5043 }
5044
5045 static ssize_t
5046 tracing_write_stub(struct file *filp, const char __user *ubuf,
5047                    size_t count, loff_t *ppos)
5048 {
5049         return count;
5050 }
5051
5052 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5053 {
5054         int ret;
5055
5056         if (file->f_mode & FMODE_READ)
5057                 ret = seq_lseek(file, offset, whence);
5058         else
5059                 file->f_pos = ret = 0;
5060
5061         return ret;
5062 }
5063
5064 static const struct file_operations tracing_fops = {
5065         .open           = tracing_open,
5066         .read           = seq_read,
5067         .write          = tracing_write_stub,
5068         .llseek         = tracing_lseek,
5069         .release        = tracing_release,
5070 };
5071
5072 static const struct file_operations show_traces_fops = {
5073         .open           = show_traces_open,
5074         .read           = seq_read,
5075         .llseek         = seq_lseek,
5076         .release        = show_traces_release,
5077 };
5078
5079 static ssize_t
5080 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5081                      size_t count, loff_t *ppos)
5082 {
5083         struct trace_array *tr = file_inode(filp)->i_private;
5084         char *mask_str;
5085         int len;
5086
5087         len = snprintf(NULL, 0, "%*pb\n",
5088                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5089         mask_str = kmalloc(len, GFP_KERNEL);
5090         if (!mask_str)
5091                 return -ENOMEM;
5092
5093         len = snprintf(mask_str, len, "%*pb\n",
5094                        cpumask_pr_args(tr->tracing_cpumask));
5095         if (len >= count) {
5096                 count = -EINVAL;
5097                 goto out_err;
5098         }
5099         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5100
5101 out_err:
5102         kfree(mask_str);
5103
5104         return count;
5105 }
5106
5107 int tracing_set_cpumask(struct trace_array *tr,
5108                         cpumask_var_t tracing_cpumask_new)
5109 {
5110         int cpu;
5111
5112         if (!tr)
5113                 return -EINVAL;
5114
5115         local_irq_disable();
5116         arch_spin_lock(&tr->max_lock);
5117         for_each_tracing_cpu(cpu) {
5118                 /*
5119                  * Increase/decrease the disabled counter if we are
5120                  * about to flip a bit in the cpumask:
5121                  */
5122                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5123                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5124                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5125                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5126                 }
5127                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5128                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5129                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5130                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5131                 }
5132         }
5133         arch_spin_unlock(&tr->max_lock);
5134         local_irq_enable();
5135
5136         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5137
5138         return 0;
5139 }
5140
5141 static ssize_t
5142 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5143                       size_t count, loff_t *ppos)
5144 {
5145         struct trace_array *tr = file_inode(filp)->i_private;
5146         cpumask_var_t tracing_cpumask_new;
5147         int err;
5148
5149         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5150                 return -ENOMEM;
5151
5152         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5153         if (err)
5154                 goto err_free;
5155
5156         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5157         if (err)
5158                 goto err_free;
5159
5160         free_cpumask_var(tracing_cpumask_new);
5161
5162         return count;
5163
5164 err_free:
5165         free_cpumask_var(tracing_cpumask_new);
5166
5167         return err;
5168 }
5169
5170 static const struct file_operations tracing_cpumask_fops = {
5171         .open           = tracing_open_generic_tr,
5172         .read           = tracing_cpumask_read,
5173         .write          = tracing_cpumask_write,
5174         .release        = tracing_release_generic_tr,
5175         .llseek         = generic_file_llseek,
5176 };
5177
5178 static int tracing_trace_options_show(struct seq_file *m, void *v)
5179 {
5180         struct tracer_opt *trace_opts;
5181         struct trace_array *tr = m->private;
5182         u32 tracer_flags;
5183         int i;
5184
5185         mutex_lock(&trace_types_lock);
5186         tracer_flags = tr->current_trace->flags->val;
5187         trace_opts = tr->current_trace->flags->opts;
5188
5189         for (i = 0; trace_options[i]; i++) {
5190                 if (tr->trace_flags & (1 << i))
5191                         seq_printf(m, "%s\n", trace_options[i]);
5192                 else
5193                         seq_printf(m, "no%s\n", trace_options[i]);
5194         }
5195
5196         for (i = 0; trace_opts[i].name; i++) {
5197                 if (tracer_flags & trace_opts[i].bit)
5198                         seq_printf(m, "%s\n", trace_opts[i].name);
5199                 else
5200                         seq_printf(m, "no%s\n", trace_opts[i].name);
5201         }
5202         mutex_unlock(&trace_types_lock);
5203
5204         return 0;
5205 }
5206
5207 static int __set_tracer_option(struct trace_array *tr,
5208                                struct tracer_flags *tracer_flags,
5209                                struct tracer_opt *opts, int neg)
5210 {
5211         struct tracer *trace = tracer_flags->trace;
5212         int ret;
5213
5214         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5215         if (ret)
5216                 return ret;
5217
5218         if (neg)
5219                 tracer_flags->val &= ~opts->bit;
5220         else
5221                 tracer_flags->val |= opts->bit;
5222         return 0;
5223 }
5224
5225 /* Try to assign a tracer specific option */
5226 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5227 {
5228         struct tracer *trace = tr->current_trace;
5229         struct tracer_flags *tracer_flags = trace->flags;
5230         struct tracer_opt *opts = NULL;
5231         int i;
5232
5233         for (i = 0; tracer_flags->opts[i].name; i++) {
5234                 opts = &tracer_flags->opts[i];
5235
5236                 if (strcmp(cmp, opts->name) == 0)
5237                         return __set_tracer_option(tr, trace->flags, opts, neg);
5238         }
5239
5240         return -EINVAL;
5241 }
5242
5243 /* Some tracers require overwrite to stay enabled */
5244 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5245 {
5246         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5247                 return -1;
5248
5249         return 0;
5250 }
5251
5252 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5253 {
5254         int *map;
5255
5256         if ((mask == TRACE_ITER_RECORD_TGID) ||
5257             (mask == TRACE_ITER_RECORD_CMD))
5258                 lockdep_assert_held(&event_mutex);
5259
5260         /* do nothing if flag is already set */
5261         if (!!(tr->trace_flags & mask) == !!enabled)
5262                 return 0;
5263
5264         /* Give the tracer a chance to approve the change */
5265         if (tr->current_trace->flag_changed)
5266                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5267                         return -EINVAL;
5268
5269         if (enabled)
5270                 tr->trace_flags |= mask;
5271         else
5272                 tr->trace_flags &= ~mask;
5273
5274         if (mask == TRACE_ITER_RECORD_CMD)
5275                 trace_event_enable_cmd_record(enabled);
5276
5277         if (mask == TRACE_ITER_RECORD_TGID) {
5278                 if (!tgid_map) {
5279                         tgid_map_max = pid_max;
5280                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5281                                        GFP_KERNEL);
5282
5283                         /*
5284                          * Pairs with smp_load_acquire() in
5285                          * trace_find_tgid_ptr() to ensure that if it observes
5286                          * the tgid_map we just allocated then it also observes
5287                          * the corresponding tgid_map_max value.
5288                          */
5289                         smp_store_release(&tgid_map, map);
5290                 }
5291                 if (!tgid_map) {
5292                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5293                         return -ENOMEM;
5294                 }
5295
5296                 trace_event_enable_tgid_record(enabled);
5297         }
5298
5299         if (mask == TRACE_ITER_EVENT_FORK)
5300                 trace_event_follow_fork(tr, enabled);
5301
5302         if (mask == TRACE_ITER_FUNC_FORK)
5303                 ftrace_pid_follow_fork(tr, enabled);
5304
5305         if (mask == TRACE_ITER_OVERWRITE) {
5306                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5307 #ifdef CONFIG_TRACER_MAX_TRACE
5308                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5309 #endif
5310         }
5311
5312         if (mask == TRACE_ITER_PRINTK) {
5313                 trace_printk_start_stop_comm(enabled);
5314                 trace_printk_control(enabled);
5315         }
5316
5317         return 0;
5318 }
5319
5320 int trace_set_options(struct trace_array *tr, char *option)
5321 {
5322         char *cmp;
5323         int neg = 0;
5324         int ret;
5325         size_t orig_len = strlen(option);
5326         int len;
5327
5328         cmp = strstrip(option);
5329
5330         len = str_has_prefix(cmp, "no");
5331         if (len)
5332                 neg = 1;
5333
5334         cmp += len;
5335
5336         mutex_lock(&event_mutex);
5337         mutex_lock(&trace_types_lock);
5338
5339         ret = match_string(trace_options, -1, cmp);
5340         /* If no option could be set, test the specific tracer options */
5341         if (ret < 0)
5342                 ret = set_tracer_option(tr, cmp, neg);
5343         else
5344                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5345
5346         mutex_unlock(&trace_types_lock);
5347         mutex_unlock(&event_mutex);
5348
5349         /*
5350          * If the first trailing whitespace is replaced with '\0' by strstrip,
5351          * turn it back into a space.
5352          */
5353         if (orig_len > strlen(option))
5354                 option[strlen(option)] = ' ';
5355
5356         return ret;
5357 }
5358
5359 static void __init apply_trace_boot_options(void)
5360 {
5361         char *buf = trace_boot_options_buf;
5362         char *option;
5363
5364         while (true) {
5365                 option = strsep(&buf, ",");
5366
5367                 if (!option)
5368                         break;
5369
5370                 if (*option)
5371                         trace_set_options(&global_trace, option);
5372
5373                 /* Put back the comma to allow this to be called again */
5374                 if (buf)
5375                         *(buf - 1) = ',';
5376         }
5377 }
5378
5379 static ssize_t
5380 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5381                         size_t cnt, loff_t *ppos)
5382 {
5383         struct seq_file *m = filp->private_data;
5384         struct trace_array *tr = m->private;
5385         char buf[64];
5386         int ret;
5387
5388         if (cnt >= sizeof(buf))
5389                 return -EINVAL;
5390
5391         if (copy_from_user(buf, ubuf, cnt))
5392                 return -EFAULT;
5393
5394         buf[cnt] = 0;
5395
5396         ret = trace_set_options(tr, buf);
5397         if (ret < 0)
5398                 return ret;
5399
5400         *ppos += cnt;
5401
5402         return cnt;
5403 }
5404
5405 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5406 {
5407         struct trace_array *tr = inode->i_private;
5408         int ret;
5409
5410         ret = tracing_check_open_get_tr(tr);
5411         if (ret)
5412                 return ret;
5413
5414         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5415         if (ret < 0)
5416                 trace_array_put(tr);
5417
5418         return ret;
5419 }
5420
5421 static const struct file_operations tracing_iter_fops = {
5422         .open           = tracing_trace_options_open,
5423         .read           = seq_read,
5424         .llseek         = seq_lseek,
5425         .release        = tracing_single_release_tr,
5426         .write          = tracing_trace_options_write,
5427 };
5428
5429 static const char readme_msg[] =
5430         "tracing mini-HOWTO:\n\n"
5431         "# echo 0 > tracing_on : quick way to disable tracing\n"
5432         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5433         " Important files:\n"
5434         "  trace\t\t\t- The static contents of the buffer\n"
5435         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5436         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5437         "  current_tracer\t- function and latency tracers\n"
5438         "  available_tracers\t- list of configured tracers for current_tracer\n"
5439         "  error_log\t- error log for failed commands (that support it)\n"
5440         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5441         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5442         "  trace_clock\t\t-change the clock used to order events\n"
5443         "       local:   Per cpu clock but may not be synced across CPUs\n"
5444         "      global:   Synced across CPUs but slows tracing down.\n"
5445         "     counter:   Not a clock, but just an increment\n"
5446         "      uptime:   Jiffy counter from time of boot\n"
5447         "        perf:   Same clock that perf events use\n"
5448 #ifdef CONFIG_X86_64
5449         "     x86-tsc:   TSC cycle counter\n"
5450 #endif
5451         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5452         "       delta:   Delta difference against a buffer-wide timestamp\n"
5453         "    absolute:   Absolute (standalone) timestamp\n"
5454         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5455         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5456         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5457         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5458         "\t\t\t  Remove sub-buffer with rmdir\n"
5459         "  trace_options\t\t- Set format or modify how tracing happens\n"
5460         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5461         "\t\t\t  option name\n"
5462         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5463 #ifdef CONFIG_DYNAMIC_FTRACE
5464         "\n  available_filter_functions - list of functions that can be filtered on\n"
5465         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5466         "\t\t\t  functions\n"
5467         "\t     accepts: func_full_name or glob-matching-pattern\n"
5468         "\t     modules: Can select a group via module\n"
5469         "\t      Format: :mod:<module-name>\n"
5470         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5471         "\t    triggers: a command to perform when function is hit\n"
5472         "\t      Format: <function>:<trigger>[:count]\n"
5473         "\t     trigger: traceon, traceoff\n"
5474         "\t\t      enable_event:<system>:<event>\n"
5475         "\t\t      disable_event:<system>:<event>\n"
5476 #ifdef CONFIG_STACKTRACE
5477         "\t\t      stacktrace\n"
5478 #endif
5479 #ifdef CONFIG_TRACER_SNAPSHOT
5480         "\t\t      snapshot\n"
5481 #endif
5482         "\t\t      dump\n"
5483         "\t\t      cpudump\n"
5484         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5485         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5486         "\t     The first one will disable tracing every time do_fault is hit\n"
5487         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5488         "\t       The first time do trap is hit and it disables tracing, the\n"
5489         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5490         "\t       the counter will not decrement. It only decrements when the\n"
5491         "\t       trigger did work\n"
5492         "\t     To remove trigger without count:\n"
5493         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5494         "\t     To remove trigger with a count:\n"
5495         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5496         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5497         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5498         "\t    modules: Can select a group via module command :mod:\n"
5499         "\t    Does not accept triggers\n"
5500 #endif /* CONFIG_DYNAMIC_FTRACE */
5501 #ifdef CONFIG_FUNCTION_TRACER
5502         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5503         "\t\t    (function)\n"
5504         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5505         "\t\t    (function)\n"
5506 #endif
5507 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5508         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5509         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5510         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5511 #endif
5512 #ifdef CONFIG_TRACER_SNAPSHOT
5513         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5514         "\t\t\t  snapshot buffer. Read the contents for more\n"
5515         "\t\t\t  information\n"
5516 #endif
5517 #ifdef CONFIG_STACK_TRACER
5518         "  stack_trace\t\t- Shows the max stack trace when active\n"
5519         "  stack_max_size\t- Shows current max stack size that was traced\n"
5520         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5521         "\t\t\t  new trace)\n"
5522 #ifdef CONFIG_DYNAMIC_FTRACE
5523         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5524         "\t\t\t  traces\n"
5525 #endif
5526 #endif /* CONFIG_STACK_TRACER */
5527 #ifdef CONFIG_DYNAMIC_EVENTS
5528         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5529         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5530 #endif
5531 #ifdef CONFIG_KPROBE_EVENTS
5532         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5533         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5534 #endif
5535 #ifdef CONFIG_UPROBE_EVENTS
5536         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5537         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5538 #endif
5539 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5540         "\t  accepts: event-definitions (one definition per line)\n"
5541         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5542         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5543 #ifdef CONFIG_HIST_TRIGGERS
5544         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5545 #endif
5546         "\t           -:[<group>/]<event>\n"
5547 #ifdef CONFIG_KPROBE_EVENTS
5548         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5549   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5550 #endif
5551 #ifdef CONFIG_UPROBE_EVENTS
5552   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5553 #endif
5554         "\t     args: <name>=fetcharg[:type]\n"
5555         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5556 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5557         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5558 #else
5559         "\t           $stack<index>, $stack, $retval, $comm,\n"
5560 #endif
5561         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5562         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5563         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5564         "\t           <type>\\[<array-size>\\]\n"
5565 #ifdef CONFIG_HIST_TRIGGERS
5566         "\t    field: <stype> <name>;\n"
5567         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5568         "\t           [unsigned] char/int/long\n"
5569 #endif
5570 #endif
5571         "  events/\t\t- Directory containing all trace event subsystems:\n"
5572         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5573         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5574         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5575         "\t\t\t  events\n"
5576         "      filter\t\t- If set, only events passing filter are traced\n"
5577         "  events/<system>/<event>/\t- Directory containing control files for\n"
5578         "\t\t\t  <event>:\n"
5579         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5580         "      filter\t\t- If set, only events passing filter are traced\n"
5581         "      trigger\t\t- If set, a command to perform when event is hit\n"
5582         "\t    Format: <trigger>[:count][if <filter>]\n"
5583         "\t   trigger: traceon, traceoff\n"
5584         "\t            enable_event:<system>:<event>\n"
5585         "\t            disable_event:<system>:<event>\n"
5586 #ifdef CONFIG_HIST_TRIGGERS
5587         "\t            enable_hist:<system>:<event>\n"
5588         "\t            disable_hist:<system>:<event>\n"
5589 #endif
5590 #ifdef CONFIG_STACKTRACE
5591         "\t\t    stacktrace\n"
5592 #endif
5593 #ifdef CONFIG_TRACER_SNAPSHOT
5594         "\t\t    snapshot\n"
5595 #endif
5596 #ifdef CONFIG_HIST_TRIGGERS
5597         "\t\t    hist (see below)\n"
5598 #endif
5599         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5600         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5601         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5602         "\t                  events/block/block_unplug/trigger\n"
5603         "\t   The first disables tracing every time block_unplug is hit.\n"
5604         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5605         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5606         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5607         "\t   Like function triggers, the counter is only decremented if it\n"
5608         "\t    enabled or disabled tracing.\n"
5609         "\t   To remove a trigger without a count:\n"
5610         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5611         "\t   To remove a trigger with a count:\n"
5612         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5613         "\t   Filters can be ignored when removing a trigger.\n"
5614 #ifdef CONFIG_HIST_TRIGGERS
5615         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5616         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5617         "\t            [:values=<field1[,field2,...]>]\n"
5618         "\t            [:sort=<field1[,field2,...]>]\n"
5619         "\t            [:size=#entries]\n"
5620         "\t            [:pause][:continue][:clear]\n"
5621         "\t            [:name=histname1]\n"
5622         "\t            [:<handler>.<action>]\n"
5623         "\t            [if <filter>]\n\n"
5624         "\t    Note, special fields can be used as well:\n"
5625         "\t            common_timestamp - to record current timestamp\n"
5626         "\t            common_cpu - to record the CPU the event happened on\n"
5627         "\n"
5628         "\t    When a matching event is hit, an entry is added to a hash\n"
5629         "\t    table using the key(s) and value(s) named, and the value of a\n"
5630         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5631         "\t    correspond to fields in the event's format description.  Keys\n"
5632         "\t    can be any field, or the special string 'stacktrace'.\n"
5633         "\t    Compound keys consisting of up to two fields can be specified\n"
5634         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5635         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5636         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5637         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5638         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5639         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5640         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5641         "\t    its histogram data will be shared with other triggers of the\n"
5642         "\t    same name, and trigger hits will update this common data.\n\n"
5643         "\t    Reading the 'hist' file for the event will dump the hash\n"
5644         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5645         "\t    triggers attached to an event, there will be a table for each\n"
5646         "\t    trigger in the output.  The table displayed for a named\n"
5647         "\t    trigger will be the same as any other instance having the\n"
5648         "\t    same name.  The default format used to display a given field\n"
5649         "\t    can be modified by appending any of the following modifiers\n"
5650         "\t    to the field name, as applicable:\n\n"
5651         "\t            .hex        display a number as a hex value\n"
5652         "\t            .sym        display an address as a symbol\n"
5653         "\t            .sym-offset display an address as a symbol and offset\n"
5654         "\t            .execname   display a common_pid as a program name\n"
5655         "\t            .syscall    display a syscall id as a syscall name\n"
5656         "\t            .log2       display log2 value rather than raw number\n"
5657         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5658         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5659         "\t    trigger or to start a hist trigger but not log any events\n"
5660         "\t    until told to do so.  'continue' can be used to start or\n"
5661         "\t    restart a paused hist trigger.\n\n"
5662         "\t    The 'clear' parameter will clear the contents of a running\n"
5663         "\t    hist trigger and leave its current paused/active state\n"
5664         "\t    unchanged.\n\n"
5665         "\t    The enable_hist and disable_hist triggers can be used to\n"
5666         "\t    have one event conditionally start and stop another event's\n"
5667         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5668         "\t    the enable_event and disable_event triggers.\n\n"
5669         "\t    Hist trigger handlers and actions are executed whenever a\n"
5670         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5671         "\t        <handler>.<action>\n\n"
5672         "\t    The available handlers are:\n\n"
5673         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5674         "\t        onmax(var)               - invoke if var exceeds current max\n"
5675         "\t        onchange(var)            - invoke action if var changes\n\n"
5676         "\t    The available actions are:\n\n"
5677         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5678         "\t        save(field,...)                      - save current event fields\n"
5679 #ifdef CONFIG_TRACER_SNAPSHOT
5680         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5681 #endif
5682 #ifdef CONFIG_SYNTH_EVENTS
5683         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5684         "\t  Write into this file to define/undefine new synthetic events.\n"
5685         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5686 #endif
5687 #endif
5688 ;
5689
5690 static ssize_t
5691 tracing_readme_read(struct file *filp, char __user *ubuf,
5692                        size_t cnt, loff_t *ppos)
5693 {
5694         return simple_read_from_buffer(ubuf, cnt, ppos,
5695                                         readme_msg, strlen(readme_msg));
5696 }
5697
5698 static const struct file_operations tracing_readme_fops = {
5699         .open           = tracing_open_generic,
5700         .read           = tracing_readme_read,
5701         .llseek         = generic_file_llseek,
5702 };
5703
5704 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5705 {
5706         int pid = ++(*pos);
5707
5708         return trace_find_tgid_ptr(pid);
5709 }
5710
5711 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5712 {
5713         int pid = *pos;
5714
5715         return trace_find_tgid_ptr(pid);
5716 }
5717
5718 static void saved_tgids_stop(struct seq_file *m, void *v)
5719 {
5720 }
5721
5722 static int saved_tgids_show(struct seq_file *m, void *v)
5723 {
5724         int *entry = (int *)v;
5725         int pid = entry - tgid_map;
5726         int tgid = *entry;
5727
5728         if (tgid == 0)
5729                 return SEQ_SKIP;
5730
5731         seq_printf(m, "%d %d\n", pid, tgid);
5732         return 0;
5733 }
5734
5735 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5736         .start          = saved_tgids_start,
5737         .stop           = saved_tgids_stop,
5738         .next           = saved_tgids_next,
5739         .show           = saved_tgids_show,
5740 };
5741
5742 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5743 {
5744         int ret;
5745
5746         ret = tracing_check_open_get_tr(NULL);
5747         if (ret)
5748                 return ret;
5749
5750         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5751 }
5752
5753
5754 static const struct file_operations tracing_saved_tgids_fops = {
5755         .open           = tracing_saved_tgids_open,
5756         .read           = seq_read,
5757         .llseek         = seq_lseek,
5758         .release        = seq_release,
5759 };
5760
5761 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5762 {
5763         unsigned int *ptr = v;
5764
5765         if (*pos || m->count)
5766                 ptr++;
5767
5768         (*pos)++;
5769
5770         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5771              ptr++) {
5772                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5773                         continue;
5774
5775                 return ptr;
5776         }
5777
5778         return NULL;
5779 }
5780
5781 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5782 {
5783         void *v;
5784         loff_t l = 0;
5785
5786         preempt_disable();
5787         arch_spin_lock(&trace_cmdline_lock);
5788
5789         v = &savedcmd->map_cmdline_to_pid[0];
5790         while (l <= *pos) {
5791                 v = saved_cmdlines_next(m, v, &l);
5792                 if (!v)
5793                         return NULL;
5794         }
5795
5796         return v;
5797 }
5798
5799 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5800 {
5801         arch_spin_unlock(&trace_cmdline_lock);
5802         preempt_enable();
5803 }
5804
5805 static int saved_cmdlines_show(struct seq_file *m, void *v)
5806 {
5807         char buf[TASK_COMM_LEN];
5808         unsigned int *pid = v;
5809
5810         __trace_find_cmdline(*pid, buf);
5811         seq_printf(m, "%d %s\n", *pid, buf);
5812         return 0;
5813 }
5814
5815 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5816         .start          = saved_cmdlines_start,
5817         .next           = saved_cmdlines_next,
5818         .stop           = saved_cmdlines_stop,
5819         .show           = saved_cmdlines_show,
5820 };
5821
5822 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5823 {
5824         int ret;
5825
5826         ret = tracing_check_open_get_tr(NULL);
5827         if (ret)
5828                 return ret;
5829
5830         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5831 }
5832
5833 static const struct file_operations tracing_saved_cmdlines_fops = {
5834         .open           = tracing_saved_cmdlines_open,
5835         .read           = seq_read,
5836         .llseek         = seq_lseek,
5837         .release        = seq_release,
5838 };
5839
5840 static ssize_t
5841 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5842                                  size_t cnt, loff_t *ppos)
5843 {
5844         char buf[64];
5845         int r;
5846
5847         arch_spin_lock(&trace_cmdline_lock);
5848         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5849         arch_spin_unlock(&trace_cmdline_lock);
5850
5851         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5852 }
5853
5854 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5855 {
5856         kfree(s->saved_cmdlines);
5857         kfree(s->map_cmdline_to_pid);
5858         kfree(s);
5859 }
5860
5861 static int tracing_resize_saved_cmdlines(unsigned int val)
5862 {
5863         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5864
5865         s = kmalloc(sizeof(*s), GFP_KERNEL);
5866         if (!s)
5867                 return -ENOMEM;
5868
5869         if (allocate_cmdlines_buffer(val, s) < 0) {
5870                 kfree(s);
5871                 return -ENOMEM;
5872         }
5873
5874         arch_spin_lock(&trace_cmdline_lock);
5875         savedcmd_temp = savedcmd;
5876         savedcmd = s;
5877         arch_spin_unlock(&trace_cmdline_lock);
5878         free_saved_cmdlines_buffer(savedcmd_temp);
5879
5880         return 0;
5881 }
5882
5883 static ssize_t
5884 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5885                                   size_t cnt, loff_t *ppos)
5886 {
5887         unsigned long val;
5888         int ret;
5889
5890         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5891         if (ret)
5892                 return ret;
5893
5894         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5895         if (!val || val > PID_MAX_DEFAULT)
5896                 return -EINVAL;
5897
5898         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5899         if (ret < 0)
5900                 return ret;
5901
5902         *ppos += cnt;
5903
5904         return cnt;
5905 }
5906
5907 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5908         .open           = tracing_open_generic,
5909         .read           = tracing_saved_cmdlines_size_read,
5910         .write          = tracing_saved_cmdlines_size_write,
5911 };
5912
5913 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5914 static union trace_eval_map_item *
5915 update_eval_map(union trace_eval_map_item *ptr)
5916 {
5917         if (!ptr->map.eval_string) {
5918                 if (ptr->tail.next) {
5919                         ptr = ptr->tail.next;
5920                         /* Set ptr to the next real item (skip head) */
5921                         ptr++;
5922                 } else
5923                         return NULL;
5924         }
5925         return ptr;
5926 }
5927
5928 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5929 {
5930         union trace_eval_map_item *ptr = v;
5931
5932         /*
5933          * Paranoid! If ptr points to end, we don't want to increment past it.
5934          * This really should never happen.
5935          */
5936         (*pos)++;
5937         ptr = update_eval_map(ptr);
5938         if (WARN_ON_ONCE(!ptr))
5939                 return NULL;
5940
5941         ptr++;
5942         ptr = update_eval_map(ptr);
5943
5944         return ptr;
5945 }
5946
5947 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5948 {
5949         union trace_eval_map_item *v;
5950         loff_t l = 0;
5951
5952         mutex_lock(&trace_eval_mutex);
5953
5954         v = trace_eval_maps;
5955         if (v)
5956                 v++;
5957
5958         while (v && l < *pos) {
5959                 v = eval_map_next(m, v, &l);
5960         }
5961
5962         return v;
5963 }
5964
5965 static void eval_map_stop(struct seq_file *m, void *v)
5966 {
5967         mutex_unlock(&trace_eval_mutex);
5968 }
5969
5970 static int eval_map_show(struct seq_file *m, void *v)
5971 {
5972         union trace_eval_map_item *ptr = v;
5973
5974         seq_printf(m, "%s %ld (%s)\n",
5975                    ptr->map.eval_string, ptr->map.eval_value,
5976                    ptr->map.system);
5977
5978         return 0;
5979 }
5980
5981 static const struct seq_operations tracing_eval_map_seq_ops = {
5982         .start          = eval_map_start,
5983         .next           = eval_map_next,
5984         .stop           = eval_map_stop,
5985         .show           = eval_map_show,
5986 };
5987
5988 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5989 {
5990         int ret;
5991
5992         ret = tracing_check_open_get_tr(NULL);
5993         if (ret)
5994                 return ret;
5995
5996         return seq_open(filp, &tracing_eval_map_seq_ops);
5997 }
5998
5999 static const struct file_operations tracing_eval_map_fops = {
6000         .open           = tracing_eval_map_open,
6001         .read           = seq_read,
6002         .llseek         = seq_lseek,
6003         .release        = seq_release,
6004 };
6005
6006 static inline union trace_eval_map_item *
6007 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6008 {
6009         /* Return tail of array given the head */
6010         return ptr + ptr->head.length + 1;
6011 }
6012
6013 static void
6014 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6015                            int len)
6016 {
6017         struct trace_eval_map **stop;
6018         struct trace_eval_map **map;
6019         union trace_eval_map_item *map_array;
6020         union trace_eval_map_item *ptr;
6021
6022         stop = start + len;
6023
6024         /*
6025          * The trace_eval_maps contains the map plus a head and tail item,
6026          * where the head holds the module and length of array, and the
6027          * tail holds a pointer to the next list.
6028          */
6029         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6030         if (!map_array) {
6031                 pr_warn("Unable to allocate trace eval mapping\n");
6032                 return;
6033         }
6034
6035         mutex_lock(&trace_eval_mutex);
6036
6037         if (!trace_eval_maps)
6038                 trace_eval_maps = map_array;
6039         else {
6040                 ptr = trace_eval_maps;
6041                 for (;;) {
6042                         ptr = trace_eval_jmp_to_tail(ptr);
6043                         if (!ptr->tail.next)
6044                                 break;
6045                         ptr = ptr->tail.next;
6046
6047                 }
6048                 ptr->tail.next = map_array;
6049         }
6050         map_array->head.mod = mod;
6051         map_array->head.length = len;
6052         map_array++;
6053
6054         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6055                 map_array->map = **map;
6056                 map_array++;
6057         }
6058         memset(map_array, 0, sizeof(*map_array));
6059
6060         mutex_unlock(&trace_eval_mutex);
6061 }
6062
6063 static void trace_create_eval_file(struct dentry *d_tracer)
6064 {
6065         trace_create_file("eval_map", 0444, d_tracer,
6066                           NULL, &tracing_eval_map_fops);
6067 }
6068
6069 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6070 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6071 static inline void trace_insert_eval_map_file(struct module *mod,
6072                               struct trace_eval_map **start, int len) { }
6073 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6074
6075 static void trace_insert_eval_map(struct module *mod,
6076                                   struct trace_eval_map **start, int len)
6077 {
6078         struct trace_eval_map **map;
6079
6080         if (len <= 0)
6081                 return;
6082
6083         map = start;
6084
6085         trace_event_eval_update(map, len);
6086
6087         trace_insert_eval_map_file(mod, start, len);
6088 }
6089
6090 static ssize_t
6091 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6092                        size_t cnt, loff_t *ppos)
6093 {
6094         struct trace_array *tr = filp->private_data;
6095         char buf[MAX_TRACER_SIZE+2];
6096         int r;
6097
6098         mutex_lock(&trace_types_lock);
6099         r = sprintf(buf, "%s\n", tr->current_trace->name);
6100         mutex_unlock(&trace_types_lock);
6101
6102         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6103 }
6104
6105 int tracer_init(struct tracer *t, struct trace_array *tr)
6106 {
6107         tracing_reset_online_cpus(&tr->array_buffer);
6108         return t->init(tr);
6109 }
6110
6111 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6112 {
6113         int cpu;
6114
6115         for_each_tracing_cpu(cpu)
6116                 per_cpu_ptr(buf->data, cpu)->entries = val;
6117 }
6118
6119 #ifdef CONFIG_TRACER_MAX_TRACE
6120 /* resize @tr's buffer to the size of @size_tr's entries */
6121 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6122                                         struct array_buffer *size_buf, int cpu_id)
6123 {
6124         int cpu, ret = 0;
6125
6126         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6127                 for_each_tracing_cpu(cpu) {
6128                         ret = ring_buffer_resize(trace_buf->buffer,
6129                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6130                         if (ret < 0)
6131                                 break;
6132                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6133                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6134                 }
6135         } else {
6136                 ret = ring_buffer_resize(trace_buf->buffer,
6137                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6138                 if (ret == 0)
6139                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6140                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6141         }
6142
6143         return ret;
6144 }
6145 #endif /* CONFIG_TRACER_MAX_TRACE */
6146
6147 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6148                                         unsigned long size, int cpu)
6149 {
6150         int ret;
6151
6152         /*
6153          * If kernel or user changes the size of the ring buffer
6154          * we use the size that was given, and we can forget about
6155          * expanding it later.
6156          */
6157         ring_buffer_expanded = true;
6158
6159         /* May be called before buffers are initialized */
6160         if (!tr->array_buffer.buffer)
6161                 return 0;
6162
6163         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6164         if (ret < 0)
6165                 return ret;
6166
6167 #ifdef CONFIG_TRACER_MAX_TRACE
6168         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6169             !tr->current_trace->use_max_tr)
6170                 goto out;
6171
6172         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6173         if (ret < 0) {
6174                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6175                                                      &tr->array_buffer, cpu);
6176                 if (r < 0) {
6177                         /*
6178                          * AARGH! We are left with different
6179                          * size max buffer!!!!
6180                          * The max buffer is our "snapshot" buffer.
6181                          * When a tracer needs a snapshot (one of the
6182                          * latency tracers), it swaps the max buffer
6183                          * with the saved snap shot. We succeeded to
6184                          * update the size of the main buffer, but failed to
6185                          * update the size of the max buffer. But when we tried
6186                          * to reset the main buffer to the original size, we
6187                          * failed there too. This is very unlikely to
6188                          * happen, but if it does, warn and kill all
6189                          * tracing.
6190                          */
6191                         WARN_ON(1);
6192                         tracing_disabled = 1;
6193                 }
6194                 return ret;
6195         }
6196
6197         if (cpu == RING_BUFFER_ALL_CPUS)
6198                 set_buffer_entries(&tr->max_buffer, size);
6199         else
6200                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6201
6202  out:
6203 #endif /* CONFIG_TRACER_MAX_TRACE */
6204
6205         if (cpu == RING_BUFFER_ALL_CPUS)
6206                 set_buffer_entries(&tr->array_buffer, size);
6207         else
6208                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6209
6210         return ret;
6211 }
6212
6213 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6214                                   unsigned long size, int cpu_id)
6215 {
6216         int ret;
6217
6218         mutex_lock(&trace_types_lock);
6219
6220         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6221                 /* make sure, this cpu is enabled in the mask */
6222                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6223                         ret = -EINVAL;
6224                         goto out;
6225                 }
6226         }
6227
6228         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6229         if (ret < 0)
6230                 ret = -ENOMEM;
6231
6232 out:
6233         mutex_unlock(&trace_types_lock);
6234
6235         return ret;
6236 }
6237
6238
6239 /**
6240  * tracing_update_buffers - used by tracing facility to expand ring buffers
6241  *
6242  * To save on memory when the tracing is never used on a system with it
6243  * configured in. The ring buffers are set to a minimum size. But once
6244  * a user starts to use the tracing facility, then they need to grow
6245  * to their default size.
6246  *
6247  * This function is to be called when a tracer is about to be used.
6248  */
6249 int tracing_update_buffers(void)
6250 {
6251         int ret = 0;
6252
6253         mutex_lock(&trace_types_lock);
6254         if (!ring_buffer_expanded)
6255                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6256                                                 RING_BUFFER_ALL_CPUS);
6257         mutex_unlock(&trace_types_lock);
6258
6259         return ret;
6260 }
6261
6262 struct trace_option_dentry;
6263
6264 static void
6265 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6266
6267 /*
6268  * Used to clear out the tracer before deletion of an instance.
6269  * Must have trace_types_lock held.
6270  */
6271 static void tracing_set_nop(struct trace_array *tr)
6272 {
6273         if (tr->current_trace == &nop_trace)
6274                 return;
6275         
6276         tr->current_trace->enabled--;
6277
6278         if (tr->current_trace->reset)
6279                 tr->current_trace->reset(tr);
6280
6281         tr->current_trace = &nop_trace;
6282 }
6283
6284 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6285 {
6286         /* Only enable if the directory has been created already. */
6287         if (!tr->dir)
6288                 return;
6289
6290         create_trace_option_files(tr, t);
6291 }
6292
6293 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6294 {
6295         struct tracer *t;
6296 #ifdef CONFIG_TRACER_MAX_TRACE
6297         bool had_max_tr;
6298 #endif
6299         int ret = 0;
6300
6301         mutex_lock(&trace_types_lock);
6302
6303         if (!ring_buffer_expanded) {
6304                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6305                                                 RING_BUFFER_ALL_CPUS);
6306                 if (ret < 0)
6307                         goto out;
6308                 ret = 0;
6309         }
6310
6311         for (t = trace_types; t; t = t->next) {
6312                 if (strcmp(t->name, buf) == 0)
6313                         break;
6314         }
6315         if (!t) {
6316                 ret = -EINVAL;
6317                 goto out;
6318         }
6319         if (t == tr->current_trace)
6320                 goto out;
6321
6322 #ifdef CONFIG_TRACER_SNAPSHOT
6323         if (t->use_max_tr) {
6324                 arch_spin_lock(&tr->max_lock);
6325                 if (tr->cond_snapshot)
6326                         ret = -EBUSY;
6327                 arch_spin_unlock(&tr->max_lock);
6328                 if (ret)
6329                         goto out;
6330         }
6331 #endif
6332         /* Some tracers won't work on kernel command line */
6333         if (system_state < SYSTEM_RUNNING && t->noboot) {
6334                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6335                         t->name);
6336                 goto out;
6337         }
6338
6339         /* Some tracers are only allowed for the top level buffer */
6340         if (!trace_ok_for_array(t, tr)) {
6341                 ret = -EINVAL;
6342                 goto out;
6343         }
6344
6345         /* If trace pipe files are being read, we can't change the tracer */
6346         if (tr->trace_ref) {
6347                 ret = -EBUSY;
6348                 goto out;
6349         }
6350
6351         trace_branch_disable();
6352
6353         tr->current_trace->enabled--;
6354
6355         if (tr->current_trace->reset)
6356                 tr->current_trace->reset(tr);
6357
6358         /* Current trace needs to be nop_trace before synchronize_rcu */
6359         tr->current_trace = &nop_trace;
6360
6361 #ifdef CONFIG_TRACER_MAX_TRACE
6362         had_max_tr = tr->allocated_snapshot;
6363
6364         if (had_max_tr && !t->use_max_tr) {
6365                 /*
6366                  * We need to make sure that the update_max_tr sees that
6367                  * current_trace changed to nop_trace to keep it from
6368                  * swapping the buffers after we resize it.
6369                  * The update_max_tr is called from interrupts disabled
6370                  * so a synchronized_sched() is sufficient.
6371                  */
6372                 synchronize_rcu();
6373                 free_snapshot(tr);
6374         }
6375 #endif
6376
6377 #ifdef CONFIG_TRACER_MAX_TRACE
6378         if (t->use_max_tr && !had_max_tr) {
6379                 ret = tracing_alloc_snapshot_instance(tr);
6380                 if (ret < 0)
6381                         goto out;
6382         }
6383 #endif
6384
6385         if (t->init) {
6386                 ret = tracer_init(t, tr);
6387                 if (ret)
6388                         goto out;
6389         }
6390
6391         tr->current_trace = t;
6392         tr->current_trace->enabled++;
6393         trace_branch_enable(tr);
6394  out:
6395         mutex_unlock(&trace_types_lock);
6396
6397         return ret;
6398 }
6399
6400 static ssize_t
6401 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6402                         size_t cnt, loff_t *ppos)
6403 {
6404         struct trace_array *tr = filp->private_data;
6405         char buf[MAX_TRACER_SIZE+1];
6406         int i;
6407         size_t ret;
6408         int err;
6409
6410         ret = cnt;
6411
6412         if (cnt > MAX_TRACER_SIZE)
6413                 cnt = MAX_TRACER_SIZE;
6414
6415         if (copy_from_user(buf, ubuf, cnt))
6416                 return -EFAULT;
6417
6418         buf[cnt] = 0;
6419
6420         /* strip ending whitespace. */
6421         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6422                 buf[i] = 0;
6423
6424         err = tracing_set_tracer(tr, buf);
6425         if (err)
6426                 return err;
6427
6428         *ppos += ret;
6429
6430         return ret;
6431 }
6432
6433 static ssize_t
6434 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6435                    size_t cnt, loff_t *ppos)
6436 {
6437         char buf[64];
6438         int r;
6439
6440         r = snprintf(buf, sizeof(buf), "%ld\n",
6441                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6442         if (r > sizeof(buf))
6443                 r = sizeof(buf);
6444         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6445 }
6446
6447 static ssize_t
6448 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6449                     size_t cnt, loff_t *ppos)
6450 {
6451         unsigned long val;
6452         int ret;
6453
6454         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6455         if (ret)
6456                 return ret;
6457
6458         *ptr = val * 1000;
6459
6460         return cnt;
6461 }
6462
6463 static ssize_t
6464 tracing_thresh_read(struct file *filp, char __user *ubuf,
6465                     size_t cnt, loff_t *ppos)
6466 {
6467         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6468 }
6469
6470 static ssize_t
6471 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6472                      size_t cnt, loff_t *ppos)
6473 {
6474         struct trace_array *tr = filp->private_data;
6475         int ret;
6476
6477         mutex_lock(&trace_types_lock);
6478         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6479         if (ret < 0)
6480                 goto out;
6481
6482         if (tr->current_trace->update_thresh) {
6483                 ret = tr->current_trace->update_thresh(tr);
6484                 if (ret < 0)
6485                         goto out;
6486         }
6487
6488         ret = cnt;
6489 out:
6490         mutex_unlock(&trace_types_lock);
6491
6492         return ret;
6493 }
6494
6495 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6496
6497 static ssize_t
6498 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6499                      size_t cnt, loff_t *ppos)
6500 {
6501         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6502 }
6503
6504 static ssize_t
6505 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6506                       size_t cnt, loff_t *ppos)
6507 {
6508         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6509 }
6510
6511 #endif
6512
6513 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6514 {
6515         struct trace_array *tr = inode->i_private;
6516         struct trace_iterator *iter;
6517         int ret;
6518
6519         ret = tracing_check_open_get_tr(tr);
6520         if (ret)
6521                 return ret;
6522
6523         mutex_lock(&trace_types_lock);
6524
6525         /* create a buffer to store the information to pass to userspace */
6526         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6527         if (!iter) {
6528                 ret = -ENOMEM;
6529                 __trace_array_put(tr);
6530                 goto out;
6531         }
6532
6533         trace_seq_init(&iter->seq);
6534         iter->trace = tr->current_trace;
6535
6536         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6537                 ret = -ENOMEM;
6538                 goto fail;
6539         }
6540
6541         /* trace pipe does not show start of buffer */
6542         cpumask_setall(iter->started);
6543
6544         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6545                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6546
6547         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6548         if (trace_clocks[tr->clock_id].in_ns)
6549                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6550
6551         iter->tr = tr;
6552         iter->array_buffer = &tr->array_buffer;
6553         iter->cpu_file = tracing_get_cpu(inode);
6554         mutex_init(&iter->mutex);
6555         filp->private_data = iter;
6556
6557         if (iter->trace->pipe_open)
6558                 iter->trace->pipe_open(iter);
6559
6560         nonseekable_open(inode, filp);
6561
6562         tr->trace_ref++;
6563 out:
6564         mutex_unlock(&trace_types_lock);
6565         return ret;
6566
6567 fail:
6568         kfree(iter);
6569         __trace_array_put(tr);
6570         mutex_unlock(&trace_types_lock);
6571         return ret;
6572 }
6573
6574 static int tracing_release_pipe(struct inode *inode, struct file *file)
6575 {
6576         struct trace_iterator *iter = file->private_data;
6577         struct trace_array *tr = inode->i_private;
6578
6579         mutex_lock(&trace_types_lock);
6580
6581         tr->trace_ref--;
6582
6583         if (iter->trace->pipe_close)
6584                 iter->trace->pipe_close(iter);
6585
6586         mutex_unlock(&trace_types_lock);
6587
6588         free_cpumask_var(iter->started);
6589         mutex_destroy(&iter->mutex);
6590         kfree(iter);
6591
6592         trace_array_put(tr);
6593
6594         return 0;
6595 }
6596
6597 static __poll_t
6598 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6599 {
6600         struct trace_array *tr = iter->tr;
6601
6602         /* Iterators are static, they should be filled or empty */
6603         if (trace_buffer_iter(iter, iter->cpu_file))
6604                 return EPOLLIN | EPOLLRDNORM;
6605
6606         if (tr->trace_flags & TRACE_ITER_BLOCK)
6607                 /*
6608                  * Always select as readable when in blocking mode
6609                  */
6610                 return EPOLLIN | EPOLLRDNORM;
6611         else
6612                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6613                                              filp, poll_table);
6614 }
6615
6616 static __poll_t
6617 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6618 {
6619         struct trace_iterator *iter = filp->private_data;
6620
6621         return trace_poll(iter, filp, poll_table);
6622 }
6623
6624 /* Must be called with iter->mutex held. */
6625 static int tracing_wait_pipe(struct file *filp)
6626 {
6627         struct trace_iterator *iter = filp->private_data;
6628         int ret;
6629
6630         while (trace_empty(iter)) {
6631
6632                 if ((filp->f_flags & O_NONBLOCK)) {
6633                         return -EAGAIN;
6634                 }
6635
6636                 /*
6637                  * We block until we read something and tracing is disabled.
6638                  * We still block if tracing is disabled, but we have never
6639                  * read anything. This allows a user to cat this file, and
6640                  * then enable tracing. But after we have read something,
6641                  * we give an EOF when tracing is again disabled.
6642                  *
6643                  * iter->pos will be 0 if we haven't read anything.
6644                  */
6645                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6646                         break;
6647
6648                 mutex_unlock(&iter->mutex);
6649
6650                 ret = wait_on_pipe(iter, 0);
6651
6652                 mutex_lock(&iter->mutex);
6653
6654                 if (ret)
6655                         return ret;
6656         }
6657
6658         return 1;
6659 }
6660
6661 /*
6662  * Consumer reader.
6663  */
6664 static ssize_t
6665 tracing_read_pipe(struct file *filp, char __user *ubuf,
6666                   size_t cnt, loff_t *ppos)
6667 {
6668         struct trace_iterator *iter = filp->private_data;
6669         ssize_t sret;
6670
6671         /*
6672          * Avoid more than one consumer on a single file descriptor
6673          * This is just a matter of traces coherency, the ring buffer itself
6674          * is protected.
6675          */
6676         mutex_lock(&iter->mutex);
6677
6678         /* return any leftover data */
6679         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6680         if (sret != -EBUSY)
6681                 goto out;
6682
6683         trace_seq_init(&iter->seq);
6684
6685         if (iter->trace->read) {
6686                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6687                 if (sret)
6688                         goto out;
6689         }
6690
6691 waitagain:
6692         sret = tracing_wait_pipe(filp);
6693         if (sret <= 0)
6694                 goto out;
6695
6696         /* stop when tracing is finished */
6697         if (trace_empty(iter)) {
6698                 sret = 0;
6699                 goto out;
6700         }
6701
6702         if (cnt >= PAGE_SIZE)
6703                 cnt = PAGE_SIZE - 1;
6704
6705         /* reset all but tr, trace, and overruns */
6706         memset(&iter->seq, 0,
6707                sizeof(struct trace_iterator) -
6708                offsetof(struct trace_iterator, seq));
6709         cpumask_clear(iter->started);
6710         trace_seq_init(&iter->seq);
6711         iter->pos = -1;
6712
6713         trace_event_read_lock();
6714         trace_access_lock(iter->cpu_file);
6715         while (trace_find_next_entry_inc(iter) != NULL) {
6716                 enum print_line_t ret;
6717                 int save_len = iter->seq.seq.len;
6718
6719                 ret = print_trace_line(iter);
6720                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6721                         /* don't print partial lines */
6722                         iter->seq.seq.len = save_len;
6723                         break;
6724                 }
6725                 if (ret != TRACE_TYPE_NO_CONSUME)
6726                         trace_consume(iter);
6727
6728                 if (trace_seq_used(&iter->seq) >= cnt)
6729                         break;
6730
6731                 /*
6732                  * Setting the full flag means we reached the trace_seq buffer
6733                  * size and we should leave by partial output condition above.
6734                  * One of the trace_seq_* functions is not used properly.
6735                  */
6736                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6737                           iter->ent->type);
6738         }
6739         trace_access_unlock(iter->cpu_file);
6740         trace_event_read_unlock();
6741
6742         /* Now copy what we have to the user */
6743         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6744         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6745                 trace_seq_init(&iter->seq);
6746
6747         /*
6748          * If there was nothing to send to user, in spite of consuming trace
6749          * entries, go back to wait for more entries.
6750          */
6751         if (sret == -EBUSY)
6752                 goto waitagain;
6753
6754 out:
6755         mutex_unlock(&iter->mutex);
6756
6757         return sret;
6758 }
6759
6760 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6761                                      unsigned int idx)
6762 {
6763         __free_page(spd->pages[idx]);
6764 }
6765
6766 static size_t
6767 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6768 {
6769         size_t count;
6770         int save_len;
6771         int ret;
6772
6773         /* Seq buffer is page-sized, exactly what we need. */
6774         for (;;) {
6775                 save_len = iter->seq.seq.len;
6776                 ret = print_trace_line(iter);
6777
6778                 if (trace_seq_has_overflowed(&iter->seq)) {
6779                         iter->seq.seq.len = save_len;
6780                         break;
6781                 }
6782
6783                 /*
6784                  * This should not be hit, because it should only
6785                  * be set if the iter->seq overflowed. But check it
6786                  * anyway to be safe.
6787                  */
6788                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6789                         iter->seq.seq.len = save_len;
6790                         break;
6791                 }
6792
6793                 count = trace_seq_used(&iter->seq) - save_len;
6794                 if (rem < count) {
6795                         rem = 0;
6796                         iter->seq.seq.len = save_len;
6797                         break;
6798                 }
6799
6800                 if (ret != TRACE_TYPE_NO_CONSUME)
6801                         trace_consume(iter);
6802                 rem -= count;
6803                 if (!trace_find_next_entry_inc(iter))   {
6804                         rem = 0;
6805                         iter->ent = NULL;
6806                         break;
6807                 }
6808         }
6809
6810         return rem;
6811 }
6812
6813 static ssize_t tracing_splice_read_pipe(struct file *filp,
6814                                         loff_t *ppos,
6815                                         struct pipe_inode_info *pipe,
6816                                         size_t len,
6817                                         unsigned int flags)
6818 {
6819         struct page *pages_def[PIPE_DEF_BUFFERS];
6820         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6821         struct trace_iterator *iter = filp->private_data;
6822         struct splice_pipe_desc spd = {
6823                 .pages          = pages_def,
6824                 .partial        = partial_def,
6825                 .nr_pages       = 0, /* This gets updated below. */
6826                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6827                 .ops            = &default_pipe_buf_ops,
6828                 .spd_release    = tracing_spd_release_pipe,
6829         };
6830         ssize_t ret;
6831         size_t rem;
6832         unsigned int i;
6833
6834         if (splice_grow_spd(pipe, &spd))
6835                 return -ENOMEM;
6836
6837         mutex_lock(&iter->mutex);
6838
6839         if (iter->trace->splice_read) {
6840                 ret = iter->trace->splice_read(iter, filp,
6841                                                ppos, pipe, len, flags);
6842                 if (ret)
6843                         goto out_err;
6844         }
6845
6846         ret = tracing_wait_pipe(filp);
6847         if (ret <= 0)
6848                 goto out_err;
6849
6850         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6851                 ret = -EFAULT;
6852                 goto out_err;
6853         }
6854
6855         trace_event_read_lock();
6856         trace_access_lock(iter->cpu_file);
6857
6858         /* Fill as many pages as possible. */
6859         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6860                 spd.pages[i] = alloc_page(GFP_KERNEL);
6861                 if (!spd.pages[i])
6862                         break;
6863
6864                 rem = tracing_fill_pipe_page(rem, iter);
6865
6866                 /* Copy the data into the page, so we can start over. */
6867                 ret = trace_seq_to_buffer(&iter->seq,
6868                                           page_address(spd.pages[i]),
6869                                           trace_seq_used(&iter->seq));
6870                 if (ret < 0) {
6871                         __free_page(spd.pages[i]);
6872                         break;
6873                 }
6874                 spd.partial[i].offset = 0;
6875                 spd.partial[i].len = trace_seq_used(&iter->seq);
6876
6877                 trace_seq_init(&iter->seq);
6878         }
6879
6880         trace_access_unlock(iter->cpu_file);
6881         trace_event_read_unlock();
6882         mutex_unlock(&iter->mutex);
6883
6884         spd.nr_pages = i;
6885
6886         if (i)
6887                 ret = splice_to_pipe(pipe, &spd);
6888         else
6889                 ret = 0;
6890 out:
6891         splice_shrink_spd(&spd);
6892         return ret;
6893
6894 out_err:
6895         mutex_unlock(&iter->mutex);
6896         goto out;
6897 }
6898
6899 static ssize_t
6900 tracing_entries_read(struct file *filp, char __user *ubuf,
6901                      size_t cnt, loff_t *ppos)
6902 {
6903         struct inode *inode = file_inode(filp);
6904         struct trace_array *tr = inode->i_private;
6905         int cpu = tracing_get_cpu(inode);
6906         char buf[64];
6907         int r = 0;
6908         ssize_t ret;
6909
6910         mutex_lock(&trace_types_lock);
6911
6912         if (cpu == RING_BUFFER_ALL_CPUS) {
6913                 int cpu, buf_size_same;
6914                 unsigned long size;
6915
6916                 size = 0;
6917                 buf_size_same = 1;
6918                 /* check if all cpu sizes are same */
6919                 for_each_tracing_cpu(cpu) {
6920                         /* fill in the size from first enabled cpu */
6921                         if (size == 0)
6922                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6923                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6924                                 buf_size_same = 0;
6925                                 break;
6926                         }
6927                 }
6928
6929                 if (buf_size_same) {
6930                         if (!ring_buffer_expanded)
6931                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6932                                             size >> 10,
6933                                             trace_buf_size >> 10);
6934                         else
6935                                 r = sprintf(buf, "%lu\n", size >> 10);
6936                 } else
6937                         r = sprintf(buf, "X\n");
6938         } else
6939                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6940
6941         mutex_unlock(&trace_types_lock);
6942
6943         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6944         return ret;
6945 }
6946
6947 static ssize_t
6948 tracing_entries_write(struct file *filp, const char __user *ubuf,
6949                       size_t cnt, loff_t *ppos)
6950 {
6951         struct inode *inode = file_inode(filp);
6952         struct trace_array *tr = inode->i_private;
6953         unsigned long val;
6954         int ret;
6955
6956         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6957         if (ret)
6958                 return ret;
6959
6960         /* must have at least 1 entry */
6961         if (!val)
6962                 return -EINVAL;
6963
6964         /* value is in KB */
6965         val <<= 10;
6966         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6967         if (ret < 0)
6968                 return ret;
6969
6970         *ppos += cnt;
6971
6972         return cnt;
6973 }
6974
6975 static ssize_t
6976 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6977                                 size_t cnt, loff_t *ppos)
6978 {
6979         struct trace_array *tr = filp->private_data;
6980         char buf[64];
6981         int r, cpu;
6982         unsigned long size = 0, expanded_size = 0;
6983
6984         mutex_lock(&trace_types_lock);
6985         for_each_tracing_cpu(cpu) {
6986                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6987                 if (!ring_buffer_expanded)
6988                         expanded_size += trace_buf_size >> 10;
6989         }
6990         if (ring_buffer_expanded)
6991                 r = sprintf(buf, "%lu\n", size);
6992         else
6993                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6994         mutex_unlock(&trace_types_lock);
6995
6996         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6997 }
6998
6999 static ssize_t
7000 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7001                           size_t cnt, loff_t *ppos)
7002 {
7003         /*
7004          * There is no need to read what the user has written, this function
7005          * is just to make sure that there is no error when "echo" is used
7006          */
7007
7008         *ppos += cnt;
7009
7010         return cnt;
7011 }
7012
7013 static int
7014 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7015 {
7016         struct trace_array *tr = inode->i_private;
7017
7018         /* disable tracing ? */
7019         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7020                 tracer_tracing_off(tr);
7021         /* resize the ring buffer to 0 */
7022         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7023
7024         trace_array_put(tr);
7025
7026         return 0;
7027 }
7028
7029 static ssize_t
7030 tracing_mark_write(struct file *filp, const char __user *ubuf,
7031                                         size_t cnt, loff_t *fpos)
7032 {
7033         struct trace_array *tr = filp->private_data;
7034         struct ring_buffer_event *event;
7035         enum event_trigger_type tt = ETT_NONE;
7036         struct trace_buffer *buffer;
7037         struct print_entry *entry;
7038         ssize_t written;
7039         int size;
7040         int len;
7041
7042 /* Used in tracing_mark_raw_write() as well */
7043 #define FAULTED_STR "<faulted>"
7044 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7045
7046         if (tracing_disabled)
7047                 return -EINVAL;
7048
7049         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7050                 return -EINVAL;
7051
7052         if (cnt > TRACE_BUF_SIZE)
7053                 cnt = TRACE_BUF_SIZE;
7054
7055         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7056
7057         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7058
7059         /* If less than "<faulted>", then make sure we can still add that */
7060         if (cnt < FAULTED_SIZE)
7061                 size += FAULTED_SIZE - cnt;
7062
7063         buffer = tr->array_buffer.buffer;
7064         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7065                                             tracing_gen_ctx());
7066         if (unlikely(!event))
7067                 /* Ring buffer disabled, return as if not open for write */
7068                 return -EBADF;
7069
7070         entry = ring_buffer_event_data(event);
7071         entry->ip = _THIS_IP_;
7072
7073         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7074         if (len) {
7075                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7076                 cnt = FAULTED_SIZE;
7077                 written = -EFAULT;
7078         } else
7079                 written = cnt;
7080
7081         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7082                 /* do not add \n before testing triggers, but add \0 */
7083                 entry->buf[cnt] = '\0';
7084                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7085         }
7086
7087         if (entry->buf[cnt - 1] != '\n') {
7088                 entry->buf[cnt] = '\n';
7089                 entry->buf[cnt + 1] = '\0';
7090         } else
7091                 entry->buf[cnt] = '\0';
7092
7093         if (static_branch_unlikely(&trace_marker_exports_enabled))
7094                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7095         __buffer_unlock_commit(buffer, event);
7096
7097         if (tt)
7098                 event_triggers_post_call(tr->trace_marker_file, tt);
7099
7100         if (written > 0)
7101                 *fpos += written;
7102
7103         return written;
7104 }
7105
7106 /* Limit it for now to 3K (including tag) */
7107 #define RAW_DATA_MAX_SIZE (1024*3)
7108
7109 static ssize_t
7110 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7111                                         size_t cnt, loff_t *fpos)
7112 {
7113         struct trace_array *tr = filp->private_data;
7114         struct ring_buffer_event *event;
7115         struct trace_buffer *buffer;
7116         struct raw_data_entry *entry;
7117         ssize_t written;
7118         int size;
7119         int len;
7120
7121 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7122
7123         if (tracing_disabled)
7124                 return -EINVAL;
7125
7126         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7127                 return -EINVAL;
7128
7129         /* The marker must at least have a tag id */
7130         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7131                 return -EINVAL;
7132
7133         if (cnt > TRACE_BUF_SIZE)
7134                 cnt = TRACE_BUF_SIZE;
7135
7136         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7137
7138         size = sizeof(*entry) + cnt;
7139         if (cnt < FAULT_SIZE_ID)
7140                 size += FAULT_SIZE_ID - cnt;
7141
7142         buffer = tr->array_buffer.buffer;
7143         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7144                                             tracing_gen_ctx());
7145         if (!event)
7146                 /* Ring buffer disabled, return as if not open for write */
7147                 return -EBADF;
7148
7149         entry = ring_buffer_event_data(event);
7150
7151         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7152         if (len) {
7153                 entry->id = -1;
7154                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7155                 written = -EFAULT;
7156         } else
7157                 written = cnt;
7158
7159         __buffer_unlock_commit(buffer, event);
7160
7161         if (written > 0)
7162                 *fpos += written;
7163
7164         return written;
7165 }
7166
7167 static int tracing_clock_show(struct seq_file *m, void *v)
7168 {
7169         struct trace_array *tr = m->private;
7170         int i;
7171
7172         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7173                 seq_printf(m,
7174                         "%s%s%s%s", i ? " " : "",
7175                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7176                         i == tr->clock_id ? "]" : "");
7177         seq_putc(m, '\n');
7178
7179         return 0;
7180 }
7181
7182 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7183 {
7184         int i;
7185
7186         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7187                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7188                         break;
7189         }
7190         if (i == ARRAY_SIZE(trace_clocks))
7191                 return -EINVAL;
7192
7193         mutex_lock(&trace_types_lock);
7194
7195         tr->clock_id = i;
7196
7197         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7198
7199         /*
7200          * New clock may not be consistent with the previous clock.
7201          * Reset the buffer so that it doesn't have incomparable timestamps.
7202          */
7203         tracing_reset_online_cpus(&tr->array_buffer);
7204
7205 #ifdef CONFIG_TRACER_MAX_TRACE
7206         if (tr->max_buffer.buffer)
7207                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7208         tracing_reset_online_cpus(&tr->max_buffer);
7209 #endif
7210
7211         mutex_unlock(&trace_types_lock);
7212
7213         return 0;
7214 }
7215
7216 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7217                                    size_t cnt, loff_t *fpos)
7218 {
7219         struct seq_file *m = filp->private_data;
7220         struct trace_array *tr = m->private;
7221         char buf[64];
7222         const char *clockstr;
7223         int ret;
7224
7225         if (cnt >= sizeof(buf))
7226                 return -EINVAL;
7227
7228         if (copy_from_user(buf, ubuf, cnt))
7229                 return -EFAULT;
7230
7231         buf[cnt] = 0;
7232
7233         clockstr = strstrip(buf);
7234
7235         ret = tracing_set_clock(tr, clockstr);
7236         if (ret)
7237                 return ret;
7238
7239         *fpos += cnt;
7240
7241         return cnt;
7242 }
7243
7244 static int tracing_clock_open(struct inode *inode, struct file *file)
7245 {
7246         struct trace_array *tr = inode->i_private;
7247         int ret;
7248
7249         ret = tracing_check_open_get_tr(tr);
7250         if (ret)
7251                 return ret;
7252
7253         ret = single_open(file, tracing_clock_show, inode->i_private);
7254         if (ret < 0)
7255                 trace_array_put(tr);
7256
7257         return ret;
7258 }
7259
7260 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7261 {
7262         struct trace_array *tr = m->private;
7263
7264         mutex_lock(&trace_types_lock);
7265
7266         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7267                 seq_puts(m, "delta [absolute]\n");
7268         else
7269                 seq_puts(m, "[delta] absolute\n");
7270
7271         mutex_unlock(&trace_types_lock);
7272
7273         return 0;
7274 }
7275
7276 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7277 {
7278         struct trace_array *tr = inode->i_private;
7279         int ret;
7280
7281         ret = tracing_check_open_get_tr(tr);
7282         if (ret)
7283                 return ret;
7284
7285         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7286         if (ret < 0)
7287                 trace_array_put(tr);
7288
7289         return ret;
7290 }
7291
7292 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7293 {
7294         if (rbe == this_cpu_read(trace_buffered_event))
7295                 return ring_buffer_time_stamp(buffer);
7296
7297         return ring_buffer_event_time_stamp(buffer, rbe);
7298 }
7299
7300 /*
7301  * Set or disable using the per CPU trace_buffer_event when possible.
7302  */
7303 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7304 {
7305         int ret = 0;
7306
7307         mutex_lock(&trace_types_lock);
7308
7309         if (set && tr->no_filter_buffering_ref++)
7310                 goto out;
7311
7312         if (!set) {
7313                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7314                         ret = -EINVAL;
7315                         goto out;
7316                 }
7317
7318                 --tr->no_filter_buffering_ref;
7319         }
7320  out:
7321         mutex_unlock(&trace_types_lock);
7322
7323         return ret;
7324 }
7325
7326 struct ftrace_buffer_info {
7327         struct trace_iterator   iter;
7328         void                    *spare;
7329         unsigned int            spare_cpu;
7330         unsigned int            read;
7331 };
7332
7333 #ifdef CONFIG_TRACER_SNAPSHOT
7334 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7335 {
7336         struct trace_array *tr = inode->i_private;
7337         struct trace_iterator *iter;
7338         struct seq_file *m;
7339         int ret;
7340
7341         ret = tracing_check_open_get_tr(tr);
7342         if (ret)
7343                 return ret;
7344
7345         if (file->f_mode & FMODE_READ) {
7346                 iter = __tracing_open(inode, file, true);
7347                 if (IS_ERR(iter))
7348                         ret = PTR_ERR(iter);
7349         } else {
7350                 /* Writes still need the seq_file to hold the private data */
7351                 ret = -ENOMEM;
7352                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7353                 if (!m)
7354                         goto out;
7355                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7356                 if (!iter) {
7357                         kfree(m);
7358                         goto out;
7359                 }
7360                 ret = 0;
7361
7362                 iter->tr = tr;
7363                 iter->array_buffer = &tr->max_buffer;
7364                 iter->cpu_file = tracing_get_cpu(inode);
7365                 m->private = iter;
7366                 file->private_data = m;
7367         }
7368 out:
7369         if (ret < 0)
7370                 trace_array_put(tr);
7371
7372         return ret;
7373 }
7374
7375 static ssize_t
7376 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7377                        loff_t *ppos)
7378 {
7379         struct seq_file *m = filp->private_data;
7380         struct trace_iterator *iter = m->private;
7381         struct trace_array *tr = iter->tr;
7382         unsigned long val;
7383         int ret;
7384
7385         ret = tracing_update_buffers();
7386         if (ret < 0)
7387                 return ret;
7388
7389         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7390         if (ret)
7391                 return ret;
7392
7393         mutex_lock(&trace_types_lock);
7394
7395         if (tr->current_trace->use_max_tr) {
7396                 ret = -EBUSY;
7397                 goto out;
7398         }
7399
7400         arch_spin_lock(&tr->max_lock);
7401         if (tr->cond_snapshot)
7402                 ret = -EBUSY;
7403         arch_spin_unlock(&tr->max_lock);
7404         if (ret)
7405                 goto out;
7406
7407         switch (val) {
7408         case 0:
7409                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7410                         ret = -EINVAL;
7411                         break;
7412                 }
7413                 if (tr->allocated_snapshot)
7414                         free_snapshot(tr);
7415                 break;
7416         case 1:
7417 /* Only allow per-cpu swap if the ring buffer supports it */
7418 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7419                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7420                         ret = -EINVAL;
7421                         break;
7422                 }
7423 #endif
7424                 if (tr->allocated_snapshot)
7425                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7426                                         &tr->array_buffer, iter->cpu_file);
7427                 else
7428                         ret = tracing_alloc_snapshot_instance(tr);
7429                 if (ret < 0)
7430                         break;
7431                 local_irq_disable();
7432                 /* Now, we're going to swap */
7433                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7434                         update_max_tr(tr, current, smp_processor_id(), NULL);
7435                 else
7436                         update_max_tr_single(tr, current, iter->cpu_file);
7437                 local_irq_enable();
7438                 break;
7439         default:
7440                 if (tr->allocated_snapshot) {
7441                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7442                                 tracing_reset_online_cpus(&tr->max_buffer);
7443                         else
7444                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7445                 }
7446                 break;
7447         }
7448
7449         if (ret >= 0) {
7450                 *ppos += cnt;
7451                 ret = cnt;
7452         }
7453 out:
7454         mutex_unlock(&trace_types_lock);
7455         return ret;
7456 }
7457
7458 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7459 {
7460         struct seq_file *m = file->private_data;
7461         int ret;
7462
7463         ret = tracing_release(inode, file);
7464
7465         if (file->f_mode & FMODE_READ)
7466                 return ret;
7467
7468         /* If write only, the seq_file is just a stub */
7469         if (m)
7470                 kfree(m->private);
7471         kfree(m);
7472
7473         return 0;
7474 }
7475
7476 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7477 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7478                                     size_t count, loff_t *ppos);
7479 static int tracing_buffers_release(struct inode *inode, struct file *file);
7480 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7481                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7482
7483 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7484 {
7485         struct ftrace_buffer_info *info;
7486         int ret;
7487
7488         /* The following checks for tracefs lockdown */
7489         ret = tracing_buffers_open(inode, filp);
7490         if (ret < 0)
7491                 return ret;
7492
7493         info = filp->private_data;
7494
7495         if (info->iter.trace->use_max_tr) {
7496                 tracing_buffers_release(inode, filp);
7497                 return -EBUSY;
7498         }
7499
7500         info->iter.snapshot = true;
7501         info->iter.array_buffer = &info->iter.tr->max_buffer;
7502
7503         return ret;
7504 }
7505
7506 #endif /* CONFIG_TRACER_SNAPSHOT */
7507
7508
7509 static const struct file_operations tracing_thresh_fops = {
7510         .open           = tracing_open_generic,
7511         .read           = tracing_thresh_read,
7512         .write          = tracing_thresh_write,
7513         .llseek         = generic_file_llseek,
7514 };
7515
7516 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7517 static const struct file_operations tracing_max_lat_fops = {
7518         .open           = tracing_open_generic,
7519         .read           = tracing_max_lat_read,
7520         .write          = tracing_max_lat_write,
7521         .llseek         = generic_file_llseek,
7522 };
7523 #endif
7524
7525 static const struct file_operations set_tracer_fops = {
7526         .open           = tracing_open_generic,
7527         .read           = tracing_set_trace_read,
7528         .write          = tracing_set_trace_write,
7529         .llseek         = generic_file_llseek,
7530 };
7531
7532 static const struct file_operations tracing_pipe_fops = {
7533         .open           = tracing_open_pipe,
7534         .poll           = tracing_poll_pipe,
7535         .read           = tracing_read_pipe,
7536         .splice_read    = tracing_splice_read_pipe,
7537         .release        = tracing_release_pipe,
7538         .llseek         = no_llseek,
7539 };
7540
7541 static const struct file_operations tracing_entries_fops = {
7542         .open           = tracing_open_generic_tr,
7543         .read           = tracing_entries_read,
7544         .write          = tracing_entries_write,
7545         .llseek         = generic_file_llseek,
7546         .release        = tracing_release_generic_tr,
7547 };
7548
7549 static const struct file_operations tracing_total_entries_fops = {
7550         .open           = tracing_open_generic_tr,
7551         .read           = tracing_total_entries_read,
7552         .llseek         = generic_file_llseek,
7553         .release        = tracing_release_generic_tr,
7554 };
7555
7556 static const struct file_operations tracing_free_buffer_fops = {
7557         .open           = tracing_open_generic_tr,
7558         .write          = tracing_free_buffer_write,
7559         .release        = tracing_free_buffer_release,
7560 };
7561
7562 static const struct file_operations tracing_mark_fops = {
7563         .open           = tracing_open_generic_tr,
7564         .write          = tracing_mark_write,
7565         .llseek         = generic_file_llseek,
7566         .release        = tracing_release_generic_tr,
7567 };
7568
7569 static const struct file_operations tracing_mark_raw_fops = {
7570         .open           = tracing_open_generic_tr,
7571         .write          = tracing_mark_raw_write,
7572         .llseek         = generic_file_llseek,
7573         .release        = tracing_release_generic_tr,
7574 };
7575
7576 static const struct file_operations trace_clock_fops = {
7577         .open           = tracing_clock_open,
7578         .read           = seq_read,
7579         .llseek         = seq_lseek,
7580         .release        = tracing_single_release_tr,
7581         .write          = tracing_clock_write,
7582 };
7583
7584 static const struct file_operations trace_time_stamp_mode_fops = {
7585         .open           = tracing_time_stamp_mode_open,
7586         .read           = seq_read,
7587         .llseek         = seq_lseek,
7588         .release        = tracing_single_release_tr,
7589 };
7590
7591 #ifdef CONFIG_TRACER_SNAPSHOT
7592 static const struct file_operations snapshot_fops = {
7593         .open           = tracing_snapshot_open,
7594         .read           = seq_read,
7595         .write          = tracing_snapshot_write,
7596         .llseek         = tracing_lseek,
7597         .release        = tracing_snapshot_release,
7598 };
7599
7600 static const struct file_operations snapshot_raw_fops = {
7601         .open           = snapshot_raw_open,
7602         .read           = tracing_buffers_read,
7603         .release        = tracing_buffers_release,
7604         .splice_read    = tracing_buffers_splice_read,
7605         .llseek         = no_llseek,
7606 };
7607
7608 #endif /* CONFIG_TRACER_SNAPSHOT */
7609
7610 /*
7611  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7612  * @filp: The active open file structure
7613  * @ubuf: The userspace provided buffer to read value into
7614  * @cnt: The maximum number of bytes to read
7615  * @ppos: The current "file" position
7616  *
7617  * This function implements the write interface for a struct trace_min_max_param.
7618  * The filp->private_data must point to a trace_min_max_param structure that
7619  * defines where to write the value, the min and the max acceptable values,
7620  * and a lock to protect the write.
7621  */
7622 static ssize_t
7623 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7624 {
7625         struct trace_min_max_param *param = filp->private_data;
7626         u64 val;
7627         int err;
7628
7629         if (!param)
7630                 return -EFAULT;
7631
7632         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7633         if (err)
7634                 return err;
7635
7636         if (param->lock)
7637                 mutex_lock(param->lock);
7638
7639         if (param->min && val < *param->min)
7640                 err = -EINVAL;
7641
7642         if (param->max && val > *param->max)
7643                 err = -EINVAL;
7644
7645         if (!err)
7646                 *param->val = val;
7647
7648         if (param->lock)
7649                 mutex_unlock(param->lock);
7650
7651         if (err)
7652                 return err;
7653
7654         return cnt;
7655 }
7656
7657 /*
7658  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7659  * @filp: The active open file structure
7660  * @ubuf: The userspace provided buffer to read value into
7661  * @cnt: The maximum number of bytes to read
7662  * @ppos: The current "file" position
7663  *
7664  * This function implements the read interface for a struct trace_min_max_param.
7665  * The filp->private_data must point to a trace_min_max_param struct with valid
7666  * data.
7667  */
7668 static ssize_t
7669 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7670 {
7671         struct trace_min_max_param *param = filp->private_data;
7672         char buf[U64_STR_SIZE];
7673         int len;
7674         u64 val;
7675
7676         if (!param)
7677                 return -EFAULT;
7678
7679         val = *param->val;
7680
7681         if (cnt > sizeof(buf))
7682                 cnt = sizeof(buf);
7683
7684         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7685
7686         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7687 }
7688
7689 const struct file_operations trace_min_max_fops = {
7690         .open           = tracing_open_generic,
7691         .read           = trace_min_max_read,
7692         .write          = trace_min_max_write,
7693 };
7694
7695 #define TRACING_LOG_ERRS_MAX    8
7696 #define TRACING_LOG_LOC_MAX     128
7697
7698 #define CMD_PREFIX "  Command: "
7699
7700 struct err_info {
7701         const char      **errs; /* ptr to loc-specific array of err strings */
7702         u8              type;   /* index into errs -> specific err string */
7703         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7704         u64             ts;
7705 };
7706
7707 struct tracing_log_err {
7708         struct list_head        list;
7709         struct err_info         info;
7710         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7711         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7712 };
7713
7714 static DEFINE_MUTEX(tracing_err_log_lock);
7715
7716 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7717 {
7718         struct tracing_log_err *err;
7719
7720         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7721                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7722                 if (!err)
7723                         err = ERR_PTR(-ENOMEM);
7724                 tr->n_err_log_entries++;
7725
7726                 return err;
7727         }
7728
7729         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7730         list_del(&err->list);
7731
7732         return err;
7733 }
7734
7735 /**
7736  * err_pos - find the position of a string within a command for error careting
7737  * @cmd: The tracing command that caused the error
7738  * @str: The string to position the caret at within @cmd
7739  *
7740  * Finds the position of the first occurrence of @str within @cmd.  The
7741  * return value can be passed to tracing_log_err() for caret placement
7742  * within @cmd.
7743  *
7744  * Returns the index within @cmd of the first occurrence of @str or 0
7745  * if @str was not found.
7746  */
7747 unsigned int err_pos(char *cmd, const char *str)
7748 {
7749         char *found;
7750
7751         if (WARN_ON(!strlen(cmd)))
7752                 return 0;
7753
7754         found = strstr(cmd, str);
7755         if (found)
7756                 return found - cmd;
7757
7758         return 0;
7759 }
7760
7761 /**
7762  * tracing_log_err - write an error to the tracing error log
7763  * @tr: The associated trace array for the error (NULL for top level array)
7764  * @loc: A string describing where the error occurred
7765  * @cmd: The tracing command that caused the error
7766  * @errs: The array of loc-specific static error strings
7767  * @type: The index into errs[], which produces the specific static err string
7768  * @pos: The position the caret should be placed in the cmd
7769  *
7770  * Writes an error into tracing/error_log of the form:
7771  *
7772  * <loc>: error: <text>
7773  *   Command: <cmd>
7774  *              ^
7775  *
7776  * tracing/error_log is a small log file containing the last
7777  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7778  * unless there has been a tracing error, and the error log can be
7779  * cleared and have its memory freed by writing the empty string in
7780  * truncation mode to it i.e. echo > tracing/error_log.
7781  *
7782  * NOTE: the @errs array along with the @type param are used to
7783  * produce a static error string - this string is not copied and saved
7784  * when the error is logged - only a pointer to it is saved.  See
7785  * existing callers for examples of how static strings are typically
7786  * defined for use with tracing_log_err().
7787  */
7788 void tracing_log_err(struct trace_array *tr,
7789                      const char *loc, const char *cmd,
7790                      const char **errs, u8 type, u8 pos)
7791 {
7792         struct tracing_log_err *err;
7793
7794         if (!tr)
7795                 tr = &global_trace;
7796
7797         mutex_lock(&tracing_err_log_lock);
7798         err = get_tracing_log_err(tr);
7799         if (PTR_ERR(err) == -ENOMEM) {
7800                 mutex_unlock(&tracing_err_log_lock);
7801                 return;
7802         }
7803
7804         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7805         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7806
7807         err->info.errs = errs;
7808         err->info.type = type;
7809         err->info.pos = pos;
7810         err->info.ts = local_clock();
7811
7812         list_add_tail(&err->list, &tr->err_log);
7813         mutex_unlock(&tracing_err_log_lock);
7814 }
7815
7816 static void clear_tracing_err_log(struct trace_array *tr)
7817 {
7818         struct tracing_log_err *err, *next;
7819
7820         mutex_lock(&tracing_err_log_lock);
7821         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7822                 list_del(&err->list);
7823                 kfree(err);
7824         }
7825
7826         tr->n_err_log_entries = 0;
7827         mutex_unlock(&tracing_err_log_lock);
7828 }
7829
7830 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7831 {
7832         struct trace_array *tr = m->private;
7833
7834         mutex_lock(&tracing_err_log_lock);
7835
7836         return seq_list_start(&tr->err_log, *pos);
7837 }
7838
7839 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7840 {
7841         struct trace_array *tr = m->private;
7842
7843         return seq_list_next(v, &tr->err_log, pos);
7844 }
7845
7846 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7847 {
7848         mutex_unlock(&tracing_err_log_lock);
7849 }
7850
7851 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7852 {
7853         u8 i;
7854
7855         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7856                 seq_putc(m, ' ');
7857         for (i = 0; i < pos; i++)
7858                 seq_putc(m, ' ');
7859         seq_puts(m, "^\n");
7860 }
7861
7862 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7863 {
7864         struct tracing_log_err *err = v;
7865
7866         if (err) {
7867                 const char *err_text = err->info.errs[err->info.type];
7868                 u64 sec = err->info.ts;
7869                 u32 nsec;
7870
7871                 nsec = do_div(sec, NSEC_PER_SEC);
7872                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7873                            err->loc, err_text);
7874                 seq_printf(m, "%s", err->cmd);
7875                 tracing_err_log_show_pos(m, err->info.pos);
7876         }
7877
7878         return 0;
7879 }
7880
7881 static const struct seq_operations tracing_err_log_seq_ops = {
7882         .start  = tracing_err_log_seq_start,
7883         .next   = tracing_err_log_seq_next,
7884         .stop   = tracing_err_log_seq_stop,
7885         .show   = tracing_err_log_seq_show
7886 };
7887
7888 static int tracing_err_log_open(struct inode *inode, struct file *file)
7889 {
7890         struct trace_array *tr = inode->i_private;
7891         int ret = 0;
7892
7893         ret = tracing_check_open_get_tr(tr);
7894         if (ret)
7895                 return ret;
7896
7897         /* If this file was opened for write, then erase contents */
7898         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7899                 clear_tracing_err_log(tr);
7900
7901         if (file->f_mode & FMODE_READ) {
7902                 ret = seq_open(file, &tracing_err_log_seq_ops);
7903                 if (!ret) {
7904                         struct seq_file *m = file->private_data;
7905                         m->private = tr;
7906                 } else {
7907                         trace_array_put(tr);
7908                 }
7909         }
7910         return ret;
7911 }
7912
7913 static ssize_t tracing_err_log_write(struct file *file,
7914                                      const char __user *buffer,
7915                                      size_t count, loff_t *ppos)
7916 {
7917         return count;
7918 }
7919
7920 static int tracing_err_log_release(struct inode *inode, struct file *file)
7921 {
7922         struct trace_array *tr = inode->i_private;
7923
7924         trace_array_put(tr);
7925
7926         if (file->f_mode & FMODE_READ)
7927                 seq_release(inode, file);
7928
7929         return 0;
7930 }
7931
7932 static const struct file_operations tracing_err_log_fops = {
7933         .open           = tracing_err_log_open,
7934         .write          = tracing_err_log_write,
7935         .read           = seq_read,
7936         .llseek         = seq_lseek,
7937         .release        = tracing_err_log_release,
7938 };
7939
7940 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7941 {
7942         struct trace_array *tr = inode->i_private;
7943         struct ftrace_buffer_info *info;
7944         int ret;
7945
7946         ret = tracing_check_open_get_tr(tr);
7947         if (ret)
7948                 return ret;
7949
7950         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7951         if (!info) {
7952                 trace_array_put(tr);
7953                 return -ENOMEM;
7954         }
7955
7956         mutex_lock(&trace_types_lock);
7957
7958         info->iter.tr           = tr;
7959         info->iter.cpu_file     = tracing_get_cpu(inode);
7960         info->iter.trace        = tr->current_trace;
7961         info->iter.array_buffer = &tr->array_buffer;
7962         info->spare             = NULL;
7963         /* Force reading ring buffer for first read */
7964         info->read              = (unsigned int)-1;
7965
7966         filp->private_data = info;
7967
7968         tr->trace_ref++;
7969
7970         mutex_unlock(&trace_types_lock);
7971
7972         ret = nonseekable_open(inode, filp);
7973         if (ret < 0)
7974                 trace_array_put(tr);
7975
7976         return ret;
7977 }
7978
7979 static __poll_t
7980 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7981 {
7982         struct ftrace_buffer_info *info = filp->private_data;
7983         struct trace_iterator *iter = &info->iter;
7984
7985         return trace_poll(iter, filp, poll_table);
7986 }
7987
7988 static ssize_t
7989 tracing_buffers_read(struct file *filp, char __user *ubuf,
7990                      size_t count, loff_t *ppos)
7991 {
7992         struct ftrace_buffer_info *info = filp->private_data;
7993         struct trace_iterator *iter = &info->iter;
7994         ssize_t ret = 0;
7995         ssize_t size;
7996
7997         if (!count)
7998                 return 0;
7999
8000 #ifdef CONFIG_TRACER_MAX_TRACE
8001         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8002                 return -EBUSY;
8003 #endif
8004
8005         if (!info->spare) {
8006                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8007                                                           iter->cpu_file);
8008                 if (IS_ERR(info->spare)) {
8009                         ret = PTR_ERR(info->spare);
8010                         info->spare = NULL;
8011                 } else {
8012                         info->spare_cpu = iter->cpu_file;
8013                 }
8014         }
8015         if (!info->spare)
8016                 return ret;
8017
8018         /* Do we have previous read data to read? */
8019         if (info->read < PAGE_SIZE)
8020                 goto read;
8021
8022  again:
8023         trace_access_lock(iter->cpu_file);
8024         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8025                                     &info->spare,
8026                                     count,
8027                                     iter->cpu_file, 0);
8028         trace_access_unlock(iter->cpu_file);
8029
8030         if (ret < 0) {
8031                 if (trace_empty(iter)) {
8032                         if ((filp->f_flags & O_NONBLOCK))
8033                                 return -EAGAIN;
8034
8035                         ret = wait_on_pipe(iter, 0);
8036                         if (ret)
8037                                 return ret;
8038
8039                         goto again;
8040                 }
8041                 return 0;
8042         }
8043
8044         info->read = 0;
8045  read:
8046         size = PAGE_SIZE - info->read;
8047         if (size > count)
8048                 size = count;
8049
8050         ret = copy_to_user(ubuf, info->spare + info->read, size);
8051         if (ret == size)
8052                 return -EFAULT;
8053
8054         size -= ret;
8055
8056         *ppos += size;
8057         info->read += size;
8058
8059         return size;
8060 }
8061
8062 static int tracing_buffers_release(struct inode *inode, struct file *file)
8063 {
8064         struct ftrace_buffer_info *info = file->private_data;
8065         struct trace_iterator *iter = &info->iter;
8066
8067         mutex_lock(&trace_types_lock);
8068
8069         iter->tr->trace_ref--;
8070
8071         __trace_array_put(iter->tr);
8072
8073         if (info->spare)
8074                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8075                                            info->spare_cpu, info->spare);
8076         kvfree(info);
8077
8078         mutex_unlock(&trace_types_lock);
8079
8080         return 0;
8081 }
8082
8083 struct buffer_ref {
8084         struct trace_buffer     *buffer;
8085         void                    *page;
8086         int                     cpu;
8087         refcount_t              refcount;
8088 };
8089
8090 static void buffer_ref_release(struct buffer_ref *ref)
8091 {
8092         if (!refcount_dec_and_test(&ref->refcount))
8093                 return;
8094         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8095         kfree(ref);
8096 }
8097
8098 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8099                                     struct pipe_buffer *buf)
8100 {
8101         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8102
8103         buffer_ref_release(ref);
8104         buf->private = 0;
8105 }
8106
8107 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8108                                 struct pipe_buffer *buf)
8109 {
8110         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8111
8112         if (refcount_read(&ref->refcount) > INT_MAX/2)
8113                 return false;
8114
8115         refcount_inc(&ref->refcount);
8116         return true;
8117 }
8118
8119 /* Pipe buffer operations for a buffer. */
8120 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8121         .release                = buffer_pipe_buf_release,
8122         .get                    = buffer_pipe_buf_get,
8123 };
8124
8125 /*
8126  * Callback from splice_to_pipe(), if we need to release some pages
8127  * at the end of the spd in case we error'ed out in filling the pipe.
8128  */
8129 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8130 {
8131         struct buffer_ref *ref =
8132                 (struct buffer_ref *)spd->partial[i].private;
8133
8134         buffer_ref_release(ref);
8135         spd->partial[i].private = 0;
8136 }
8137
8138 static ssize_t
8139 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8140                             struct pipe_inode_info *pipe, size_t len,
8141                             unsigned int flags)
8142 {
8143         struct ftrace_buffer_info *info = file->private_data;
8144         struct trace_iterator *iter = &info->iter;
8145         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8146         struct page *pages_def[PIPE_DEF_BUFFERS];
8147         struct splice_pipe_desc spd = {
8148                 .pages          = pages_def,
8149                 .partial        = partial_def,
8150                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8151                 .ops            = &buffer_pipe_buf_ops,
8152                 .spd_release    = buffer_spd_release,
8153         };
8154         struct buffer_ref *ref;
8155         int entries, i;
8156         ssize_t ret = 0;
8157
8158 #ifdef CONFIG_TRACER_MAX_TRACE
8159         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8160                 return -EBUSY;
8161 #endif
8162
8163         if (*ppos & (PAGE_SIZE - 1))
8164                 return -EINVAL;
8165
8166         if (len & (PAGE_SIZE - 1)) {
8167                 if (len < PAGE_SIZE)
8168                         return -EINVAL;
8169                 len &= PAGE_MASK;
8170         }
8171
8172         if (splice_grow_spd(pipe, &spd))
8173                 return -ENOMEM;
8174
8175  again:
8176         trace_access_lock(iter->cpu_file);
8177         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8178
8179         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8180                 struct page *page;
8181                 int r;
8182
8183                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8184                 if (!ref) {
8185                         ret = -ENOMEM;
8186                         break;
8187                 }
8188
8189                 refcount_set(&ref->refcount, 1);
8190                 ref->buffer = iter->array_buffer->buffer;
8191                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8192                 if (IS_ERR(ref->page)) {
8193                         ret = PTR_ERR(ref->page);
8194                         ref->page = NULL;
8195                         kfree(ref);
8196                         break;
8197                 }
8198                 ref->cpu = iter->cpu_file;
8199
8200                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8201                                           len, iter->cpu_file, 1);
8202                 if (r < 0) {
8203                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8204                                                    ref->page);
8205                         kfree(ref);
8206                         break;
8207                 }
8208
8209                 page = virt_to_page(ref->page);
8210
8211                 spd.pages[i] = page;
8212                 spd.partial[i].len = PAGE_SIZE;
8213                 spd.partial[i].offset = 0;
8214                 spd.partial[i].private = (unsigned long)ref;
8215                 spd.nr_pages++;
8216                 *ppos += PAGE_SIZE;
8217
8218                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8219         }
8220
8221         trace_access_unlock(iter->cpu_file);
8222         spd.nr_pages = i;
8223
8224         /* did we read anything? */
8225         if (!spd.nr_pages) {
8226                 if (ret)
8227                         goto out;
8228
8229                 ret = -EAGAIN;
8230                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8231                         goto out;
8232
8233                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8234                 if (ret)
8235                         goto out;
8236
8237                 goto again;
8238         }
8239
8240         ret = splice_to_pipe(pipe, &spd);
8241 out:
8242         splice_shrink_spd(&spd);
8243
8244         return ret;
8245 }
8246
8247 static const struct file_operations tracing_buffers_fops = {
8248         .open           = tracing_buffers_open,
8249         .read           = tracing_buffers_read,
8250         .poll           = tracing_buffers_poll,
8251         .release        = tracing_buffers_release,
8252         .splice_read    = tracing_buffers_splice_read,
8253         .llseek         = no_llseek,
8254 };
8255
8256 static ssize_t
8257 tracing_stats_read(struct file *filp, char __user *ubuf,
8258                    size_t count, loff_t *ppos)
8259 {
8260         struct inode *inode = file_inode(filp);
8261         struct trace_array *tr = inode->i_private;
8262         struct array_buffer *trace_buf = &tr->array_buffer;
8263         int cpu = tracing_get_cpu(inode);
8264         struct trace_seq *s;
8265         unsigned long cnt;
8266         unsigned long long t;
8267         unsigned long usec_rem;
8268
8269         s = kmalloc(sizeof(*s), GFP_KERNEL);
8270         if (!s)
8271                 return -ENOMEM;
8272
8273         trace_seq_init(s);
8274
8275         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8276         trace_seq_printf(s, "entries: %ld\n", cnt);
8277
8278         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8279         trace_seq_printf(s, "overrun: %ld\n", cnt);
8280
8281         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8282         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8283
8284         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8285         trace_seq_printf(s, "bytes: %ld\n", cnt);
8286
8287         if (trace_clocks[tr->clock_id].in_ns) {
8288                 /* local or global for trace_clock */
8289                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8290                 usec_rem = do_div(t, USEC_PER_SEC);
8291                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8292                                                                 t, usec_rem);
8293
8294                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8295                 usec_rem = do_div(t, USEC_PER_SEC);
8296                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8297         } else {
8298                 /* counter or tsc mode for trace_clock */
8299                 trace_seq_printf(s, "oldest event ts: %llu\n",
8300                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8301
8302                 trace_seq_printf(s, "now ts: %llu\n",
8303                                 ring_buffer_time_stamp(trace_buf->buffer));
8304         }
8305
8306         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8307         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8308
8309         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8310         trace_seq_printf(s, "read events: %ld\n", cnt);
8311
8312         count = simple_read_from_buffer(ubuf, count, ppos,
8313                                         s->buffer, trace_seq_used(s));
8314
8315         kfree(s);
8316
8317         return count;
8318 }
8319
8320 static const struct file_operations tracing_stats_fops = {
8321         .open           = tracing_open_generic_tr,
8322         .read           = tracing_stats_read,
8323         .llseek         = generic_file_llseek,
8324         .release        = tracing_release_generic_tr,
8325 };
8326
8327 #ifdef CONFIG_DYNAMIC_FTRACE
8328
8329 static ssize_t
8330 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8331                   size_t cnt, loff_t *ppos)
8332 {
8333         ssize_t ret;
8334         char *buf;
8335         int r;
8336
8337         /* 256 should be plenty to hold the amount needed */
8338         buf = kmalloc(256, GFP_KERNEL);
8339         if (!buf)
8340                 return -ENOMEM;
8341
8342         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8343                       ftrace_update_tot_cnt,
8344                       ftrace_number_of_pages,
8345                       ftrace_number_of_groups);
8346
8347         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8348         kfree(buf);
8349         return ret;
8350 }
8351
8352 static const struct file_operations tracing_dyn_info_fops = {
8353         .open           = tracing_open_generic,
8354         .read           = tracing_read_dyn_info,
8355         .llseek         = generic_file_llseek,
8356 };
8357 #endif /* CONFIG_DYNAMIC_FTRACE */
8358
8359 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8360 static void
8361 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8362                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8363                 void *data)
8364 {
8365         tracing_snapshot_instance(tr);
8366 }
8367
8368 static void
8369 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8370                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8371                       void *data)
8372 {
8373         struct ftrace_func_mapper *mapper = data;
8374         long *count = NULL;
8375
8376         if (mapper)
8377                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8378
8379         if (count) {
8380
8381                 if (*count <= 0)
8382                         return;
8383
8384                 (*count)--;
8385         }
8386
8387         tracing_snapshot_instance(tr);
8388 }
8389
8390 static int
8391 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8392                       struct ftrace_probe_ops *ops, void *data)
8393 {
8394         struct ftrace_func_mapper *mapper = data;
8395         long *count = NULL;
8396
8397         seq_printf(m, "%ps:", (void *)ip);
8398
8399         seq_puts(m, "snapshot");
8400
8401         if (mapper)
8402                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8403
8404         if (count)
8405                 seq_printf(m, ":count=%ld\n", *count);
8406         else
8407                 seq_puts(m, ":unlimited\n");
8408
8409         return 0;
8410 }
8411
8412 static int
8413 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8414                      unsigned long ip, void *init_data, void **data)
8415 {
8416         struct ftrace_func_mapper *mapper = *data;
8417
8418         if (!mapper) {
8419                 mapper = allocate_ftrace_func_mapper();
8420                 if (!mapper)
8421                         return -ENOMEM;
8422                 *data = mapper;
8423         }
8424
8425         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8426 }
8427
8428 static void
8429 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8430                      unsigned long ip, void *data)
8431 {
8432         struct ftrace_func_mapper *mapper = data;
8433
8434         if (!ip) {
8435                 if (!mapper)
8436                         return;
8437                 free_ftrace_func_mapper(mapper, NULL);
8438                 return;
8439         }
8440
8441         ftrace_func_mapper_remove_ip(mapper, ip);
8442 }
8443
8444 static struct ftrace_probe_ops snapshot_probe_ops = {
8445         .func                   = ftrace_snapshot,
8446         .print                  = ftrace_snapshot_print,
8447 };
8448
8449 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8450         .func                   = ftrace_count_snapshot,
8451         .print                  = ftrace_snapshot_print,
8452         .init                   = ftrace_snapshot_init,
8453         .free                   = ftrace_snapshot_free,
8454 };
8455
8456 static int
8457 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8458                                char *glob, char *cmd, char *param, int enable)
8459 {
8460         struct ftrace_probe_ops *ops;
8461         void *count = (void *)-1;
8462         char *number;
8463         int ret;
8464
8465         if (!tr)
8466                 return -ENODEV;
8467
8468         /* hash funcs only work with set_ftrace_filter */
8469         if (!enable)
8470                 return -EINVAL;
8471
8472         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8473
8474         if (glob[0] == '!')
8475                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8476
8477         if (!param)
8478                 goto out_reg;
8479
8480         number = strsep(&param, ":");
8481
8482         if (!strlen(number))
8483                 goto out_reg;
8484
8485         /*
8486          * We use the callback data field (which is a pointer)
8487          * as our counter.
8488          */
8489         ret = kstrtoul(number, 0, (unsigned long *)&count);
8490         if (ret)
8491                 return ret;
8492
8493  out_reg:
8494         ret = tracing_alloc_snapshot_instance(tr);
8495         if (ret < 0)
8496                 goto out;
8497
8498         ret = register_ftrace_function_probe(glob, tr, ops, count);
8499
8500  out:
8501         return ret < 0 ? ret : 0;
8502 }
8503
8504 static struct ftrace_func_command ftrace_snapshot_cmd = {
8505         .name                   = "snapshot",
8506         .func                   = ftrace_trace_snapshot_callback,
8507 };
8508
8509 static __init int register_snapshot_cmd(void)
8510 {
8511         return register_ftrace_command(&ftrace_snapshot_cmd);
8512 }
8513 #else
8514 static inline __init int register_snapshot_cmd(void) { return 0; }
8515 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8516
8517 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8518 {
8519         if (WARN_ON(!tr->dir))
8520                 return ERR_PTR(-ENODEV);
8521
8522         /* Top directory uses NULL as the parent */
8523         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8524                 return NULL;
8525
8526         /* All sub buffers have a descriptor */
8527         return tr->dir;
8528 }
8529
8530 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8531 {
8532         struct dentry *d_tracer;
8533
8534         if (tr->percpu_dir)
8535                 return tr->percpu_dir;
8536
8537         d_tracer = tracing_get_dentry(tr);
8538         if (IS_ERR(d_tracer))
8539                 return NULL;
8540
8541         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8542
8543         MEM_FAIL(!tr->percpu_dir,
8544                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8545
8546         return tr->percpu_dir;
8547 }
8548
8549 static struct dentry *
8550 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8551                       void *data, long cpu, const struct file_operations *fops)
8552 {
8553         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8554
8555         if (ret) /* See tracing_get_cpu() */
8556                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8557         return ret;
8558 }
8559
8560 static void
8561 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8562 {
8563         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8564         struct dentry *d_cpu;
8565         char cpu_dir[30]; /* 30 characters should be more than enough */
8566
8567         if (!d_percpu)
8568                 return;
8569
8570         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8571         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8572         if (!d_cpu) {
8573                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8574                 return;
8575         }
8576
8577         /* per cpu trace_pipe */
8578         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8579                                 tr, cpu, &tracing_pipe_fops);
8580
8581         /* per cpu trace */
8582         trace_create_cpu_file("trace", 0644, d_cpu,
8583                                 tr, cpu, &tracing_fops);
8584
8585         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8586                                 tr, cpu, &tracing_buffers_fops);
8587
8588         trace_create_cpu_file("stats", 0444, d_cpu,
8589                                 tr, cpu, &tracing_stats_fops);
8590
8591         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8592                                 tr, cpu, &tracing_entries_fops);
8593
8594 #ifdef CONFIG_TRACER_SNAPSHOT
8595         trace_create_cpu_file("snapshot", 0644, d_cpu,
8596                                 tr, cpu, &snapshot_fops);
8597
8598         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8599                                 tr, cpu, &snapshot_raw_fops);
8600 #endif
8601 }
8602
8603 #ifdef CONFIG_FTRACE_SELFTEST
8604 /* Let selftest have access to static functions in this file */
8605 #include "trace_selftest.c"
8606 #endif
8607
8608 static ssize_t
8609 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8610                         loff_t *ppos)
8611 {
8612         struct trace_option_dentry *topt = filp->private_data;
8613         char *buf;
8614
8615         if (topt->flags->val & topt->opt->bit)
8616                 buf = "1\n";
8617         else
8618                 buf = "0\n";
8619
8620         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8621 }
8622
8623 static ssize_t
8624 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8625                          loff_t *ppos)
8626 {
8627         struct trace_option_dentry *topt = filp->private_data;
8628         unsigned long val;
8629         int ret;
8630
8631         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8632         if (ret)
8633                 return ret;
8634
8635         if (val != 0 && val != 1)
8636                 return -EINVAL;
8637
8638         if (!!(topt->flags->val & topt->opt->bit) != val) {
8639                 mutex_lock(&trace_types_lock);
8640                 ret = __set_tracer_option(topt->tr, topt->flags,
8641                                           topt->opt, !val);
8642                 mutex_unlock(&trace_types_lock);
8643                 if (ret)
8644                         return ret;
8645         }
8646
8647         *ppos += cnt;
8648
8649         return cnt;
8650 }
8651
8652
8653 static const struct file_operations trace_options_fops = {
8654         .open = tracing_open_generic,
8655         .read = trace_options_read,
8656         .write = trace_options_write,
8657         .llseek = generic_file_llseek,
8658 };
8659
8660 /*
8661  * In order to pass in both the trace_array descriptor as well as the index
8662  * to the flag that the trace option file represents, the trace_array
8663  * has a character array of trace_flags_index[], which holds the index
8664  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8665  * The address of this character array is passed to the flag option file
8666  * read/write callbacks.
8667  *
8668  * In order to extract both the index and the trace_array descriptor,
8669  * get_tr_index() uses the following algorithm.
8670  *
8671  *   idx = *ptr;
8672  *
8673  * As the pointer itself contains the address of the index (remember
8674  * index[1] == 1).
8675  *
8676  * Then to get the trace_array descriptor, by subtracting that index
8677  * from the ptr, we get to the start of the index itself.
8678  *
8679  *   ptr - idx == &index[0]
8680  *
8681  * Then a simple container_of() from that pointer gets us to the
8682  * trace_array descriptor.
8683  */
8684 static void get_tr_index(void *data, struct trace_array **ptr,
8685                          unsigned int *pindex)
8686 {
8687         *pindex = *(unsigned char *)data;
8688
8689         *ptr = container_of(data - *pindex, struct trace_array,
8690                             trace_flags_index);
8691 }
8692
8693 static ssize_t
8694 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8695                         loff_t *ppos)
8696 {
8697         void *tr_index = filp->private_data;
8698         struct trace_array *tr;
8699         unsigned int index;
8700         char *buf;
8701
8702         get_tr_index(tr_index, &tr, &index);
8703
8704         if (tr->trace_flags & (1 << index))
8705                 buf = "1\n";
8706         else
8707                 buf = "0\n";
8708
8709         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8710 }
8711
8712 static ssize_t
8713 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8714                          loff_t *ppos)
8715 {
8716         void *tr_index = filp->private_data;
8717         struct trace_array *tr;
8718         unsigned int index;
8719         unsigned long val;
8720         int ret;
8721
8722         get_tr_index(tr_index, &tr, &index);
8723
8724         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8725         if (ret)
8726                 return ret;
8727
8728         if (val != 0 && val != 1)
8729                 return -EINVAL;
8730
8731         mutex_lock(&event_mutex);
8732         mutex_lock(&trace_types_lock);
8733         ret = set_tracer_flag(tr, 1 << index, val);
8734         mutex_unlock(&trace_types_lock);
8735         mutex_unlock(&event_mutex);
8736
8737         if (ret < 0)
8738                 return ret;
8739
8740         *ppos += cnt;
8741
8742         return cnt;
8743 }
8744
8745 static const struct file_operations trace_options_core_fops = {
8746         .open = tracing_open_generic,
8747         .read = trace_options_core_read,
8748         .write = trace_options_core_write,
8749         .llseek = generic_file_llseek,
8750 };
8751
8752 struct dentry *trace_create_file(const char *name,
8753                                  umode_t mode,
8754                                  struct dentry *parent,
8755                                  void *data,
8756                                  const struct file_operations *fops)
8757 {
8758         struct dentry *ret;
8759
8760         ret = tracefs_create_file(name, mode, parent, data, fops);
8761         if (!ret)
8762                 pr_warn("Could not create tracefs '%s' entry\n", name);
8763
8764         return ret;
8765 }
8766
8767
8768 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8769 {
8770         struct dentry *d_tracer;
8771
8772         if (tr->options)
8773                 return tr->options;
8774
8775         d_tracer = tracing_get_dentry(tr);
8776         if (IS_ERR(d_tracer))
8777                 return NULL;
8778
8779         tr->options = tracefs_create_dir("options", d_tracer);
8780         if (!tr->options) {
8781                 pr_warn("Could not create tracefs directory 'options'\n");
8782                 return NULL;
8783         }
8784
8785         return tr->options;
8786 }
8787
8788 static void
8789 create_trace_option_file(struct trace_array *tr,
8790                          struct trace_option_dentry *topt,
8791                          struct tracer_flags *flags,
8792                          struct tracer_opt *opt)
8793 {
8794         struct dentry *t_options;
8795
8796         t_options = trace_options_init_dentry(tr);
8797         if (!t_options)
8798                 return;
8799
8800         topt->flags = flags;
8801         topt->opt = opt;
8802         topt->tr = tr;
8803
8804         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8805                                     &trace_options_fops);
8806
8807 }
8808
8809 static void
8810 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8811 {
8812         struct trace_option_dentry *topts;
8813         struct trace_options *tr_topts;
8814         struct tracer_flags *flags;
8815         struct tracer_opt *opts;
8816         int cnt;
8817         int i;
8818
8819         if (!tracer)
8820                 return;
8821
8822         flags = tracer->flags;
8823
8824         if (!flags || !flags->opts)
8825                 return;
8826
8827         /*
8828          * If this is an instance, only create flags for tracers
8829          * the instance may have.
8830          */
8831         if (!trace_ok_for_array(tracer, tr))
8832                 return;
8833
8834         for (i = 0; i < tr->nr_topts; i++) {
8835                 /* Make sure there's no duplicate flags. */
8836                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8837                         return;
8838         }
8839
8840         opts = flags->opts;
8841
8842         for (cnt = 0; opts[cnt].name; cnt++)
8843                 ;
8844
8845         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8846         if (!topts)
8847                 return;
8848
8849         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8850                             GFP_KERNEL);
8851         if (!tr_topts) {
8852                 kfree(topts);
8853                 return;
8854         }
8855
8856         tr->topts = tr_topts;
8857         tr->topts[tr->nr_topts].tracer = tracer;
8858         tr->topts[tr->nr_topts].topts = topts;
8859         tr->nr_topts++;
8860
8861         for (cnt = 0; opts[cnt].name; cnt++) {
8862                 create_trace_option_file(tr, &topts[cnt], flags,
8863                                          &opts[cnt]);
8864                 MEM_FAIL(topts[cnt].entry == NULL,
8865                           "Failed to create trace option: %s",
8866                           opts[cnt].name);
8867         }
8868 }
8869
8870 static struct dentry *
8871 create_trace_option_core_file(struct trace_array *tr,
8872                               const char *option, long index)
8873 {
8874         struct dentry *t_options;
8875
8876         t_options = trace_options_init_dentry(tr);
8877         if (!t_options)
8878                 return NULL;
8879
8880         return trace_create_file(option, 0644, t_options,
8881                                  (void *)&tr->trace_flags_index[index],
8882                                  &trace_options_core_fops);
8883 }
8884
8885 static void create_trace_options_dir(struct trace_array *tr)
8886 {
8887         struct dentry *t_options;
8888         bool top_level = tr == &global_trace;
8889         int i;
8890
8891         t_options = trace_options_init_dentry(tr);
8892         if (!t_options)
8893                 return;
8894
8895         for (i = 0; trace_options[i]; i++) {
8896                 if (top_level ||
8897                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8898                         create_trace_option_core_file(tr, trace_options[i], i);
8899         }
8900 }
8901
8902 static ssize_t
8903 rb_simple_read(struct file *filp, char __user *ubuf,
8904                size_t cnt, loff_t *ppos)
8905 {
8906         struct trace_array *tr = filp->private_data;
8907         char buf[64];
8908         int r;
8909
8910         r = tracer_tracing_is_on(tr);
8911         r = sprintf(buf, "%d\n", r);
8912
8913         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8914 }
8915
8916 static ssize_t
8917 rb_simple_write(struct file *filp, const char __user *ubuf,
8918                 size_t cnt, loff_t *ppos)
8919 {
8920         struct trace_array *tr = filp->private_data;
8921         struct trace_buffer *buffer = tr->array_buffer.buffer;
8922         unsigned long val;
8923         int ret;
8924
8925         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8926         if (ret)
8927                 return ret;
8928
8929         if (buffer) {
8930                 mutex_lock(&trace_types_lock);
8931                 if (!!val == tracer_tracing_is_on(tr)) {
8932                         val = 0; /* do nothing */
8933                 } else if (val) {
8934                         tracer_tracing_on(tr);
8935                         if (tr->current_trace->start)
8936                                 tr->current_trace->start(tr);
8937                 } else {
8938                         tracer_tracing_off(tr);
8939                         if (tr->current_trace->stop)
8940                                 tr->current_trace->stop(tr);
8941                 }
8942                 mutex_unlock(&trace_types_lock);
8943         }
8944
8945         (*ppos)++;
8946
8947         return cnt;
8948 }
8949
8950 static const struct file_operations rb_simple_fops = {
8951         .open           = tracing_open_generic_tr,
8952         .read           = rb_simple_read,
8953         .write          = rb_simple_write,
8954         .release        = tracing_release_generic_tr,
8955         .llseek         = default_llseek,
8956 };
8957
8958 static ssize_t
8959 buffer_percent_read(struct file *filp, char __user *ubuf,
8960                     size_t cnt, loff_t *ppos)
8961 {
8962         struct trace_array *tr = filp->private_data;
8963         char buf[64];
8964         int r;
8965
8966         r = tr->buffer_percent;
8967         r = sprintf(buf, "%d\n", r);
8968
8969         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8970 }
8971
8972 static ssize_t
8973 buffer_percent_write(struct file *filp, const char __user *ubuf,
8974                      size_t cnt, loff_t *ppos)
8975 {
8976         struct trace_array *tr = filp->private_data;
8977         unsigned long val;
8978         int ret;
8979
8980         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8981         if (ret)
8982                 return ret;
8983
8984         if (val > 100)
8985                 return -EINVAL;
8986
8987         if (!val)
8988                 val = 1;
8989
8990         tr->buffer_percent = val;
8991
8992         (*ppos)++;
8993
8994         return cnt;
8995 }
8996
8997 static const struct file_operations buffer_percent_fops = {
8998         .open           = tracing_open_generic_tr,
8999         .read           = buffer_percent_read,
9000         .write          = buffer_percent_write,
9001         .release        = tracing_release_generic_tr,
9002         .llseek         = default_llseek,
9003 };
9004
9005 static struct dentry *trace_instance_dir;
9006
9007 static void
9008 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9009
9010 static int
9011 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9012 {
9013         enum ring_buffer_flags rb_flags;
9014
9015         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9016
9017         buf->tr = tr;
9018
9019         buf->buffer = ring_buffer_alloc(size, rb_flags);
9020         if (!buf->buffer)
9021                 return -ENOMEM;
9022
9023         buf->data = alloc_percpu(struct trace_array_cpu);
9024         if (!buf->data) {
9025                 ring_buffer_free(buf->buffer);
9026                 buf->buffer = NULL;
9027                 return -ENOMEM;
9028         }
9029
9030         /* Allocate the first page for all buffers */
9031         set_buffer_entries(&tr->array_buffer,
9032                            ring_buffer_size(tr->array_buffer.buffer, 0));
9033
9034         return 0;
9035 }
9036
9037 static int allocate_trace_buffers(struct trace_array *tr, int size)
9038 {
9039         int ret;
9040
9041         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9042         if (ret)
9043                 return ret;
9044
9045 #ifdef CONFIG_TRACER_MAX_TRACE
9046         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9047                                     allocate_snapshot ? size : 1);
9048         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9049                 ring_buffer_free(tr->array_buffer.buffer);
9050                 tr->array_buffer.buffer = NULL;
9051                 free_percpu(tr->array_buffer.data);
9052                 tr->array_buffer.data = NULL;
9053                 return -ENOMEM;
9054         }
9055         tr->allocated_snapshot = allocate_snapshot;
9056
9057         /*
9058          * Only the top level trace array gets its snapshot allocated
9059          * from the kernel command line.
9060          */
9061         allocate_snapshot = false;
9062 #endif
9063
9064         return 0;
9065 }
9066
9067 static void free_trace_buffer(struct array_buffer *buf)
9068 {
9069         if (buf->buffer) {
9070                 ring_buffer_free(buf->buffer);
9071                 buf->buffer = NULL;
9072                 free_percpu(buf->data);
9073                 buf->data = NULL;
9074         }
9075 }
9076
9077 static void free_trace_buffers(struct trace_array *tr)
9078 {
9079         if (!tr)
9080                 return;
9081
9082         free_trace_buffer(&tr->array_buffer);
9083
9084 #ifdef CONFIG_TRACER_MAX_TRACE
9085         free_trace_buffer(&tr->max_buffer);
9086 #endif
9087 }
9088
9089 static void init_trace_flags_index(struct trace_array *tr)
9090 {
9091         int i;
9092
9093         /* Used by the trace options files */
9094         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9095                 tr->trace_flags_index[i] = i;
9096 }
9097
9098 static void __update_tracer_options(struct trace_array *tr)
9099 {
9100         struct tracer *t;
9101
9102         for (t = trace_types; t; t = t->next)
9103                 add_tracer_options(tr, t);
9104 }
9105
9106 static void update_tracer_options(struct trace_array *tr)
9107 {
9108         mutex_lock(&trace_types_lock);
9109         __update_tracer_options(tr);
9110         mutex_unlock(&trace_types_lock);
9111 }
9112
9113 /* Must have trace_types_lock held */
9114 struct trace_array *trace_array_find(const char *instance)
9115 {
9116         struct trace_array *tr, *found = NULL;
9117
9118         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9119                 if (tr->name && strcmp(tr->name, instance) == 0) {
9120                         found = tr;
9121                         break;
9122                 }
9123         }
9124
9125         return found;
9126 }
9127
9128 struct trace_array *trace_array_find_get(const char *instance)
9129 {
9130         struct trace_array *tr;
9131
9132         mutex_lock(&trace_types_lock);
9133         tr = trace_array_find(instance);
9134         if (tr)
9135                 tr->ref++;
9136         mutex_unlock(&trace_types_lock);
9137
9138         return tr;
9139 }
9140
9141 static int trace_array_create_dir(struct trace_array *tr)
9142 {
9143         int ret;
9144
9145         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9146         if (!tr->dir)
9147                 return -EINVAL;
9148
9149         ret = event_trace_add_tracer(tr->dir, tr);
9150         if (ret) {
9151                 tracefs_remove(tr->dir);
9152                 return ret;
9153         }
9154
9155         init_tracer_tracefs(tr, tr->dir);
9156         __update_tracer_options(tr);
9157
9158         return ret;
9159 }
9160
9161 static struct trace_array *trace_array_create(const char *name)
9162 {
9163         struct trace_array *tr;
9164         int ret;
9165
9166         ret = -ENOMEM;
9167         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9168         if (!tr)
9169                 return ERR_PTR(ret);
9170
9171         tr->name = kstrdup(name, GFP_KERNEL);
9172         if (!tr->name)
9173                 goto out_free_tr;
9174
9175         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9176                 goto out_free_tr;
9177
9178         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9179
9180         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9181
9182         raw_spin_lock_init(&tr->start_lock);
9183
9184         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9185
9186         tr->current_trace = &nop_trace;
9187
9188         INIT_LIST_HEAD(&tr->systems);
9189         INIT_LIST_HEAD(&tr->events);
9190         INIT_LIST_HEAD(&tr->hist_vars);
9191         INIT_LIST_HEAD(&tr->err_log);
9192
9193         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9194                 goto out_free_tr;
9195
9196         if (ftrace_allocate_ftrace_ops(tr) < 0)
9197                 goto out_free_tr;
9198
9199         ftrace_init_trace_array(tr);
9200
9201         init_trace_flags_index(tr);
9202
9203         if (trace_instance_dir) {
9204                 ret = trace_array_create_dir(tr);
9205                 if (ret)
9206                         goto out_free_tr;
9207         } else
9208                 __trace_early_add_events(tr);
9209
9210         list_add(&tr->list, &ftrace_trace_arrays);
9211
9212         tr->ref++;
9213
9214         return tr;
9215
9216  out_free_tr:
9217         ftrace_free_ftrace_ops(tr);
9218         free_trace_buffers(tr);
9219         free_cpumask_var(tr->tracing_cpumask);
9220         kfree(tr->name);
9221         kfree(tr);
9222
9223         return ERR_PTR(ret);
9224 }
9225
9226 static int instance_mkdir(const char *name)
9227 {
9228         struct trace_array *tr;
9229         int ret;
9230
9231         mutex_lock(&event_mutex);
9232         mutex_lock(&trace_types_lock);
9233
9234         ret = -EEXIST;
9235         if (trace_array_find(name))
9236                 goto out_unlock;
9237
9238         tr = trace_array_create(name);
9239
9240         ret = PTR_ERR_OR_ZERO(tr);
9241
9242 out_unlock:
9243         mutex_unlock(&trace_types_lock);
9244         mutex_unlock(&event_mutex);
9245         return ret;
9246 }
9247
9248 /**
9249  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9250  * @name: The name of the trace array to be looked up/created.
9251  *
9252  * Returns pointer to trace array with given name.
9253  * NULL, if it cannot be created.
9254  *
9255  * NOTE: This function increments the reference counter associated with the
9256  * trace array returned. This makes sure it cannot be freed while in use.
9257  * Use trace_array_put() once the trace array is no longer needed.
9258  * If the trace_array is to be freed, trace_array_destroy() needs to
9259  * be called after the trace_array_put(), or simply let user space delete
9260  * it from the tracefs instances directory. But until the
9261  * trace_array_put() is called, user space can not delete it.
9262  *
9263  */
9264 struct trace_array *trace_array_get_by_name(const char *name)
9265 {
9266         struct trace_array *tr;
9267
9268         mutex_lock(&event_mutex);
9269         mutex_lock(&trace_types_lock);
9270
9271         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9272                 if (tr->name && strcmp(tr->name, name) == 0)
9273                         goto out_unlock;
9274         }
9275
9276         tr = trace_array_create(name);
9277
9278         if (IS_ERR(tr))
9279                 tr = NULL;
9280 out_unlock:
9281         if (tr)
9282                 tr->ref++;
9283
9284         mutex_unlock(&trace_types_lock);
9285         mutex_unlock(&event_mutex);
9286         return tr;
9287 }
9288 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9289
9290 static int __remove_instance(struct trace_array *tr)
9291 {
9292         int i;
9293
9294         /* Reference counter for a newly created trace array = 1. */
9295         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9296                 return -EBUSY;
9297
9298         list_del(&tr->list);
9299
9300         /* Disable all the flags that were enabled coming in */
9301         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9302                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9303                         set_tracer_flag(tr, 1 << i, 0);
9304         }
9305
9306         tracing_set_nop(tr);
9307         clear_ftrace_function_probes(tr);
9308         event_trace_del_tracer(tr);
9309         ftrace_clear_pids(tr);
9310         ftrace_destroy_function_files(tr);
9311         tracefs_remove(tr->dir);
9312         free_percpu(tr->last_func_repeats);
9313         free_trace_buffers(tr);
9314
9315         for (i = 0; i < tr->nr_topts; i++) {
9316                 kfree(tr->topts[i].topts);
9317         }
9318         kfree(tr->topts);
9319
9320         free_cpumask_var(tr->tracing_cpumask);
9321         kfree(tr->name);
9322         kfree(tr);
9323
9324         return 0;
9325 }
9326
9327 int trace_array_destroy(struct trace_array *this_tr)
9328 {
9329         struct trace_array *tr;
9330         int ret;
9331
9332         if (!this_tr)
9333                 return -EINVAL;
9334
9335         mutex_lock(&event_mutex);
9336         mutex_lock(&trace_types_lock);
9337
9338         ret = -ENODEV;
9339
9340         /* Making sure trace array exists before destroying it. */
9341         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9342                 if (tr == this_tr) {
9343                         ret = __remove_instance(tr);
9344                         break;
9345                 }
9346         }
9347
9348         mutex_unlock(&trace_types_lock);
9349         mutex_unlock(&event_mutex);
9350
9351         return ret;
9352 }
9353 EXPORT_SYMBOL_GPL(trace_array_destroy);
9354
9355 static int instance_rmdir(const char *name)
9356 {
9357         struct trace_array *tr;
9358         int ret;
9359
9360         mutex_lock(&event_mutex);
9361         mutex_lock(&trace_types_lock);
9362
9363         ret = -ENODEV;
9364         tr = trace_array_find(name);
9365         if (tr)
9366                 ret = __remove_instance(tr);
9367
9368         mutex_unlock(&trace_types_lock);
9369         mutex_unlock(&event_mutex);
9370
9371         return ret;
9372 }
9373
9374 static __init void create_trace_instances(struct dentry *d_tracer)
9375 {
9376         struct trace_array *tr;
9377
9378         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9379                                                          instance_mkdir,
9380                                                          instance_rmdir);
9381         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9382                 return;
9383
9384         mutex_lock(&event_mutex);
9385         mutex_lock(&trace_types_lock);
9386
9387         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9388                 if (!tr->name)
9389                         continue;
9390                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9391                              "Failed to create instance directory\n"))
9392                         break;
9393         }
9394
9395         mutex_unlock(&trace_types_lock);
9396         mutex_unlock(&event_mutex);
9397 }
9398
9399 static void
9400 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9401 {
9402         struct trace_event_file *file;
9403         int cpu;
9404
9405         trace_create_file("available_tracers", 0444, d_tracer,
9406                         tr, &show_traces_fops);
9407
9408         trace_create_file("current_tracer", 0644, d_tracer,
9409                         tr, &set_tracer_fops);
9410
9411         trace_create_file("tracing_cpumask", 0644, d_tracer,
9412                           tr, &tracing_cpumask_fops);
9413
9414         trace_create_file("trace_options", 0644, d_tracer,
9415                           tr, &tracing_iter_fops);
9416
9417         trace_create_file("trace", 0644, d_tracer,
9418                           tr, &tracing_fops);
9419
9420         trace_create_file("trace_pipe", 0444, d_tracer,
9421                           tr, &tracing_pipe_fops);
9422
9423         trace_create_file("buffer_size_kb", 0644, d_tracer,
9424                           tr, &tracing_entries_fops);
9425
9426         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9427                           tr, &tracing_total_entries_fops);
9428
9429         trace_create_file("free_buffer", 0200, d_tracer,
9430                           tr, &tracing_free_buffer_fops);
9431
9432         trace_create_file("trace_marker", 0220, d_tracer,
9433                           tr, &tracing_mark_fops);
9434
9435         file = __find_event_file(tr, "ftrace", "print");
9436         if (file && file->dir)
9437                 trace_create_file("trigger", 0644, file->dir, file,
9438                                   &event_trigger_fops);
9439         tr->trace_marker_file = file;
9440
9441         trace_create_file("trace_marker_raw", 0220, d_tracer,
9442                           tr, &tracing_mark_raw_fops);
9443
9444         trace_create_file("trace_clock", 0644, d_tracer, tr,
9445                           &trace_clock_fops);
9446
9447         trace_create_file("tracing_on", 0644, d_tracer,
9448                           tr, &rb_simple_fops);
9449
9450         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9451                           &trace_time_stamp_mode_fops);
9452
9453         tr->buffer_percent = 50;
9454
9455         trace_create_file("buffer_percent", 0444, d_tracer,
9456                         tr, &buffer_percent_fops);
9457
9458         create_trace_options_dir(tr);
9459
9460 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9461         trace_create_maxlat_file(tr, d_tracer);
9462 #endif
9463
9464         if (ftrace_create_function_files(tr, d_tracer))
9465                 MEM_FAIL(1, "Could not allocate function filter files");
9466
9467 #ifdef CONFIG_TRACER_SNAPSHOT
9468         trace_create_file("snapshot", 0644, d_tracer,
9469                           tr, &snapshot_fops);
9470 #endif
9471
9472         trace_create_file("error_log", 0644, d_tracer,
9473                           tr, &tracing_err_log_fops);
9474
9475         for_each_tracing_cpu(cpu)
9476                 tracing_init_tracefs_percpu(tr, cpu);
9477
9478         ftrace_init_tracefs(tr, d_tracer);
9479 }
9480
9481 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9482 {
9483         struct vfsmount *mnt;
9484         struct file_system_type *type;
9485
9486         /*
9487          * To maintain backward compatibility for tools that mount
9488          * debugfs to get to the tracing facility, tracefs is automatically
9489          * mounted to the debugfs/tracing directory.
9490          */
9491         type = get_fs_type("tracefs");
9492         if (!type)
9493                 return NULL;
9494         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9495         put_filesystem(type);
9496         if (IS_ERR(mnt))
9497                 return NULL;
9498         mntget(mnt);
9499
9500         return mnt;
9501 }
9502
9503 /**
9504  * tracing_init_dentry - initialize top level trace array
9505  *
9506  * This is called when creating files or directories in the tracing
9507  * directory. It is called via fs_initcall() by any of the boot up code
9508  * and expects to return the dentry of the top level tracing directory.
9509  */
9510 int tracing_init_dentry(void)
9511 {
9512         struct trace_array *tr = &global_trace;
9513
9514         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9515                 pr_warn("Tracing disabled due to lockdown\n");
9516                 return -EPERM;
9517         }
9518
9519         /* The top level trace array uses  NULL as parent */
9520         if (tr->dir)
9521                 return 0;
9522
9523         if (WARN_ON(!tracefs_initialized()))
9524                 return -ENODEV;
9525
9526         /*
9527          * As there may still be users that expect the tracing
9528          * files to exist in debugfs/tracing, we must automount
9529          * the tracefs file system there, so older tools still
9530          * work with the newer kernel.
9531          */
9532         tr->dir = debugfs_create_automount("tracing", NULL,
9533                                            trace_automount, NULL);
9534
9535         return 0;
9536 }
9537
9538 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9539 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9540
9541 static struct workqueue_struct *eval_map_wq __initdata;
9542 static struct work_struct eval_map_work __initdata;
9543
9544 static void __init eval_map_work_func(struct work_struct *work)
9545 {
9546         int len;
9547
9548         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9549         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9550 }
9551
9552 static int __init trace_eval_init(void)
9553 {
9554         INIT_WORK(&eval_map_work, eval_map_work_func);
9555
9556         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9557         if (!eval_map_wq) {
9558                 pr_err("Unable to allocate eval_map_wq\n");
9559                 /* Do work here */
9560                 eval_map_work_func(&eval_map_work);
9561                 return -ENOMEM;
9562         }
9563
9564         queue_work(eval_map_wq, &eval_map_work);
9565         return 0;
9566 }
9567
9568 static int __init trace_eval_sync(void)
9569 {
9570         /* Make sure the eval map updates are finished */
9571         if (eval_map_wq)
9572                 destroy_workqueue(eval_map_wq);
9573         return 0;
9574 }
9575
9576 late_initcall_sync(trace_eval_sync);
9577
9578
9579 #ifdef CONFIG_MODULES
9580 static void trace_module_add_evals(struct module *mod)
9581 {
9582         if (!mod->num_trace_evals)
9583                 return;
9584
9585         /*
9586          * Modules with bad taint do not have events created, do
9587          * not bother with enums either.
9588          */
9589         if (trace_module_has_bad_taint(mod))
9590                 return;
9591
9592         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9593 }
9594
9595 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9596 static void trace_module_remove_evals(struct module *mod)
9597 {
9598         union trace_eval_map_item *map;
9599         union trace_eval_map_item **last = &trace_eval_maps;
9600
9601         if (!mod->num_trace_evals)
9602                 return;
9603
9604         mutex_lock(&trace_eval_mutex);
9605
9606         map = trace_eval_maps;
9607
9608         while (map) {
9609                 if (map->head.mod == mod)
9610                         break;
9611                 map = trace_eval_jmp_to_tail(map);
9612                 last = &map->tail.next;
9613                 map = map->tail.next;
9614         }
9615         if (!map)
9616                 goto out;
9617
9618         *last = trace_eval_jmp_to_tail(map)->tail.next;
9619         kfree(map);
9620  out:
9621         mutex_unlock(&trace_eval_mutex);
9622 }
9623 #else
9624 static inline void trace_module_remove_evals(struct module *mod) { }
9625 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9626
9627 static int trace_module_notify(struct notifier_block *self,
9628                                unsigned long val, void *data)
9629 {
9630         struct module *mod = data;
9631
9632         switch (val) {
9633         case MODULE_STATE_COMING:
9634                 trace_module_add_evals(mod);
9635                 break;
9636         case MODULE_STATE_GOING:
9637                 trace_module_remove_evals(mod);
9638                 break;
9639         }
9640
9641         return NOTIFY_OK;
9642 }
9643
9644 static struct notifier_block trace_module_nb = {
9645         .notifier_call = trace_module_notify,
9646         .priority = 0,
9647 };
9648 #endif /* CONFIG_MODULES */
9649
9650 static __init int tracer_init_tracefs(void)
9651 {
9652         int ret;
9653
9654         trace_access_lock_init();
9655
9656         ret = tracing_init_dentry();
9657         if (ret)
9658                 return 0;
9659
9660         event_trace_init();
9661
9662         init_tracer_tracefs(&global_trace, NULL);
9663         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9664
9665         trace_create_file("tracing_thresh", 0644, NULL,
9666                         &global_trace, &tracing_thresh_fops);
9667
9668         trace_create_file("README", 0444, NULL,
9669                         NULL, &tracing_readme_fops);
9670
9671         trace_create_file("saved_cmdlines", 0444, NULL,
9672                         NULL, &tracing_saved_cmdlines_fops);
9673
9674         trace_create_file("saved_cmdlines_size", 0644, NULL,
9675                           NULL, &tracing_saved_cmdlines_size_fops);
9676
9677         trace_create_file("saved_tgids", 0444, NULL,
9678                         NULL, &tracing_saved_tgids_fops);
9679
9680         trace_eval_init();
9681
9682         trace_create_eval_file(NULL);
9683
9684 #ifdef CONFIG_MODULES
9685         register_module_notifier(&trace_module_nb);
9686 #endif
9687
9688 #ifdef CONFIG_DYNAMIC_FTRACE
9689         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9690                         NULL, &tracing_dyn_info_fops);
9691 #endif
9692
9693         create_trace_instances(NULL);
9694
9695         update_tracer_options(&global_trace);
9696
9697         return 0;
9698 }
9699
9700 fs_initcall(tracer_init_tracefs);
9701
9702 static int trace_panic_handler(struct notifier_block *this,
9703                                unsigned long event, void *unused)
9704 {
9705         if (ftrace_dump_on_oops)
9706                 ftrace_dump(ftrace_dump_on_oops);
9707         return NOTIFY_OK;
9708 }
9709
9710 static struct notifier_block trace_panic_notifier = {
9711         .notifier_call  = trace_panic_handler,
9712         .next           = NULL,
9713         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9714 };
9715
9716 static int trace_die_handler(struct notifier_block *self,
9717                              unsigned long val,
9718                              void *data)
9719 {
9720         switch (val) {
9721         case DIE_OOPS:
9722                 if (ftrace_dump_on_oops)
9723                         ftrace_dump(ftrace_dump_on_oops);
9724                 break;
9725         default:
9726                 break;
9727         }
9728         return NOTIFY_OK;
9729 }
9730
9731 static struct notifier_block trace_die_notifier = {
9732         .notifier_call = trace_die_handler,
9733         .priority = 200
9734 };
9735
9736 /*
9737  * printk is set to max of 1024, we really don't need it that big.
9738  * Nothing should be printing 1000 characters anyway.
9739  */
9740 #define TRACE_MAX_PRINT         1000
9741
9742 /*
9743  * Define here KERN_TRACE so that we have one place to modify
9744  * it if we decide to change what log level the ftrace dump
9745  * should be at.
9746  */
9747 #define KERN_TRACE              KERN_EMERG
9748
9749 void
9750 trace_printk_seq(struct trace_seq *s)
9751 {
9752         /* Probably should print a warning here. */
9753         if (s->seq.len >= TRACE_MAX_PRINT)
9754                 s->seq.len = TRACE_MAX_PRINT;
9755
9756         /*
9757          * More paranoid code. Although the buffer size is set to
9758          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9759          * an extra layer of protection.
9760          */
9761         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9762                 s->seq.len = s->seq.size - 1;
9763
9764         /* should be zero ended, but we are paranoid. */
9765         s->buffer[s->seq.len] = 0;
9766
9767         printk(KERN_TRACE "%s", s->buffer);
9768
9769         trace_seq_init(s);
9770 }
9771
9772 void trace_init_global_iter(struct trace_iterator *iter)
9773 {
9774         iter->tr = &global_trace;
9775         iter->trace = iter->tr->current_trace;
9776         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9777         iter->array_buffer = &global_trace.array_buffer;
9778
9779         if (iter->trace && iter->trace->open)
9780                 iter->trace->open(iter);
9781
9782         /* Annotate start of buffers if we had overruns */
9783         if (ring_buffer_overruns(iter->array_buffer->buffer))
9784                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9785
9786         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9787         if (trace_clocks[iter->tr->clock_id].in_ns)
9788                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9789 }
9790
9791 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9792 {
9793         /* use static because iter can be a bit big for the stack */
9794         static struct trace_iterator iter;
9795         static atomic_t dump_running;
9796         struct trace_array *tr = &global_trace;
9797         unsigned int old_userobj;
9798         unsigned long flags;
9799         int cnt = 0, cpu;
9800
9801         /* Only allow one dump user at a time. */
9802         if (atomic_inc_return(&dump_running) != 1) {
9803                 atomic_dec(&dump_running);
9804                 return;
9805         }
9806
9807         /*
9808          * Always turn off tracing when we dump.
9809          * We don't need to show trace output of what happens
9810          * between multiple crashes.
9811          *
9812          * If the user does a sysrq-z, then they can re-enable
9813          * tracing with echo 1 > tracing_on.
9814          */
9815         tracing_off();
9816
9817         local_irq_save(flags);
9818
9819         /* Simulate the iterator */
9820         trace_init_global_iter(&iter);
9821         /* Can not use kmalloc for iter.temp and iter.fmt */
9822         iter.temp = static_temp_buf;
9823         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9824         iter.fmt = static_fmt_buf;
9825         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9826
9827         for_each_tracing_cpu(cpu) {
9828                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9829         }
9830
9831         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9832
9833         /* don't look at user memory in panic mode */
9834         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9835
9836         switch (oops_dump_mode) {
9837         case DUMP_ALL:
9838                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9839                 break;
9840         case DUMP_ORIG:
9841                 iter.cpu_file = raw_smp_processor_id();
9842                 break;
9843         case DUMP_NONE:
9844                 goto out_enable;
9845         default:
9846                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9847                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9848         }
9849
9850         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9851
9852         /* Did function tracer already get disabled? */
9853         if (ftrace_is_dead()) {
9854                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9855                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9856         }
9857
9858         /*
9859          * We need to stop all tracing on all CPUS to read
9860          * the next buffer. This is a bit expensive, but is
9861          * not done often. We fill all what we can read,
9862          * and then release the locks again.
9863          */
9864
9865         while (!trace_empty(&iter)) {
9866
9867                 if (!cnt)
9868                         printk(KERN_TRACE "---------------------------------\n");
9869
9870                 cnt++;
9871
9872                 trace_iterator_reset(&iter);
9873                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9874
9875                 if (trace_find_next_entry_inc(&iter) != NULL) {
9876                         int ret;
9877
9878                         ret = print_trace_line(&iter);
9879                         if (ret != TRACE_TYPE_NO_CONSUME)
9880                                 trace_consume(&iter);
9881                 }
9882                 touch_nmi_watchdog();
9883
9884                 trace_printk_seq(&iter.seq);
9885         }
9886
9887         if (!cnt)
9888                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9889         else
9890                 printk(KERN_TRACE "---------------------------------\n");
9891
9892  out_enable:
9893         tr->trace_flags |= old_userobj;
9894
9895         for_each_tracing_cpu(cpu) {
9896                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9897         }
9898         atomic_dec(&dump_running);
9899         local_irq_restore(flags);
9900 }
9901 EXPORT_SYMBOL_GPL(ftrace_dump);
9902
9903 #define WRITE_BUFSIZE  4096
9904
9905 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9906                                 size_t count, loff_t *ppos,
9907                                 int (*createfn)(const char *))
9908 {
9909         char *kbuf, *buf, *tmp;
9910         int ret = 0;
9911         size_t done = 0;
9912         size_t size;
9913
9914         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9915         if (!kbuf)
9916                 return -ENOMEM;
9917
9918         while (done < count) {
9919                 size = count - done;
9920
9921                 if (size >= WRITE_BUFSIZE)
9922                         size = WRITE_BUFSIZE - 1;
9923
9924                 if (copy_from_user(kbuf, buffer + done, size)) {
9925                         ret = -EFAULT;
9926                         goto out;
9927                 }
9928                 kbuf[size] = '\0';
9929                 buf = kbuf;
9930                 do {
9931                         tmp = strchr(buf, '\n');
9932                         if (tmp) {
9933                                 *tmp = '\0';
9934                                 size = tmp - buf + 1;
9935                         } else {
9936                                 size = strlen(buf);
9937                                 if (done + size < count) {
9938                                         if (buf != kbuf)
9939                                                 break;
9940                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9941                                         pr_warn("Line length is too long: Should be less than %d\n",
9942                                                 WRITE_BUFSIZE - 2);
9943                                         ret = -EINVAL;
9944                                         goto out;
9945                                 }
9946                         }
9947                         done += size;
9948
9949                         /* Remove comments */
9950                         tmp = strchr(buf, '#');
9951
9952                         if (tmp)
9953                                 *tmp = '\0';
9954
9955                         ret = createfn(buf);
9956                         if (ret)
9957                                 goto out;
9958                         buf += size;
9959
9960                 } while (done < count);
9961         }
9962         ret = done;
9963
9964 out:
9965         kfree(kbuf);
9966
9967         return ret;
9968 }
9969
9970 __init static int tracer_alloc_buffers(void)
9971 {
9972         int ring_buf_size;
9973         int ret = -ENOMEM;
9974
9975
9976         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9977                 pr_warn("Tracing disabled due to lockdown\n");
9978                 return -EPERM;
9979         }
9980
9981         /*
9982          * Make sure we don't accidentally add more trace options
9983          * than we have bits for.
9984          */
9985         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9986
9987         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9988                 goto out;
9989
9990         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9991                 goto out_free_buffer_mask;
9992
9993         /* Only allocate trace_printk buffers if a trace_printk exists */
9994         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9995                 /* Must be called before global_trace.buffer is allocated */
9996                 trace_printk_init_buffers();
9997
9998         /* To save memory, keep the ring buffer size to its minimum */
9999         if (ring_buffer_expanded)
10000                 ring_buf_size = trace_buf_size;
10001         else
10002                 ring_buf_size = 1;
10003
10004         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10005         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10006
10007         raw_spin_lock_init(&global_trace.start_lock);
10008
10009         /*
10010          * The prepare callbacks allocates some memory for the ring buffer. We
10011          * don't free the buffer if the CPU goes down. If we were to free
10012          * the buffer, then the user would lose any trace that was in the
10013          * buffer. The memory will be removed once the "instance" is removed.
10014          */
10015         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10016                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10017                                       NULL);
10018         if (ret < 0)
10019                 goto out_free_cpumask;
10020         /* Used for event triggers */
10021         ret = -ENOMEM;
10022         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10023         if (!temp_buffer)
10024                 goto out_rm_hp_state;
10025
10026         if (trace_create_savedcmd() < 0)
10027                 goto out_free_temp_buffer;
10028
10029         /* TODO: make the number of buffers hot pluggable with CPUS */
10030         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10031                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10032                 goto out_free_savedcmd;
10033         }
10034
10035         if (global_trace.buffer_disabled)
10036                 tracing_off();
10037
10038         if (trace_boot_clock) {
10039                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10040                 if (ret < 0)
10041                         pr_warn("Trace clock %s not defined, going back to default\n",
10042                                 trace_boot_clock);
10043         }
10044
10045         /*
10046          * register_tracer() might reference current_trace, so it
10047          * needs to be set before we register anything. This is
10048          * just a bootstrap of current_trace anyway.
10049          */
10050         global_trace.current_trace = &nop_trace;
10051
10052         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10053
10054         ftrace_init_global_array_ops(&global_trace);
10055
10056         init_trace_flags_index(&global_trace);
10057
10058         register_tracer(&nop_trace);
10059
10060         /* Function tracing may start here (via kernel command line) */
10061         init_function_trace();
10062
10063         /* All seems OK, enable tracing */
10064         tracing_disabled = 0;
10065
10066         atomic_notifier_chain_register(&panic_notifier_list,
10067                                        &trace_panic_notifier);
10068
10069         register_die_notifier(&trace_die_notifier);
10070
10071         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10072
10073         INIT_LIST_HEAD(&global_trace.systems);
10074         INIT_LIST_HEAD(&global_trace.events);
10075         INIT_LIST_HEAD(&global_trace.hist_vars);
10076         INIT_LIST_HEAD(&global_trace.err_log);
10077         list_add(&global_trace.list, &ftrace_trace_arrays);
10078
10079         apply_trace_boot_options();
10080
10081         register_snapshot_cmd();
10082
10083         test_can_verify();
10084
10085         return 0;
10086
10087 out_free_savedcmd:
10088         free_saved_cmdlines_buffer(savedcmd);
10089 out_free_temp_buffer:
10090         ring_buffer_free(temp_buffer);
10091 out_rm_hp_state:
10092         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10093 out_free_cpumask:
10094         free_cpumask_var(global_trace.tracing_cpumask);
10095 out_free_buffer_mask:
10096         free_cpumask_var(tracing_buffer_mask);
10097 out:
10098         return ret;
10099 }
10100
10101 void __init early_trace_init(void)
10102 {
10103         if (tracepoint_printk) {
10104                 tracepoint_print_iter =
10105                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10106                 if (MEM_FAIL(!tracepoint_print_iter,
10107                              "Failed to allocate trace iterator\n"))
10108                         tracepoint_printk = 0;
10109                 else
10110                         static_key_enable(&tracepoint_printk_key.key);
10111         }
10112         tracer_alloc_buffers();
10113 }
10114
10115 void __init trace_init(void)
10116 {
10117         trace_event_init();
10118 }
10119
10120 __init static void clear_boot_tracer(void)
10121 {
10122         /*
10123          * The default tracer at boot buffer is an init section.
10124          * This function is called in lateinit. If we did not
10125          * find the boot tracer, then clear it out, to prevent
10126          * later registration from accessing the buffer that is
10127          * about to be freed.
10128          */
10129         if (!default_bootup_tracer)
10130                 return;
10131
10132         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10133                default_bootup_tracer);
10134         default_bootup_tracer = NULL;
10135 }
10136
10137 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10138 __init static void tracing_set_default_clock(void)
10139 {
10140         /* sched_clock_stable() is determined in late_initcall */
10141         if (!trace_boot_clock && !sched_clock_stable()) {
10142                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10143                         pr_warn("Can not set tracing clock due to lockdown\n");
10144                         return;
10145                 }
10146
10147                 printk(KERN_WARNING
10148                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10149                        "If you want to keep using the local clock, then add:\n"
10150                        "  \"trace_clock=local\"\n"
10151                        "on the kernel command line\n");
10152                 tracing_set_clock(&global_trace, "global");
10153         }
10154 }
10155 #else
10156 static inline void tracing_set_default_clock(void) { }
10157 #endif
10158
10159 __init static int late_trace_init(void)
10160 {
10161         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10162                 static_key_disable(&tracepoint_printk_key.key);
10163                 tracepoint_printk = 0;
10164         }
10165
10166         tracing_set_default_clock();
10167         clear_boot_tracer();
10168         return 0;
10169 }
10170
10171 late_initcall_sync(late_trace_init);