Merge tag 'cxl-for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188
189 static int __init set_cmdline_ftrace(char *str)
190 {
191         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192         default_bootup_tracer = bootup_tracer_buf;
193         /* We are using ftrace early, expand it */
194         ring_buffer_expanded = true;
195         return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201         if (*str++ != '=' || !*str || !strcmp("1", str)) {
202                 ftrace_dump_on_oops = DUMP_ALL;
203                 return 1;
204         }
205
206         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207                 ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
215 static int __init stop_trace_on_warning(char *str)
216 {
217         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218                 __disable_trace_on_warning = 1;
219         return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
223 static int __init boot_alloc_snapshot(char *str)
224 {
225         allocate_snapshot = true;
226         /* We also need the main ring buffer expanded */
227         ring_buffer_expanded = true;
228         return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234
235 static int __init set_trace_boot_options(char *str)
236 {
237         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238         return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244
245 static int __init set_trace_boot_clock(char *str)
246 {
247         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248         trace_boot_clock = trace_boot_clock_buf;
249         return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252
253 static int __init set_tracepoint_printk(char *str)
254 {
255         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256                 tracepoint_printk = 1;
257         return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263         tracepoint_printk_stop_on_boot = true;
264         return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267
268 unsigned long long ns2usecs(u64 nsec)
269 {
270         nsec += 500;
271         do_div(nsec, 1000);
272         return nsec;
273 }
274
275 static void
276 trace_process_export(struct trace_export *export,
277                struct ring_buffer_event *event, int flag)
278 {
279         struct trace_entry *entry;
280         unsigned int size = 0;
281
282         if (export->flags & flag) {
283                 entry = ring_buffer_event_data(event);
284                 size = ring_buffer_event_length(event);
285                 export->write(export, entry, size);
286         }
287 }
288
289 static DEFINE_MUTEX(ftrace_export_lock);
290
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299         if (export->flags & TRACE_EXPORT_FUNCTION)
300                 static_branch_inc(&trace_function_exports_enabled);
301
302         if (export->flags & TRACE_EXPORT_EVENT)
303                 static_branch_inc(&trace_event_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_MARKER)
306                 static_branch_inc(&trace_marker_exports_enabled);
307 }
308
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311         if (export->flags & TRACE_EXPORT_FUNCTION)
312                 static_branch_dec(&trace_function_exports_enabled);
313
314         if (export->flags & TRACE_EXPORT_EVENT)
315                 static_branch_dec(&trace_event_exports_enabled);
316
317         if (export->flags & TRACE_EXPORT_MARKER)
318                 static_branch_dec(&trace_marker_exports_enabled);
319 }
320
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323         struct trace_export *export;
324
325         preempt_disable_notrace();
326
327         export = rcu_dereference_raw_check(ftrace_exports_list);
328         while (export) {
329                 trace_process_export(export, event, flag);
330                 export = rcu_dereference_raw_check(export->next);
331         }
332
333         preempt_enable_notrace();
334 }
335
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339         rcu_assign_pointer(export->next, *list);
340         /*
341          * We are entering export into the list but another
342          * CPU might be walking that list. We need to make sure
343          * the export->next pointer is valid before another CPU sees
344          * the export pointer included into the list.
345          */
346         rcu_assign_pointer(*list, export);
347 }
348
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352         struct trace_export **p;
353
354         for (p = list; *p != NULL; p = &(*p)->next)
355                 if (*p == export)
356                         break;
357
358         if (*p != export)
359                 return -1;
360
361         rcu_assign_pointer(*p, (*p)->next);
362
363         return 0;
364 }
365
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369         ftrace_exports_enable(export);
370
371         add_trace_export(list, export);
372 }
373
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377         int ret;
378
379         ret = rm_trace_export(list, export);
380         ftrace_exports_disable(export);
381
382         return ret;
383 }
384
385 int register_ftrace_export(struct trace_export *export)
386 {
387         if (WARN_ON_ONCE(!export->write))
388                 return -1;
389
390         mutex_lock(&ftrace_export_lock);
391
392         add_ftrace_export(&ftrace_exports_list, export);
393
394         mutex_unlock(&ftrace_export_lock);
395
396         return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402         int ret;
403
404         mutex_lock(&ftrace_export_lock);
405
406         ret = rm_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS                                             \
416         (FUNCTION_DEFAULT_FLAGS |                                       \
417          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
418          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
419          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
420          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
421          TRACE_ITER_HASH_PTR)
422
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
425                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436         .trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438
439 LIST_HEAD(ftrace_trace_arrays);
440
441 int trace_array_get(struct trace_array *this_tr)
442 {
443         struct trace_array *tr;
444         int ret = -ENODEV;
445
446         mutex_lock(&trace_types_lock);
447         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448                 if (tr == this_tr) {
449                         tr->ref++;
450                         ret = 0;
451                         break;
452                 }
453         }
454         mutex_unlock(&trace_types_lock);
455
456         return ret;
457 }
458
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461         WARN_ON(!this_tr->ref);
462         this_tr->ref--;
463 }
464
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476         if (!this_tr)
477                 return;
478
479         mutex_lock(&trace_types_lock);
480         __trace_array_put(this_tr);
481         mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487         int ret;
488
489         ret = security_locked_down(LOCKDOWN_TRACEFS);
490         if (ret)
491                 return ret;
492
493         if (tracing_disabled)
494                 return -ENODEV;
495
496         if (tr && trace_array_get(tr) < 0)
497                 return -ENODEV;
498
499         return 0;
500 }
501
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503                               struct trace_buffer *buffer,
504                               struct ring_buffer_event *event)
505 {
506         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507             !filter_match_preds(call->filter, rec)) {
508                 __trace_event_discard_commit(buffer, event);
509                 return 1;
510         }
511
512         return 0;
513 }
514
515 void trace_free_pid_list(struct trace_pid_list *pid_list)
516 {
517         vfree(pid_list->pids);
518         kfree(pid_list);
519 }
520
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531         /*
532          * If pid_max changed after filtered_pids was created, we
533          * by default ignore all pids greater than the previous pid_max.
534          */
535         if (search_pid >= filtered_pids->pid_max)
536                 return false;
537
538         return test_bit(search_pid, filtered_pids->pids);
539 }
540
541 /**
542  * trace_ignore_this_task - should a task be ignored for tracing
543  * @filtered_pids: The list of pids to check
544  * @filtered_no_pids: The list of pids not to be traced
545  * @task: The task that should be ignored if not filtered
546  *
547  * Checks if @task should be traced or not from @filtered_pids.
548  * Returns true if @task should *NOT* be traced.
549  * Returns false if @task should be traced.
550  */
551 bool
552 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
553                        struct trace_pid_list *filtered_no_pids,
554                        struct task_struct *task)
555 {
556         /*
557          * If filtered_no_pids is not empty, and the task's pid is listed
558          * in filtered_no_pids, then return true.
559          * Otherwise, if filtered_pids is empty, that means we can
560          * trace all tasks. If it has content, then only trace pids
561          * within filtered_pids.
562          */
563
564         return (filtered_pids &&
565                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
566                 (filtered_no_pids &&
567                  trace_find_filtered_pid(filtered_no_pids, task->pid));
568 }
569
570 /**
571  * trace_filter_add_remove_task - Add or remove a task from a pid_list
572  * @pid_list: The list to modify
573  * @self: The current task for fork or NULL for exit
574  * @task: The task to add or remove
575  *
576  * If adding a task, if @self is defined, the task is only added if @self
577  * is also included in @pid_list. This happens on fork and tasks should
578  * only be added when the parent is listed. If @self is NULL, then the
579  * @task pid will be removed from the list, which would happen on exit
580  * of a task.
581  */
582 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
583                                   struct task_struct *self,
584                                   struct task_struct *task)
585 {
586         if (!pid_list)
587                 return;
588
589         /* For forks, we only add if the forking task is listed */
590         if (self) {
591                 if (!trace_find_filtered_pid(pid_list, self->pid))
592                         return;
593         }
594
595         /* Sorry, but we don't support pid_max changing after setting */
596         if (task->pid >= pid_list->pid_max)
597                 return;
598
599         /* "self" is set for forks, and NULL for exits */
600         if (self)
601                 set_bit(task->pid, pid_list->pids);
602         else
603                 clear_bit(task->pid, pid_list->pids);
604 }
605
606 /**
607  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
608  * @pid_list: The pid list to show
609  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
610  * @pos: The position of the file
611  *
612  * This is used by the seq_file "next" operation to iterate the pids
613  * listed in a trace_pid_list structure.
614  *
615  * Returns the pid+1 as we want to display pid of zero, but NULL would
616  * stop the iteration.
617  */
618 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
619 {
620         unsigned long pid = (unsigned long)v;
621
622         (*pos)++;
623
624         /* pid already is +1 of the actual previous bit */
625         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
626
627         /* Return pid + 1 to allow zero to be represented */
628         if (pid < pid_list->pid_max)
629                 return (void *)(pid + 1);
630
631         return NULL;
632 }
633
634 /**
635  * trace_pid_start - Used for seq_file to start reading pid lists
636  * @pid_list: The pid list to show
637  * @pos: The position of the file
638  *
639  * This is used by seq_file "start" operation to start the iteration
640  * of listing pids.
641  *
642  * Returns the pid+1 as we want to display pid of zero, but NULL would
643  * stop the iteration.
644  */
645 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
646 {
647         unsigned long pid;
648         loff_t l = 0;
649
650         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
651         if (pid >= pid_list->pid_max)
652                 return NULL;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret = 0;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         pid_list->pid_max = READ_ONCE(pid_max);
709
710         /* Only truncating will shrink pid_max */
711         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
712                 pid_list->pid_max = filtered_pids->pid_max;
713
714         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
715         if (!pid_list->pids) {
716                 trace_parser_put(&parser);
717                 kfree(pid_list);
718                 return -ENOMEM;
719         }
720
721         if (filtered_pids) {
722                 /* copy the current bits to the new max */
723                 for_each_set_bit(pid, filtered_pids->pids,
724                                  filtered_pids->pid_max) {
725                         set_bit(pid, pid_list->pids);
726                         nr_pids++;
727                 }
728         }
729
730         while (cnt > 0) {
731
732                 pos = 0;
733
734                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
735                 if (ret < 0 || !trace_parser_loaded(&parser))
736                         break;
737
738                 read += ret;
739                 ubuf += ret;
740                 cnt -= ret;
741
742                 ret = -EINVAL;
743                 if (kstrtoul(parser.buffer, 0, &val))
744                         break;
745                 if (val >= pid_list->pid_max)
746                         break;
747
748                 pid = (pid_t)val;
749
750                 set_bit(pid, pid_list->pids);
751                 nr_pids++;
752
753                 trace_parser_clear(&parser);
754                 ret = 0;
755         }
756         trace_parser_put(&parser);
757
758         if (ret < 0) {
759                 trace_free_pid_list(pid_list);
760                 return ret;
761         }
762
763         if (!nr_pids) {
764                 /* Cleared the list of pids */
765                 trace_free_pid_list(pid_list);
766                 read = ret;
767                 pid_list = NULL;
768         }
769
770         *new_pid_list = pid_list;
771
772         return read;
773 }
774
775 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
776 {
777         u64 ts;
778
779         /* Early boot up does not have a buffer yet */
780         if (!buf->buffer)
781                 return trace_clock_local();
782
783         ts = ring_buffer_time_stamp(buf->buffer);
784         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
785
786         return ts;
787 }
788
789 u64 ftrace_now(int cpu)
790 {
791         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
792 }
793
794 /**
795  * tracing_is_enabled - Show if global_trace has been enabled
796  *
797  * Shows if the global trace has been enabled or not. It uses the
798  * mirror flag "buffer_disabled" to be used in fast paths such as for
799  * the irqsoff tracer. But it may be inaccurate due to races. If you
800  * need to know the accurate state, use tracing_is_on() which is a little
801  * slower, but accurate.
802  */
803 int tracing_is_enabled(void)
804 {
805         /*
806          * For quick access (irqsoff uses this in fast path), just
807          * return the mirror variable of the state of the ring buffer.
808          * It's a little racy, but we don't really care.
809          */
810         smp_rmb();
811         return !global_trace.buffer_disabled;
812 }
813
814 /*
815  * trace_buf_size is the size in bytes that is allocated
816  * for a buffer. Note, the number of bytes is always rounded
817  * to page size.
818  *
819  * This number is purposely set to a low number of 16384.
820  * If the dump on oops happens, it will be much appreciated
821  * to not have to wait for all that output. Anyway this can be
822  * boot time and run time configurable.
823  */
824 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
825
826 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
827
828 /* trace_types holds a link list of available tracers. */
829 static struct tracer            *trace_types __read_mostly;
830
831 /*
832  * trace_types_lock is used to protect the trace_types list.
833  */
834 DEFINE_MUTEX(trace_types_lock);
835
836 /*
837  * serialize the access of the ring buffer
838  *
839  * ring buffer serializes readers, but it is low level protection.
840  * The validity of the events (which returns by ring_buffer_peek() ..etc)
841  * are not protected by ring buffer.
842  *
843  * The content of events may become garbage if we allow other process consumes
844  * these events concurrently:
845  *   A) the page of the consumed events may become a normal page
846  *      (not reader page) in ring buffer, and this page will be rewritten
847  *      by events producer.
848  *   B) The page of the consumed events may become a page for splice_read,
849  *      and this page will be returned to system.
850  *
851  * These primitives allow multi process access to different cpu ring buffer
852  * concurrently.
853  *
854  * These primitives don't distinguish read-only and read-consume access.
855  * Multi read-only access are also serialized.
856  */
857
858 #ifdef CONFIG_SMP
859 static DECLARE_RWSEM(all_cpu_access_lock);
860 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
861
862 static inline void trace_access_lock(int cpu)
863 {
864         if (cpu == RING_BUFFER_ALL_CPUS) {
865                 /* gain it for accessing the whole ring buffer. */
866                 down_write(&all_cpu_access_lock);
867         } else {
868                 /* gain it for accessing a cpu ring buffer. */
869
870                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
871                 down_read(&all_cpu_access_lock);
872
873                 /* Secondly block other access to this @cpu ring buffer. */
874                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
875         }
876 }
877
878 static inline void trace_access_unlock(int cpu)
879 {
880         if (cpu == RING_BUFFER_ALL_CPUS) {
881                 up_write(&all_cpu_access_lock);
882         } else {
883                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
884                 up_read(&all_cpu_access_lock);
885         }
886 }
887
888 static inline void trace_access_lock_init(void)
889 {
890         int cpu;
891
892         for_each_possible_cpu(cpu)
893                 mutex_init(&per_cpu(cpu_access_lock, cpu));
894 }
895
896 #else
897
898 static DEFINE_MUTEX(access_lock);
899
900 static inline void trace_access_lock(int cpu)
901 {
902         (void)cpu;
903         mutex_lock(&access_lock);
904 }
905
906 static inline void trace_access_unlock(int cpu)
907 {
908         (void)cpu;
909         mutex_unlock(&access_lock);
910 }
911
912 static inline void trace_access_lock_init(void)
913 {
914 }
915
916 #endif
917
918 #ifdef CONFIG_STACKTRACE
919 static void __ftrace_trace_stack(struct trace_buffer *buffer,
920                                  unsigned int trace_ctx,
921                                  int skip, struct pt_regs *regs);
922 static inline void ftrace_trace_stack(struct trace_array *tr,
923                                       struct trace_buffer *buffer,
924                                       unsigned int trace_ctx,
925                                       int skip, struct pt_regs *regs);
926
927 #else
928 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
929                                         unsigned int trace_ctx,
930                                         int skip, struct pt_regs *regs)
931 {
932 }
933 static inline void ftrace_trace_stack(struct trace_array *tr,
934                                       struct trace_buffer *buffer,
935                                       unsigned long trace_ctx,
936                                       int skip, struct pt_regs *regs)
937 {
938 }
939
940 #endif
941
942 static __always_inline void
943 trace_event_setup(struct ring_buffer_event *event,
944                   int type, unsigned int trace_ctx)
945 {
946         struct trace_entry *ent = ring_buffer_event_data(event);
947
948         tracing_generic_entry_update(ent, type, trace_ctx);
949 }
950
951 static __always_inline struct ring_buffer_event *
952 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
953                           int type,
954                           unsigned long len,
955                           unsigned int trace_ctx)
956 {
957         struct ring_buffer_event *event;
958
959         event = ring_buffer_lock_reserve(buffer, len);
960         if (event != NULL)
961                 trace_event_setup(event, type, trace_ctx);
962
963         return event;
964 }
965
966 void tracer_tracing_on(struct trace_array *tr)
967 {
968         if (tr->array_buffer.buffer)
969                 ring_buffer_record_on(tr->array_buffer.buffer);
970         /*
971          * This flag is looked at when buffers haven't been allocated
972          * yet, or by some tracers (like irqsoff), that just want to
973          * know if the ring buffer has been disabled, but it can handle
974          * races of where it gets disabled but we still do a record.
975          * As the check is in the fast path of the tracers, it is more
976          * important to be fast than accurate.
977          */
978         tr->buffer_disabled = 0;
979         /* Make the flag seen by readers */
980         smp_wmb();
981 }
982
983 /**
984  * tracing_on - enable tracing buffers
985  *
986  * This function enables tracing buffers that may have been
987  * disabled with tracing_off.
988  */
989 void tracing_on(void)
990 {
991         tracer_tracing_on(&global_trace);
992 }
993 EXPORT_SYMBOL_GPL(tracing_on);
994
995
996 static __always_inline void
997 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
998 {
999         __this_cpu_write(trace_taskinfo_save, true);
1000
1001         /* If this is the temp buffer, we need to commit fully */
1002         if (this_cpu_read(trace_buffered_event) == event) {
1003                 /* Length is in event->array[0] */
1004                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005                 /* Release the temp buffer */
1006                 this_cpu_dec(trace_buffered_event_cnt);
1007         } else
1008                 ring_buffer_unlock_commit(buffer, event);
1009 }
1010
1011 /**
1012  * __trace_puts - write a constant string into the trace buffer.
1013  * @ip:    The address of the caller
1014  * @str:   The constant string to write
1015  * @size:  The size of the string.
1016  */
1017 int __trace_puts(unsigned long ip, const char *str, int size)
1018 {
1019         struct ring_buffer_event *event;
1020         struct trace_buffer *buffer;
1021         struct print_entry *entry;
1022         unsigned int trace_ctx;
1023         int alloc;
1024
1025         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026                 return 0;
1027
1028         if (unlikely(tracing_selftest_running || tracing_disabled))
1029                 return 0;
1030
1031         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032
1033         trace_ctx = tracing_gen_ctx();
1034         buffer = global_trace.array_buffer.buffer;
1035         ring_buffer_nest_start(buffer);
1036         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037                                             trace_ctx);
1038         if (!event) {
1039                 size = 0;
1040                 goto out;
1041         }
1042
1043         entry = ring_buffer_event_data(event);
1044         entry->ip = ip;
1045
1046         memcpy(&entry->buf, str, size);
1047
1048         /* Add a newline if necessary */
1049         if (entry->buf[size - 1] != '\n') {
1050                 entry->buf[size] = '\n';
1051                 entry->buf[size + 1] = '\0';
1052         } else
1053                 entry->buf[size] = '\0';
1054
1055         __buffer_unlock_commit(buffer, event);
1056         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057  out:
1058         ring_buffer_nest_end(buffer);
1059         return size;
1060 }
1061 EXPORT_SYMBOL_GPL(__trace_puts);
1062
1063 /**
1064  * __trace_bputs - write the pointer to a constant string into trace buffer
1065  * @ip:    The address of the caller
1066  * @str:   The constant string to write to the buffer to
1067  */
1068 int __trace_bputs(unsigned long ip, const char *str)
1069 {
1070         struct ring_buffer_event *event;
1071         struct trace_buffer *buffer;
1072         struct bputs_entry *entry;
1073         unsigned int trace_ctx;
1074         int size = sizeof(struct bputs_entry);
1075         int ret = 0;
1076
1077         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078                 return 0;
1079
1080         if (unlikely(tracing_selftest_running || tracing_disabled))
1081                 return 0;
1082
1083         trace_ctx = tracing_gen_ctx();
1084         buffer = global_trace.array_buffer.buffer;
1085
1086         ring_buffer_nest_start(buffer);
1087         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088                                             trace_ctx);
1089         if (!event)
1090                 goto out;
1091
1092         entry = ring_buffer_event_data(event);
1093         entry->ip                       = ip;
1094         entry->str                      = str;
1095
1096         __buffer_unlock_commit(buffer, event);
1097         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098
1099         ret = 1;
1100  out:
1101         ring_buffer_nest_end(buffer);
1102         return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(__trace_bputs);
1105
1106 #ifdef CONFIG_TRACER_SNAPSHOT
1107 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108                                            void *cond_data)
1109 {
1110         struct tracer *tracer = tr->current_trace;
1111         unsigned long flags;
1112
1113         if (in_nmi()) {
1114                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1116                 return;
1117         }
1118
1119         if (!tr->allocated_snapshot) {
1120                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121                 internal_trace_puts("*** stopping trace here!   ***\n");
1122                 tracing_off();
1123                 return;
1124         }
1125
1126         /* Note, snapshot can not be used when the tracer uses it */
1127         if (tracer->use_max_tr) {
1128                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130                 return;
1131         }
1132
1133         local_irq_save(flags);
1134         update_max_tr(tr, current, smp_processor_id(), cond_data);
1135         local_irq_restore(flags);
1136 }
1137
1138 void tracing_snapshot_instance(struct trace_array *tr)
1139 {
1140         tracing_snapshot_instance_cond(tr, NULL);
1141 }
1142
1143 /**
1144  * tracing_snapshot - take a snapshot of the current buffer.
1145  *
1146  * This causes a swap between the snapshot buffer and the current live
1147  * tracing buffer. You can use this to take snapshots of the live
1148  * trace when some condition is triggered, but continue to trace.
1149  *
1150  * Note, make sure to allocate the snapshot with either
1151  * a tracing_snapshot_alloc(), or by doing it manually
1152  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153  *
1154  * If the snapshot buffer is not allocated, it will stop tracing.
1155  * Basically making a permanent snapshot.
1156  */
1157 void tracing_snapshot(void)
1158 {
1159         struct trace_array *tr = &global_trace;
1160
1161         tracing_snapshot_instance(tr);
1162 }
1163 EXPORT_SYMBOL_GPL(tracing_snapshot);
1164
1165 /**
1166  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167  * @tr:         The tracing instance to snapshot
1168  * @cond_data:  The data to be tested conditionally, and possibly saved
1169  *
1170  * This is the same as tracing_snapshot() except that the snapshot is
1171  * conditional - the snapshot will only happen if the
1172  * cond_snapshot.update() implementation receiving the cond_data
1173  * returns true, which means that the trace array's cond_snapshot
1174  * update() operation used the cond_data to determine whether the
1175  * snapshot should be taken, and if it was, presumably saved it along
1176  * with the snapshot.
1177  */
1178 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179 {
1180         tracing_snapshot_instance_cond(tr, cond_data);
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183
1184 /**
1185  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186  * @tr:         The tracing instance
1187  *
1188  * When the user enables a conditional snapshot using
1189  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190  * with the snapshot.  This accessor is used to retrieve it.
1191  *
1192  * Should not be called from cond_snapshot.update(), since it takes
1193  * the tr->max_lock lock, which the code calling
1194  * cond_snapshot.update() has already done.
1195  *
1196  * Returns the cond_data associated with the trace array's snapshot.
1197  */
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200         void *cond_data = NULL;
1201
1202         arch_spin_lock(&tr->max_lock);
1203
1204         if (tr->cond_snapshot)
1205                 cond_data = tr->cond_snapshot->cond_data;
1206
1207         arch_spin_unlock(&tr->max_lock);
1208
1209         return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214                                         struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216
1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219         int ret;
1220
1221         if (!tr->allocated_snapshot) {
1222
1223                 /* allocate spare buffer */
1224                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226                 if (ret < 0)
1227                         return ret;
1228
1229                 tr->allocated_snapshot = true;
1230         }
1231
1232         return 0;
1233 }
1234
1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237         /*
1238          * We don't free the ring buffer. instead, resize it because
1239          * The max_tr ring buffer has some state (e.g. ring->clock) and
1240          * we want preserve it.
1241          */
1242         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243         set_buffer_entries(&tr->max_buffer, 1);
1244         tracing_reset_online_cpus(&tr->max_buffer);
1245         tr->allocated_snapshot = false;
1246 }
1247
1248 /**
1249  * tracing_alloc_snapshot - allocate snapshot buffer.
1250  *
1251  * This only allocates the snapshot buffer if it isn't already
1252  * allocated - it doesn't also take a snapshot.
1253  *
1254  * This is meant to be used in cases where the snapshot buffer needs
1255  * to be set up for events that can't sleep but need to be able to
1256  * trigger a snapshot.
1257  */
1258 int tracing_alloc_snapshot(void)
1259 {
1260         struct trace_array *tr = &global_trace;
1261         int ret;
1262
1263         ret = tracing_alloc_snapshot_instance(tr);
1264         WARN_ON(ret < 0);
1265
1266         return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269
1270 /**
1271  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272  *
1273  * This is similar to tracing_snapshot(), but it will allocate the
1274  * snapshot buffer if it isn't already allocated. Use this only
1275  * where it is safe to sleep, as the allocation may sleep.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  */
1281 void tracing_snapshot_alloc(void)
1282 {
1283         int ret;
1284
1285         ret = tracing_alloc_snapshot();
1286         if (ret < 0)
1287                 return;
1288
1289         tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292
1293 /**
1294  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295  * @tr:         The tracing instance
1296  * @cond_data:  User data to associate with the snapshot
1297  * @update:     Implementation of the cond_snapshot update function
1298  *
1299  * Check whether the conditional snapshot for the given instance has
1300  * already been enabled, or if the current tracer is already using a
1301  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302  * save the cond_data and update function inside.
1303  *
1304  * Returns 0 if successful, error otherwise.
1305  */
1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307                                  cond_update_fn_t update)
1308 {
1309         struct cond_snapshot *cond_snapshot;
1310         int ret = 0;
1311
1312         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313         if (!cond_snapshot)
1314                 return -ENOMEM;
1315
1316         cond_snapshot->cond_data = cond_data;
1317         cond_snapshot->update = update;
1318
1319         mutex_lock(&trace_types_lock);
1320
1321         ret = tracing_alloc_snapshot_instance(tr);
1322         if (ret)
1323                 goto fail_unlock;
1324
1325         if (tr->current_trace->use_max_tr) {
1326                 ret = -EBUSY;
1327                 goto fail_unlock;
1328         }
1329
1330         /*
1331          * The cond_snapshot can only change to NULL without the
1332          * trace_types_lock. We don't care if we race with it going
1333          * to NULL, but we want to make sure that it's not set to
1334          * something other than NULL when we get here, which we can
1335          * do safely with only holding the trace_types_lock and not
1336          * having to take the max_lock.
1337          */
1338         if (tr->cond_snapshot) {
1339                 ret = -EBUSY;
1340                 goto fail_unlock;
1341         }
1342
1343         arch_spin_lock(&tr->max_lock);
1344         tr->cond_snapshot = cond_snapshot;
1345         arch_spin_unlock(&tr->max_lock);
1346
1347         mutex_unlock(&trace_types_lock);
1348
1349         return ret;
1350
1351  fail_unlock:
1352         mutex_unlock(&trace_types_lock);
1353         kfree(cond_snapshot);
1354         return ret;
1355 }
1356 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357
1358 /**
1359  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360  * @tr:         The tracing instance
1361  *
1362  * Check whether the conditional snapshot for the given instance is
1363  * enabled; if so, free the cond_snapshot associated with it,
1364  * otherwise return -EINVAL.
1365  *
1366  * Returns 0 if successful, error otherwise.
1367  */
1368 int tracing_snapshot_cond_disable(struct trace_array *tr)
1369 {
1370         int ret = 0;
1371
1372         arch_spin_lock(&tr->max_lock);
1373
1374         if (!tr->cond_snapshot)
1375                 ret = -EINVAL;
1376         else {
1377                 kfree(tr->cond_snapshot);
1378                 tr->cond_snapshot = NULL;
1379         }
1380
1381         arch_spin_unlock(&tr->max_lock);
1382
1383         return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400         return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405         /* Give warning */
1406         tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411         return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416         return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421         return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428         if (tr->array_buffer.buffer)
1429                 ring_buffer_record_off(tr->array_buffer.buffer);
1430         /*
1431          * This flag is looked at when buffers haven't been allocated
1432          * yet, or by some tracers (like irqsoff), that just want to
1433          * know if the ring buffer has been disabled, but it can handle
1434          * races of where it gets disabled but we still do a record.
1435          * As the check is in the fast path of the tracers, it is more
1436          * important to be fast than accurate.
1437          */
1438         tr->buffer_disabled = 1;
1439         /* Make the flag seen by readers */
1440         smp_wmb();
1441 }
1442
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453         tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456
1457 void disable_trace_on_warning(void)
1458 {
1459         if (__disable_trace_on_warning) {
1460                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                         "Disabling tracing due to warning\n");
1462                 tracing_off();
1463         }
1464 }
1465
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474         if (tr->array_buffer.buffer)
1475                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476         return !tr->buffer_disabled;
1477 }
1478
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484         return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488 static int __init set_buf_size(char *str)
1489 {
1490         unsigned long buf_size;
1491
1492         if (!str)
1493                 return 0;
1494         buf_size = memparse(str, &str);
1495         /* nr_entries can not be zero */
1496         if (buf_size == 0)
1497                 return 0;
1498         trace_buf_size = buf_size;
1499         return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502
1503 static int __init set_tracing_thresh(char *str)
1504 {
1505         unsigned long threshold;
1506         int ret;
1507
1508         if (!str)
1509                 return 0;
1510         ret = kstrtoul(str, 0, &threshold);
1511         if (ret < 0)
1512                 return 0;
1513         tracing_thresh = threshold * 1000;
1514         return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517
1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520         return nsecs / 1000;
1521 }
1522
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534         TRACE_FLAGS
1535         NULL
1536 };
1537
1538 static struct {
1539         u64 (*func)(void);
1540         const char *name;
1541         int in_ns;              /* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543         { trace_clock_local,            "local",        1 },
1544         { trace_clock_global,           "global",       1 },
1545         { trace_clock_counter,          "counter",      0 },
1546         { trace_clock_jiffies,          "uptime",       0 },
1547         { trace_clock,                  "perf",         1 },
1548         { ktime_get_mono_fast_ns,       "mono",         1 },
1549         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1550         { ktime_get_boot_fast_ns,       "boot",         1 },
1551         ARCH_TRACE_CLOCKS
1552 };
1553
1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556         if (trace_clocks[tr->clock_id].in_ns)
1557                 return true;
1558
1559         return false;
1560 }
1561
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567         memset(parser, 0, sizeof(*parser));
1568
1569         parser->buffer = kmalloc(size, GFP_KERNEL);
1570         if (!parser->buffer)
1571                 return 1;
1572
1573         parser->size = size;
1574         return 0;
1575 }
1576
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582         kfree(parser->buffer);
1583         parser->buffer = NULL;
1584 }
1585
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598         size_t cnt, loff_t *ppos)
1599 {
1600         char ch;
1601         size_t read = 0;
1602         ssize_t ret;
1603
1604         if (!*ppos)
1605                 trace_parser_clear(parser);
1606
1607         ret = get_user(ch, ubuf++);
1608         if (ret)
1609                 goto out;
1610
1611         read++;
1612         cnt--;
1613
1614         /*
1615          * The parser is not finished with the last write,
1616          * continue reading the user input without skipping spaces.
1617          */
1618         if (!parser->cont) {
1619                 /* skip white space */
1620                 while (cnt && isspace(ch)) {
1621                         ret = get_user(ch, ubuf++);
1622                         if (ret)
1623                                 goto out;
1624                         read++;
1625                         cnt--;
1626                 }
1627
1628                 parser->idx = 0;
1629
1630                 /* only spaces were written */
1631                 if (isspace(ch) || !ch) {
1632                         *ppos += read;
1633                         ret = read;
1634                         goto out;
1635                 }
1636         }
1637
1638         /* read the non-space input */
1639         while (cnt && !isspace(ch) && ch) {
1640                 if (parser->idx < parser->size - 1)
1641                         parser->buffer[parser->idx++] = ch;
1642                 else {
1643                         ret = -EINVAL;
1644                         goto out;
1645                 }
1646                 ret = get_user(ch, ubuf++);
1647                 if (ret)
1648                         goto out;
1649                 read++;
1650                 cnt--;
1651         }
1652
1653         /* We either got finished input or we have to wait for another call. */
1654         if (isspace(ch) || !ch) {
1655                 parser->buffer[parser->idx] = 0;
1656                 parser->cont = false;
1657         } else if (parser->idx < parser->size - 1) {
1658                 parser->cont = true;
1659                 parser->buffer[parser->idx++] = ch;
1660                 /* Make sure the parsed string always terminates with '\0'. */
1661                 parser->buffer[parser->idx] = 0;
1662         } else {
1663                 ret = -EINVAL;
1664                 goto out;
1665         }
1666
1667         *ppos += read;
1668         ret = read;
1669
1670 out:
1671         return ret;
1672 }
1673
1674 /* TODO add a seq_buf_to_buffer() */
1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677         int len;
1678
1679         if (trace_seq_used(s) <= s->seq.readpos)
1680                 return -EBUSY;
1681
1682         len = trace_seq_used(s) - s->seq.readpos;
1683         if (cnt > len)
1684                 cnt = len;
1685         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686
1687         s->seq.readpos += cnt;
1688         return cnt;
1689 }
1690
1691 unsigned long __read_mostly     tracing_thresh;
1692 static const struct file_operations tracing_max_lat_fops;
1693
1694 #ifdef LATENCY_FS_NOTIFY
1695
1696 static struct workqueue_struct *fsnotify_wq;
1697
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700         struct trace_array *tr = container_of(work, struct trace_array,
1701                                               fsnotify_work);
1702         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707         struct trace_array *tr = container_of(iwork, struct trace_array,
1708                                               fsnotify_irqwork);
1709         queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713                                      struct dentry *d_tracer)
1714 {
1715         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718                                               d_tracer, &tr->max_latency,
1719                                               &tracing_max_lat_fops);
1720 }
1721
1722 __init static int latency_fsnotify_init(void)
1723 {
1724         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1726         if (!fsnotify_wq) {
1727                 pr_err("Unable to allocate tr_max_lat_wq\n");
1728                 return -ENOMEM;
1729         }
1730         return 0;
1731 }
1732
1733 late_initcall_sync(latency_fsnotify_init);
1734
1735 void latency_fsnotify(struct trace_array *tr)
1736 {
1737         if (!fsnotify_wq)
1738                 return;
1739         /*
1740          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741          * possible that we are called from __schedule() or do_idle(), which
1742          * could cause a deadlock.
1743          */
1744         irq_work_queue(&tr->fsnotify_irqwork);
1745 }
1746
1747 /*
1748  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1749  *  defined(CONFIG_FSNOTIFY)
1750  */
1751 #else
1752
1753 #define trace_create_maxlat_file(tr, d_tracer)                          \
1754         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1755                           &tr->max_latency, &tracing_max_lat_fops)
1756
1757 #endif
1758
1759 #ifdef CONFIG_TRACER_MAX_TRACE
1760 /*
1761  * Copy the new maximum trace into the separate maximum-trace
1762  * structure. (this way the maximum trace is permanently saved,
1763  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1764  */
1765 static void
1766 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1767 {
1768         struct array_buffer *trace_buf = &tr->array_buffer;
1769         struct array_buffer *max_buf = &tr->max_buffer;
1770         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1771         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1772
1773         max_buf->cpu = cpu;
1774         max_buf->time_start = data->preempt_timestamp;
1775
1776         max_data->saved_latency = tr->max_latency;
1777         max_data->critical_start = data->critical_start;
1778         max_data->critical_end = data->critical_end;
1779
1780         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1781         max_data->pid = tsk->pid;
1782         /*
1783          * If tsk == current, then use current_uid(), as that does not use
1784          * RCU. The irq tracer can be called out of RCU scope.
1785          */
1786         if (tsk == current)
1787                 max_data->uid = current_uid();
1788         else
1789                 max_data->uid = task_uid(tsk);
1790
1791         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1792         max_data->policy = tsk->policy;
1793         max_data->rt_priority = tsk->rt_priority;
1794
1795         /* record this tasks comm */
1796         tracing_record_cmdline(tsk);
1797         latency_fsnotify(tr);
1798 }
1799
1800 /**
1801  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1802  * @tr: tracer
1803  * @tsk: the task with the latency
1804  * @cpu: The cpu that initiated the trace.
1805  * @cond_data: User data associated with a conditional snapshot
1806  *
1807  * Flip the buffers between the @tr and the max_tr and record information
1808  * about which task was the cause of this latency.
1809  */
1810 void
1811 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1812               void *cond_data)
1813 {
1814         if (tr->stop_count)
1815                 return;
1816
1817         WARN_ON_ONCE(!irqs_disabled());
1818
1819         if (!tr->allocated_snapshot) {
1820                 /* Only the nop tracer should hit this when disabling */
1821                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1822                 return;
1823         }
1824
1825         arch_spin_lock(&tr->max_lock);
1826
1827         /* Inherit the recordable setting from array_buffer */
1828         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1829                 ring_buffer_record_on(tr->max_buffer.buffer);
1830         else
1831                 ring_buffer_record_off(tr->max_buffer.buffer);
1832
1833 #ifdef CONFIG_TRACER_SNAPSHOT
1834         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1835                 goto out_unlock;
1836 #endif
1837         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838
1839         __update_max_tr(tr, tsk, cpu);
1840
1841  out_unlock:
1842         arch_spin_unlock(&tr->max_lock);
1843 }
1844
1845 /**
1846  * update_max_tr_single - only copy one trace over, and reset the rest
1847  * @tr: tracer
1848  * @tsk: task with the latency
1849  * @cpu: the cpu of the buffer to copy.
1850  *
1851  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1852  */
1853 void
1854 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1855 {
1856         int ret;
1857
1858         if (tr->stop_count)
1859                 return;
1860
1861         WARN_ON_ONCE(!irqs_disabled());
1862         if (!tr->allocated_snapshot) {
1863                 /* Only the nop tracer should hit this when disabling */
1864                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1865                 return;
1866         }
1867
1868         arch_spin_lock(&tr->max_lock);
1869
1870         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1871
1872         if (ret == -EBUSY) {
1873                 /*
1874                  * We failed to swap the buffer due to a commit taking
1875                  * place on this CPU. We fail to record, but we reset
1876                  * the max trace buffer (no one writes directly to it)
1877                  * and flag that it failed.
1878                  */
1879                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1880                         "Failed to swap buffers due to commit in progress\n");
1881         }
1882
1883         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1884
1885         __update_max_tr(tr, tsk, cpu);
1886         arch_spin_unlock(&tr->max_lock);
1887 }
1888 #endif /* CONFIG_TRACER_MAX_TRACE */
1889
1890 static int wait_on_pipe(struct trace_iterator *iter, int full)
1891 {
1892         /* Iterators are static, they should be filled or empty */
1893         if (trace_buffer_iter(iter, iter->cpu_file))
1894                 return 0;
1895
1896         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1897                                 full);
1898 }
1899
1900 #ifdef CONFIG_FTRACE_STARTUP_TEST
1901 static bool selftests_can_run;
1902
1903 struct trace_selftests {
1904         struct list_head                list;
1905         struct tracer                   *type;
1906 };
1907
1908 static LIST_HEAD(postponed_selftests);
1909
1910 static int save_selftest(struct tracer *type)
1911 {
1912         struct trace_selftests *selftest;
1913
1914         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1915         if (!selftest)
1916                 return -ENOMEM;
1917
1918         selftest->type = type;
1919         list_add(&selftest->list, &postponed_selftests);
1920         return 0;
1921 }
1922
1923 static int run_tracer_selftest(struct tracer *type)
1924 {
1925         struct trace_array *tr = &global_trace;
1926         struct tracer *saved_tracer = tr->current_trace;
1927         int ret;
1928
1929         if (!type->selftest || tracing_selftest_disabled)
1930                 return 0;
1931
1932         /*
1933          * If a tracer registers early in boot up (before scheduling is
1934          * initialized and such), then do not run its selftests yet.
1935          * Instead, run it a little later in the boot process.
1936          */
1937         if (!selftests_can_run)
1938                 return save_selftest(type);
1939
1940         if (!tracing_is_on()) {
1941                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1942                         type->name);
1943                 return 0;
1944         }
1945
1946         /*
1947          * Run a selftest on this tracer.
1948          * Here we reset the trace buffer, and set the current
1949          * tracer to be this tracer. The tracer can then run some
1950          * internal tracing to verify that everything is in order.
1951          * If we fail, we do not register this tracer.
1952          */
1953         tracing_reset_online_cpus(&tr->array_buffer);
1954
1955         tr->current_trace = type;
1956
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958         if (type->use_max_tr) {
1959                 /* If we expanded the buffers, make sure the max is expanded too */
1960                 if (ring_buffer_expanded)
1961                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1962                                            RING_BUFFER_ALL_CPUS);
1963                 tr->allocated_snapshot = true;
1964         }
1965 #endif
1966
1967         /* the test is responsible for initializing and enabling */
1968         pr_info("Testing tracer %s: ", type->name);
1969         ret = type->selftest(type, tr);
1970         /* the test is responsible for resetting too */
1971         tr->current_trace = saved_tracer;
1972         if (ret) {
1973                 printk(KERN_CONT "FAILED!\n");
1974                 /* Add the warning after printing 'FAILED' */
1975                 WARN_ON(1);
1976                 return -1;
1977         }
1978         /* Only reset on passing, to avoid touching corrupted buffers */
1979         tracing_reset_online_cpus(&tr->array_buffer);
1980
1981 #ifdef CONFIG_TRACER_MAX_TRACE
1982         if (type->use_max_tr) {
1983                 tr->allocated_snapshot = false;
1984
1985                 /* Shrink the max buffer again */
1986                 if (ring_buffer_expanded)
1987                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1988                                            RING_BUFFER_ALL_CPUS);
1989         }
1990 #endif
1991
1992         printk(KERN_CONT "PASSED\n");
1993         return 0;
1994 }
1995
1996 static __init int init_trace_selftests(void)
1997 {
1998         struct trace_selftests *p, *n;
1999         struct tracer *t, **last;
2000         int ret;
2001
2002         selftests_can_run = true;
2003
2004         mutex_lock(&trace_types_lock);
2005
2006         if (list_empty(&postponed_selftests))
2007                 goto out;
2008
2009         pr_info("Running postponed tracer tests:\n");
2010
2011         tracing_selftest_running = true;
2012         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2013                 /* This loop can take minutes when sanitizers are enabled, so
2014                  * lets make sure we allow RCU processing.
2015                  */
2016                 cond_resched();
2017                 ret = run_tracer_selftest(p->type);
2018                 /* If the test fails, then warn and remove from available_tracers */
2019                 if (ret < 0) {
2020                         WARN(1, "tracer: %s failed selftest, disabling\n",
2021                              p->type->name);
2022                         last = &trace_types;
2023                         for (t = trace_types; t; t = t->next) {
2024                                 if (t == p->type) {
2025                                         *last = t->next;
2026                                         break;
2027                                 }
2028                                 last = &t->next;
2029                         }
2030                 }
2031                 list_del(&p->list);
2032                 kfree(p);
2033         }
2034         tracing_selftest_running = false;
2035
2036  out:
2037         mutex_unlock(&trace_types_lock);
2038
2039         return 0;
2040 }
2041 core_initcall(init_trace_selftests);
2042 #else
2043 static inline int run_tracer_selftest(struct tracer *type)
2044 {
2045         return 0;
2046 }
2047 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2048
2049 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2050
2051 static void __init apply_trace_boot_options(void);
2052
2053 /**
2054  * register_tracer - register a tracer with the ftrace system.
2055  * @type: the plugin for the tracer
2056  *
2057  * Register a new plugin tracer.
2058  */
2059 int __init register_tracer(struct tracer *type)
2060 {
2061         struct tracer *t;
2062         int ret = 0;
2063
2064         if (!type->name) {
2065                 pr_info("Tracer must have a name\n");
2066                 return -1;
2067         }
2068
2069         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2070                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2071                 return -1;
2072         }
2073
2074         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2075                 pr_warn("Can not register tracer %s due to lockdown\n",
2076                            type->name);
2077                 return -EPERM;
2078         }
2079
2080         mutex_lock(&trace_types_lock);
2081
2082         tracing_selftest_running = true;
2083
2084         for (t = trace_types; t; t = t->next) {
2085                 if (strcmp(type->name, t->name) == 0) {
2086                         /* already found */
2087                         pr_info("Tracer %s already registered\n",
2088                                 type->name);
2089                         ret = -1;
2090                         goto out;
2091                 }
2092         }
2093
2094         if (!type->set_flag)
2095                 type->set_flag = &dummy_set_flag;
2096         if (!type->flags) {
2097                 /*allocate a dummy tracer_flags*/
2098                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2099                 if (!type->flags) {
2100                         ret = -ENOMEM;
2101                         goto out;
2102                 }
2103                 type->flags->val = 0;
2104                 type->flags->opts = dummy_tracer_opt;
2105         } else
2106                 if (!type->flags->opts)
2107                         type->flags->opts = dummy_tracer_opt;
2108
2109         /* store the tracer for __set_tracer_option */
2110         type->flags->trace = type;
2111
2112         ret = run_tracer_selftest(type);
2113         if (ret < 0)
2114                 goto out;
2115
2116         type->next = trace_types;
2117         trace_types = type;
2118         add_tracer_options(&global_trace, type);
2119
2120  out:
2121         tracing_selftest_running = false;
2122         mutex_unlock(&trace_types_lock);
2123
2124         if (ret || !default_bootup_tracer)
2125                 goto out_unlock;
2126
2127         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2128                 goto out_unlock;
2129
2130         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2131         /* Do we want this tracer to start on bootup? */
2132         tracing_set_tracer(&global_trace, type->name);
2133         default_bootup_tracer = NULL;
2134
2135         apply_trace_boot_options();
2136
2137         /* disable other selftests, since this will break it. */
2138         disable_tracing_selftest("running a tracer");
2139
2140  out_unlock:
2141         return ret;
2142 }
2143
2144 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2145 {
2146         struct trace_buffer *buffer = buf->buffer;
2147
2148         if (!buffer)
2149                 return;
2150
2151         ring_buffer_record_disable(buffer);
2152
2153         /* Make sure all commits have finished */
2154         synchronize_rcu();
2155         ring_buffer_reset_cpu(buffer, cpu);
2156
2157         ring_buffer_record_enable(buffer);
2158 }
2159
2160 void tracing_reset_online_cpus(struct array_buffer *buf)
2161 {
2162         struct trace_buffer *buffer = buf->buffer;
2163
2164         if (!buffer)
2165                 return;
2166
2167         ring_buffer_record_disable(buffer);
2168
2169         /* Make sure all commits have finished */
2170         synchronize_rcu();
2171
2172         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2173
2174         ring_buffer_reset_online_cpus(buffer);
2175
2176         ring_buffer_record_enable(buffer);
2177 }
2178
2179 /* Must have trace_types_lock held */
2180 void tracing_reset_all_online_cpus(void)
2181 {
2182         struct trace_array *tr;
2183
2184         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2185                 if (!tr->clear_trace)
2186                         continue;
2187                 tr->clear_trace = false;
2188                 tracing_reset_online_cpus(&tr->array_buffer);
2189 #ifdef CONFIG_TRACER_MAX_TRACE
2190                 tracing_reset_online_cpus(&tr->max_buffer);
2191 #endif
2192         }
2193 }
2194
2195 /*
2196  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2197  * is the tgid last observed corresponding to pid=i.
2198  */
2199 static int *tgid_map;
2200
2201 /* The maximum valid index into tgid_map. */
2202 static size_t tgid_map_max;
2203
2204 #define SAVED_CMDLINES_DEFAULT 128
2205 #define NO_CMDLINE_MAP UINT_MAX
2206 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2207 struct saved_cmdlines_buffer {
2208         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2209         unsigned *map_cmdline_to_pid;
2210         unsigned cmdline_num;
2211         int cmdline_idx;
2212         char *saved_cmdlines;
2213 };
2214 static struct saved_cmdlines_buffer *savedcmd;
2215
2216 static inline char *get_saved_cmdlines(int idx)
2217 {
2218         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2219 }
2220
2221 static inline void set_cmdline(int idx, const char *cmdline)
2222 {
2223         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2224 }
2225
2226 static int allocate_cmdlines_buffer(unsigned int val,
2227                                     struct saved_cmdlines_buffer *s)
2228 {
2229         s->map_cmdline_to_pid = kmalloc_array(val,
2230                                               sizeof(*s->map_cmdline_to_pid),
2231                                               GFP_KERNEL);
2232         if (!s->map_cmdline_to_pid)
2233                 return -ENOMEM;
2234
2235         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2236         if (!s->saved_cmdlines) {
2237                 kfree(s->map_cmdline_to_pid);
2238                 return -ENOMEM;
2239         }
2240
2241         s->cmdline_idx = 0;
2242         s->cmdline_num = val;
2243         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2244                sizeof(s->map_pid_to_cmdline));
2245         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2246                val * sizeof(*s->map_cmdline_to_pid));
2247
2248         return 0;
2249 }
2250
2251 static int trace_create_savedcmd(void)
2252 {
2253         int ret;
2254
2255         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2256         if (!savedcmd)
2257                 return -ENOMEM;
2258
2259         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2260         if (ret < 0) {
2261                 kfree(savedcmd);
2262                 savedcmd = NULL;
2263                 return -ENOMEM;
2264         }
2265
2266         return 0;
2267 }
2268
2269 int is_tracing_stopped(void)
2270 {
2271         return global_trace.stop_count;
2272 }
2273
2274 /**
2275  * tracing_start - quick start of the tracer
2276  *
2277  * If tracing is enabled but was stopped by tracing_stop,
2278  * this will start the tracer back up.
2279  */
2280 void tracing_start(void)
2281 {
2282         struct trace_buffer *buffer;
2283         unsigned long flags;
2284
2285         if (tracing_disabled)
2286                 return;
2287
2288         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2289         if (--global_trace.stop_count) {
2290                 if (global_trace.stop_count < 0) {
2291                         /* Someone screwed up their debugging */
2292                         WARN_ON_ONCE(1);
2293                         global_trace.stop_count = 0;
2294                 }
2295                 goto out;
2296         }
2297
2298         /* Prevent the buffers from switching */
2299         arch_spin_lock(&global_trace.max_lock);
2300
2301         buffer = global_trace.array_buffer.buffer;
2302         if (buffer)
2303                 ring_buffer_record_enable(buffer);
2304
2305 #ifdef CONFIG_TRACER_MAX_TRACE
2306         buffer = global_trace.max_buffer.buffer;
2307         if (buffer)
2308                 ring_buffer_record_enable(buffer);
2309 #endif
2310
2311         arch_spin_unlock(&global_trace.max_lock);
2312
2313  out:
2314         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2315 }
2316
2317 static void tracing_start_tr(struct trace_array *tr)
2318 {
2319         struct trace_buffer *buffer;
2320         unsigned long flags;
2321
2322         if (tracing_disabled)
2323                 return;
2324
2325         /* If global, we need to also start the max tracer */
2326         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2327                 return tracing_start();
2328
2329         raw_spin_lock_irqsave(&tr->start_lock, flags);
2330
2331         if (--tr->stop_count) {
2332                 if (tr->stop_count < 0) {
2333                         /* Someone screwed up their debugging */
2334                         WARN_ON_ONCE(1);
2335                         tr->stop_count = 0;
2336                 }
2337                 goto out;
2338         }
2339
2340         buffer = tr->array_buffer.buffer;
2341         if (buffer)
2342                 ring_buffer_record_enable(buffer);
2343
2344  out:
2345         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2346 }
2347
2348 /**
2349  * tracing_stop - quick stop of the tracer
2350  *
2351  * Light weight way to stop tracing. Use in conjunction with
2352  * tracing_start.
2353  */
2354 void tracing_stop(void)
2355 {
2356         struct trace_buffer *buffer;
2357         unsigned long flags;
2358
2359         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2360         if (global_trace.stop_count++)
2361                 goto out;
2362
2363         /* Prevent the buffers from switching */
2364         arch_spin_lock(&global_trace.max_lock);
2365
2366         buffer = global_trace.array_buffer.buffer;
2367         if (buffer)
2368                 ring_buffer_record_disable(buffer);
2369
2370 #ifdef CONFIG_TRACER_MAX_TRACE
2371         buffer = global_trace.max_buffer.buffer;
2372         if (buffer)
2373                 ring_buffer_record_disable(buffer);
2374 #endif
2375
2376         arch_spin_unlock(&global_trace.max_lock);
2377
2378  out:
2379         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2380 }
2381
2382 static void tracing_stop_tr(struct trace_array *tr)
2383 {
2384         struct trace_buffer *buffer;
2385         unsigned long flags;
2386
2387         /* If global, we need to also stop the max tracer */
2388         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2389                 return tracing_stop();
2390
2391         raw_spin_lock_irqsave(&tr->start_lock, flags);
2392         if (tr->stop_count++)
2393                 goto out;
2394
2395         buffer = tr->array_buffer.buffer;
2396         if (buffer)
2397                 ring_buffer_record_disable(buffer);
2398
2399  out:
2400         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2401 }
2402
2403 static int trace_save_cmdline(struct task_struct *tsk)
2404 {
2405         unsigned tpid, idx;
2406
2407         /* treat recording of idle task as a success */
2408         if (!tsk->pid)
2409                 return 1;
2410
2411         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412
2413         /*
2414          * It's not the end of the world if we don't get
2415          * the lock, but we also don't want to spin
2416          * nor do we want to disable interrupts,
2417          * so if we miss here, then better luck next time.
2418          */
2419         if (!arch_spin_trylock(&trace_cmdline_lock))
2420                 return 0;
2421
2422         idx = savedcmd->map_pid_to_cmdline[tpid];
2423         if (idx == NO_CMDLINE_MAP) {
2424                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2425
2426                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2427                 savedcmd->cmdline_idx = idx;
2428         }
2429
2430         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2431         set_cmdline(idx, tsk->comm);
2432
2433         arch_spin_unlock(&trace_cmdline_lock);
2434
2435         return 1;
2436 }
2437
2438 static void __trace_find_cmdline(int pid, char comm[])
2439 {
2440         unsigned map;
2441         int tpid;
2442
2443         if (!pid) {
2444                 strcpy(comm, "<idle>");
2445                 return;
2446         }
2447
2448         if (WARN_ON_ONCE(pid < 0)) {
2449                 strcpy(comm, "<XXX>");
2450                 return;
2451         }
2452
2453         tpid = pid & (PID_MAX_DEFAULT - 1);
2454         map = savedcmd->map_pid_to_cmdline[tpid];
2455         if (map != NO_CMDLINE_MAP) {
2456                 tpid = savedcmd->map_cmdline_to_pid[map];
2457                 if (tpid == pid) {
2458                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2459                         return;
2460                 }
2461         }
2462         strcpy(comm, "<...>");
2463 }
2464
2465 void trace_find_cmdline(int pid, char comm[])
2466 {
2467         preempt_disable();
2468         arch_spin_lock(&trace_cmdline_lock);
2469
2470         __trace_find_cmdline(pid, comm);
2471
2472         arch_spin_unlock(&trace_cmdline_lock);
2473         preempt_enable();
2474 }
2475
2476 static int *trace_find_tgid_ptr(int pid)
2477 {
2478         /*
2479          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2480          * if we observe a non-NULL tgid_map then we also observe the correct
2481          * tgid_map_max.
2482          */
2483         int *map = smp_load_acquire(&tgid_map);
2484
2485         if (unlikely(!map || pid > tgid_map_max))
2486                 return NULL;
2487
2488         return &map[pid];
2489 }
2490
2491 int trace_find_tgid(int pid)
2492 {
2493         int *ptr = trace_find_tgid_ptr(pid);
2494
2495         return ptr ? *ptr : 0;
2496 }
2497
2498 static int trace_save_tgid(struct task_struct *tsk)
2499 {
2500         int *ptr;
2501
2502         /* treat recording of idle task as a success */
2503         if (!tsk->pid)
2504                 return 1;
2505
2506         ptr = trace_find_tgid_ptr(tsk->pid);
2507         if (!ptr)
2508                 return 0;
2509
2510         *ptr = tsk->tgid;
2511         return 1;
2512 }
2513
2514 static bool tracing_record_taskinfo_skip(int flags)
2515 {
2516         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2517                 return true;
2518         if (!__this_cpu_read(trace_taskinfo_save))
2519                 return true;
2520         return false;
2521 }
2522
2523 /**
2524  * tracing_record_taskinfo - record the task info of a task
2525  *
2526  * @task:  task to record
2527  * @flags: TRACE_RECORD_CMDLINE for recording comm
2528  *         TRACE_RECORD_TGID for recording tgid
2529  */
2530 void tracing_record_taskinfo(struct task_struct *task, int flags)
2531 {
2532         bool done;
2533
2534         if (tracing_record_taskinfo_skip(flags))
2535                 return;
2536
2537         /*
2538          * Record as much task information as possible. If some fail, continue
2539          * to try to record the others.
2540          */
2541         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2542         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2543
2544         /* If recording any information failed, retry again soon. */
2545         if (!done)
2546                 return;
2547
2548         __this_cpu_write(trace_taskinfo_save, false);
2549 }
2550
2551 /**
2552  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2553  *
2554  * @prev: previous task during sched_switch
2555  * @next: next task during sched_switch
2556  * @flags: TRACE_RECORD_CMDLINE for recording comm
2557  *         TRACE_RECORD_TGID for recording tgid
2558  */
2559 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2560                                           struct task_struct *next, int flags)
2561 {
2562         bool done;
2563
2564         if (tracing_record_taskinfo_skip(flags))
2565                 return;
2566
2567         /*
2568          * Record as much task information as possible. If some fail, continue
2569          * to try to record the others.
2570          */
2571         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2572         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2573         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2574         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2575
2576         /* If recording any information failed, retry again soon. */
2577         if (!done)
2578                 return;
2579
2580         __this_cpu_write(trace_taskinfo_save, false);
2581 }
2582
2583 /* Helpers to record a specific task information */
2584 void tracing_record_cmdline(struct task_struct *task)
2585 {
2586         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2587 }
2588
2589 void tracing_record_tgid(struct task_struct *task)
2590 {
2591         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2592 }
2593
2594 /*
2595  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2596  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2597  * simplifies those functions and keeps them in sync.
2598  */
2599 enum print_line_t trace_handle_return(struct trace_seq *s)
2600 {
2601         return trace_seq_has_overflowed(s) ?
2602                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2603 }
2604 EXPORT_SYMBOL_GPL(trace_handle_return);
2605
2606 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2607 {
2608         unsigned int trace_flags = irqs_status;
2609         unsigned int pc;
2610
2611         pc = preempt_count();
2612
2613         if (pc & NMI_MASK)
2614                 trace_flags |= TRACE_FLAG_NMI;
2615         if (pc & HARDIRQ_MASK)
2616                 trace_flags |= TRACE_FLAG_HARDIRQ;
2617         if (in_serving_softirq())
2618                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2619
2620         if (tif_need_resched())
2621                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2622         if (test_preempt_need_resched())
2623                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2624         return (trace_flags << 16) | (pc & 0xff);
2625 }
2626
2627 struct ring_buffer_event *
2628 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2629                           int type,
2630                           unsigned long len,
2631                           unsigned int trace_ctx)
2632 {
2633         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2634 }
2635
2636 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2637 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2638 static int trace_buffered_event_ref;
2639
2640 /**
2641  * trace_buffered_event_enable - enable buffering events
2642  *
2643  * When events are being filtered, it is quicker to use a temporary
2644  * buffer to write the event data into if there's a likely chance
2645  * that it will not be committed. The discard of the ring buffer
2646  * is not as fast as committing, and is much slower than copying
2647  * a commit.
2648  *
2649  * When an event is to be filtered, allocate per cpu buffers to
2650  * write the event data into, and if the event is filtered and discarded
2651  * it is simply dropped, otherwise, the entire data is to be committed
2652  * in one shot.
2653  */
2654 void trace_buffered_event_enable(void)
2655 {
2656         struct ring_buffer_event *event;
2657         struct page *page;
2658         int cpu;
2659
2660         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2661
2662         if (trace_buffered_event_ref++)
2663                 return;
2664
2665         for_each_tracing_cpu(cpu) {
2666                 page = alloc_pages_node(cpu_to_node(cpu),
2667                                         GFP_KERNEL | __GFP_NORETRY, 0);
2668                 if (!page)
2669                         goto failed;
2670
2671                 event = page_address(page);
2672                 memset(event, 0, sizeof(*event));
2673
2674                 per_cpu(trace_buffered_event, cpu) = event;
2675
2676                 preempt_disable();
2677                 if (cpu == smp_processor_id() &&
2678                     __this_cpu_read(trace_buffered_event) !=
2679                     per_cpu(trace_buffered_event, cpu))
2680                         WARN_ON_ONCE(1);
2681                 preempt_enable();
2682         }
2683
2684         return;
2685  failed:
2686         trace_buffered_event_disable();
2687 }
2688
2689 static void enable_trace_buffered_event(void *data)
2690 {
2691         /* Probably not needed, but do it anyway */
2692         smp_rmb();
2693         this_cpu_dec(trace_buffered_event_cnt);
2694 }
2695
2696 static void disable_trace_buffered_event(void *data)
2697 {
2698         this_cpu_inc(trace_buffered_event_cnt);
2699 }
2700
2701 /**
2702  * trace_buffered_event_disable - disable buffering events
2703  *
2704  * When a filter is removed, it is faster to not use the buffered
2705  * events, and to commit directly into the ring buffer. Free up
2706  * the temp buffers when there are no more users. This requires
2707  * special synchronization with current events.
2708  */
2709 void trace_buffered_event_disable(void)
2710 {
2711         int cpu;
2712
2713         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2714
2715         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2716                 return;
2717
2718         if (--trace_buffered_event_ref)
2719                 return;
2720
2721         preempt_disable();
2722         /* For each CPU, set the buffer as used. */
2723         smp_call_function_many(tracing_buffer_mask,
2724                                disable_trace_buffered_event, NULL, 1);
2725         preempt_enable();
2726
2727         /* Wait for all current users to finish */
2728         synchronize_rcu();
2729
2730         for_each_tracing_cpu(cpu) {
2731                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2732                 per_cpu(trace_buffered_event, cpu) = NULL;
2733         }
2734         /*
2735          * Make sure trace_buffered_event is NULL before clearing
2736          * trace_buffered_event_cnt.
2737          */
2738         smp_wmb();
2739
2740         preempt_disable();
2741         /* Do the work on each cpu */
2742         smp_call_function_many(tracing_buffer_mask,
2743                                enable_trace_buffered_event, NULL, 1);
2744         preempt_enable();
2745 }
2746
2747 static struct trace_buffer *temp_buffer;
2748
2749 struct ring_buffer_event *
2750 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2751                           struct trace_event_file *trace_file,
2752                           int type, unsigned long len,
2753                           unsigned int trace_ctx)
2754 {
2755         struct ring_buffer_event *entry;
2756         struct trace_array *tr = trace_file->tr;
2757         int val;
2758
2759         *current_rb = tr->array_buffer.buffer;
2760
2761         if (!tr->no_filter_buffering_ref &&
2762             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2763             (entry = this_cpu_read(trace_buffered_event))) {
2764                 /*
2765                  * Filtering is on, so try to use the per cpu buffer first.
2766                  * This buffer will simulate a ring_buffer_event,
2767                  * where the type_len is zero and the array[0] will
2768                  * hold the full length.
2769                  * (see include/linux/ring-buffer.h for details on
2770                  *  how the ring_buffer_event is structured).
2771                  *
2772                  * Using a temp buffer during filtering and copying it
2773                  * on a matched filter is quicker than writing directly
2774                  * into the ring buffer and then discarding it when
2775                  * it doesn't match. That is because the discard
2776                  * requires several atomic operations to get right.
2777                  * Copying on match and doing nothing on a failed match
2778                  * is still quicker than no copy on match, but having
2779                  * to discard out of the ring buffer on a failed match.
2780                  */
2781                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2782
2783                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2784
2785                 /*
2786                  * Preemption is disabled, but interrupts and NMIs
2787                  * can still come in now. If that happens after
2788                  * the above increment, then it will have to go
2789                  * back to the old method of allocating the event
2790                  * on the ring buffer, and if the filter fails, it
2791                  * will have to call ring_buffer_discard_commit()
2792                  * to remove it.
2793                  *
2794                  * Need to also check the unlikely case that the
2795                  * length is bigger than the temp buffer size.
2796                  * If that happens, then the reserve is pretty much
2797                  * guaranteed to fail, as the ring buffer currently
2798                  * only allows events less than a page. But that may
2799                  * change in the future, so let the ring buffer reserve
2800                  * handle the failure in that case.
2801                  */
2802                 if (val == 1 && likely(len <= max_len)) {
2803                         trace_event_setup(entry, type, trace_ctx);
2804                         entry->array[0] = len;
2805                         return entry;
2806                 }
2807                 this_cpu_dec(trace_buffered_event_cnt);
2808         }
2809
2810         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2811                                             trace_ctx);
2812         /*
2813          * If tracing is off, but we have triggers enabled
2814          * we still need to look at the event data. Use the temp_buffer
2815          * to store the trace event for the trigger to use. It's recursive
2816          * safe and will not be recorded anywhere.
2817          */
2818         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2819                 *current_rb = temp_buffer;
2820                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2821                                                     trace_ctx);
2822         }
2823         return entry;
2824 }
2825 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2826
2827 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2828 static DEFINE_MUTEX(tracepoint_printk_mutex);
2829
2830 static void output_printk(struct trace_event_buffer *fbuffer)
2831 {
2832         struct trace_event_call *event_call;
2833         struct trace_event_file *file;
2834         struct trace_event *event;
2835         unsigned long flags;
2836         struct trace_iterator *iter = tracepoint_print_iter;
2837
2838         /* We should never get here if iter is NULL */
2839         if (WARN_ON_ONCE(!iter))
2840                 return;
2841
2842         event_call = fbuffer->trace_file->event_call;
2843         if (!event_call || !event_call->event.funcs ||
2844             !event_call->event.funcs->trace)
2845                 return;
2846
2847         file = fbuffer->trace_file;
2848         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2849             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2850              !filter_match_preds(file->filter, fbuffer->entry)))
2851                 return;
2852
2853         event = &fbuffer->trace_file->event_call->event;
2854
2855         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2856         trace_seq_init(&iter->seq);
2857         iter->ent = fbuffer->entry;
2858         event_call->event.funcs->trace(iter, 0, event);
2859         trace_seq_putc(&iter->seq, 0);
2860         printk("%s", iter->seq.buffer);
2861
2862         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2863 }
2864
2865 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2866                              void *buffer, size_t *lenp,
2867                              loff_t *ppos)
2868 {
2869         int save_tracepoint_printk;
2870         int ret;
2871
2872         mutex_lock(&tracepoint_printk_mutex);
2873         save_tracepoint_printk = tracepoint_printk;
2874
2875         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2876
2877         /*
2878          * This will force exiting early, as tracepoint_printk
2879          * is always zero when tracepoint_printk_iter is not allocated
2880          */
2881         if (!tracepoint_print_iter)
2882                 tracepoint_printk = 0;
2883
2884         if (save_tracepoint_printk == tracepoint_printk)
2885                 goto out;
2886
2887         if (tracepoint_printk)
2888                 static_key_enable(&tracepoint_printk_key.key);
2889         else
2890                 static_key_disable(&tracepoint_printk_key.key);
2891
2892  out:
2893         mutex_unlock(&tracepoint_printk_mutex);
2894
2895         return ret;
2896 }
2897
2898 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2899 {
2900         enum event_trigger_type tt = ETT_NONE;
2901         struct trace_event_file *file = fbuffer->trace_file;
2902
2903         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2904                         fbuffer->entry, &tt))
2905                 goto discard;
2906
2907         if (static_key_false(&tracepoint_printk_key.key))
2908                 output_printk(fbuffer);
2909
2910         if (static_branch_unlikely(&trace_event_exports_enabled))
2911                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2912
2913         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2914                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2915
2916 discard:
2917         if (tt)
2918                 event_triggers_post_call(file, tt);
2919
2920 }
2921 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2922
2923 /*
2924  * Skip 3:
2925  *
2926  *   trace_buffer_unlock_commit_regs()
2927  *   trace_event_buffer_commit()
2928  *   trace_event_raw_event_xxx()
2929  */
2930 # define STACK_SKIP 3
2931
2932 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2933                                      struct trace_buffer *buffer,
2934                                      struct ring_buffer_event *event,
2935                                      unsigned int trace_ctx,
2936                                      struct pt_regs *regs)
2937 {
2938         __buffer_unlock_commit(buffer, event);
2939
2940         /*
2941          * If regs is not set, then skip the necessary functions.
2942          * Note, we can still get here via blktrace, wakeup tracer
2943          * and mmiotrace, but that's ok if they lose a function or
2944          * two. They are not that meaningful.
2945          */
2946         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2947         ftrace_trace_userstack(tr, buffer, trace_ctx);
2948 }
2949
2950 /*
2951  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2952  */
2953 void
2954 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2955                                    struct ring_buffer_event *event)
2956 {
2957         __buffer_unlock_commit(buffer, event);
2958 }
2959
2960 void
2961 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2962                parent_ip, unsigned int trace_ctx)
2963 {
2964         struct trace_event_call *call = &event_function;
2965         struct trace_buffer *buffer = tr->array_buffer.buffer;
2966         struct ring_buffer_event *event;
2967         struct ftrace_entry *entry;
2968
2969         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2970                                             trace_ctx);
2971         if (!event)
2972                 return;
2973         entry   = ring_buffer_event_data(event);
2974         entry->ip                       = ip;
2975         entry->parent_ip                = parent_ip;
2976
2977         if (!call_filter_check_discard(call, entry, buffer, event)) {
2978                 if (static_branch_unlikely(&trace_function_exports_enabled))
2979                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2980                 __buffer_unlock_commit(buffer, event);
2981         }
2982 }
2983
2984 #ifdef CONFIG_STACKTRACE
2985
2986 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2987 #define FTRACE_KSTACK_NESTING   4
2988
2989 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2990
2991 struct ftrace_stack {
2992         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2993 };
2994
2995
2996 struct ftrace_stacks {
2997         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2998 };
2999
3000 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3001 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3002
3003 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3004                                  unsigned int trace_ctx,
3005                                  int skip, struct pt_regs *regs)
3006 {
3007         struct trace_event_call *call = &event_kernel_stack;
3008         struct ring_buffer_event *event;
3009         unsigned int size, nr_entries;
3010         struct ftrace_stack *fstack;
3011         struct stack_entry *entry;
3012         int stackidx;
3013
3014         /*
3015          * Add one, for this function and the call to save_stack_trace()
3016          * If regs is set, then these functions will not be in the way.
3017          */
3018 #ifndef CONFIG_UNWINDER_ORC
3019         if (!regs)
3020                 skip++;
3021 #endif
3022
3023         preempt_disable_notrace();
3024
3025         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3026
3027         /* This should never happen. If it does, yell once and skip */
3028         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3029                 goto out;
3030
3031         /*
3032          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3033          * interrupt will either see the value pre increment or post
3034          * increment. If the interrupt happens pre increment it will have
3035          * restored the counter when it returns.  We just need a barrier to
3036          * keep gcc from moving things around.
3037          */
3038         barrier();
3039
3040         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3041         size = ARRAY_SIZE(fstack->calls);
3042
3043         if (regs) {
3044                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3045                                                    size, skip);
3046         } else {
3047                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3048         }
3049
3050         size = nr_entries * sizeof(unsigned long);
3051         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3052                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3053                                     trace_ctx);
3054         if (!event)
3055                 goto out;
3056         entry = ring_buffer_event_data(event);
3057
3058         memcpy(&entry->caller, fstack->calls, size);
3059         entry->size = nr_entries;
3060
3061         if (!call_filter_check_discard(call, entry, buffer, event))
3062                 __buffer_unlock_commit(buffer, event);
3063
3064  out:
3065         /* Again, don't let gcc optimize things here */
3066         barrier();
3067         __this_cpu_dec(ftrace_stack_reserve);
3068         preempt_enable_notrace();
3069
3070 }
3071
3072 static inline void ftrace_trace_stack(struct trace_array *tr,
3073                                       struct trace_buffer *buffer,
3074                                       unsigned int trace_ctx,
3075                                       int skip, struct pt_regs *regs)
3076 {
3077         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3078                 return;
3079
3080         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3081 }
3082
3083 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3084                    int skip)
3085 {
3086         struct trace_buffer *buffer = tr->array_buffer.buffer;
3087
3088         if (rcu_is_watching()) {
3089                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3090                 return;
3091         }
3092
3093         /*
3094          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3095          * but if the above rcu_is_watching() failed, then the NMI
3096          * triggered someplace critical, and rcu_irq_enter() should
3097          * not be called from NMI.
3098          */
3099         if (unlikely(in_nmi()))
3100                 return;
3101
3102         rcu_irq_enter_irqson();
3103         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3104         rcu_irq_exit_irqson();
3105 }
3106
3107 /**
3108  * trace_dump_stack - record a stack back trace in the trace buffer
3109  * @skip: Number of functions to skip (helper handlers)
3110  */
3111 void trace_dump_stack(int skip)
3112 {
3113         if (tracing_disabled || tracing_selftest_running)
3114                 return;
3115
3116 #ifndef CONFIG_UNWINDER_ORC
3117         /* Skip 1 to skip this function. */
3118         skip++;
3119 #endif
3120         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3121                              tracing_gen_ctx(), skip, NULL);
3122 }
3123 EXPORT_SYMBOL_GPL(trace_dump_stack);
3124
3125 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3126 static DEFINE_PER_CPU(int, user_stack_count);
3127
3128 static void
3129 ftrace_trace_userstack(struct trace_array *tr,
3130                        struct trace_buffer *buffer, unsigned int trace_ctx)
3131 {
3132         struct trace_event_call *call = &event_user_stack;
3133         struct ring_buffer_event *event;
3134         struct userstack_entry *entry;
3135
3136         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3137                 return;
3138
3139         /*
3140          * NMIs can not handle page faults, even with fix ups.
3141          * The save user stack can (and often does) fault.
3142          */
3143         if (unlikely(in_nmi()))
3144                 return;
3145
3146         /*
3147          * prevent recursion, since the user stack tracing may
3148          * trigger other kernel events.
3149          */
3150         preempt_disable();
3151         if (__this_cpu_read(user_stack_count))
3152                 goto out;
3153
3154         __this_cpu_inc(user_stack_count);
3155
3156         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3157                                             sizeof(*entry), trace_ctx);
3158         if (!event)
3159                 goto out_drop_count;
3160         entry   = ring_buffer_event_data(event);
3161
3162         entry->tgid             = current->tgid;
3163         memset(&entry->caller, 0, sizeof(entry->caller));
3164
3165         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3166         if (!call_filter_check_discard(call, entry, buffer, event))
3167                 __buffer_unlock_commit(buffer, event);
3168
3169  out_drop_count:
3170         __this_cpu_dec(user_stack_count);
3171  out:
3172         preempt_enable();
3173 }
3174 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3175 static void ftrace_trace_userstack(struct trace_array *tr,
3176                                    struct trace_buffer *buffer,
3177                                    unsigned int trace_ctx)
3178 {
3179 }
3180 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3181
3182 #endif /* CONFIG_STACKTRACE */
3183
3184 static inline void
3185 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3186                           unsigned long long delta)
3187 {
3188         entry->bottom_delta_ts = delta & U32_MAX;
3189         entry->top_delta_ts = (delta >> 32);
3190 }
3191
3192 void trace_last_func_repeats(struct trace_array *tr,
3193                              struct trace_func_repeats *last_info,
3194                              unsigned int trace_ctx)
3195 {
3196         struct trace_buffer *buffer = tr->array_buffer.buffer;
3197         struct func_repeats_entry *entry;
3198         struct ring_buffer_event *event;
3199         u64 delta;
3200
3201         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3202                                             sizeof(*entry), trace_ctx);
3203         if (!event)
3204                 return;
3205
3206         delta = ring_buffer_event_time_stamp(buffer, event) -
3207                 last_info->ts_last_call;
3208
3209         entry = ring_buffer_event_data(event);
3210         entry->ip = last_info->ip;
3211         entry->parent_ip = last_info->parent_ip;
3212         entry->count = last_info->count;
3213         func_repeats_set_delta_ts(entry, delta);
3214
3215         __buffer_unlock_commit(buffer, event);
3216 }
3217
3218 /* created for use with alloc_percpu */
3219 struct trace_buffer_struct {
3220         int nesting;
3221         char buffer[4][TRACE_BUF_SIZE];
3222 };
3223
3224 static struct trace_buffer_struct *trace_percpu_buffer;
3225
3226 /*
3227  * This allows for lockless recording.  If we're nested too deeply, then
3228  * this returns NULL.
3229  */
3230 static char *get_trace_buf(void)
3231 {
3232         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3233
3234         if (!buffer || buffer->nesting >= 4)
3235                 return NULL;
3236
3237         buffer->nesting++;
3238
3239         /* Interrupts must see nesting incremented before we use the buffer */
3240         barrier();
3241         return &buffer->buffer[buffer->nesting - 1][0];
3242 }
3243
3244 static void put_trace_buf(void)
3245 {
3246         /* Don't let the decrement of nesting leak before this */
3247         barrier();
3248         this_cpu_dec(trace_percpu_buffer->nesting);
3249 }
3250
3251 static int alloc_percpu_trace_buffer(void)
3252 {
3253         struct trace_buffer_struct *buffers;
3254
3255         if (trace_percpu_buffer)
3256                 return 0;
3257
3258         buffers = alloc_percpu(struct trace_buffer_struct);
3259         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3260                 return -ENOMEM;
3261
3262         trace_percpu_buffer = buffers;
3263         return 0;
3264 }
3265
3266 static int buffers_allocated;
3267
3268 void trace_printk_init_buffers(void)
3269 {
3270         if (buffers_allocated)
3271                 return;
3272
3273         if (alloc_percpu_trace_buffer())
3274                 return;
3275
3276         /* trace_printk() is for debug use only. Don't use it in production. */
3277
3278         pr_warn("\n");
3279         pr_warn("**********************************************************\n");
3280         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3281         pr_warn("**                                                      **\n");
3282         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3283         pr_warn("**                                                      **\n");
3284         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3285         pr_warn("** unsafe for production use.                           **\n");
3286         pr_warn("**                                                      **\n");
3287         pr_warn("** If you see this message and you are not debugging    **\n");
3288         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3289         pr_warn("**                                                      **\n");
3290         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3291         pr_warn("**********************************************************\n");
3292
3293         /* Expand the buffers to set size */
3294         tracing_update_buffers();
3295
3296         buffers_allocated = 1;
3297
3298         /*
3299          * trace_printk_init_buffers() can be called by modules.
3300          * If that happens, then we need to start cmdline recording
3301          * directly here. If the global_trace.buffer is already
3302          * allocated here, then this was called by module code.
3303          */
3304         if (global_trace.array_buffer.buffer)
3305                 tracing_start_cmdline_record();
3306 }
3307 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3308
3309 void trace_printk_start_comm(void)
3310 {
3311         /* Start tracing comms if trace printk is set */
3312         if (!buffers_allocated)
3313                 return;
3314         tracing_start_cmdline_record();
3315 }
3316
3317 static void trace_printk_start_stop_comm(int enabled)
3318 {
3319         if (!buffers_allocated)
3320                 return;
3321
3322         if (enabled)
3323                 tracing_start_cmdline_record();
3324         else
3325                 tracing_stop_cmdline_record();
3326 }
3327
3328 /**
3329  * trace_vbprintk - write binary msg to tracing buffer
3330  * @ip:    The address of the caller
3331  * @fmt:   The string format to write to the buffer
3332  * @args:  Arguments for @fmt
3333  */
3334 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3335 {
3336         struct trace_event_call *call = &event_bprint;
3337         struct ring_buffer_event *event;
3338         struct trace_buffer *buffer;
3339         struct trace_array *tr = &global_trace;
3340         struct bprint_entry *entry;
3341         unsigned int trace_ctx;
3342         char *tbuffer;
3343         int len = 0, size;
3344
3345         if (unlikely(tracing_selftest_running || tracing_disabled))
3346                 return 0;
3347
3348         /* Don't pollute graph traces with trace_vprintk internals */
3349         pause_graph_tracing();
3350
3351         trace_ctx = tracing_gen_ctx();
3352         preempt_disable_notrace();
3353
3354         tbuffer = get_trace_buf();
3355         if (!tbuffer) {
3356                 len = 0;
3357                 goto out_nobuffer;
3358         }
3359
3360         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3361
3362         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3363                 goto out_put;
3364
3365         size = sizeof(*entry) + sizeof(u32) * len;
3366         buffer = tr->array_buffer.buffer;
3367         ring_buffer_nest_start(buffer);
3368         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3369                                             trace_ctx);
3370         if (!event)
3371                 goto out;
3372         entry = ring_buffer_event_data(event);
3373         entry->ip                       = ip;
3374         entry->fmt                      = fmt;
3375
3376         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3377         if (!call_filter_check_discard(call, entry, buffer, event)) {
3378                 __buffer_unlock_commit(buffer, event);
3379                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3380         }
3381
3382 out:
3383         ring_buffer_nest_end(buffer);
3384 out_put:
3385         put_trace_buf();
3386
3387 out_nobuffer:
3388         preempt_enable_notrace();
3389         unpause_graph_tracing();
3390
3391         return len;
3392 }
3393 EXPORT_SYMBOL_GPL(trace_vbprintk);
3394
3395 __printf(3, 0)
3396 static int
3397 __trace_array_vprintk(struct trace_buffer *buffer,
3398                       unsigned long ip, const char *fmt, va_list args)
3399 {
3400         struct trace_event_call *call = &event_print;
3401         struct ring_buffer_event *event;
3402         int len = 0, size;
3403         struct print_entry *entry;
3404         unsigned int trace_ctx;
3405         char *tbuffer;
3406
3407         if (tracing_disabled || tracing_selftest_running)
3408                 return 0;
3409
3410         /* Don't pollute graph traces with trace_vprintk internals */
3411         pause_graph_tracing();
3412
3413         trace_ctx = tracing_gen_ctx();
3414         preempt_disable_notrace();
3415
3416
3417         tbuffer = get_trace_buf();
3418         if (!tbuffer) {
3419                 len = 0;
3420                 goto out_nobuffer;
3421         }
3422
3423         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3424
3425         size = sizeof(*entry) + len + 1;
3426         ring_buffer_nest_start(buffer);
3427         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3428                                             trace_ctx);
3429         if (!event)
3430                 goto out;
3431         entry = ring_buffer_event_data(event);
3432         entry->ip = ip;
3433
3434         memcpy(&entry->buf, tbuffer, len + 1);
3435         if (!call_filter_check_discard(call, entry, buffer, event)) {
3436                 __buffer_unlock_commit(buffer, event);
3437                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3438         }
3439
3440 out:
3441         ring_buffer_nest_end(buffer);
3442         put_trace_buf();
3443
3444 out_nobuffer:
3445         preempt_enable_notrace();
3446         unpause_graph_tracing();
3447
3448         return len;
3449 }
3450
3451 __printf(3, 0)
3452 int trace_array_vprintk(struct trace_array *tr,
3453                         unsigned long ip, const char *fmt, va_list args)
3454 {
3455         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3456 }
3457
3458 /**
3459  * trace_array_printk - Print a message to a specific instance
3460  * @tr: The instance trace_array descriptor
3461  * @ip: The instruction pointer that this is called from.
3462  * @fmt: The format to print (printf format)
3463  *
3464  * If a subsystem sets up its own instance, they have the right to
3465  * printk strings into their tracing instance buffer using this
3466  * function. Note, this function will not write into the top level
3467  * buffer (use trace_printk() for that), as writing into the top level
3468  * buffer should only have events that can be individually disabled.
3469  * trace_printk() is only used for debugging a kernel, and should not
3470  * be ever incorporated in normal use.
3471  *
3472  * trace_array_printk() can be used, as it will not add noise to the
3473  * top level tracing buffer.
3474  *
3475  * Note, trace_array_init_printk() must be called on @tr before this
3476  * can be used.
3477  */
3478 __printf(3, 0)
3479 int trace_array_printk(struct trace_array *tr,
3480                        unsigned long ip, const char *fmt, ...)
3481 {
3482         int ret;
3483         va_list ap;
3484
3485         if (!tr)
3486                 return -ENOENT;
3487
3488         /* This is only allowed for created instances */
3489         if (tr == &global_trace)
3490                 return 0;
3491
3492         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3493                 return 0;
3494
3495         va_start(ap, fmt);
3496         ret = trace_array_vprintk(tr, ip, fmt, ap);
3497         va_end(ap);
3498         return ret;
3499 }
3500 EXPORT_SYMBOL_GPL(trace_array_printk);
3501
3502 /**
3503  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3504  * @tr: The trace array to initialize the buffers for
3505  *
3506  * As trace_array_printk() only writes into instances, they are OK to
3507  * have in the kernel (unlike trace_printk()). This needs to be called
3508  * before trace_array_printk() can be used on a trace_array.
3509  */
3510 int trace_array_init_printk(struct trace_array *tr)
3511 {
3512         if (!tr)
3513                 return -ENOENT;
3514
3515         /* This is only allowed for created instances */
3516         if (tr == &global_trace)
3517                 return -EINVAL;
3518
3519         return alloc_percpu_trace_buffer();
3520 }
3521 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3522
3523 __printf(3, 4)
3524 int trace_array_printk_buf(struct trace_buffer *buffer,
3525                            unsigned long ip, const char *fmt, ...)
3526 {
3527         int ret;
3528         va_list ap;
3529
3530         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3531                 return 0;
3532
3533         va_start(ap, fmt);
3534         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3535         va_end(ap);
3536         return ret;
3537 }
3538
3539 __printf(2, 0)
3540 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3541 {
3542         return trace_array_vprintk(&global_trace, ip, fmt, args);
3543 }
3544 EXPORT_SYMBOL_GPL(trace_vprintk);
3545
3546 static void trace_iterator_increment(struct trace_iterator *iter)
3547 {
3548         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3549
3550         iter->idx++;
3551         if (buf_iter)
3552                 ring_buffer_iter_advance(buf_iter);
3553 }
3554
3555 static struct trace_entry *
3556 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3557                 unsigned long *lost_events)
3558 {
3559         struct ring_buffer_event *event;
3560         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3561
3562         if (buf_iter) {
3563                 event = ring_buffer_iter_peek(buf_iter, ts);
3564                 if (lost_events)
3565                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3566                                 (unsigned long)-1 : 0;
3567         } else {
3568                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3569                                          lost_events);
3570         }
3571
3572         if (event) {
3573                 iter->ent_size = ring_buffer_event_length(event);
3574                 return ring_buffer_event_data(event);
3575         }
3576         iter->ent_size = 0;
3577         return NULL;
3578 }
3579
3580 static struct trace_entry *
3581 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3582                   unsigned long *missing_events, u64 *ent_ts)
3583 {
3584         struct trace_buffer *buffer = iter->array_buffer->buffer;
3585         struct trace_entry *ent, *next = NULL;
3586         unsigned long lost_events = 0, next_lost = 0;
3587         int cpu_file = iter->cpu_file;
3588         u64 next_ts = 0, ts;
3589         int next_cpu = -1;
3590         int next_size = 0;
3591         int cpu;
3592
3593         /*
3594          * If we are in a per_cpu trace file, don't bother by iterating over
3595          * all cpu and peek directly.
3596          */
3597         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3598                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3599                         return NULL;
3600                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3601                 if (ent_cpu)
3602                         *ent_cpu = cpu_file;
3603
3604                 return ent;
3605         }
3606
3607         for_each_tracing_cpu(cpu) {
3608
3609                 if (ring_buffer_empty_cpu(buffer, cpu))
3610                         continue;
3611
3612                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3613
3614                 /*
3615                  * Pick the entry with the smallest timestamp:
3616                  */
3617                 if (ent && (!next || ts < next_ts)) {
3618                         next = ent;
3619                         next_cpu = cpu;
3620                         next_ts = ts;
3621                         next_lost = lost_events;
3622                         next_size = iter->ent_size;
3623                 }
3624         }
3625
3626         iter->ent_size = next_size;
3627
3628         if (ent_cpu)
3629                 *ent_cpu = next_cpu;
3630
3631         if (ent_ts)
3632                 *ent_ts = next_ts;
3633
3634         if (missing_events)
3635                 *missing_events = next_lost;
3636
3637         return next;
3638 }
3639
3640 #define STATIC_FMT_BUF_SIZE     128
3641 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3642
3643 static char *trace_iter_expand_format(struct trace_iterator *iter)
3644 {
3645         char *tmp;
3646
3647         /*
3648          * iter->tr is NULL when used with tp_printk, which makes
3649          * this get called where it is not safe to call krealloc().
3650          */
3651         if (!iter->tr || iter->fmt == static_fmt_buf)
3652                 return NULL;
3653
3654         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3655                        GFP_KERNEL);
3656         if (tmp) {
3657                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3658                 iter->fmt = tmp;
3659         }
3660
3661         return tmp;
3662 }
3663
3664 /* Returns true if the string is safe to dereference from an event */
3665 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3666 {
3667         unsigned long addr = (unsigned long)str;
3668         struct trace_event *trace_event;
3669         struct trace_event_call *event;
3670
3671         /* OK if part of the event data */
3672         if ((addr >= (unsigned long)iter->ent) &&
3673             (addr < (unsigned long)iter->ent + iter->ent_size))
3674                 return true;
3675
3676         /* OK if part of the temp seq buffer */
3677         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3678             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3679                 return true;
3680
3681         /* Core rodata can not be freed */
3682         if (is_kernel_rodata(addr))
3683                 return true;
3684
3685         if (trace_is_tracepoint_string(str))
3686                 return true;
3687
3688         /*
3689          * Now this could be a module event, referencing core module
3690          * data, which is OK.
3691          */
3692         if (!iter->ent)
3693                 return false;
3694
3695         trace_event = ftrace_find_event(iter->ent->type);
3696         if (!trace_event)
3697                 return false;
3698
3699         event = container_of(trace_event, struct trace_event_call, event);
3700         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3701                 return false;
3702
3703         /* Would rather have rodata, but this will suffice */
3704         if (within_module_core(addr, event->module))
3705                 return true;
3706
3707         return false;
3708 }
3709
3710 static const char *show_buffer(struct trace_seq *s)
3711 {
3712         struct seq_buf *seq = &s->seq;
3713
3714         seq_buf_terminate(seq);
3715
3716         return seq->buffer;
3717 }
3718
3719 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3720
3721 static int test_can_verify_check(const char *fmt, ...)
3722 {
3723         char buf[16];
3724         va_list ap;
3725         int ret;
3726
3727         /*
3728          * The verifier is dependent on vsnprintf() modifies the va_list
3729          * passed to it, where it is sent as a reference. Some architectures
3730          * (like x86_32) passes it by value, which means that vsnprintf()
3731          * does not modify the va_list passed to it, and the verifier
3732          * would then need to be able to understand all the values that
3733          * vsnprintf can use. If it is passed by value, then the verifier
3734          * is disabled.
3735          */
3736         va_start(ap, fmt);
3737         vsnprintf(buf, 16, "%d", ap);
3738         ret = va_arg(ap, int);
3739         va_end(ap);
3740
3741         return ret;
3742 }
3743
3744 static void test_can_verify(void)
3745 {
3746         if (!test_can_verify_check("%d %d", 0, 1)) {
3747                 pr_info("trace event string verifier disabled\n");
3748                 static_branch_inc(&trace_no_verify);
3749         }
3750 }
3751
3752 /**
3753  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3754  * @iter: The iterator that holds the seq buffer and the event being printed
3755  * @fmt: The format used to print the event
3756  * @ap: The va_list holding the data to print from @fmt.
3757  *
3758  * This writes the data into the @iter->seq buffer using the data from
3759  * @fmt and @ap. If the format has a %s, then the source of the string
3760  * is examined to make sure it is safe to print, otherwise it will
3761  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3762  * pointer.
3763  */
3764 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3765                          va_list ap)
3766 {
3767         const char *p = fmt;
3768         const char *str;
3769         int i, j;
3770
3771         if (WARN_ON_ONCE(!fmt))
3772                 return;
3773
3774         if (static_branch_unlikely(&trace_no_verify))
3775                 goto print;
3776
3777         /* Don't bother checking when doing a ftrace_dump() */
3778         if (iter->fmt == static_fmt_buf)
3779                 goto print;
3780
3781         while (*p) {
3782                 bool star = false;
3783                 int len = 0;
3784
3785                 j = 0;
3786
3787                 /* We only care about %s and variants */
3788                 for (i = 0; p[i]; i++) {
3789                         if (i + 1 >= iter->fmt_size) {
3790                                 /*
3791                                  * If we can't expand the copy buffer,
3792                                  * just print it.
3793                                  */
3794                                 if (!trace_iter_expand_format(iter))
3795                                         goto print;
3796                         }
3797
3798                         if (p[i] == '\\' && p[i+1]) {
3799                                 i++;
3800                                 continue;
3801                         }
3802                         if (p[i] == '%') {
3803                                 /* Need to test cases like %08.*s */
3804                                 for (j = 1; p[i+j]; j++) {
3805                                         if (isdigit(p[i+j]) ||
3806                                             p[i+j] == '.')
3807                                                 continue;
3808                                         if (p[i+j] == '*') {
3809                                                 star = true;
3810                                                 continue;
3811                                         }
3812                                         break;
3813                                 }
3814                                 if (p[i+j] == 's')
3815                                         break;
3816                                 star = false;
3817                         }
3818                         j = 0;
3819                 }
3820                 /* If no %s found then just print normally */
3821                 if (!p[i])
3822                         break;
3823
3824                 /* Copy up to the %s, and print that */
3825                 strncpy(iter->fmt, p, i);
3826                 iter->fmt[i] = '\0';
3827                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3828
3829                 if (star)
3830                         len = va_arg(ap, int);
3831
3832                 /* The ap now points to the string data of the %s */
3833                 str = va_arg(ap, const char *);
3834
3835                 /*
3836                  * If you hit this warning, it is likely that the
3837                  * trace event in question used %s on a string that
3838                  * was saved at the time of the event, but may not be
3839                  * around when the trace is read. Use __string(),
3840                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3841                  * instead. See samples/trace_events/trace-events-sample.h
3842                  * for reference.
3843                  */
3844                 if (WARN_ONCE(!trace_safe_str(iter, str),
3845                               "fmt: '%s' current_buffer: '%s'",
3846                               fmt, show_buffer(&iter->seq))) {
3847                         int ret;
3848
3849                         /* Try to safely read the string */
3850                         if (star) {
3851                                 if (len + 1 > iter->fmt_size)
3852                                         len = iter->fmt_size - 1;
3853                                 if (len < 0)
3854                                         len = 0;
3855                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3856                                 iter->fmt[len] = 0;
3857                                 star = false;
3858                         } else {
3859                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3860                                                                   iter->fmt_size);
3861                         }
3862                         if (ret < 0)
3863                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3864                         else
3865                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3866                                                  str, iter->fmt);
3867                         str = "[UNSAFE-MEMORY]";
3868                         strcpy(iter->fmt, "%s");
3869                 } else {
3870                         strncpy(iter->fmt, p + i, j + 1);
3871                         iter->fmt[j+1] = '\0';
3872                 }
3873                 if (star)
3874                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3875                 else
3876                         trace_seq_printf(&iter->seq, iter->fmt, str);
3877
3878                 p += i + j + 1;
3879         }
3880  print:
3881         if (*p)
3882                 trace_seq_vprintf(&iter->seq, p, ap);
3883 }
3884
3885 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3886 {
3887         const char *p, *new_fmt;
3888         char *q;
3889
3890         if (WARN_ON_ONCE(!fmt))
3891                 return fmt;
3892
3893         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3894                 return fmt;
3895
3896         p = fmt;
3897         new_fmt = q = iter->fmt;
3898         while (*p) {
3899                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3900                         if (!trace_iter_expand_format(iter))
3901                                 return fmt;
3902
3903                         q += iter->fmt - new_fmt;
3904                         new_fmt = iter->fmt;
3905                 }
3906
3907                 *q++ = *p++;
3908
3909                 /* Replace %p with %px */
3910                 if (p[-1] == '%') {
3911                         if (p[0] == '%') {
3912                                 *q++ = *p++;
3913                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3914                                 *q++ = *p++;
3915                                 *q++ = 'x';
3916                         }
3917                 }
3918         }
3919         *q = '\0';
3920
3921         return new_fmt;
3922 }
3923
3924 #define STATIC_TEMP_BUF_SIZE    128
3925 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3926
3927 /* Find the next real entry, without updating the iterator itself */
3928 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3929                                           int *ent_cpu, u64 *ent_ts)
3930 {
3931         /* __find_next_entry will reset ent_size */
3932         int ent_size = iter->ent_size;
3933         struct trace_entry *entry;
3934
3935         /*
3936          * If called from ftrace_dump(), then the iter->temp buffer
3937          * will be the static_temp_buf and not created from kmalloc.
3938          * If the entry size is greater than the buffer, we can
3939          * not save it. Just return NULL in that case. This is only
3940          * used to add markers when two consecutive events' time
3941          * stamps have a large delta. See trace_print_lat_context()
3942          */
3943         if (iter->temp == static_temp_buf &&
3944             STATIC_TEMP_BUF_SIZE < ent_size)
3945                 return NULL;
3946
3947         /*
3948          * The __find_next_entry() may call peek_next_entry(), which may
3949          * call ring_buffer_peek() that may make the contents of iter->ent
3950          * undefined. Need to copy iter->ent now.
3951          */
3952         if (iter->ent && iter->ent != iter->temp) {
3953                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3954                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3955                         void *temp;
3956                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3957                         if (!temp)
3958                                 return NULL;
3959                         kfree(iter->temp);
3960                         iter->temp = temp;
3961                         iter->temp_size = iter->ent_size;
3962                 }
3963                 memcpy(iter->temp, iter->ent, iter->ent_size);
3964                 iter->ent = iter->temp;
3965         }
3966         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3967         /* Put back the original ent_size */
3968         iter->ent_size = ent_size;
3969
3970         return entry;
3971 }
3972
3973 /* Find the next real entry, and increment the iterator to the next entry */
3974 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3975 {
3976         iter->ent = __find_next_entry(iter, &iter->cpu,
3977                                       &iter->lost_events, &iter->ts);
3978
3979         if (iter->ent)
3980                 trace_iterator_increment(iter);
3981
3982         return iter->ent ? iter : NULL;
3983 }
3984
3985 static void trace_consume(struct trace_iterator *iter)
3986 {
3987         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3988                             &iter->lost_events);
3989 }
3990
3991 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3992 {
3993         struct trace_iterator *iter = m->private;
3994         int i = (int)*pos;
3995         void *ent;
3996
3997         WARN_ON_ONCE(iter->leftover);
3998
3999         (*pos)++;
4000
4001         /* can't go backwards */
4002         if (iter->idx > i)
4003                 return NULL;
4004
4005         if (iter->idx < 0)
4006                 ent = trace_find_next_entry_inc(iter);
4007         else
4008                 ent = iter;
4009
4010         while (ent && iter->idx < i)
4011                 ent = trace_find_next_entry_inc(iter);
4012
4013         iter->pos = *pos;
4014
4015         return ent;
4016 }
4017
4018 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4019 {
4020         struct ring_buffer_iter *buf_iter;
4021         unsigned long entries = 0;
4022         u64 ts;
4023
4024         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4025
4026         buf_iter = trace_buffer_iter(iter, cpu);
4027         if (!buf_iter)
4028                 return;
4029
4030         ring_buffer_iter_reset(buf_iter);
4031
4032         /*
4033          * We could have the case with the max latency tracers
4034          * that a reset never took place on a cpu. This is evident
4035          * by the timestamp being before the start of the buffer.
4036          */
4037         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4038                 if (ts >= iter->array_buffer->time_start)
4039                         break;
4040                 entries++;
4041                 ring_buffer_iter_advance(buf_iter);
4042         }
4043
4044         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4045 }
4046
4047 /*
4048  * The current tracer is copied to avoid a global locking
4049  * all around.
4050  */
4051 static void *s_start(struct seq_file *m, loff_t *pos)
4052 {
4053         struct trace_iterator *iter = m->private;
4054         struct trace_array *tr = iter->tr;
4055         int cpu_file = iter->cpu_file;
4056         void *p = NULL;
4057         loff_t l = 0;
4058         int cpu;
4059
4060         /*
4061          * copy the tracer to avoid using a global lock all around.
4062          * iter->trace is a copy of current_trace, the pointer to the
4063          * name may be used instead of a strcmp(), as iter->trace->name
4064          * will point to the same string as current_trace->name.
4065          */
4066         mutex_lock(&trace_types_lock);
4067         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4068                 *iter->trace = *tr->current_trace;
4069         mutex_unlock(&trace_types_lock);
4070
4071 #ifdef CONFIG_TRACER_MAX_TRACE
4072         if (iter->snapshot && iter->trace->use_max_tr)
4073                 return ERR_PTR(-EBUSY);
4074 #endif
4075
4076         if (*pos != iter->pos) {
4077                 iter->ent = NULL;
4078                 iter->cpu = 0;
4079                 iter->idx = -1;
4080
4081                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4082                         for_each_tracing_cpu(cpu)
4083                                 tracing_iter_reset(iter, cpu);
4084                 } else
4085                         tracing_iter_reset(iter, cpu_file);
4086
4087                 iter->leftover = 0;
4088                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4089                         ;
4090
4091         } else {
4092                 /*
4093                  * If we overflowed the seq_file before, then we want
4094                  * to just reuse the trace_seq buffer again.
4095                  */
4096                 if (iter->leftover)
4097                         p = iter;
4098                 else {
4099                         l = *pos - 1;
4100                         p = s_next(m, p, &l);
4101                 }
4102         }
4103
4104         trace_event_read_lock();
4105         trace_access_lock(cpu_file);
4106         return p;
4107 }
4108
4109 static void s_stop(struct seq_file *m, void *p)
4110 {
4111         struct trace_iterator *iter = m->private;
4112
4113 #ifdef CONFIG_TRACER_MAX_TRACE
4114         if (iter->snapshot && iter->trace->use_max_tr)
4115                 return;
4116 #endif
4117
4118         trace_access_unlock(iter->cpu_file);
4119         trace_event_read_unlock();
4120 }
4121
4122 static void
4123 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4124                       unsigned long *entries, int cpu)
4125 {
4126         unsigned long count;
4127
4128         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4129         /*
4130          * If this buffer has skipped entries, then we hold all
4131          * entries for the trace and we need to ignore the
4132          * ones before the time stamp.
4133          */
4134         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4135                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4136                 /* total is the same as the entries */
4137                 *total = count;
4138         } else
4139                 *total = count +
4140                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4141         *entries = count;
4142 }
4143
4144 static void
4145 get_total_entries(struct array_buffer *buf,
4146                   unsigned long *total, unsigned long *entries)
4147 {
4148         unsigned long t, e;
4149         int cpu;
4150
4151         *total = 0;
4152         *entries = 0;
4153
4154         for_each_tracing_cpu(cpu) {
4155                 get_total_entries_cpu(buf, &t, &e, cpu);
4156                 *total += t;
4157                 *entries += e;
4158         }
4159 }
4160
4161 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4162 {
4163         unsigned long total, entries;
4164
4165         if (!tr)
4166                 tr = &global_trace;
4167
4168         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4169
4170         return entries;
4171 }
4172
4173 unsigned long trace_total_entries(struct trace_array *tr)
4174 {
4175         unsigned long total, entries;
4176
4177         if (!tr)
4178                 tr = &global_trace;
4179
4180         get_total_entries(&tr->array_buffer, &total, &entries);
4181
4182         return entries;
4183 }
4184
4185 static void print_lat_help_header(struct seq_file *m)
4186 {
4187         seq_puts(m, "#                    _------=> CPU#            \n"
4188                     "#                   / _-----=> irqs-off        \n"
4189                     "#                  | / _----=> need-resched    \n"
4190                     "#                  || / _---=> hardirq/softirq \n"
4191                     "#                  ||| / _--=> preempt-depth   \n"
4192                     "#                  |||| /     delay            \n"
4193                     "#  cmd     pid     ||||| time  |   caller      \n"
4194                     "#     \\   /        |||||  \\    |   /         \n");
4195 }
4196
4197 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4198 {
4199         unsigned long total;
4200         unsigned long entries;
4201
4202         get_total_entries(buf, &total, &entries);
4203         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4204                    entries, total, num_online_cpus());
4205         seq_puts(m, "#\n");
4206 }
4207
4208 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4209                                    unsigned int flags)
4210 {
4211         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4212
4213         print_event_info(buf, m);
4214
4215         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4216         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4217 }
4218
4219 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4220                                        unsigned int flags)
4221 {
4222         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4223         const char *space = "            ";
4224         int prec = tgid ? 12 : 2;
4225
4226         print_event_info(buf, m);
4227
4228         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4229         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4230         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4231         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4232         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4233         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4234         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4235 }
4236
4237 void
4238 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4239 {
4240         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4241         struct array_buffer *buf = iter->array_buffer;
4242         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4243         struct tracer *type = iter->trace;
4244         unsigned long entries;
4245         unsigned long total;
4246         const char *name = "preemption";
4247
4248         name = type->name;
4249
4250         get_total_entries(buf, &total, &entries);
4251
4252         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4253                    name, UTS_RELEASE);
4254         seq_puts(m, "# -----------------------------------"
4255                  "---------------------------------\n");
4256         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4257                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4258                    nsecs_to_usecs(data->saved_latency),
4259                    entries,
4260                    total,
4261                    buf->cpu,
4262 #if defined(CONFIG_PREEMPT_NONE)
4263                    "server",
4264 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4265                    "desktop",
4266 #elif defined(CONFIG_PREEMPT)
4267                    "preempt",
4268 #elif defined(CONFIG_PREEMPT_RT)
4269                    "preempt_rt",
4270 #else
4271                    "unknown",
4272 #endif
4273                    /* These are reserved for later use */
4274                    0, 0, 0, 0);
4275 #ifdef CONFIG_SMP
4276         seq_printf(m, " #P:%d)\n", num_online_cpus());
4277 #else
4278         seq_puts(m, ")\n");
4279 #endif
4280         seq_puts(m, "#    -----------------\n");
4281         seq_printf(m, "#    | task: %.16s-%d "
4282                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4283                    data->comm, data->pid,
4284                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4285                    data->policy, data->rt_priority);
4286         seq_puts(m, "#    -----------------\n");
4287
4288         if (data->critical_start) {
4289                 seq_puts(m, "#  => started at: ");
4290                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4291                 trace_print_seq(m, &iter->seq);
4292                 seq_puts(m, "\n#  => ended at:   ");
4293                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4294                 trace_print_seq(m, &iter->seq);
4295                 seq_puts(m, "\n#\n");
4296         }
4297
4298         seq_puts(m, "#\n");
4299 }
4300
4301 static void test_cpu_buff_start(struct trace_iterator *iter)
4302 {
4303         struct trace_seq *s = &iter->seq;
4304         struct trace_array *tr = iter->tr;
4305
4306         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4307                 return;
4308
4309         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4310                 return;
4311
4312         if (cpumask_available(iter->started) &&
4313             cpumask_test_cpu(iter->cpu, iter->started))
4314                 return;
4315
4316         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4317                 return;
4318
4319         if (cpumask_available(iter->started))
4320                 cpumask_set_cpu(iter->cpu, iter->started);
4321
4322         /* Don't print started cpu buffer for the first entry of the trace */
4323         if (iter->idx > 1)
4324                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4325                                 iter->cpu);
4326 }
4327
4328 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4329 {
4330         struct trace_array *tr = iter->tr;
4331         struct trace_seq *s = &iter->seq;
4332         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4333         struct trace_entry *entry;
4334         struct trace_event *event;
4335
4336         entry = iter->ent;
4337
4338         test_cpu_buff_start(iter);
4339
4340         event = ftrace_find_event(entry->type);
4341
4342         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4343                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4344                         trace_print_lat_context(iter);
4345                 else
4346                         trace_print_context(iter);
4347         }
4348
4349         if (trace_seq_has_overflowed(s))
4350                 return TRACE_TYPE_PARTIAL_LINE;
4351
4352         if (event)
4353                 return event->funcs->trace(iter, sym_flags, event);
4354
4355         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4356
4357         return trace_handle_return(s);
4358 }
4359
4360 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4361 {
4362         struct trace_array *tr = iter->tr;
4363         struct trace_seq *s = &iter->seq;
4364         struct trace_entry *entry;
4365         struct trace_event *event;
4366
4367         entry = iter->ent;
4368
4369         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4370                 trace_seq_printf(s, "%d %d %llu ",
4371                                  entry->pid, iter->cpu, iter->ts);
4372
4373         if (trace_seq_has_overflowed(s))
4374                 return TRACE_TYPE_PARTIAL_LINE;
4375
4376         event = ftrace_find_event(entry->type);
4377         if (event)
4378                 return event->funcs->raw(iter, 0, event);
4379
4380         trace_seq_printf(s, "%d ?\n", entry->type);
4381
4382         return trace_handle_return(s);
4383 }
4384
4385 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4386 {
4387         struct trace_array *tr = iter->tr;
4388         struct trace_seq *s = &iter->seq;
4389         unsigned char newline = '\n';
4390         struct trace_entry *entry;
4391         struct trace_event *event;
4392
4393         entry = iter->ent;
4394
4395         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4396                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4397                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4398                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4399                 if (trace_seq_has_overflowed(s))
4400                         return TRACE_TYPE_PARTIAL_LINE;
4401         }
4402
4403         event = ftrace_find_event(entry->type);
4404         if (event) {
4405                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4406                 if (ret != TRACE_TYPE_HANDLED)
4407                         return ret;
4408         }
4409
4410         SEQ_PUT_FIELD(s, newline);
4411
4412         return trace_handle_return(s);
4413 }
4414
4415 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4416 {
4417         struct trace_array *tr = iter->tr;
4418         struct trace_seq *s = &iter->seq;
4419         struct trace_entry *entry;
4420         struct trace_event *event;
4421
4422         entry = iter->ent;
4423
4424         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4425                 SEQ_PUT_FIELD(s, entry->pid);
4426                 SEQ_PUT_FIELD(s, iter->cpu);
4427                 SEQ_PUT_FIELD(s, iter->ts);
4428                 if (trace_seq_has_overflowed(s))
4429                         return TRACE_TYPE_PARTIAL_LINE;
4430         }
4431
4432         event = ftrace_find_event(entry->type);
4433         return event ? event->funcs->binary(iter, 0, event) :
4434                 TRACE_TYPE_HANDLED;
4435 }
4436
4437 int trace_empty(struct trace_iterator *iter)
4438 {
4439         struct ring_buffer_iter *buf_iter;
4440         int cpu;
4441
4442         /* If we are looking at one CPU buffer, only check that one */
4443         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4444                 cpu = iter->cpu_file;
4445                 buf_iter = trace_buffer_iter(iter, cpu);
4446                 if (buf_iter) {
4447                         if (!ring_buffer_iter_empty(buf_iter))
4448                                 return 0;
4449                 } else {
4450                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451                                 return 0;
4452                 }
4453                 return 1;
4454         }
4455
4456         for_each_tracing_cpu(cpu) {
4457                 buf_iter = trace_buffer_iter(iter, cpu);
4458                 if (buf_iter) {
4459                         if (!ring_buffer_iter_empty(buf_iter))
4460                                 return 0;
4461                 } else {
4462                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4463                                 return 0;
4464                 }
4465         }
4466
4467         return 1;
4468 }
4469
4470 /*  Called with trace_event_read_lock() held. */
4471 enum print_line_t print_trace_line(struct trace_iterator *iter)
4472 {
4473         struct trace_array *tr = iter->tr;
4474         unsigned long trace_flags = tr->trace_flags;
4475         enum print_line_t ret;
4476
4477         if (iter->lost_events) {
4478                 if (iter->lost_events == (unsigned long)-1)
4479                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4480                                          iter->cpu);
4481                 else
4482                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4483                                          iter->cpu, iter->lost_events);
4484                 if (trace_seq_has_overflowed(&iter->seq))
4485                         return TRACE_TYPE_PARTIAL_LINE;
4486         }
4487
4488         if (iter->trace && iter->trace->print_line) {
4489                 ret = iter->trace->print_line(iter);
4490                 if (ret != TRACE_TYPE_UNHANDLED)
4491                         return ret;
4492         }
4493
4494         if (iter->ent->type == TRACE_BPUTS &&
4495                         trace_flags & TRACE_ITER_PRINTK &&
4496                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4497                 return trace_print_bputs_msg_only(iter);
4498
4499         if (iter->ent->type == TRACE_BPRINT &&
4500                         trace_flags & TRACE_ITER_PRINTK &&
4501                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4502                 return trace_print_bprintk_msg_only(iter);
4503
4504         if (iter->ent->type == TRACE_PRINT &&
4505                         trace_flags & TRACE_ITER_PRINTK &&
4506                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4507                 return trace_print_printk_msg_only(iter);
4508
4509         if (trace_flags & TRACE_ITER_BIN)
4510                 return print_bin_fmt(iter);
4511
4512         if (trace_flags & TRACE_ITER_HEX)
4513                 return print_hex_fmt(iter);
4514
4515         if (trace_flags & TRACE_ITER_RAW)
4516                 return print_raw_fmt(iter);
4517
4518         return print_trace_fmt(iter);
4519 }
4520
4521 void trace_latency_header(struct seq_file *m)
4522 {
4523         struct trace_iterator *iter = m->private;
4524         struct trace_array *tr = iter->tr;
4525
4526         /* print nothing if the buffers are empty */
4527         if (trace_empty(iter))
4528                 return;
4529
4530         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4531                 print_trace_header(m, iter);
4532
4533         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4534                 print_lat_help_header(m);
4535 }
4536
4537 void trace_default_header(struct seq_file *m)
4538 {
4539         struct trace_iterator *iter = m->private;
4540         struct trace_array *tr = iter->tr;
4541         unsigned long trace_flags = tr->trace_flags;
4542
4543         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4544                 return;
4545
4546         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4547                 /* print nothing if the buffers are empty */
4548                 if (trace_empty(iter))
4549                         return;
4550                 print_trace_header(m, iter);
4551                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4552                         print_lat_help_header(m);
4553         } else {
4554                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4555                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4556                                 print_func_help_header_irq(iter->array_buffer,
4557                                                            m, trace_flags);
4558                         else
4559                                 print_func_help_header(iter->array_buffer, m,
4560                                                        trace_flags);
4561                 }
4562         }
4563 }
4564
4565 static void test_ftrace_alive(struct seq_file *m)
4566 {
4567         if (!ftrace_is_dead())
4568                 return;
4569         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4570                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4571 }
4572
4573 #ifdef CONFIG_TRACER_MAX_TRACE
4574 static void show_snapshot_main_help(struct seq_file *m)
4575 {
4576         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4577                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4578                     "#                      Takes a snapshot of the main buffer.\n"
4579                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4580                     "#                      (Doesn't have to be '2' works with any number that\n"
4581                     "#                       is not a '0' or '1')\n");
4582 }
4583
4584 static void show_snapshot_percpu_help(struct seq_file *m)
4585 {
4586         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4587 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4588         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4589                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4590 #else
4591         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4592                     "#                     Must use main snapshot file to allocate.\n");
4593 #endif
4594         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4595                     "#                      (Doesn't have to be '2' works with any number that\n"
4596                     "#                       is not a '0' or '1')\n");
4597 }
4598
4599 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4600 {
4601         if (iter->tr->allocated_snapshot)
4602                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4603         else
4604                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4605
4606         seq_puts(m, "# Snapshot commands:\n");
4607         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4608                 show_snapshot_main_help(m);
4609         else
4610                 show_snapshot_percpu_help(m);
4611 }
4612 #else
4613 /* Should never be called */
4614 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4615 #endif
4616
4617 static int s_show(struct seq_file *m, void *v)
4618 {
4619         struct trace_iterator *iter = v;
4620         int ret;
4621
4622         if (iter->ent == NULL) {
4623                 if (iter->tr) {
4624                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4625                         seq_puts(m, "#\n");
4626                         test_ftrace_alive(m);
4627                 }
4628                 if (iter->snapshot && trace_empty(iter))
4629                         print_snapshot_help(m, iter);
4630                 else if (iter->trace && iter->trace->print_header)
4631                         iter->trace->print_header(m);
4632                 else
4633                         trace_default_header(m);
4634
4635         } else if (iter->leftover) {
4636                 /*
4637                  * If we filled the seq_file buffer earlier, we
4638                  * want to just show it now.
4639                  */
4640                 ret = trace_print_seq(m, &iter->seq);
4641
4642                 /* ret should this time be zero, but you never know */
4643                 iter->leftover = ret;
4644
4645         } else {
4646                 print_trace_line(iter);
4647                 ret = trace_print_seq(m, &iter->seq);
4648                 /*
4649                  * If we overflow the seq_file buffer, then it will
4650                  * ask us for this data again at start up.
4651                  * Use that instead.
4652                  *  ret is 0 if seq_file write succeeded.
4653                  *        -1 otherwise.
4654                  */
4655                 iter->leftover = ret;
4656         }
4657
4658         return 0;
4659 }
4660
4661 /*
4662  * Should be used after trace_array_get(), trace_types_lock
4663  * ensures that i_cdev was already initialized.
4664  */
4665 static inline int tracing_get_cpu(struct inode *inode)
4666 {
4667         if (inode->i_cdev) /* See trace_create_cpu_file() */
4668                 return (long)inode->i_cdev - 1;
4669         return RING_BUFFER_ALL_CPUS;
4670 }
4671
4672 static const struct seq_operations tracer_seq_ops = {
4673         .start          = s_start,
4674         .next           = s_next,
4675         .stop           = s_stop,
4676         .show           = s_show,
4677 };
4678
4679 static struct trace_iterator *
4680 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4681 {
4682         struct trace_array *tr = inode->i_private;
4683         struct trace_iterator *iter;
4684         int cpu;
4685
4686         if (tracing_disabled)
4687                 return ERR_PTR(-ENODEV);
4688
4689         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4690         if (!iter)
4691                 return ERR_PTR(-ENOMEM);
4692
4693         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4694                                     GFP_KERNEL);
4695         if (!iter->buffer_iter)
4696                 goto release;
4697
4698         /*
4699          * trace_find_next_entry() may need to save off iter->ent.
4700          * It will place it into the iter->temp buffer. As most
4701          * events are less than 128, allocate a buffer of that size.
4702          * If one is greater, then trace_find_next_entry() will
4703          * allocate a new buffer to adjust for the bigger iter->ent.
4704          * It's not critical if it fails to get allocated here.
4705          */
4706         iter->temp = kmalloc(128, GFP_KERNEL);
4707         if (iter->temp)
4708                 iter->temp_size = 128;
4709
4710         /*
4711          * trace_event_printf() may need to modify given format
4712          * string to replace %p with %px so that it shows real address
4713          * instead of hash value. However, that is only for the event
4714          * tracing, other tracer may not need. Defer the allocation
4715          * until it is needed.
4716          */
4717         iter->fmt = NULL;
4718         iter->fmt_size = 0;
4719
4720         /*
4721          * We make a copy of the current tracer to avoid concurrent
4722          * changes on it while we are reading.
4723          */
4724         mutex_lock(&trace_types_lock);
4725         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4726         if (!iter->trace)
4727                 goto fail;
4728
4729         *iter->trace = *tr->current_trace;
4730
4731         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4732                 goto fail;
4733
4734         iter->tr = tr;
4735
4736 #ifdef CONFIG_TRACER_MAX_TRACE
4737         /* Currently only the top directory has a snapshot */
4738         if (tr->current_trace->print_max || snapshot)
4739                 iter->array_buffer = &tr->max_buffer;
4740         else
4741 #endif
4742                 iter->array_buffer = &tr->array_buffer;
4743         iter->snapshot = snapshot;
4744         iter->pos = -1;
4745         iter->cpu_file = tracing_get_cpu(inode);
4746         mutex_init(&iter->mutex);
4747
4748         /* Notify the tracer early; before we stop tracing. */
4749         if (iter->trace->open)
4750                 iter->trace->open(iter);
4751
4752         /* Annotate start of buffers if we had overruns */
4753         if (ring_buffer_overruns(iter->array_buffer->buffer))
4754                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4755
4756         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4757         if (trace_clocks[tr->clock_id].in_ns)
4758                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4759
4760         /*
4761          * If pause-on-trace is enabled, then stop the trace while
4762          * dumping, unless this is the "snapshot" file
4763          */
4764         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4765                 tracing_stop_tr(tr);
4766
4767         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4768                 for_each_tracing_cpu(cpu) {
4769                         iter->buffer_iter[cpu] =
4770                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4771                                                          cpu, GFP_KERNEL);
4772                 }
4773                 ring_buffer_read_prepare_sync();
4774                 for_each_tracing_cpu(cpu) {
4775                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4776                         tracing_iter_reset(iter, cpu);
4777                 }
4778         } else {
4779                 cpu = iter->cpu_file;
4780                 iter->buffer_iter[cpu] =
4781                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4782                                                  cpu, GFP_KERNEL);
4783                 ring_buffer_read_prepare_sync();
4784                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4785                 tracing_iter_reset(iter, cpu);
4786         }
4787
4788         mutex_unlock(&trace_types_lock);
4789
4790         return iter;
4791
4792  fail:
4793         mutex_unlock(&trace_types_lock);
4794         kfree(iter->trace);
4795         kfree(iter->temp);
4796         kfree(iter->buffer_iter);
4797 release:
4798         seq_release_private(inode, file);
4799         return ERR_PTR(-ENOMEM);
4800 }
4801
4802 int tracing_open_generic(struct inode *inode, struct file *filp)
4803 {
4804         int ret;
4805
4806         ret = tracing_check_open_get_tr(NULL);
4807         if (ret)
4808                 return ret;
4809
4810         filp->private_data = inode->i_private;
4811         return 0;
4812 }
4813
4814 bool tracing_is_disabled(void)
4815 {
4816         return (tracing_disabled) ? true: false;
4817 }
4818
4819 /*
4820  * Open and update trace_array ref count.
4821  * Must have the current trace_array passed to it.
4822  */
4823 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4824 {
4825         struct trace_array *tr = inode->i_private;
4826         int ret;
4827
4828         ret = tracing_check_open_get_tr(tr);
4829         if (ret)
4830                 return ret;
4831
4832         filp->private_data = inode->i_private;
4833
4834         return 0;
4835 }
4836
4837 static int tracing_release(struct inode *inode, struct file *file)
4838 {
4839         struct trace_array *tr = inode->i_private;
4840         struct seq_file *m = file->private_data;
4841         struct trace_iterator *iter;
4842         int cpu;
4843
4844         if (!(file->f_mode & FMODE_READ)) {
4845                 trace_array_put(tr);
4846                 return 0;
4847         }
4848
4849         /* Writes do not use seq_file */
4850         iter = m->private;
4851         mutex_lock(&trace_types_lock);
4852
4853         for_each_tracing_cpu(cpu) {
4854                 if (iter->buffer_iter[cpu])
4855                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4856         }
4857
4858         if (iter->trace && iter->trace->close)
4859                 iter->trace->close(iter);
4860
4861         if (!iter->snapshot && tr->stop_count)
4862                 /* reenable tracing if it was previously enabled */
4863                 tracing_start_tr(tr);
4864
4865         __trace_array_put(tr);
4866
4867         mutex_unlock(&trace_types_lock);
4868
4869         mutex_destroy(&iter->mutex);
4870         free_cpumask_var(iter->started);
4871         kfree(iter->fmt);
4872         kfree(iter->temp);
4873         kfree(iter->trace);
4874         kfree(iter->buffer_iter);
4875         seq_release_private(inode, file);
4876
4877         return 0;
4878 }
4879
4880 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4881 {
4882         struct trace_array *tr = inode->i_private;
4883
4884         trace_array_put(tr);
4885         return 0;
4886 }
4887
4888 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4889 {
4890         struct trace_array *tr = inode->i_private;
4891
4892         trace_array_put(tr);
4893
4894         return single_release(inode, file);
4895 }
4896
4897 static int tracing_open(struct inode *inode, struct file *file)
4898 {
4899         struct trace_array *tr = inode->i_private;
4900         struct trace_iterator *iter;
4901         int ret;
4902
4903         ret = tracing_check_open_get_tr(tr);
4904         if (ret)
4905                 return ret;
4906
4907         /* If this file was open for write, then erase contents */
4908         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4909                 int cpu = tracing_get_cpu(inode);
4910                 struct array_buffer *trace_buf = &tr->array_buffer;
4911
4912 #ifdef CONFIG_TRACER_MAX_TRACE
4913                 if (tr->current_trace->print_max)
4914                         trace_buf = &tr->max_buffer;
4915 #endif
4916
4917                 if (cpu == RING_BUFFER_ALL_CPUS)
4918                         tracing_reset_online_cpus(trace_buf);
4919                 else
4920                         tracing_reset_cpu(trace_buf, cpu);
4921         }
4922
4923         if (file->f_mode & FMODE_READ) {
4924                 iter = __tracing_open(inode, file, false);
4925                 if (IS_ERR(iter))
4926                         ret = PTR_ERR(iter);
4927                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4928                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4929         }
4930
4931         if (ret < 0)
4932                 trace_array_put(tr);
4933
4934         return ret;
4935 }
4936
4937 /*
4938  * Some tracers are not suitable for instance buffers.
4939  * A tracer is always available for the global array (toplevel)
4940  * or if it explicitly states that it is.
4941  */
4942 static bool
4943 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4944 {
4945         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4946 }
4947
4948 /* Find the next tracer that this trace array may use */
4949 static struct tracer *
4950 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4951 {
4952         while (t && !trace_ok_for_array(t, tr))
4953                 t = t->next;
4954
4955         return t;
4956 }
4957
4958 static void *
4959 t_next(struct seq_file *m, void *v, loff_t *pos)
4960 {
4961         struct trace_array *tr = m->private;
4962         struct tracer *t = v;
4963
4964         (*pos)++;
4965
4966         if (t)
4967                 t = get_tracer_for_array(tr, t->next);
4968
4969         return t;
4970 }
4971
4972 static void *t_start(struct seq_file *m, loff_t *pos)
4973 {
4974         struct trace_array *tr = m->private;
4975         struct tracer *t;
4976         loff_t l = 0;
4977
4978         mutex_lock(&trace_types_lock);
4979
4980         t = get_tracer_for_array(tr, trace_types);
4981         for (; t && l < *pos; t = t_next(m, t, &l))
4982                         ;
4983
4984         return t;
4985 }
4986
4987 static void t_stop(struct seq_file *m, void *p)
4988 {
4989         mutex_unlock(&trace_types_lock);
4990 }
4991
4992 static int t_show(struct seq_file *m, void *v)
4993 {
4994         struct tracer *t = v;
4995
4996         if (!t)
4997                 return 0;
4998
4999         seq_puts(m, t->name);
5000         if (t->next)
5001                 seq_putc(m, ' ');
5002         else
5003                 seq_putc(m, '\n');
5004
5005         return 0;
5006 }
5007
5008 static const struct seq_operations show_traces_seq_ops = {
5009         .start          = t_start,
5010         .next           = t_next,
5011         .stop           = t_stop,
5012         .show           = t_show,
5013 };
5014
5015 static int show_traces_open(struct inode *inode, struct file *file)
5016 {
5017         struct trace_array *tr = inode->i_private;
5018         struct seq_file *m;
5019         int ret;
5020
5021         ret = tracing_check_open_get_tr(tr);
5022         if (ret)
5023                 return ret;
5024
5025         ret = seq_open(file, &show_traces_seq_ops);
5026         if (ret) {
5027                 trace_array_put(tr);
5028                 return ret;
5029         }
5030
5031         m = file->private_data;
5032         m->private = tr;
5033
5034         return 0;
5035 }
5036
5037 static int show_traces_release(struct inode *inode, struct file *file)
5038 {
5039         struct trace_array *tr = inode->i_private;
5040
5041         trace_array_put(tr);
5042         return seq_release(inode, file);
5043 }
5044
5045 static ssize_t
5046 tracing_write_stub(struct file *filp, const char __user *ubuf,
5047                    size_t count, loff_t *ppos)
5048 {
5049         return count;
5050 }
5051
5052 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5053 {
5054         int ret;
5055
5056         if (file->f_mode & FMODE_READ)
5057                 ret = seq_lseek(file, offset, whence);
5058         else
5059                 file->f_pos = ret = 0;
5060
5061         return ret;
5062 }
5063
5064 static const struct file_operations tracing_fops = {
5065         .open           = tracing_open,
5066         .read           = seq_read,
5067         .write          = tracing_write_stub,
5068         .llseek         = tracing_lseek,
5069         .release        = tracing_release,
5070 };
5071
5072 static const struct file_operations show_traces_fops = {
5073         .open           = show_traces_open,
5074         .read           = seq_read,
5075         .llseek         = seq_lseek,
5076         .release        = show_traces_release,
5077 };
5078
5079 static ssize_t
5080 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5081                      size_t count, loff_t *ppos)
5082 {
5083         struct trace_array *tr = file_inode(filp)->i_private;
5084         char *mask_str;
5085         int len;
5086
5087         len = snprintf(NULL, 0, "%*pb\n",
5088                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5089         mask_str = kmalloc(len, GFP_KERNEL);
5090         if (!mask_str)
5091                 return -ENOMEM;
5092
5093         len = snprintf(mask_str, len, "%*pb\n",
5094                        cpumask_pr_args(tr->tracing_cpumask));
5095         if (len >= count) {
5096                 count = -EINVAL;
5097                 goto out_err;
5098         }
5099         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5100
5101 out_err:
5102         kfree(mask_str);
5103
5104         return count;
5105 }
5106
5107 int tracing_set_cpumask(struct trace_array *tr,
5108                         cpumask_var_t tracing_cpumask_new)
5109 {
5110         int cpu;
5111
5112         if (!tr)
5113                 return -EINVAL;
5114
5115         local_irq_disable();
5116         arch_spin_lock(&tr->max_lock);
5117         for_each_tracing_cpu(cpu) {
5118                 /*
5119                  * Increase/decrease the disabled counter if we are
5120                  * about to flip a bit in the cpumask:
5121                  */
5122                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5123                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5124                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5125                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5126                 }
5127                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5128                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5129                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5130                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5131                 }
5132         }
5133         arch_spin_unlock(&tr->max_lock);
5134         local_irq_enable();
5135
5136         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5137
5138         return 0;
5139 }
5140
5141 static ssize_t
5142 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5143                       size_t count, loff_t *ppos)
5144 {
5145         struct trace_array *tr = file_inode(filp)->i_private;
5146         cpumask_var_t tracing_cpumask_new;
5147         int err;
5148
5149         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5150                 return -ENOMEM;
5151
5152         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5153         if (err)
5154                 goto err_free;
5155
5156         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5157         if (err)
5158                 goto err_free;
5159
5160         free_cpumask_var(tracing_cpumask_new);
5161
5162         return count;
5163
5164 err_free:
5165         free_cpumask_var(tracing_cpumask_new);
5166
5167         return err;
5168 }
5169
5170 static const struct file_operations tracing_cpumask_fops = {
5171         .open           = tracing_open_generic_tr,
5172         .read           = tracing_cpumask_read,
5173         .write          = tracing_cpumask_write,
5174         .release        = tracing_release_generic_tr,
5175         .llseek         = generic_file_llseek,
5176 };
5177
5178 static int tracing_trace_options_show(struct seq_file *m, void *v)
5179 {
5180         struct tracer_opt *trace_opts;
5181         struct trace_array *tr = m->private;
5182         u32 tracer_flags;
5183         int i;
5184
5185         mutex_lock(&trace_types_lock);
5186         tracer_flags = tr->current_trace->flags->val;
5187         trace_opts = tr->current_trace->flags->opts;
5188
5189         for (i = 0; trace_options[i]; i++) {
5190                 if (tr->trace_flags & (1 << i))
5191                         seq_printf(m, "%s\n", trace_options[i]);
5192                 else
5193                         seq_printf(m, "no%s\n", trace_options[i]);
5194         }
5195
5196         for (i = 0; trace_opts[i].name; i++) {
5197                 if (tracer_flags & trace_opts[i].bit)
5198                         seq_printf(m, "%s\n", trace_opts[i].name);
5199                 else
5200                         seq_printf(m, "no%s\n", trace_opts[i].name);
5201         }
5202         mutex_unlock(&trace_types_lock);
5203
5204         return 0;
5205 }
5206
5207 static int __set_tracer_option(struct trace_array *tr,
5208                                struct tracer_flags *tracer_flags,
5209                                struct tracer_opt *opts, int neg)
5210 {
5211         struct tracer *trace = tracer_flags->trace;
5212         int ret;
5213
5214         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5215         if (ret)
5216                 return ret;
5217
5218         if (neg)
5219                 tracer_flags->val &= ~opts->bit;
5220         else
5221                 tracer_flags->val |= opts->bit;
5222         return 0;
5223 }
5224
5225 /* Try to assign a tracer specific option */
5226 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5227 {
5228         struct tracer *trace = tr->current_trace;
5229         struct tracer_flags *tracer_flags = trace->flags;
5230         struct tracer_opt *opts = NULL;
5231         int i;
5232
5233         for (i = 0; tracer_flags->opts[i].name; i++) {
5234                 opts = &tracer_flags->opts[i];
5235
5236                 if (strcmp(cmp, opts->name) == 0)
5237                         return __set_tracer_option(tr, trace->flags, opts, neg);
5238         }
5239
5240         return -EINVAL;
5241 }
5242
5243 /* Some tracers require overwrite to stay enabled */
5244 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5245 {
5246         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5247                 return -1;
5248
5249         return 0;
5250 }
5251
5252 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5253 {
5254         int *map;
5255
5256         if ((mask == TRACE_ITER_RECORD_TGID) ||
5257             (mask == TRACE_ITER_RECORD_CMD))
5258                 lockdep_assert_held(&event_mutex);
5259
5260         /* do nothing if flag is already set */
5261         if (!!(tr->trace_flags & mask) == !!enabled)
5262                 return 0;
5263
5264         /* Give the tracer a chance to approve the change */
5265         if (tr->current_trace->flag_changed)
5266                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5267                         return -EINVAL;
5268
5269         if (enabled)
5270                 tr->trace_flags |= mask;
5271         else
5272                 tr->trace_flags &= ~mask;
5273
5274         if (mask == TRACE_ITER_RECORD_CMD)
5275                 trace_event_enable_cmd_record(enabled);
5276
5277         if (mask == TRACE_ITER_RECORD_TGID) {
5278                 if (!tgid_map) {
5279                         tgid_map_max = pid_max;
5280                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5281                                        GFP_KERNEL);
5282
5283                         /*
5284                          * Pairs with smp_load_acquire() in
5285                          * trace_find_tgid_ptr() to ensure that if it observes
5286                          * the tgid_map we just allocated then it also observes
5287                          * the corresponding tgid_map_max value.
5288                          */
5289                         smp_store_release(&tgid_map, map);
5290                 }
5291                 if (!tgid_map) {
5292                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5293                         return -ENOMEM;
5294                 }
5295
5296                 trace_event_enable_tgid_record(enabled);
5297         }
5298
5299         if (mask == TRACE_ITER_EVENT_FORK)
5300                 trace_event_follow_fork(tr, enabled);
5301
5302         if (mask == TRACE_ITER_FUNC_FORK)
5303                 ftrace_pid_follow_fork(tr, enabled);
5304
5305         if (mask == TRACE_ITER_OVERWRITE) {
5306                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5307 #ifdef CONFIG_TRACER_MAX_TRACE
5308                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5309 #endif
5310         }
5311
5312         if (mask == TRACE_ITER_PRINTK) {
5313                 trace_printk_start_stop_comm(enabled);
5314                 trace_printk_control(enabled);
5315         }
5316
5317         return 0;
5318 }
5319
5320 int trace_set_options(struct trace_array *tr, char *option)
5321 {
5322         char *cmp;
5323         int neg = 0;
5324         int ret;
5325         size_t orig_len = strlen(option);
5326         int len;
5327
5328         cmp = strstrip(option);
5329
5330         len = str_has_prefix(cmp, "no");
5331         if (len)
5332                 neg = 1;
5333
5334         cmp += len;
5335
5336         mutex_lock(&event_mutex);
5337         mutex_lock(&trace_types_lock);
5338
5339         ret = match_string(trace_options, -1, cmp);
5340         /* If no option could be set, test the specific tracer options */
5341         if (ret < 0)
5342                 ret = set_tracer_option(tr, cmp, neg);
5343         else
5344                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5345
5346         mutex_unlock(&trace_types_lock);
5347         mutex_unlock(&event_mutex);
5348
5349         /*
5350          * If the first trailing whitespace is replaced with '\0' by strstrip,
5351          * turn it back into a space.
5352          */
5353         if (orig_len > strlen(option))
5354                 option[strlen(option)] = ' ';
5355
5356         return ret;
5357 }
5358
5359 static void __init apply_trace_boot_options(void)
5360 {
5361         char *buf = trace_boot_options_buf;
5362         char *option;
5363
5364         while (true) {
5365                 option = strsep(&buf, ",");
5366
5367                 if (!option)
5368                         break;
5369
5370                 if (*option)
5371                         trace_set_options(&global_trace, option);
5372
5373                 /* Put back the comma to allow this to be called again */
5374                 if (buf)
5375                         *(buf - 1) = ',';
5376         }
5377 }
5378
5379 static ssize_t
5380 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5381                         size_t cnt, loff_t *ppos)
5382 {
5383         struct seq_file *m = filp->private_data;
5384         struct trace_array *tr = m->private;
5385         char buf[64];
5386         int ret;
5387
5388         if (cnt >= sizeof(buf))
5389                 return -EINVAL;
5390
5391         if (copy_from_user(buf, ubuf, cnt))
5392                 return -EFAULT;
5393
5394         buf[cnt] = 0;
5395
5396         ret = trace_set_options(tr, buf);
5397         if (ret < 0)
5398                 return ret;
5399
5400         *ppos += cnt;
5401
5402         return cnt;
5403 }
5404
5405 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5406 {
5407         struct trace_array *tr = inode->i_private;
5408         int ret;
5409
5410         ret = tracing_check_open_get_tr(tr);
5411         if (ret)
5412                 return ret;
5413
5414         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5415         if (ret < 0)
5416                 trace_array_put(tr);
5417
5418         return ret;
5419 }
5420
5421 static const struct file_operations tracing_iter_fops = {
5422         .open           = tracing_trace_options_open,
5423         .read           = seq_read,
5424         .llseek         = seq_lseek,
5425         .release        = tracing_single_release_tr,
5426         .write          = tracing_trace_options_write,
5427 };
5428
5429 static const char readme_msg[] =
5430         "tracing mini-HOWTO:\n\n"
5431         "# echo 0 > tracing_on : quick way to disable tracing\n"
5432         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5433         " Important files:\n"
5434         "  trace\t\t\t- The static contents of the buffer\n"
5435         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5436         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5437         "  current_tracer\t- function and latency tracers\n"
5438         "  available_tracers\t- list of configured tracers for current_tracer\n"
5439         "  error_log\t- error log for failed commands (that support it)\n"
5440         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5441         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5442         "  trace_clock\t\t-change the clock used to order events\n"
5443         "       local:   Per cpu clock but may not be synced across CPUs\n"
5444         "      global:   Synced across CPUs but slows tracing down.\n"
5445         "     counter:   Not a clock, but just an increment\n"
5446         "      uptime:   Jiffy counter from time of boot\n"
5447         "        perf:   Same clock that perf events use\n"
5448 #ifdef CONFIG_X86_64
5449         "     x86-tsc:   TSC cycle counter\n"
5450 #endif
5451         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5452         "       delta:   Delta difference against a buffer-wide timestamp\n"
5453         "    absolute:   Absolute (standalone) timestamp\n"
5454         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5455         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5456         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5457         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5458         "\t\t\t  Remove sub-buffer with rmdir\n"
5459         "  trace_options\t\t- Set format or modify how tracing happens\n"
5460         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5461         "\t\t\t  option name\n"
5462         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5463 #ifdef CONFIG_DYNAMIC_FTRACE
5464         "\n  available_filter_functions - list of functions that can be filtered on\n"
5465         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5466         "\t\t\t  functions\n"
5467         "\t     accepts: func_full_name or glob-matching-pattern\n"
5468         "\t     modules: Can select a group via module\n"
5469         "\t      Format: :mod:<module-name>\n"
5470         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5471         "\t    triggers: a command to perform when function is hit\n"
5472         "\t      Format: <function>:<trigger>[:count]\n"
5473         "\t     trigger: traceon, traceoff\n"
5474         "\t\t      enable_event:<system>:<event>\n"
5475         "\t\t      disable_event:<system>:<event>\n"
5476 #ifdef CONFIG_STACKTRACE
5477         "\t\t      stacktrace\n"
5478 #endif
5479 #ifdef CONFIG_TRACER_SNAPSHOT
5480         "\t\t      snapshot\n"
5481 #endif
5482         "\t\t      dump\n"
5483         "\t\t      cpudump\n"
5484         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5485         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5486         "\t     The first one will disable tracing every time do_fault is hit\n"
5487         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5488         "\t       The first time do trap is hit and it disables tracing, the\n"
5489         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5490         "\t       the counter will not decrement. It only decrements when the\n"
5491         "\t       trigger did work\n"
5492         "\t     To remove trigger without count:\n"
5493         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5494         "\t     To remove trigger with a count:\n"
5495         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5496         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5497         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5498         "\t    modules: Can select a group via module command :mod:\n"
5499         "\t    Does not accept triggers\n"
5500 #endif /* CONFIG_DYNAMIC_FTRACE */
5501 #ifdef CONFIG_FUNCTION_TRACER
5502         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5503         "\t\t    (function)\n"
5504         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5505         "\t\t    (function)\n"
5506 #endif
5507 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5508         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5509         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5510         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5511 #endif
5512 #ifdef CONFIG_TRACER_SNAPSHOT
5513         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5514         "\t\t\t  snapshot buffer. Read the contents for more\n"
5515         "\t\t\t  information\n"
5516 #endif
5517 #ifdef CONFIG_STACK_TRACER
5518         "  stack_trace\t\t- Shows the max stack trace when active\n"
5519         "  stack_max_size\t- Shows current max stack size that was traced\n"
5520         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5521         "\t\t\t  new trace)\n"
5522 #ifdef CONFIG_DYNAMIC_FTRACE
5523         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5524         "\t\t\t  traces\n"
5525 #endif
5526 #endif /* CONFIG_STACK_TRACER */
5527 #ifdef CONFIG_DYNAMIC_EVENTS
5528         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5529         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5530 #endif
5531 #ifdef CONFIG_KPROBE_EVENTS
5532         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5533         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5534 #endif
5535 #ifdef CONFIG_UPROBE_EVENTS
5536         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5537         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5538 #endif
5539 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5540         "\t  accepts: event-definitions (one definition per line)\n"
5541         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5542         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5543 #ifdef CONFIG_HIST_TRIGGERS
5544         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5545 #endif
5546         "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5547         "\t           -:[<group>/]<event>\n"
5548 #ifdef CONFIG_KPROBE_EVENTS
5549         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5550   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5551 #endif
5552 #ifdef CONFIG_UPROBE_EVENTS
5553   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5554 #endif
5555         "\t     args: <name>=fetcharg[:type]\n"
5556         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5557 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5558         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5559 #else
5560         "\t           $stack<index>, $stack, $retval, $comm,\n"
5561 #endif
5562         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5563         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5564         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5565         "\t           <type>\\[<array-size>\\]\n"
5566 #ifdef CONFIG_HIST_TRIGGERS
5567         "\t    field: <stype> <name>;\n"
5568         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5569         "\t           [unsigned] char/int/long\n"
5570 #endif
5571         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5572         "\t            of the <attached-group>/<attached-event>.\n"
5573 #endif
5574         "  events/\t\t- Directory containing all trace event subsystems:\n"
5575         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5576         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5577         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5578         "\t\t\t  events\n"
5579         "      filter\t\t- If set, only events passing filter are traced\n"
5580         "  events/<system>/<event>/\t- Directory containing control files for\n"
5581         "\t\t\t  <event>:\n"
5582         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5583         "      filter\t\t- If set, only events passing filter are traced\n"
5584         "      trigger\t\t- If set, a command to perform when event is hit\n"
5585         "\t    Format: <trigger>[:count][if <filter>]\n"
5586         "\t   trigger: traceon, traceoff\n"
5587         "\t            enable_event:<system>:<event>\n"
5588         "\t            disable_event:<system>:<event>\n"
5589 #ifdef CONFIG_HIST_TRIGGERS
5590         "\t            enable_hist:<system>:<event>\n"
5591         "\t            disable_hist:<system>:<event>\n"
5592 #endif
5593 #ifdef CONFIG_STACKTRACE
5594         "\t\t    stacktrace\n"
5595 #endif
5596 #ifdef CONFIG_TRACER_SNAPSHOT
5597         "\t\t    snapshot\n"
5598 #endif
5599 #ifdef CONFIG_HIST_TRIGGERS
5600         "\t\t    hist (see below)\n"
5601 #endif
5602         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5603         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5604         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5605         "\t                  events/block/block_unplug/trigger\n"
5606         "\t   The first disables tracing every time block_unplug is hit.\n"
5607         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5608         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5609         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5610         "\t   Like function triggers, the counter is only decremented if it\n"
5611         "\t    enabled or disabled tracing.\n"
5612         "\t   To remove a trigger without a count:\n"
5613         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5614         "\t   To remove a trigger with a count:\n"
5615         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5616         "\t   Filters can be ignored when removing a trigger.\n"
5617 #ifdef CONFIG_HIST_TRIGGERS
5618         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5619         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5620         "\t            [:values=<field1[,field2,...]>]\n"
5621         "\t            [:sort=<field1[,field2,...]>]\n"
5622         "\t            [:size=#entries]\n"
5623         "\t            [:pause][:continue][:clear]\n"
5624         "\t            [:name=histname1]\n"
5625         "\t            [:<handler>.<action>]\n"
5626         "\t            [if <filter>]\n\n"
5627         "\t    Note, special fields can be used as well:\n"
5628         "\t            common_timestamp - to record current timestamp\n"
5629         "\t            common_cpu - to record the CPU the event happened on\n"
5630         "\n"
5631         "\t    When a matching event is hit, an entry is added to a hash\n"
5632         "\t    table using the key(s) and value(s) named, and the value of a\n"
5633         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5634         "\t    correspond to fields in the event's format description.  Keys\n"
5635         "\t    can be any field, or the special string 'stacktrace'.\n"
5636         "\t    Compound keys consisting of up to two fields can be specified\n"
5637         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5638         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5639         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5640         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5641         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5642         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5643         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5644         "\t    its histogram data will be shared with other triggers of the\n"
5645         "\t    same name, and trigger hits will update this common data.\n\n"
5646         "\t    Reading the 'hist' file for the event will dump the hash\n"
5647         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5648         "\t    triggers attached to an event, there will be a table for each\n"
5649         "\t    trigger in the output.  The table displayed for a named\n"
5650         "\t    trigger will be the same as any other instance having the\n"
5651         "\t    same name.  The default format used to display a given field\n"
5652         "\t    can be modified by appending any of the following modifiers\n"
5653         "\t    to the field name, as applicable:\n\n"
5654         "\t            .hex        display a number as a hex value\n"
5655         "\t            .sym        display an address as a symbol\n"
5656         "\t            .sym-offset display an address as a symbol and offset\n"
5657         "\t            .execname   display a common_pid as a program name\n"
5658         "\t            .syscall    display a syscall id as a syscall name\n"
5659         "\t            .log2       display log2 value rather than raw number\n"
5660         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5661         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5662         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5663         "\t    trigger or to start a hist trigger but not log any events\n"
5664         "\t    until told to do so.  'continue' can be used to start or\n"
5665         "\t    restart a paused hist trigger.\n\n"
5666         "\t    The 'clear' parameter will clear the contents of a running\n"
5667         "\t    hist trigger and leave its current paused/active state\n"
5668         "\t    unchanged.\n\n"
5669         "\t    The enable_hist and disable_hist triggers can be used to\n"
5670         "\t    have one event conditionally start and stop another event's\n"
5671         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5672         "\t    the enable_event and disable_event triggers.\n\n"
5673         "\t    Hist trigger handlers and actions are executed whenever a\n"
5674         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5675         "\t        <handler>.<action>\n\n"
5676         "\t    The available handlers are:\n\n"
5677         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5678         "\t        onmax(var)               - invoke if var exceeds current max\n"
5679         "\t        onchange(var)            - invoke action if var changes\n\n"
5680         "\t    The available actions are:\n\n"
5681         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5682         "\t        save(field,...)                      - save current event fields\n"
5683 #ifdef CONFIG_TRACER_SNAPSHOT
5684         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5685 #endif
5686 #ifdef CONFIG_SYNTH_EVENTS
5687         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5688         "\t  Write into this file to define/undefine new synthetic events.\n"
5689         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5690 #endif
5691 #endif
5692 ;
5693
5694 static ssize_t
5695 tracing_readme_read(struct file *filp, char __user *ubuf,
5696                        size_t cnt, loff_t *ppos)
5697 {
5698         return simple_read_from_buffer(ubuf, cnt, ppos,
5699                                         readme_msg, strlen(readme_msg));
5700 }
5701
5702 static const struct file_operations tracing_readme_fops = {
5703         .open           = tracing_open_generic,
5704         .read           = tracing_readme_read,
5705         .llseek         = generic_file_llseek,
5706 };
5707
5708 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5709 {
5710         int pid = ++(*pos);
5711
5712         return trace_find_tgid_ptr(pid);
5713 }
5714
5715 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5716 {
5717         int pid = *pos;
5718
5719         return trace_find_tgid_ptr(pid);
5720 }
5721
5722 static void saved_tgids_stop(struct seq_file *m, void *v)
5723 {
5724 }
5725
5726 static int saved_tgids_show(struct seq_file *m, void *v)
5727 {
5728         int *entry = (int *)v;
5729         int pid = entry - tgid_map;
5730         int tgid = *entry;
5731
5732         if (tgid == 0)
5733                 return SEQ_SKIP;
5734
5735         seq_printf(m, "%d %d\n", pid, tgid);
5736         return 0;
5737 }
5738
5739 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5740         .start          = saved_tgids_start,
5741         .stop           = saved_tgids_stop,
5742         .next           = saved_tgids_next,
5743         .show           = saved_tgids_show,
5744 };
5745
5746 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5747 {
5748         int ret;
5749
5750         ret = tracing_check_open_get_tr(NULL);
5751         if (ret)
5752                 return ret;
5753
5754         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5755 }
5756
5757
5758 static const struct file_operations tracing_saved_tgids_fops = {
5759         .open           = tracing_saved_tgids_open,
5760         .read           = seq_read,
5761         .llseek         = seq_lseek,
5762         .release        = seq_release,
5763 };
5764
5765 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5766 {
5767         unsigned int *ptr = v;
5768
5769         if (*pos || m->count)
5770                 ptr++;
5771
5772         (*pos)++;
5773
5774         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5775              ptr++) {
5776                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5777                         continue;
5778
5779                 return ptr;
5780         }
5781
5782         return NULL;
5783 }
5784
5785 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5786 {
5787         void *v;
5788         loff_t l = 0;
5789
5790         preempt_disable();
5791         arch_spin_lock(&trace_cmdline_lock);
5792
5793         v = &savedcmd->map_cmdline_to_pid[0];
5794         while (l <= *pos) {
5795                 v = saved_cmdlines_next(m, v, &l);
5796                 if (!v)
5797                         return NULL;
5798         }
5799
5800         return v;
5801 }
5802
5803 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5804 {
5805         arch_spin_unlock(&trace_cmdline_lock);
5806         preempt_enable();
5807 }
5808
5809 static int saved_cmdlines_show(struct seq_file *m, void *v)
5810 {
5811         char buf[TASK_COMM_LEN];
5812         unsigned int *pid = v;
5813
5814         __trace_find_cmdline(*pid, buf);
5815         seq_printf(m, "%d %s\n", *pid, buf);
5816         return 0;
5817 }
5818
5819 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5820         .start          = saved_cmdlines_start,
5821         .next           = saved_cmdlines_next,
5822         .stop           = saved_cmdlines_stop,
5823         .show           = saved_cmdlines_show,
5824 };
5825
5826 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5827 {
5828         int ret;
5829
5830         ret = tracing_check_open_get_tr(NULL);
5831         if (ret)
5832                 return ret;
5833
5834         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5835 }
5836
5837 static const struct file_operations tracing_saved_cmdlines_fops = {
5838         .open           = tracing_saved_cmdlines_open,
5839         .read           = seq_read,
5840         .llseek         = seq_lseek,
5841         .release        = seq_release,
5842 };
5843
5844 static ssize_t
5845 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5846                                  size_t cnt, loff_t *ppos)
5847 {
5848         char buf[64];
5849         int r;
5850
5851         arch_spin_lock(&trace_cmdline_lock);
5852         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5853         arch_spin_unlock(&trace_cmdline_lock);
5854
5855         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5856 }
5857
5858 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5859 {
5860         kfree(s->saved_cmdlines);
5861         kfree(s->map_cmdline_to_pid);
5862         kfree(s);
5863 }
5864
5865 static int tracing_resize_saved_cmdlines(unsigned int val)
5866 {
5867         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5868
5869         s = kmalloc(sizeof(*s), GFP_KERNEL);
5870         if (!s)
5871                 return -ENOMEM;
5872
5873         if (allocate_cmdlines_buffer(val, s) < 0) {
5874                 kfree(s);
5875                 return -ENOMEM;
5876         }
5877
5878         arch_spin_lock(&trace_cmdline_lock);
5879         savedcmd_temp = savedcmd;
5880         savedcmd = s;
5881         arch_spin_unlock(&trace_cmdline_lock);
5882         free_saved_cmdlines_buffer(savedcmd_temp);
5883
5884         return 0;
5885 }
5886
5887 static ssize_t
5888 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5889                                   size_t cnt, loff_t *ppos)
5890 {
5891         unsigned long val;
5892         int ret;
5893
5894         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5895         if (ret)
5896                 return ret;
5897
5898         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5899         if (!val || val > PID_MAX_DEFAULT)
5900                 return -EINVAL;
5901
5902         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5903         if (ret < 0)
5904                 return ret;
5905
5906         *ppos += cnt;
5907
5908         return cnt;
5909 }
5910
5911 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5912         .open           = tracing_open_generic,
5913         .read           = tracing_saved_cmdlines_size_read,
5914         .write          = tracing_saved_cmdlines_size_write,
5915 };
5916
5917 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5918 static union trace_eval_map_item *
5919 update_eval_map(union trace_eval_map_item *ptr)
5920 {
5921         if (!ptr->map.eval_string) {
5922                 if (ptr->tail.next) {
5923                         ptr = ptr->tail.next;
5924                         /* Set ptr to the next real item (skip head) */
5925                         ptr++;
5926                 } else
5927                         return NULL;
5928         }
5929         return ptr;
5930 }
5931
5932 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5933 {
5934         union trace_eval_map_item *ptr = v;
5935
5936         /*
5937          * Paranoid! If ptr points to end, we don't want to increment past it.
5938          * This really should never happen.
5939          */
5940         (*pos)++;
5941         ptr = update_eval_map(ptr);
5942         if (WARN_ON_ONCE(!ptr))
5943                 return NULL;
5944
5945         ptr++;
5946         ptr = update_eval_map(ptr);
5947
5948         return ptr;
5949 }
5950
5951 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5952 {
5953         union trace_eval_map_item *v;
5954         loff_t l = 0;
5955
5956         mutex_lock(&trace_eval_mutex);
5957
5958         v = trace_eval_maps;
5959         if (v)
5960                 v++;
5961
5962         while (v && l < *pos) {
5963                 v = eval_map_next(m, v, &l);
5964         }
5965
5966         return v;
5967 }
5968
5969 static void eval_map_stop(struct seq_file *m, void *v)
5970 {
5971         mutex_unlock(&trace_eval_mutex);
5972 }
5973
5974 static int eval_map_show(struct seq_file *m, void *v)
5975 {
5976         union trace_eval_map_item *ptr = v;
5977
5978         seq_printf(m, "%s %ld (%s)\n",
5979                    ptr->map.eval_string, ptr->map.eval_value,
5980                    ptr->map.system);
5981
5982         return 0;
5983 }
5984
5985 static const struct seq_operations tracing_eval_map_seq_ops = {
5986         .start          = eval_map_start,
5987         .next           = eval_map_next,
5988         .stop           = eval_map_stop,
5989         .show           = eval_map_show,
5990 };
5991
5992 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5993 {
5994         int ret;
5995
5996         ret = tracing_check_open_get_tr(NULL);
5997         if (ret)
5998                 return ret;
5999
6000         return seq_open(filp, &tracing_eval_map_seq_ops);
6001 }
6002
6003 static const struct file_operations tracing_eval_map_fops = {
6004         .open           = tracing_eval_map_open,
6005         .read           = seq_read,
6006         .llseek         = seq_lseek,
6007         .release        = seq_release,
6008 };
6009
6010 static inline union trace_eval_map_item *
6011 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6012 {
6013         /* Return tail of array given the head */
6014         return ptr + ptr->head.length + 1;
6015 }
6016
6017 static void
6018 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6019                            int len)
6020 {
6021         struct trace_eval_map **stop;
6022         struct trace_eval_map **map;
6023         union trace_eval_map_item *map_array;
6024         union trace_eval_map_item *ptr;
6025
6026         stop = start + len;
6027
6028         /*
6029          * The trace_eval_maps contains the map plus a head and tail item,
6030          * where the head holds the module and length of array, and the
6031          * tail holds a pointer to the next list.
6032          */
6033         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6034         if (!map_array) {
6035                 pr_warn("Unable to allocate trace eval mapping\n");
6036                 return;
6037         }
6038
6039         mutex_lock(&trace_eval_mutex);
6040
6041         if (!trace_eval_maps)
6042                 trace_eval_maps = map_array;
6043         else {
6044                 ptr = trace_eval_maps;
6045                 for (;;) {
6046                         ptr = trace_eval_jmp_to_tail(ptr);
6047                         if (!ptr->tail.next)
6048                                 break;
6049                         ptr = ptr->tail.next;
6050
6051                 }
6052                 ptr->tail.next = map_array;
6053         }
6054         map_array->head.mod = mod;
6055         map_array->head.length = len;
6056         map_array++;
6057
6058         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6059                 map_array->map = **map;
6060                 map_array++;
6061         }
6062         memset(map_array, 0, sizeof(*map_array));
6063
6064         mutex_unlock(&trace_eval_mutex);
6065 }
6066
6067 static void trace_create_eval_file(struct dentry *d_tracer)
6068 {
6069         trace_create_file("eval_map", 0444, d_tracer,
6070                           NULL, &tracing_eval_map_fops);
6071 }
6072
6073 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6074 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6075 static inline void trace_insert_eval_map_file(struct module *mod,
6076                               struct trace_eval_map **start, int len) { }
6077 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6078
6079 static void trace_insert_eval_map(struct module *mod,
6080                                   struct trace_eval_map **start, int len)
6081 {
6082         struct trace_eval_map **map;
6083
6084         if (len <= 0)
6085                 return;
6086
6087         map = start;
6088
6089         trace_event_eval_update(map, len);
6090
6091         trace_insert_eval_map_file(mod, start, len);
6092 }
6093
6094 static ssize_t
6095 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6096                        size_t cnt, loff_t *ppos)
6097 {
6098         struct trace_array *tr = filp->private_data;
6099         char buf[MAX_TRACER_SIZE+2];
6100         int r;
6101
6102         mutex_lock(&trace_types_lock);
6103         r = sprintf(buf, "%s\n", tr->current_trace->name);
6104         mutex_unlock(&trace_types_lock);
6105
6106         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6107 }
6108
6109 int tracer_init(struct tracer *t, struct trace_array *tr)
6110 {
6111         tracing_reset_online_cpus(&tr->array_buffer);
6112         return t->init(tr);
6113 }
6114
6115 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6116 {
6117         int cpu;
6118
6119         for_each_tracing_cpu(cpu)
6120                 per_cpu_ptr(buf->data, cpu)->entries = val;
6121 }
6122
6123 #ifdef CONFIG_TRACER_MAX_TRACE
6124 /* resize @tr's buffer to the size of @size_tr's entries */
6125 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6126                                         struct array_buffer *size_buf, int cpu_id)
6127 {
6128         int cpu, ret = 0;
6129
6130         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6131                 for_each_tracing_cpu(cpu) {
6132                         ret = ring_buffer_resize(trace_buf->buffer,
6133                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6134                         if (ret < 0)
6135                                 break;
6136                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6137                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6138                 }
6139         } else {
6140                 ret = ring_buffer_resize(trace_buf->buffer,
6141                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6142                 if (ret == 0)
6143                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6144                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6145         }
6146
6147         return ret;
6148 }
6149 #endif /* CONFIG_TRACER_MAX_TRACE */
6150
6151 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6152                                         unsigned long size, int cpu)
6153 {
6154         int ret;
6155
6156         /*
6157          * If kernel or user changes the size of the ring buffer
6158          * we use the size that was given, and we can forget about
6159          * expanding it later.
6160          */
6161         ring_buffer_expanded = true;
6162
6163         /* May be called before buffers are initialized */
6164         if (!tr->array_buffer.buffer)
6165                 return 0;
6166
6167         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6168         if (ret < 0)
6169                 return ret;
6170
6171 #ifdef CONFIG_TRACER_MAX_TRACE
6172         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6173             !tr->current_trace->use_max_tr)
6174                 goto out;
6175
6176         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6177         if (ret < 0) {
6178                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6179                                                      &tr->array_buffer, cpu);
6180                 if (r < 0) {
6181                         /*
6182                          * AARGH! We are left with different
6183                          * size max buffer!!!!
6184                          * The max buffer is our "snapshot" buffer.
6185                          * When a tracer needs a snapshot (one of the
6186                          * latency tracers), it swaps the max buffer
6187                          * with the saved snap shot. We succeeded to
6188                          * update the size of the main buffer, but failed to
6189                          * update the size of the max buffer. But when we tried
6190                          * to reset the main buffer to the original size, we
6191                          * failed there too. This is very unlikely to
6192                          * happen, but if it does, warn and kill all
6193                          * tracing.
6194                          */
6195                         WARN_ON(1);
6196                         tracing_disabled = 1;
6197                 }
6198                 return ret;
6199         }
6200
6201         if (cpu == RING_BUFFER_ALL_CPUS)
6202                 set_buffer_entries(&tr->max_buffer, size);
6203         else
6204                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6205
6206  out:
6207 #endif /* CONFIG_TRACER_MAX_TRACE */
6208
6209         if (cpu == RING_BUFFER_ALL_CPUS)
6210                 set_buffer_entries(&tr->array_buffer, size);
6211         else
6212                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6213
6214         return ret;
6215 }
6216
6217 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6218                                   unsigned long size, int cpu_id)
6219 {
6220         int ret;
6221
6222         mutex_lock(&trace_types_lock);
6223
6224         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6225                 /* make sure, this cpu is enabled in the mask */
6226                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6227                         ret = -EINVAL;
6228                         goto out;
6229                 }
6230         }
6231
6232         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6233         if (ret < 0)
6234                 ret = -ENOMEM;
6235
6236 out:
6237         mutex_unlock(&trace_types_lock);
6238
6239         return ret;
6240 }
6241
6242
6243 /**
6244  * tracing_update_buffers - used by tracing facility to expand ring buffers
6245  *
6246  * To save on memory when the tracing is never used on a system with it
6247  * configured in. The ring buffers are set to a minimum size. But once
6248  * a user starts to use the tracing facility, then they need to grow
6249  * to their default size.
6250  *
6251  * This function is to be called when a tracer is about to be used.
6252  */
6253 int tracing_update_buffers(void)
6254 {
6255         int ret = 0;
6256
6257         mutex_lock(&trace_types_lock);
6258         if (!ring_buffer_expanded)
6259                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6260                                                 RING_BUFFER_ALL_CPUS);
6261         mutex_unlock(&trace_types_lock);
6262
6263         return ret;
6264 }
6265
6266 struct trace_option_dentry;
6267
6268 static void
6269 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6270
6271 /*
6272  * Used to clear out the tracer before deletion of an instance.
6273  * Must have trace_types_lock held.
6274  */
6275 static void tracing_set_nop(struct trace_array *tr)
6276 {
6277         if (tr->current_trace == &nop_trace)
6278                 return;
6279         
6280         tr->current_trace->enabled--;
6281
6282         if (tr->current_trace->reset)
6283                 tr->current_trace->reset(tr);
6284
6285         tr->current_trace = &nop_trace;
6286 }
6287
6288 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6289 {
6290         /* Only enable if the directory has been created already. */
6291         if (!tr->dir)
6292                 return;
6293
6294         create_trace_option_files(tr, t);
6295 }
6296
6297 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6298 {
6299         struct tracer *t;
6300 #ifdef CONFIG_TRACER_MAX_TRACE
6301         bool had_max_tr;
6302 #endif
6303         int ret = 0;
6304
6305         mutex_lock(&trace_types_lock);
6306
6307         if (!ring_buffer_expanded) {
6308                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6309                                                 RING_BUFFER_ALL_CPUS);
6310                 if (ret < 0)
6311                         goto out;
6312                 ret = 0;
6313         }
6314
6315         for (t = trace_types; t; t = t->next) {
6316                 if (strcmp(t->name, buf) == 0)
6317                         break;
6318         }
6319         if (!t) {
6320                 ret = -EINVAL;
6321                 goto out;
6322         }
6323         if (t == tr->current_trace)
6324                 goto out;
6325
6326 #ifdef CONFIG_TRACER_SNAPSHOT
6327         if (t->use_max_tr) {
6328                 arch_spin_lock(&tr->max_lock);
6329                 if (tr->cond_snapshot)
6330                         ret = -EBUSY;
6331                 arch_spin_unlock(&tr->max_lock);
6332                 if (ret)
6333                         goto out;
6334         }
6335 #endif
6336         /* Some tracers won't work on kernel command line */
6337         if (system_state < SYSTEM_RUNNING && t->noboot) {
6338                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6339                         t->name);
6340                 goto out;
6341         }
6342
6343         /* Some tracers are only allowed for the top level buffer */
6344         if (!trace_ok_for_array(t, tr)) {
6345                 ret = -EINVAL;
6346                 goto out;
6347         }
6348
6349         /* If trace pipe files are being read, we can't change the tracer */
6350         if (tr->trace_ref) {
6351                 ret = -EBUSY;
6352                 goto out;
6353         }
6354
6355         trace_branch_disable();
6356
6357         tr->current_trace->enabled--;
6358
6359         if (tr->current_trace->reset)
6360                 tr->current_trace->reset(tr);
6361
6362         /* Current trace needs to be nop_trace before synchronize_rcu */
6363         tr->current_trace = &nop_trace;
6364
6365 #ifdef CONFIG_TRACER_MAX_TRACE
6366         had_max_tr = tr->allocated_snapshot;
6367
6368         if (had_max_tr && !t->use_max_tr) {
6369                 /*
6370                  * We need to make sure that the update_max_tr sees that
6371                  * current_trace changed to nop_trace to keep it from
6372                  * swapping the buffers after we resize it.
6373                  * The update_max_tr is called from interrupts disabled
6374                  * so a synchronized_sched() is sufficient.
6375                  */
6376                 synchronize_rcu();
6377                 free_snapshot(tr);
6378         }
6379 #endif
6380
6381 #ifdef CONFIG_TRACER_MAX_TRACE
6382         if (t->use_max_tr && !had_max_tr) {
6383                 ret = tracing_alloc_snapshot_instance(tr);
6384                 if (ret < 0)
6385                         goto out;
6386         }
6387 #endif
6388
6389         if (t->init) {
6390                 ret = tracer_init(t, tr);
6391                 if (ret)
6392                         goto out;
6393         }
6394
6395         tr->current_trace = t;
6396         tr->current_trace->enabled++;
6397         trace_branch_enable(tr);
6398  out:
6399         mutex_unlock(&trace_types_lock);
6400
6401         return ret;
6402 }
6403
6404 static ssize_t
6405 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6406                         size_t cnt, loff_t *ppos)
6407 {
6408         struct trace_array *tr = filp->private_data;
6409         char buf[MAX_TRACER_SIZE+1];
6410         int i;
6411         size_t ret;
6412         int err;
6413
6414         ret = cnt;
6415
6416         if (cnt > MAX_TRACER_SIZE)
6417                 cnt = MAX_TRACER_SIZE;
6418
6419         if (copy_from_user(buf, ubuf, cnt))
6420                 return -EFAULT;
6421
6422         buf[cnt] = 0;
6423
6424         /* strip ending whitespace. */
6425         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6426                 buf[i] = 0;
6427
6428         err = tracing_set_tracer(tr, buf);
6429         if (err)
6430                 return err;
6431
6432         *ppos += ret;
6433
6434         return ret;
6435 }
6436
6437 static ssize_t
6438 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6439                    size_t cnt, loff_t *ppos)
6440 {
6441         char buf[64];
6442         int r;
6443
6444         r = snprintf(buf, sizeof(buf), "%ld\n",
6445                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6446         if (r > sizeof(buf))
6447                 r = sizeof(buf);
6448         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6449 }
6450
6451 static ssize_t
6452 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6453                     size_t cnt, loff_t *ppos)
6454 {
6455         unsigned long val;
6456         int ret;
6457
6458         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6459         if (ret)
6460                 return ret;
6461
6462         *ptr = val * 1000;
6463
6464         return cnt;
6465 }
6466
6467 static ssize_t
6468 tracing_thresh_read(struct file *filp, char __user *ubuf,
6469                     size_t cnt, loff_t *ppos)
6470 {
6471         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6472 }
6473
6474 static ssize_t
6475 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6476                      size_t cnt, loff_t *ppos)
6477 {
6478         struct trace_array *tr = filp->private_data;
6479         int ret;
6480
6481         mutex_lock(&trace_types_lock);
6482         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6483         if (ret < 0)
6484                 goto out;
6485
6486         if (tr->current_trace->update_thresh) {
6487                 ret = tr->current_trace->update_thresh(tr);
6488                 if (ret < 0)
6489                         goto out;
6490         }
6491
6492         ret = cnt;
6493 out:
6494         mutex_unlock(&trace_types_lock);
6495
6496         return ret;
6497 }
6498
6499 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6500
6501 static ssize_t
6502 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6503                      size_t cnt, loff_t *ppos)
6504 {
6505         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6506 }
6507
6508 static ssize_t
6509 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6510                       size_t cnt, loff_t *ppos)
6511 {
6512         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6513 }
6514
6515 #endif
6516
6517 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6518 {
6519         struct trace_array *tr = inode->i_private;
6520         struct trace_iterator *iter;
6521         int ret;
6522
6523         ret = tracing_check_open_get_tr(tr);
6524         if (ret)
6525                 return ret;
6526
6527         mutex_lock(&trace_types_lock);
6528
6529         /* create a buffer to store the information to pass to userspace */
6530         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6531         if (!iter) {
6532                 ret = -ENOMEM;
6533                 __trace_array_put(tr);
6534                 goto out;
6535         }
6536
6537         trace_seq_init(&iter->seq);
6538         iter->trace = tr->current_trace;
6539
6540         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6541                 ret = -ENOMEM;
6542                 goto fail;
6543         }
6544
6545         /* trace pipe does not show start of buffer */
6546         cpumask_setall(iter->started);
6547
6548         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6549                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6550
6551         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6552         if (trace_clocks[tr->clock_id].in_ns)
6553                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6554
6555         iter->tr = tr;
6556         iter->array_buffer = &tr->array_buffer;
6557         iter->cpu_file = tracing_get_cpu(inode);
6558         mutex_init(&iter->mutex);
6559         filp->private_data = iter;
6560
6561         if (iter->trace->pipe_open)
6562                 iter->trace->pipe_open(iter);
6563
6564         nonseekable_open(inode, filp);
6565
6566         tr->trace_ref++;
6567 out:
6568         mutex_unlock(&trace_types_lock);
6569         return ret;
6570
6571 fail:
6572         kfree(iter);
6573         __trace_array_put(tr);
6574         mutex_unlock(&trace_types_lock);
6575         return ret;
6576 }
6577
6578 static int tracing_release_pipe(struct inode *inode, struct file *file)
6579 {
6580         struct trace_iterator *iter = file->private_data;
6581         struct trace_array *tr = inode->i_private;
6582
6583         mutex_lock(&trace_types_lock);
6584
6585         tr->trace_ref--;
6586
6587         if (iter->trace->pipe_close)
6588                 iter->trace->pipe_close(iter);
6589
6590         mutex_unlock(&trace_types_lock);
6591
6592         free_cpumask_var(iter->started);
6593         mutex_destroy(&iter->mutex);
6594         kfree(iter);
6595
6596         trace_array_put(tr);
6597
6598         return 0;
6599 }
6600
6601 static __poll_t
6602 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6603 {
6604         struct trace_array *tr = iter->tr;
6605
6606         /* Iterators are static, they should be filled or empty */
6607         if (trace_buffer_iter(iter, iter->cpu_file))
6608                 return EPOLLIN | EPOLLRDNORM;
6609
6610         if (tr->trace_flags & TRACE_ITER_BLOCK)
6611                 /*
6612                  * Always select as readable when in blocking mode
6613                  */
6614                 return EPOLLIN | EPOLLRDNORM;
6615         else
6616                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6617                                              filp, poll_table);
6618 }
6619
6620 static __poll_t
6621 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6622 {
6623         struct trace_iterator *iter = filp->private_data;
6624
6625         return trace_poll(iter, filp, poll_table);
6626 }
6627
6628 /* Must be called with iter->mutex held. */
6629 static int tracing_wait_pipe(struct file *filp)
6630 {
6631         struct trace_iterator *iter = filp->private_data;
6632         int ret;
6633
6634         while (trace_empty(iter)) {
6635
6636                 if ((filp->f_flags & O_NONBLOCK)) {
6637                         return -EAGAIN;
6638                 }
6639
6640                 /*
6641                  * We block until we read something and tracing is disabled.
6642                  * We still block if tracing is disabled, but we have never
6643                  * read anything. This allows a user to cat this file, and
6644                  * then enable tracing. But after we have read something,
6645                  * we give an EOF when tracing is again disabled.
6646                  *
6647                  * iter->pos will be 0 if we haven't read anything.
6648                  */
6649                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6650                         break;
6651
6652                 mutex_unlock(&iter->mutex);
6653
6654                 ret = wait_on_pipe(iter, 0);
6655
6656                 mutex_lock(&iter->mutex);
6657
6658                 if (ret)
6659                         return ret;
6660         }
6661
6662         return 1;
6663 }
6664
6665 /*
6666  * Consumer reader.
6667  */
6668 static ssize_t
6669 tracing_read_pipe(struct file *filp, char __user *ubuf,
6670                   size_t cnt, loff_t *ppos)
6671 {
6672         struct trace_iterator *iter = filp->private_data;
6673         ssize_t sret;
6674
6675         /*
6676          * Avoid more than one consumer on a single file descriptor
6677          * This is just a matter of traces coherency, the ring buffer itself
6678          * is protected.
6679          */
6680         mutex_lock(&iter->mutex);
6681
6682         /* return any leftover data */
6683         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6684         if (sret != -EBUSY)
6685                 goto out;
6686
6687         trace_seq_init(&iter->seq);
6688
6689         if (iter->trace->read) {
6690                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6691                 if (sret)
6692                         goto out;
6693         }
6694
6695 waitagain:
6696         sret = tracing_wait_pipe(filp);
6697         if (sret <= 0)
6698                 goto out;
6699
6700         /* stop when tracing is finished */
6701         if (trace_empty(iter)) {
6702                 sret = 0;
6703                 goto out;
6704         }
6705
6706         if (cnt >= PAGE_SIZE)
6707                 cnt = PAGE_SIZE - 1;
6708
6709         /* reset all but tr, trace, and overruns */
6710         memset(&iter->seq, 0,
6711                sizeof(struct trace_iterator) -
6712                offsetof(struct trace_iterator, seq));
6713         cpumask_clear(iter->started);
6714         trace_seq_init(&iter->seq);
6715         iter->pos = -1;
6716
6717         trace_event_read_lock();
6718         trace_access_lock(iter->cpu_file);
6719         while (trace_find_next_entry_inc(iter) != NULL) {
6720                 enum print_line_t ret;
6721                 int save_len = iter->seq.seq.len;
6722
6723                 ret = print_trace_line(iter);
6724                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6725                         /* don't print partial lines */
6726                         iter->seq.seq.len = save_len;
6727                         break;
6728                 }
6729                 if (ret != TRACE_TYPE_NO_CONSUME)
6730                         trace_consume(iter);
6731
6732                 if (trace_seq_used(&iter->seq) >= cnt)
6733                         break;
6734
6735                 /*
6736                  * Setting the full flag means we reached the trace_seq buffer
6737                  * size and we should leave by partial output condition above.
6738                  * One of the trace_seq_* functions is not used properly.
6739                  */
6740                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6741                           iter->ent->type);
6742         }
6743         trace_access_unlock(iter->cpu_file);
6744         trace_event_read_unlock();
6745
6746         /* Now copy what we have to the user */
6747         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6748         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6749                 trace_seq_init(&iter->seq);
6750
6751         /*
6752          * If there was nothing to send to user, in spite of consuming trace
6753          * entries, go back to wait for more entries.
6754          */
6755         if (sret == -EBUSY)
6756                 goto waitagain;
6757
6758 out:
6759         mutex_unlock(&iter->mutex);
6760
6761         return sret;
6762 }
6763
6764 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6765                                      unsigned int idx)
6766 {
6767         __free_page(spd->pages[idx]);
6768 }
6769
6770 static size_t
6771 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6772 {
6773         size_t count;
6774         int save_len;
6775         int ret;
6776
6777         /* Seq buffer is page-sized, exactly what we need. */
6778         for (;;) {
6779                 save_len = iter->seq.seq.len;
6780                 ret = print_trace_line(iter);
6781
6782                 if (trace_seq_has_overflowed(&iter->seq)) {
6783                         iter->seq.seq.len = save_len;
6784                         break;
6785                 }
6786
6787                 /*
6788                  * This should not be hit, because it should only
6789                  * be set if the iter->seq overflowed. But check it
6790                  * anyway to be safe.
6791                  */
6792                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6793                         iter->seq.seq.len = save_len;
6794                         break;
6795                 }
6796
6797                 count = trace_seq_used(&iter->seq) - save_len;
6798                 if (rem < count) {
6799                         rem = 0;
6800                         iter->seq.seq.len = save_len;
6801                         break;
6802                 }
6803
6804                 if (ret != TRACE_TYPE_NO_CONSUME)
6805                         trace_consume(iter);
6806                 rem -= count;
6807                 if (!trace_find_next_entry_inc(iter))   {
6808                         rem = 0;
6809                         iter->ent = NULL;
6810                         break;
6811                 }
6812         }
6813
6814         return rem;
6815 }
6816
6817 static ssize_t tracing_splice_read_pipe(struct file *filp,
6818                                         loff_t *ppos,
6819                                         struct pipe_inode_info *pipe,
6820                                         size_t len,
6821                                         unsigned int flags)
6822 {
6823         struct page *pages_def[PIPE_DEF_BUFFERS];
6824         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6825         struct trace_iterator *iter = filp->private_data;
6826         struct splice_pipe_desc spd = {
6827                 .pages          = pages_def,
6828                 .partial        = partial_def,
6829                 .nr_pages       = 0, /* This gets updated below. */
6830                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6831                 .ops            = &default_pipe_buf_ops,
6832                 .spd_release    = tracing_spd_release_pipe,
6833         };
6834         ssize_t ret;
6835         size_t rem;
6836         unsigned int i;
6837
6838         if (splice_grow_spd(pipe, &spd))
6839                 return -ENOMEM;
6840
6841         mutex_lock(&iter->mutex);
6842
6843         if (iter->trace->splice_read) {
6844                 ret = iter->trace->splice_read(iter, filp,
6845                                                ppos, pipe, len, flags);
6846                 if (ret)
6847                         goto out_err;
6848         }
6849
6850         ret = tracing_wait_pipe(filp);
6851         if (ret <= 0)
6852                 goto out_err;
6853
6854         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6855                 ret = -EFAULT;
6856                 goto out_err;
6857         }
6858
6859         trace_event_read_lock();
6860         trace_access_lock(iter->cpu_file);
6861
6862         /* Fill as many pages as possible. */
6863         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6864                 spd.pages[i] = alloc_page(GFP_KERNEL);
6865                 if (!spd.pages[i])
6866                         break;
6867
6868                 rem = tracing_fill_pipe_page(rem, iter);
6869
6870                 /* Copy the data into the page, so we can start over. */
6871                 ret = trace_seq_to_buffer(&iter->seq,
6872                                           page_address(spd.pages[i]),
6873                                           trace_seq_used(&iter->seq));
6874                 if (ret < 0) {
6875                         __free_page(spd.pages[i]);
6876                         break;
6877                 }
6878                 spd.partial[i].offset = 0;
6879                 spd.partial[i].len = trace_seq_used(&iter->seq);
6880
6881                 trace_seq_init(&iter->seq);
6882         }
6883
6884         trace_access_unlock(iter->cpu_file);
6885         trace_event_read_unlock();
6886         mutex_unlock(&iter->mutex);
6887
6888         spd.nr_pages = i;
6889
6890         if (i)
6891                 ret = splice_to_pipe(pipe, &spd);
6892         else
6893                 ret = 0;
6894 out:
6895         splice_shrink_spd(&spd);
6896         return ret;
6897
6898 out_err:
6899         mutex_unlock(&iter->mutex);
6900         goto out;
6901 }
6902
6903 static ssize_t
6904 tracing_entries_read(struct file *filp, char __user *ubuf,
6905                      size_t cnt, loff_t *ppos)
6906 {
6907         struct inode *inode = file_inode(filp);
6908         struct trace_array *tr = inode->i_private;
6909         int cpu = tracing_get_cpu(inode);
6910         char buf[64];
6911         int r = 0;
6912         ssize_t ret;
6913
6914         mutex_lock(&trace_types_lock);
6915
6916         if (cpu == RING_BUFFER_ALL_CPUS) {
6917                 int cpu, buf_size_same;
6918                 unsigned long size;
6919
6920                 size = 0;
6921                 buf_size_same = 1;
6922                 /* check if all cpu sizes are same */
6923                 for_each_tracing_cpu(cpu) {
6924                         /* fill in the size from first enabled cpu */
6925                         if (size == 0)
6926                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6927                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6928                                 buf_size_same = 0;
6929                                 break;
6930                         }
6931                 }
6932
6933                 if (buf_size_same) {
6934                         if (!ring_buffer_expanded)
6935                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6936                                             size >> 10,
6937                                             trace_buf_size >> 10);
6938                         else
6939                                 r = sprintf(buf, "%lu\n", size >> 10);
6940                 } else
6941                         r = sprintf(buf, "X\n");
6942         } else
6943                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6944
6945         mutex_unlock(&trace_types_lock);
6946
6947         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6948         return ret;
6949 }
6950
6951 static ssize_t
6952 tracing_entries_write(struct file *filp, const char __user *ubuf,
6953                       size_t cnt, loff_t *ppos)
6954 {
6955         struct inode *inode = file_inode(filp);
6956         struct trace_array *tr = inode->i_private;
6957         unsigned long val;
6958         int ret;
6959
6960         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6961         if (ret)
6962                 return ret;
6963
6964         /* must have at least 1 entry */
6965         if (!val)
6966                 return -EINVAL;
6967
6968         /* value is in KB */
6969         val <<= 10;
6970         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6971         if (ret < 0)
6972                 return ret;
6973
6974         *ppos += cnt;
6975
6976         return cnt;
6977 }
6978
6979 static ssize_t
6980 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6981                                 size_t cnt, loff_t *ppos)
6982 {
6983         struct trace_array *tr = filp->private_data;
6984         char buf[64];
6985         int r, cpu;
6986         unsigned long size = 0, expanded_size = 0;
6987
6988         mutex_lock(&trace_types_lock);
6989         for_each_tracing_cpu(cpu) {
6990                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6991                 if (!ring_buffer_expanded)
6992                         expanded_size += trace_buf_size >> 10;
6993         }
6994         if (ring_buffer_expanded)
6995                 r = sprintf(buf, "%lu\n", size);
6996         else
6997                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6998         mutex_unlock(&trace_types_lock);
6999
7000         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7001 }
7002
7003 static ssize_t
7004 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7005                           size_t cnt, loff_t *ppos)
7006 {
7007         /*
7008          * There is no need to read what the user has written, this function
7009          * is just to make sure that there is no error when "echo" is used
7010          */
7011
7012         *ppos += cnt;
7013
7014         return cnt;
7015 }
7016
7017 static int
7018 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7019 {
7020         struct trace_array *tr = inode->i_private;
7021
7022         /* disable tracing ? */
7023         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7024                 tracer_tracing_off(tr);
7025         /* resize the ring buffer to 0 */
7026         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7027
7028         trace_array_put(tr);
7029
7030         return 0;
7031 }
7032
7033 static ssize_t
7034 tracing_mark_write(struct file *filp, const char __user *ubuf,
7035                                         size_t cnt, loff_t *fpos)
7036 {
7037         struct trace_array *tr = filp->private_data;
7038         struct ring_buffer_event *event;
7039         enum event_trigger_type tt = ETT_NONE;
7040         struct trace_buffer *buffer;
7041         struct print_entry *entry;
7042         ssize_t written;
7043         int size;
7044         int len;
7045
7046 /* Used in tracing_mark_raw_write() as well */
7047 #define FAULTED_STR "<faulted>"
7048 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7049
7050         if (tracing_disabled)
7051                 return -EINVAL;
7052
7053         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7054                 return -EINVAL;
7055
7056         if (cnt > TRACE_BUF_SIZE)
7057                 cnt = TRACE_BUF_SIZE;
7058
7059         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7060
7061         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7062
7063         /* If less than "<faulted>", then make sure we can still add that */
7064         if (cnt < FAULTED_SIZE)
7065                 size += FAULTED_SIZE - cnt;
7066
7067         buffer = tr->array_buffer.buffer;
7068         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7069                                             tracing_gen_ctx());
7070         if (unlikely(!event))
7071                 /* Ring buffer disabled, return as if not open for write */
7072                 return -EBADF;
7073
7074         entry = ring_buffer_event_data(event);
7075         entry->ip = _THIS_IP_;
7076
7077         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7078         if (len) {
7079                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7080                 cnt = FAULTED_SIZE;
7081                 written = -EFAULT;
7082         } else
7083                 written = cnt;
7084
7085         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7086                 /* do not add \n before testing triggers, but add \0 */
7087                 entry->buf[cnt] = '\0';
7088                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7089         }
7090
7091         if (entry->buf[cnt - 1] != '\n') {
7092                 entry->buf[cnt] = '\n';
7093                 entry->buf[cnt + 1] = '\0';
7094         } else
7095                 entry->buf[cnt] = '\0';
7096
7097         if (static_branch_unlikely(&trace_marker_exports_enabled))
7098                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7099         __buffer_unlock_commit(buffer, event);
7100
7101         if (tt)
7102                 event_triggers_post_call(tr->trace_marker_file, tt);
7103
7104         if (written > 0)
7105                 *fpos += written;
7106
7107         return written;
7108 }
7109
7110 /* Limit it for now to 3K (including tag) */
7111 #define RAW_DATA_MAX_SIZE (1024*3)
7112
7113 static ssize_t
7114 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7115                                         size_t cnt, loff_t *fpos)
7116 {
7117         struct trace_array *tr = filp->private_data;
7118         struct ring_buffer_event *event;
7119         struct trace_buffer *buffer;
7120         struct raw_data_entry *entry;
7121         ssize_t written;
7122         int size;
7123         int len;
7124
7125 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7126
7127         if (tracing_disabled)
7128                 return -EINVAL;
7129
7130         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7131                 return -EINVAL;
7132
7133         /* The marker must at least have a tag id */
7134         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7135                 return -EINVAL;
7136
7137         if (cnt > TRACE_BUF_SIZE)
7138                 cnt = TRACE_BUF_SIZE;
7139
7140         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7141
7142         size = sizeof(*entry) + cnt;
7143         if (cnt < FAULT_SIZE_ID)
7144                 size += FAULT_SIZE_ID - cnt;
7145
7146         buffer = tr->array_buffer.buffer;
7147         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7148                                             tracing_gen_ctx());
7149         if (!event)
7150                 /* Ring buffer disabled, return as if not open for write */
7151                 return -EBADF;
7152
7153         entry = ring_buffer_event_data(event);
7154
7155         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7156         if (len) {
7157                 entry->id = -1;
7158                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7159                 written = -EFAULT;
7160         } else
7161                 written = cnt;
7162
7163         __buffer_unlock_commit(buffer, event);
7164
7165         if (written > 0)
7166                 *fpos += written;
7167
7168         return written;
7169 }
7170
7171 static int tracing_clock_show(struct seq_file *m, void *v)
7172 {
7173         struct trace_array *tr = m->private;
7174         int i;
7175
7176         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7177                 seq_printf(m,
7178                         "%s%s%s%s", i ? " " : "",
7179                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7180                         i == tr->clock_id ? "]" : "");
7181         seq_putc(m, '\n');
7182
7183         return 0;
7184 }
7185
7186 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7187 {
7188         int i;
7189
7190         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7191                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7192                         break;
7193         }
7194         if (i == ARRAY_SIZE(trace_clocks))
7195                 return -EINVAL;
7196
7197         mutex_lock(&trace_types_lock);
7198
7199         tr->clock_id = i;
7200
7201         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7202
7203         /*
7204          * New clock may not be consistent with the previous clock.
7205          * Reset the buffer so that it doesn't have incomparable timestamps.
7206          */
7207         tracing_reset_online_cpus(&tr->array_buffer);
7208
7209 #ifdef CONFIG_TRACER_MAX_TRACE
7210         if (tr->max_buffer.buffer)
7211                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7212         tracing_reset_online_cpus(&tr->max_buffer);
7213 #endif
7214
7215         mutex_unlock(&trace_types_lock);
7216
7217         return 0;
7218 }
7219
7220 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7221                                    size_t cnt, loff_t *fpos)
7222 {
7223         struct seq_file *m = filp->private_data;
7224         struct trace_array *tr = m->private;
7225         char buf[64];
7226         const char *clockstr;
7227         int ret;
7228
7229         if (cnt >= sizeof(buf))
7230                 return -EINVAL;
7231
7232         if (copy_from_user(buf, ubuf, cnt))
7233                 return -EFAULT;
7234
7235         buf[cnt] = 0;
7236
7237         clockstr = strstrip(buf);
7238
7239         ret = tracing_set_clock(tr, clockstr);
7240         if (ret)
7241                 return ret;
7242
7243         *fpos += cnt;
7244
7245         return cnt;
7246 }
7247
7248 static int tracing_clock_open(struct inode *inode, struct file *file)
7249 {
7250         struct trace_array *tr = inode->i_private;
7251         int ret;
7252
7253         ret = tracing_check_open_get_tr(tr);
7254         if (ret)
7255                 return ret;
7256
7257         ret = single_open(file, tracing_clock_show, inode->i_private);
7258         if (ret < 0)
7259                 trace_array_put(tr);
7260
7261         return ret;
7262 }
7263
7264 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7265 {
7266         struct trace_array *tr = m->private;
7267
7268         mutex_lock(&trace_types_lock);
7269
7270         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7271                 seq_puts(m, "delta [absolute]\n");
7272         else
7273                 seq_puts(m, "[delta] absolute\n");
7274
7275         mutex_unlock(&trace_types_lock);
7276
7277         return 0;
7278 }
7279
7280 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7281 {
7282         struct trace_array *tr = inode->i_private;
7283         int ret;
7284
7285         ret = tracing_check_open_get_tr(tr);
7286         if (ret)
7287                 return ret;
7288
7289         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7290         if (ret < 0)
7291                 trace_array_put(tr);
7292
7293         return ret;
7294 }
7295
7296 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7297 {
7298         if (rbe == this_cpu_read(trace_buffered_event))
7299                 return ring_buffer_time_stamp(buffer);
7300
7301         return ring_buffer_event_time_stamp(buffer, rbe);
7302 }
7303
7304 /*
7305  * Set or disable using the per CPU trace_buffer_event when possible.
7306  */
7307 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7308 {
7309         int ret = 0;
7310
7311         mutex_lock(&trace_types_lock);
7312
7313         if (set && tr->no_filter_buffering_ref++)
7314                 goto out;
7315
7316         if (!set) {
7317                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7318                         ret = -EINVAL;
7319                         goto out;
7320                 }
7321
7322                 --tr->no_filter_buffering_ref;
7323         }
7324  out:
7325         mutex_unlock(&trace_types_lock);
7326
7327         return ret;
7328 }
7329
7330 struct ftrace_buffer_info {
7331         struct trace_iterator   iter;
7332         void                    *spare;
7333         unsigned int            spare_cpu;
7334         unsigned int            read;
7335 };
7336
7337 #ifdef CONFIG_TRACER_SNAPSHOT
7338 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7339 {
7340         struct trace_array *tr = inode->i_private;
7341         struct trace_iterator *iter;
7342         struct seq_file *m;
7343         int ret;
7344
7345         ret = tracing_check_open_get_tr(tr);
7346         if (ret)
7347                 return ret;
7348
7349         if (file->f_mode & FMODE_READ) {
7350                 iter = __tracing_open(inode, file, true);
7351                 if (IS_ERR(iter))
7352                         ret = PTR_ERR(iter);
7353         } else {
7354                 /* Writes still need the seq_file to hold the private data */
7355                 ret = -ENOMEM;
7356                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7357                 if (!m)
7358                         goto out;
7359                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7360                 if (!iter) {
7361                         kfree(m);
7362                         goto out;
7363                 }
7364                 ret = 0;
7365
7366                 iter->tr = tr;
7367                 iter->array_buffer = &tr->max_buffer;
7368                 iter->cpu_file = tracing_get_cpu(inode);
7369                 m->private = iter;
7370                 file->private_data = m;
7371         }
7372 out:
7373         if (ret < 0)
7374                 trace_array_put(tr);
7375
7376         return ret;
7377 }
7378
7379 static ssize_t
7380 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7381                        loff_t *ppos)
7382 {
7383         struct seq_file *m = filp->private_data;
7384         struct trace_iterator *iter = m->private;
7385         struct trace_array *tr = iter->tr;
7386         unsigned long val;
7387         int ret;
7388
7389         ret = tracing_update_buffers();
7390         if (ret < 0)
7391                 return ret;
7392
7393         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7394         if (ret)
7395                 return ret;
7396
7397         mutex_lock(&trace_types_lock);
7398
7399         if (tr->current_trace->use_max_tr) {
7400                 ret = -EBUSY;
7401                 goto out;
7402         }
7403
7404         arch_spin_lock(&tr->max_lock);
7405         if (tr->cond_snapshot)
7406                 ret = -EBUSY;
7407         arch_spin_unlock(&tr->max_lock);
7408         if (ret)
7409                 goto out;
7410
7411         switch (val) {
7412         case 0:
7413                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7414                         ret = -EINVAL;
7415                         break;
7416                 }
7417                 if (tr->allocated_snapshot)
7418                         free_snapshot(tr);
7419                 break;
7420         case 1:
7421 /* Only allow per-cpu swap if the ring buffer supports it */
7422 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7423                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7424                         ret = -EINVAL;
7425                         break;
7426                 }
7427 #endif
7428                 if (tr->allocated_snapshot)
7429                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7430                                         &tr->array_buffer, iter->cpu_file);
7431                 else
7432                         ret = tracing_alloc_snapshot_instance(tr);
7433                 if (ret < 0)
7434                         break;
7435                 local_irq_disable();
7436                 /* Now, we're going to swap */
7437                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7438                         update_max_tr(tr, current, smp_processor_id(), NULL);
7439                 else
7440                         update_max_tr_single(tr, current, iter->cpu_file);
7441                 local_irq_enable();
7442                 break;
7443         default:
7444                 if (tr->allocated_snapshot) {
7445                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7446                                 tracing_reset_online_cpus(&tr->max_buffer);
7447                         else
7448                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7449                 }
7450                 break;
7451         }
7452
7453         if (ret >= 0) {
7454                 *ppos += cnt;
7455                 ret = cnt;
7456         }
7457 out:
7458         mutex_unlock(&trace_types_lock);
7459         return ret;
7460 }
7461
7462 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7463 {
7464         struct seq_file *m = file->private_data;
7465         int ret;
7466
7467         ret = tracing_release(inode, file);
7468
7469         if (file->f_mode & FMODE_READ)
7470                 return ret;
7471
7472         /* If write only, the seq_file is just a stub */
7473         if (m)
7474                 kfree(m->private);
7475         kfree(m);
7476
7477         return 0;
7478 }
7479
7480 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7481 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7482                                     size_t count, loff_t *ppos);
7483 static int tracing_buffers_release(struct inode *inode, struct file *file);
7484 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7485                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7486
7487 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7488 {
7489         struct ftrace_buffer_info *info;
7490         int ret;
7491
7492         /* The following checks for tracefs lockdown */
7493         ret = tracing_buffers_open(inode, filp);
7494         if (ret < 0)
7495                 return ret;
7496
7497         info = filp->private_data;
7498
7499         if (info->iter.trace->use_max_tr) {
7500                 tracing_buffers_release(inode, filp);
7501                 return -EBUSY;
7502         }
7503
7504         info->iter.snapshot = true;
7505         info->iter.array_buffer = &info->iter.tr->max_buffer;
7506
7507         return ret;
7508 }
7509
7510 #endif /* CONFIG_TRACER_SNAPSHOT */
7511
7512
7513 static const struct file_operations tracing_thresh_fops = {
7514         .open           = tracing_open_generic,
7515         .read           = tracing_thresh_read,
7516         .write          = tracing_thresh_write,
7517         .llseek         = generic_file_llseek,
7518 };
7519
7520 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7521 static const struct file_operations tracing_max_lat_fops = {
7522         .open           = tracing_open_generic,
7523         .read           = tracing_max_lat_read,
7524         .write          = tracing_max_lat_write,
7525         .llseek         = generic_file_llseek,
7526 };
7527 #endif
7528
7529 static const struct file_operations set_tracer_fops = {
7530         .open           = tracing_open_generic,
7531         .read           = tracing_set_trace_read,
7532         .write          = tracing_set_trace_write,
7533         .llseek         = generic_file_llseek,
7534 };
7535
7536 static const struct file_operations tracing_pipe_fops = {
7537         .open           = tracing_open_pipe,
7538         .poll           = tracing_poll_pipe,
7539         .read           = tracing_read_pipe,
7540         .splice_read    = tracing_splice_read_pipe,
7541         .release        = tracing_release_pipe,
7542         .llseek         = no_llseek,
7543 };
7544
7545 static const struct file_operations tracing_entries_fops = {
7546         .open           = tracing_open_generic_tr,
7547         .read           = tracing_entries_read,
7548         .write          = tracing_entries_write,
7549         .llseek         = generic_file_llseek,
7550         .release        = tracing_release_generic_tr,
7551 };
7552
7553 static const struct file_operations tracing_total_entries_fops = {
7554         .open           = tracing_open_generic_tr,
7555         .read           = tracing_total_entries_read,
7556         .llseek         = generic_file_llseek,
7557         .release        = tracing_release_generic_tr,
7558 };
7559
7560 static const struct file_operations tracing_free_buffer_fops = {
7561         .open           = tracing_open_generic_tr,
7562         .write          = tracing_free_buffer_write,
7563         .release        = tracing_free_buffer_release,
7564 };
7565
7566 static const struct file_operations tracing_mark_fops = {
7567         .open           = tracing_open_generic_tr,
7568         .write          = tracing_mark_write,
7569         .llseek         = generic_file_llseek,
7570         .release        = tracing_release_generic_tr,
7571 };
7572
7573 static const struct file_operations tracing_mark_raw_fops = {
7574         .open           = tracing_open_generic_tr,
7575         .write          = tracing_mark_raw_write,
7576         .llseek         = generic_file_llseek,
7577         .release        = tracing_release_generic_tr,
7578 };
7579
7580 static const struct file_operations trace_clock_fops = {
7581         .open           = tracing_clock_open,
7582         .read           = seq_read,
7583         .llseek         = seq_lseek,
7584         .release        = tracing_single_release_tr,
7585         .write          = tracing_clock_write,
7586 };
7587
7588 static const struct file_operations trace_time_stamp_mode_fops = {
7589         .open           = tracing_time_stamp_mode_open,
7590         .read           = seq_read,
7591         .llseek         = seq_lseek,
7592         .release        = tracing_single_release_tr,
7593 };
7594
7595 #ifdef CONFIG_TRACER_SNAPSHOT
7596 static const struct file_operations snapshot_fops = {
7597         .open           = tracing_snapshot_open,
7598         .read           = seq_read,
7599         .write          = tracing_snapshot_write,
7600         .llseek         = tracing_lseek,
7601         .release        = tracing_snapshot_release,
7602 };
7603
7604 static const struct file_operations snapshot_raw_fops = {
7605         .open           = snapshot_raw_open,
7606         .read           = tracing_buffers_read,
7607         .release        = tracing_buffers_release,
7608         .splice_read    = tracing_buffers_splice_read,
7609         .llseek         = no_llseek,
7610 };
7611
7612 #endif /* CONFIG_TRACER_SNAPSHOT */
7613
7614 /*
7615  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7616  * @filp: The active open file structure
7617  * @ubuf: The userspace provided buffer to read value into
7618  * @cnt: The maximum number of bytes to read
7619  * @ppos: The current "file" position
7620  *
7621  * This function implements the write interface for a struct trace_min_max_param.
7622  * The filp->private_data must point to a trace_min_max_param structure that
7623  * defines where to write the value, the min and the max acceptable values,
7624  * and a lock to protect the write.
7625  */
7626 static ssize_t
7627 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7628 {
7629         struct trace_min_max_param *param = filp->private_data;
7630         u64 val;
7631         int err;
7632
7633         if (!param)
7634                 return -EFAULT;
7635
7636         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7637         if (err)
7638                 return err;
7639
7640         if (param->lock)
7641                 mutex_lock(param->lock);
7642
7643         if (param->min && val < *param->min)
7644                 err = -EINVAL;
7645
7646         if (param->max && val > *param->max)
7647                 err = -EINVAL;
7648
7649         if (!err)
7650                 *param->val = val;
7651
7652         if (param->lock)
7653                 mutex_unlock(param->lock);
7654
7655         if (err)
7656                 return err;
7657
7658         return cnt;
7659 }
7660
7661 /*
7662  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7663  * @filp: The active open file structure
7664  * @ubuf: The userspace provided buffer to read value into
7665  * @cnt: The maximum number of bytes to read
7666  * @ppos: The current "file" position
7667  *
7668  * This function implements the read interface for a struct trace_min_max_param.
7669  * The filp->private_data must point to a trace_min_max_param struct with valid
7670  * data.
7671  */
7672 static ssize_t
7673 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7674 {
7675         struct trace_min_max_param *param = filp->private_data;
7676         char buf[U64_STR_SIZE];
7677         int len;
7678         u64 val;
7679
7680         if (!param)
7681                 return -EFAULT;
7682
7683         val = *param->val;
7684
7685         if (cnt > sizeof(buf))
7686                 cnt = sizeof(buf);
7687
7688         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7689
7690         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7691 }
7692
7693 const struct file_operations trace_min_max_fops = {
7694         .open           = tracing_open_generic,
7695         .read           = trace_min_max_read,
7696         .write          = trace_min_max_write,
7697 };
7698
7699 #define TRACING_LOG_ERRS_MAX    8
7700 #define TRACING_LOG_LOC_MAX     128
7701
7702 #define CMD_PREFIX "  Command: "
7703
7704 struct err_info {
7705         const char      **errs; /* ptr to loc-specific array of err strings */
7706         u8              type;   /* index into errs -> specific err string */
7707         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7708         u64             ts;
7709 };
7710
7711 struct tracing_log_err {
7712         struct list_head        list;
7713         struct err_info         info;
7714         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7715         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7716 };
7717
7718 static DEFINE_MUTEX(tracing_err_log_lock);
7719
7720 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7721 {
7722         struct tracing_log_err *err;
7723
7724         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7725                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7726                 if (!err)
7727                         err = ERR_PTR(-ENOMEM);
7728                 tr->n_err_log_entries++;
7729
7730                 return err;
7731         }
7732
7733         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7734         list_del(&err->list);
7735
7736         return err;
7737 }
7738
7739 /**
7740  * err_pos - find the position of a string within a command for error careting
7741  * @cmd: The tracing command that caused the error
7742  * @str: The string to position the caret at within @cmd
7743  *
7744  * Finds the position of the first occurrence of @str within @cmd.  The
7745  * return value can be passed to tracing_log_err() for caret placement
7746  * within @cmd.
7747  *
7748  * Returns the index within @cmd of the first occurrence of @str or 0
7749  * if @str was not found.
7750  */
7751 unsigned int err_pos(char *cmd, const char *str)
7752 {
7753         char *found;
7754
7755         if (WARN_ON(!strlen(cmd)))
7756                 return 0;
7757
7758         found = strstr(cmd, str);
7759         if (found)
7760                 return found - cmd;
7761
7762         return 0;
7763 }
7764
7765 /**
7766  * tracing_log_err - write an error to the tracing error log
7767  * @tr: The associated trace array for the error (NULL for top level array)
7768  * @loc: A string describing where the error occurred
7769  * @cmd: The tracing command that caused the error
7770  * @errs: The array of loc-specific static error strings
7771  * @type: The index into errs[], which produces the specific static err string
7772  * @pos: The position the caret should be placed in the cmd
7773  *
7774  * Writes an error into tracing/error_log of the form:
7775  *
7776  * <loc>: error: <text>
7777  *   Command: <cmd>
7778  *              ^
7779  *
7780  * tracing/error_log is a small log file containing the last
7781  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7782  * unless there has been a tracing error, and the error log can be
7783  * cleared and have its memory freed by writing the empty string in
7784  * truncation mode to it i.e. echo > tracing/error_log.
7785  *
7786  * NOTE: the @errs array along with the @type param are used to
7787  * produce a static error string - this string is not copied and saved
7788  * when the error is logged - only a pointer to it is saved.  See
7789  * existing callers for examples of how static strings are typically
7790  * defined for use with tracing_log_err().
7791  */
7792 void tracing_log_err(struct trace_array *tr,
7793                      const char *loc, const char *cmd,
7794                      const char **errs, u8 type, u8 pos)
7795 {
7796         struct tracing_log_err *err;
7797
7798         if (!tr)
7799                 tr = &global_trace;
7800
7801         mutex_lock(&tracing_err_log_lock);
7802         err = get_tracing_log_err(tr);
7803         if (PTR_ERR(err) == -ENOMEM) {
7804                 mutex_unlock(&tracing_err_log_lock);
7805                 return;
7806         }
7807
7808         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7809         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7810
7811         err->info.errs = errs;
7812         err->info.type = type;
7813         err->info.pos = pos;
7814         err->info.ts = local_clock();
7815
7816         list_add_tail(&err->list, &tr->err_log);
7817         mutex_unlock(&tracing_err_log_lock);
7818 }
7819
7820 static void clear_tracing_err_log(struct trace_array *tr)
7821 {
7822         struct tracing_log_err *err, *next;
7823
7824         mutex_lock(&tracing_err_log_lock);
7825         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7826                 list_del(&err->list);
7827                 kfree(err);
7828         }
7829
7830         tr->n_err_log_entries = 0;
7831         mutex_unlock(&tracing_err_log_lock);
7832 }
7833
7834 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7835 {
7836         struct trace_array *tr = m->private;
7837
7838         mutex_lock(&tracing_err_log_lock);
7839
7840         return seq_list_start(&tr->err_log, *pos);
7841 }
7842
7843 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7844 {
7845         struct trace_array *tr = m->private;
7846
7847         return seq_list_next(v, &tr->err_log, pos);
7848 }
7849
7850 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7851 {
7852         mutex_unlock(&tracing_err_log_lock);
7853 }
7854
7855 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7856 {
7857         u8 i;
7858
7859         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7860                 seq_putc(m, ' ');
7861         for (i = 0; i < pos; i++)
7862                 seq_putc(m, ' ');
7863         seq_puts(m, "^\n");
7864 }
7865
7866 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7867 {
7868         struct tracing_log_err *err = v;
7869
7870         if (err) {
7871                 const char *err_text = err->info.errs[err->info.type];
7872                 u64 sec = err->info.ts;
7873                 u32 nsec;
7874
7875                 nsec = do_div(sec, NSEC_PER_SEC);
7876                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7877                            err->loc, err_text);
7878                 seq_printf(m, "%s", err->cmd);
7879                 tracing_err_log_show_pos(m, err->info.pos);
7880         }
7881
7882         return 0;
7883 }
7884
7885 static const struct seq_operations tracing_err_log_seq_ops = {
7886         .start  = tracing_err_log_seq_start,
7887         .next   = tracing_err_log_seq_next,
7888         .stop   = tracing_err_log_seq_stop,
7889         .show   = tracing_err_log_seq_show
7890 };
7891
7892 static int tracing_err_log_open(struct inode *inode, struct file *file)
7893 {
7894         struct trace_array *tr = inode->i_private;
7895         int ret = 0;
7896
7897         ret = tracing_check_open_get_tr(tr);
7898         if (ret)
7899                 return ret;
7900
7901         /* If this file was opened for write, then erase contents */
7902         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7903                 clear_tracing_err_log(tr);
7904
7905         if (file->f_mode & FMODE_READ) {
7906                 ret = seq_open(file, &tracing_err_log_seq_ops);
7907                 if (!ret) {
7908                         struct seq_file *m = file->private_data;
7909                         m->private = tr;
7910                 } else {
7911                         trace_array_put(tr);
7912                 }
7913         }
7914         return ret;
7915 }
7916
7917 static ssize_t tracing_err_log_write(struct file *file,
7918                                      const char __user *buffer,
7919                                      size_t count, loff_t *ppos)
7920 {
7921         return count;
7922 }
7923
7924 static int tracing_err_log_release(struct inode *inode, struct file *file)
7925 {
7926         struct trace_array *tr = inode->i_private;
7927
7928         trace_array_put(tr);
7929
7930         if (file->f_mode & FMODE_READ)
7931                 seq_release(inode, file);
7932
7933         return 0;
7934 }
7935
7936 static const struct file_operations tracing_err_log_fops = {
7937         .open           = tracing_err_log_open,
7938         .write          = tracing_err_log_write,
7939         .read           = seq_read,
7940         .llseek         = seq_lseek,
7941         .release        = tracing_err_log_release,
7942 };
7943
7944 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7945 {
7946         struct trace_array *tr = inode->i_private;
7947         struct ftrace_buffer_info *info;
7948         int ret;
7949
7950         ret = tracing_check_open_get_tr(tr);
7951         if (ret)
7952                 return ret;
7953
7954         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7955         if (!info) {
7956                 trace_array_put(tr);
7957                 return -ENOMEM;
7958         }
7959
7960         mutex_lock(&trace_types_lock);
7961
7962         info->iter.tr           = tr;
7963         info->iter.cpu_file     = tracing_get_cpu(inode);
7964         info->iter.trace        = tr->current_trace;
7965         info->iter.array_buffer = &tr->array_buffer;
7966         info->spare             = NULL;
7967         /* Force reading ring buffer for first read */
7968         info->read              = (unsigned int)-1;
7969
7970         filp->private_data = info;
7971
7972         tr->trace_ref++;
7973
7974         mutex_unlock(&trace_types_lock);
7975
7976         ret = nonseekable_open(inode, filp);
7977         if (ret < 0)
7978                 trace_array_put(tr);
7979
7980         return ret;
7981 }
7982
7983 static __poll_t
7984 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7985 {
7986         struct ftrace_buffer_info *info = filp->private_data;
7987         struct trace_iterator *iter = &info->iter;
7988
7989         return trace_poll(iter, filp, poll_table);
7990 }
7991
7992 static ssize_t
7993 tracing_buffers_read(struct file *filp, char __user *ubuf,
7994                      size_t count, loff_t *ppos)
7995 {
7996         struct ftrace_buffer_info *info = filp->private_data;
7997         struct trace_iterator *iter = &info->iter;
7998         ssize_t ret = 0;
7999         ssize_t size;
8000
8001         if (!count)
8002                 return 0;
8003
8004 #ifdef CONFIG_TRACER_MAX_TRACE
8005         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8006                 return -EBUSY;
8007 #endif
8008
8009         if (!info->spare) {
8010                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8011                                                           iter->cpu_file);
8012                 if (IS_ERR(info->spare)) {
8013                         ret = PTR_ERR(info->spare);
8014                         info->spare = NULL;
8015                 } else {
8016                         info->spare_cpu = iter->cpu_file;
8017                 }
8018         }
8019         if (!info->spare)
8020                 return ret;
8021
8022         /* Do we have previous read data to read? */
8023         if (info->read < PAGE_SIZE)
8024                 goto read;
8025
8026  again:
8027         trace_access_lock(iter->cpu_file);
8028         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8029                                     &info->spare,
8030                                     count,
8031                                     iter->cpu_file, 0);
8032         trace_access_unlock(iter->cpu_file);
8033
8034         if (ret < 0) {
8035                 if (trace_empty(iter)) {
8036                         if ((filp->f_flags & O_NONBLOCK))
8037                                 return -EAGAIN;
8038
8039                         ret = wait_on_pipe(iter, 0);
8040                         if (ret)
8041                                 return ret;
8042
8043                         goto again;
8044                 }
8045                 return 0;
8046         }
8047
8048         info->read = 0;
8049  read:
8050         size = PAGE_SIZE - info->read;
8051         if (size > count)
8052                 size = count;
8053
8054         ret = copy_to_user(ubuf, info->spare + info->read, size);
8055         if (ret == size)
8056                 return -EFAULT;
8057
8058         size -= ret;
8059
8060         *ppos += size;
8061         info->read += size;
8062
8063         return size;
8064 }
8065
8066 static int tracing_buffers_release(struct inode *inode, struct file *file)
8067 {
8068         struct ftrace_buffer_info *info = file->private_data;
8069         struct trace_iterator *iter = &info->iter;
8070
8071         mutex_lock(&trace_types_lock);
8072
8073         iter->tr->trace_ref--;
8074
8075         __trace_array_put(iter->tr);
8076
8077         if (info->spare)
8078                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8079                                            info->spare_cpu, info->spare);
8080         kvfree(info);
8081
8082         mutex_unlock(&trace_types_lock);
8083
8084         return 0;
8085 }
8086
8087 struct buffer_ref {
8088         struct trace_buffer     *buffer;
8089         void                    *page;
8090         int                     cpu;
8091         refcount_t              refcount;
8092 };
8093
8094 static void buffer_ref_release(struct buffer_ref *ref)
8095 {
8096         if (!refcount_dec_and_test(&ref->refcount))
8097                 return;
8098         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8099         kfree(ref);
8100 }
8101
8102 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8103                                     struct pipe_buffer *buf)
8104 {
8105         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8106
8107         buffer_ref_release(ref);
8108         buf->private = 0;
8109 }
8110
8111 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8112                                 struct pipe_buffer *buf)
8113 {
8114         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8115
8116         if (refcount_read(&ref->refcount) > INT_MAX/2)
8117                 return false;
8118
8119         refcount_inc(&ref->refcount);
8120         return true;
8121 }
8122
8123 /* Pipe buffer operations for a buffer. */
8124 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8125         .release                = buffer_pipe_buf_release,
8126         .get                    = buffer_pipe_buf_get,
8127 };
8128
8129 /*
8130  * Callback from splice_to_pipe(), if we need to release some pages
8131  * at the end of the spd in case we error'ed out in filling the pipe.
8132  */
8133 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8134 {
8135         struct buffer_ref *ref =
8136                 (struct buffer_ref *)spd->partial[i].private;
8137
8138         buffer_ref_release(ref);
8139         spd->partial[i].private = 0;
8140 }
8141
8142 static ssize_t
8143 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8144                             struct pipe_inode_info *pipe, size_t len,
8145                             unsigned int flags)
8146 {
8147         struct ftrace_buffer_info *info = file->private_data;
8148         struct trace_iterator *iter = &info->iter;
8149         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8150         struct page *pages_def[PIPE_DEF_BUFFERS];
8151         struct splice_pipe_desc spd = {
8152                 .pages          = pages_def,
8153                 .partial        = partial_def,
8154                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8155                 .ops            = &buffer_pipe_buf_ops,
8156                 .spd_release    = buffer_spd_release,
8157         };
8158         struct buffer_ref *ref;
8159         int entries, i;
8160         ssize_t ret = 0;
8161
8162 #ifdef CONFIG_TRACER_MAX_TRACE
8163         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8164                 return -EBUSY;
8165 #endif
8166
8167         if (*ppos & (PAGE_SIZE - 1))
8168                 return -EINVAL;
8169
8170         if (len & (PAGE_SIZE - 1)) {
8171                 if (len < PAGE_SIZE)
8172                         return -EINVAL;
8173                 len &= PAGE_MASK;
8174         }
8175
8176         if (splice_grow_spd(pipe, &spd))
8177                 return -ENOMEM;
8178
8179  again:
8180         trace_access_lock(iter->cpu_file);
8181         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8182
8183         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8184                 struct page *page;
8185                 int r;
8186
8187                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8188                 if (!ref) {
8189                         ret = -ENOMEM;
8190                         break;
8191                 }
8192
8193                 refcount_set(&ref->refcount, 1);
8194                 ref->buffer = iter->array_buffer->buffer;
8195                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8196                 if (IS_ERR(ref->page)) {
8197                         ret = PTR_ERR(ref->page);
8198                         ref->page = NULL;
8199                         kfree(ref);
8200                         break;
8201                 }
8202                 ref->cpu = iter->cpu_file;
8203
8204                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8205                                           len, iter->cpu_file, 1);
8206                 if (r < 0) {
8207                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8208                                                    ref->page);
8209                         kfree(ref);
8210                         break;
8211                 }
8212
8213                 page = virt_to_page(ref->page);
8214
8215                 spd.pages[i] = page;
8216                 spd.partial[i].len = PAGE_SIZE;
8217                 spd.partial[i].offset = 0;
8218                 spd.partial[i].private = (unsigned long)ref;
8219                 spd.nr_pages++;
8220                 *ppos += PAGE_SIZE;
8221
8222                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8223         }
8224
8225         trace_access_unlock(iter->cpu_file);
8226         spd.nr_pages = i;
8227
8228         /* did we read anything? */
8229         if (!spd.nr_pages) {
8230                 if (ret)
8231                         goto out;
8232
8233                 ret = -EAGAIN;
8234                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8235                         goto out;
8236
8237                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8238                 if (ret)
8239                         goto out;
8240
8241                 goto again;
8242         }
8243
8244         ret = splice_to_pipe(pipe, &spd);
8245 out:
8246         splice_shrink_spd(&spd);
8247
8248         return ret;
8249 }
8250
8251 static const struct file_operations tracing_buffers_fops = {
8252         .open           = tracing_buffers_open,
8253         .read           = tracing_buffers_read,
8254         .poll           = tracing_buffers_poll,
8255         .release        = tracing_buffers_release,
8256         .splice_read    = tracing_buffers_splice_read,
8257         .llseek         = no_llseek,
8258 };
8259
8260 static ssize_t
8261 tracing_stats_read(struct file *filp, char __user *ubuf,
8262                    size_t count, loff_t *ppos)
8263 {
8264         struct inode *inode = file_inode(filp);
8265         struct trace_array *tr = inode->i_private;
8266         struct array_buffer *trace_buf = &tr->array_buffer;
8267         int cpu = tracing_get_cpu(inode);
8268         struct trace_seq *s;
8269         unsigned long cnt;
8270         unsigned long long t;
8271         unsigned long usec_rem;
8272
8273         s = kmalloc(sizeof(*s), GFP_KERNEL);
8274         if (!s)
8275                 return -ENOMEM;
8276
8277         trace_seq_init(s);
8278
8279         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8280         trace_seq_printf(s, "entries: %ld\n", cnt);
8281
8282         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8283         trace_seq_printf(s, "overrun: %ld\n", cnt);
8284
8285         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8286         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8287
8288         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8289         trace_seq_printf(s, "bytes: %ld\n", cnt);
8290
8291         if (trace_clocks[tr->clock_id].in_ns) {
8292                 /* local or global for trace_clock */
8293                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8294                 usec_rem = do_div(t, USEC_PER_SEC);
8295                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8296                                                                 t, usec_rem);
8297
8298                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8299                 usec_rem = do_div(t, USEC_PER_SEC);
8300                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8301         } else {
8302                 /* counter or tsc mode for trace_clock */
8303                 trace_seq_printf(s, "oldest event ts: %llu\n",
8304                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8305
8306                 trace_seq_printf(s, "now ts: %llu\n",
8307                                 ring_buffer_time_stamp(trace_buf->buffer));
8308         }
8309
8310         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8311         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8312
8313         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8314         trace_seq_printf(s, "read events: %ld\n", cnt);
8315
8316         count = simple_read_from_buffer(ubuf, count, ppos,
8317                                         s->buffer, trace_seq_used(s));
8318
8319         kfree(s);
8320
8321         return count;
8322 }
8323
8324 static const struct file_operations tracing_stats_fops = {
8325         .open           = tracing_open_generic_tr,
8326         .read           = tracing_stats_read,
8327         .llseek         = generic_file_llseek,
8328         .release        = tracing_release_generic_tr,
8329 };
8330
8331 #ifdef CONFIG_DYNAMIC_FTRACE
8332
8333 static ssize_t
8334 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8335                   size_t cnt, loff_t *ppos)
8336 {
8337         ssize_t ret;
8338         char *buf;
8339         int r;
8340
8341         /* 256 should be plenty to hold the amount needed */
8342         buf = kmalloc(256, GFP_KERNEL);
8343         if (!buf)
8344                 return -ENOMEM;
8345
8346         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8347                       ftrace_update_tot_cnt,
8348                       ftrace_number_of_pages,
8349                       ftrace_number_of_groups);
8350
8351         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8352         kfree(buf);
8353         return ret;
8354 }
8355
8356 static const struct file_operations tracing_dyn_info_fops = {
8357         .open           = tracing_open_generic,
8358         .read           = tracing_read_dyn_info,
8359         .llseek         = generic_file_llseek,
8360 };
8361 #endif /* CONFIG_DYNAMIC_FTRACE */
8362
8363 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8364 static void
8365 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8366                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8367                 void *data)
8368 {
8369         tracing_snapshot_instance(tr);
8370 }
8371
8372 static void
8373 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8374                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8375                       void *data)
8376 {
8377         struct ftrace_func_mapper *mapper = data;
8378         long *count = NULL;
8379
8380         if (mapper)
8381                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8382
8383         if (count) {
8384
8385                 if (*count <= 0)
8386                         return;
8387
8388                 (*count)--;
8389         }
8390
8391         tracing_snapshot_instance(tr);
8392 }
8393
8394 static int
8395 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8396                       struct ftrace_probe_ops *ops, void *data)
8397 {
8398         struct ftrace_func_mapper *mapper = data;
8399         long *count = NULL;
8400
8401         seq_printf(m, "%ps:", (void *)ip);
8402
8403         seq_puts(m, "snapshot");
8404
8405         if (mapper)
8406                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8407
8408         if (count)
8409                 seq_printf(m, ":count=%ld\n", *count);
8410         else
8411                 seq_puts(m, ":unlimited\n");
8412
8413         return 0;
8414 }
8415
8416 static int
8417 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8418                      unsigned long ip, void *init_data, void **data)
8419 {
8420         struct ftrace_func_mapper *mapper = *data;
8421
8422         if (!mapper) {
8423                 mapper = allocate_ftrace_func_mapper();
8424                 if (!mapper)
8425                         return -ENOMEM;
8426                 *data = mapper;
8427         }
8428
8429         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8430 }
8431
8432 static void
8433 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8434                      unsigned long ip, void *data)
8435 {
8436         struct ftrace_func_mapper *mapper = data;
8437
8438         if (!ip) {
8439                 if (!mapper)
8440                         return;
8441                 free_ftrace_func_mapper(mapper, NULL);
8442                 return;
8443         }
8444
8445         ftrace_func_mapper_remove_ip(mapper, ip);
8446 }
8447
8448 static struct ftrace_probe_ops snapshot_probe_ops = {
8449         .func                   = ftrace_snapshot,
8450         .print                  = ftrace_snapshot_print,
8451 };
8452
8453 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8454         .func                   = ftrace_count_snapshot,
8455         .print                  = ftrace_snapshot_print,
8456         .init                   = ftrace_snapshot_init,
8457         .free                   = ftrace_snapshot_free,
8458 };
8459
8460 static int
8461 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8462                                char *glob, char *cmd, char *param, int enable)
8463 {
8464         struct ftrace_probe_ops *ops;
8465         void *count = (void *)-1;
8466         char *number;
8467         int ret;
8468
8469         if (!tr)
8470                 return -ENODEV;
8471
8472         /* hash funcs only work with set_ftrace_filter */
8473         if (!enable)
8474                 return -EINVAL;
8475
8476         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8477
8478         if (glob[0] == '!')
8479                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8480
8481         if (!param)
8482                 goto out_reg;
8483
8484         number = strsep(&param, ":");
8485
8486         if (!strlen(number))
8487                 goto out_reg;
8488
8489         /*
8490          * We use the callback data field (which is a pointer)
8491          * as our counter.
8492          */
8493         ret = kstrtoul(number, 0, (unsigned long *)&count);
8494         if (ret)
8495                 return ret;
8496
8497  out_reg:
8498         ret = tracing_alloc_snapshot_instance(tr);
8499         if (ret < 0)
8500                 goto out;
8501
8502         ret = register_ftrace_function_probe(glob, tr, ops, count);
8503
8504  out:
8505         return ret < 0 ? ret : 0;
8506 }
8507
8508 static struct ftrace_func_command ftrace_snapshot_cmd = {
8509         .name                   = "snapshot",
8510         .func                   = ftrace_trace_snapshot_callback,
8511 };
8512
8513 static __init int register_snapshot_cmd(void)
8514 {
8515         return register_ftrace_command(&ftrace_snapshot_cmd);
8516 }
8517 #else
8518 static inline __init int register_snapshot_cmd(void) { return 0; }
8519 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8520
8521 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8522 {
8523         if (WARN_ON(!tr->dir))
8524                 return ERR_PTR(-ENODEV);
8525
8526         /* Top directory uses NULL as the parent */
8527         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8528                 return NULL;
8529
8530         /* All sub buffers have a descriptor */
8531         return tr->dir;
8532 }
8533
8534 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8535 {
8536         struct dentry *d_tracer;
8537
8538         if (tr->percpu_dir)
8539                 return tr->percpu_dir;
8540
8541         d_tracer = tracing_get_dentry(tr);
8542         if (IS_ERR(d_tracer))
8543                 return NULL;
8544
8545         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8546
8547         MEM_FAIL(!tr->percpu_dir,
8548                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8549
8550         return tr->percpu_dir;
8551 }
8552
8553 static struct dentry *
8554 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8555                       void *data, long cpu, const struct file_operations *fops)
8556 {
8557         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8558
8559         if (ret) /* See tracing_get_cpu() */
8560                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8561         return ret;
8562 }
8563
8564 static void
8565 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8566 {
8567         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8568         struct dentry *d_cpu;
8569         char cpu_dir[30]; /* 30 characters should be more than enough */
8570
8571         if (!d_percpu)
8572                 return;
8573
8574         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8575         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8576         if (!d_cpu) {
8577                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8578                 return;
8579         }
8580
8581         /* per cpu trace_pipe */
8582         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8583                                 tr, cpu, &tracing_pipe_fops);
8584
8585         /* per cpu trace */
8586         trace_create_cpu_file("trace", 0644, d_cpu,
8587                                 tr, cpu, &tracing_fops);
8588
8589         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8590                                 tr, cpu, &tracing_buffers_fops);
8591
8592         trace_create_cpu_file("stats", 0444, d_cpu,
8593                                 tr, cpu, &tracing_stats_fops);
8594
8595         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8596                                 tr, cpu, &tracing_entries_fops);
8597
8598 #ifdef CONFIG_TRACER_SNAPSHOT
8599         trace_create_cpu_file("snapshot", 0644, d_cpu,
8600                                 tr, cpu, &snapshot_fops);
8601
8602         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8603                                 tr, cpu, &snapshot_raw_fops);
8604 #endif
8605 }
8606
8607 #ifdef CONFIG_FTRACE_SELFTEST
8608 /* Let selftest have access to static functions in this file */
8609 #include "trace_selftest.c"
8610 #endif
8611
8612 static ssize_t
8613 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8614                         loff_t *ppos)
8615 {
8616         struct trace_option_dentry *topt = filp->private_data;
8617         char *buf;
8618
8619         if (topt->flags->val & topt->opt->bit)
8620                 buf = "1\n";
8621         else
8622                 buf = "0\n";
8623
8624         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8625 }
8626
8627 static ssize_t
8628 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8629                          loff_t *ppos)
8630 {
8631         struct trace_option_dentry *topt = filp->private_data;
8632         unsigned long val;
8633         int ret;
8634
8635         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8636         if (ret)
8637                 return ret;
8638
8639         if (val != 0 && val != 1)
8640                 return -EINVAL;
8641
8642         if (!!(topt->flags->val & topt->opt->bit) != val) {
8643                 mutex_lock(&trace_types_lock);
8644                 ret = __set_tracer_option(topt->tr, topt->flags,
8645                                           topt->opt, !val);
8646                 mutex_unlock(&trace_types_lock);
8647                 if (ret)
8648                         return ret;
8649         }
8650
8651         *ppos += cnt;
8652
8653         return cnt;
8654 }
8655
8656
8657 static const struct file_operations trace_options_fops = {
8658         .open = tracing_open_generic,
8659         .read = trace_options_read,
8660         .write = trace_options_write,
8661         .llseek = generic_file_llseek,
8662 };
8663
8664 /*
8665  * In order to pass in both the trace_array descriptor as well as the index
8666  * to the flag that the trace option file represents, the trace_array
8667  * has a character array of trace_flags_index[], which holds the index
8668  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8669  * The address of this character array is passed to the flag option file
8670  * read/write callbacks.
8671  *
8672  * In order to extract both the index and the trace_array descriptor,
8673  * get_tr_index() uses the following algorithm.
8674  *
8675  *   idx = *ptr;
8676  *
8677  * As the pointer itself contains the address of the index (remember
8678  * index[1] == 1).
8679  *
8680  * Then to get the trace_array descriptor, by subtracting that index
8681  * from the ptr, we get to the start of the index itself.
8682  *
8683  *   ptr - idx == &index[0]
8684  *
8685  * Then a simple container_of() from that pointer gets us to the
8686  * trace_array descriptor.
8687  */
8688 static void get_tr_index(void *data, struct trace_array **ptr,
8689                          unsigned int *pindex)
8690 {
8691         *pindex = *(unsigned char *)data;
8692
8693         *ptr = container_of(data - *pindex, struct trace_array,
8694                             trace_flags_index);
8695 }
8696
8697 static ssize_t
8698 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8699                         loff_t *ppos)
8700 {
8701         void *tr_index = filp->private_data;
8702         struct trace_array *tr;
8703         unsigned int index;
8704         char *buf;
8705
8706         get_tr_index(tr_index, &tr, &index);
8707
8708         if (tr->trace_flags & (1 << index))
8709                 buf = "1\n";
8710         else
8711                 buf = "0\n";
8712
8713         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8714 }
8715
8716 static ssize_t
8717 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8718                          loff_t *ppos)
8719 {
8720         void *tr_index = filp->private_data;
8721         struct trace_array *tr;
8722         unsigned int index;
8723         unsigned long val;
8724         int ret;
8725
8726         get_tr_index(tr_index, &tr, &index);
8727
8728         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8729         if (ret)
8730                 return ret;
8731
8732         if (val != 0 && val != 1)
8733                 return -EINVAL;
8734
8735         mutex_lock(&event_mutex);
8736         mutex_lock(&trace_types_lock);
8737         ret = set_tracer_flag(tr, 1 << index, val);
8738         mutex_unlock(&trace_types_lock);
8739         mutex_unlock(&event_mutex);
8740
8741         if (ret < 0)
8742                 return ret;
8743
8744         *ppos += cnt;
8745
8746         return cnt;
8747 }
8748
8749 static const struct file_operations trace_options_core_fops = {
8750         .open = tracing_open_generic,
8751         .read = trace_options_core_read,
8752         .write = trace_options_core_write,
8753         .llseek = generic_file_llseek,
8754 };
8755
8756 struct dentry *trace_create_file(const char *name,
8757                                  umode_t mode,
8758                                  struct dentry *parent,
8759                                  void *data,
8760                                  const struct file_operations *fops)
8761 {
8762         struct dentry *ret;
8763
8764         ret = tracefs_create_file(name, mode, parent, data, fops);
8765         if (!ret)
8766                 pr_warn("Could not create tracefs '%s' entry\n", name);
8767
8768         return ret;
8769 }
8770
8771
8772 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8773 {
8774         struct dentry *d_tracer;
8775
8776         if (tr->options)
8777                 return tr->options;
8778
8779         d_tracer = tracing_get_dentry(tr);
8780         if (IS_ERR(d_tracer))
8781                 return NULL;
8782
8783         tr->options = tracefs_create_dir("options", d_tracer);
8784         if (!tr->options) {
8785                 pr_warn("Could not create tracefs directory 'options'\n");
8786                 return NULL;
8787         }
8788
8789         return tr->options;
8790 }
8791
8792 static void
8793 create_trace_option_file(struct trace_array *tr,
8794                          struct trace_option_dentry *topt,
8795                          struct tracer_flags *flags,
8796                          struct tracer_opt *opt)
8797 {
8798         struct dentry *t_options;
8799
8800         t_options = trace_options_init_dentry(tr);
8801         if (!t_options)
8802                 return;
8803
8804         topt->flags = flags;
8805         topt->opt = opt;
8806         topt->tr = tr;
8807
8808         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8809                                     &trace_options_fops);
8810
8811 }
8812
8813 static void
8814 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8815 {
8816         struct trace_option_dentry *topts;
8817         struct trace_options *tr_topts;
8818         struct tracer_flags *flags;
8819         struct tracer_opt *opts;
8820         int cnt;
8821         int i;
8822
8823         if (!tracer)
8824                 return;
8825
8826         flags = tracer->flags;
8827
8828         if (!flags || !flags->opts)
8829                 return;
8830
8831         /*
8832          * If this is an instance, only create flags for tracers
8833          * the instance may have.
8834          */
8835         if (!trace_ok_for_array(tracer, tr))
8836                 return;
8837
8838         for (i = 0; i < tr->nr_topts; i++) {
8839                 /* Make sure there's no duplicate flags. */
8840                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8841                         return;
8842         }
8843
8844         opts = flags->opts;
8845
8846         for (cnt = 0; opts[cnt].name; cnt++)
8847                 ;
8848
8849         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8850         if (!topts)
8851                 return;
8852
8853         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8854                             GFP_KERNEL);
8855         if (!tr_topts) {
8856                 kfree(topts);
8857                 return;
8858         }
8859
8860         tr->topts = tr_topts;
8861         tr->topts[tr->nr_topts].tracer = tracer;
8862         tr->topts[tr->nr_topts].topts = topts;
8863         tr->nr_topts++;
8864
8865         for (cnt = 0; opts[cnt].name; cnt++) {
8866                 create_trace_option_file(tr, &topts[cnt], flags,
8867                                          &opts[cnt]);
8868                 MEM_FAIL(topts[cnt].entry == NULL,
8869                           "Failed to create trace option: %s",
8870                           opts[cnt].name);
8871         }
8872 }
8873
8874 static struct dentry *
8875 create_trace_option_core_file(struct trace_array *tr,
8876                               const char *option, long index)
8877 {
8878         struct dentry *t_options;
8879
8880         t_options = trace_options_init_dentry(tr);
8881         if (!t_options)
8882                 return NULL;
8883
8884         return trace_create_file(option, 0644, t_options,
8885                                  (void *)&tr->trace_flags_index[index],
8886                                  &trace_options_core_fops);
8887 }
8888
8889 static void create_trace_options_dir(struct trace_array *tr)
8890 {
8891         struct dentry *t_options;
8892         bool top_level = tr == &global_trace;
8893         int i;
8894
8895         t_options = trace_options_init_dentry(tr);
8896         if (!t_options)
8897                 return;
8898
8899         for (i = 0; trace_options[i]; i++) {
8900                 if (top_level ||
8901                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8902                         create_trace_option_core_file(tr, trace_options[i], i);
8903         }
8904 }
8905
8906 static ssize_t
8907 rb_simple_read(struct file *filp, char __user *ubuf,
8908                size_t cnt, loff_t *ppos)
8909 {
8910         struct trace_array *tr = filp->private_data;
8911         char buf[64];
8912         int r;
8913
8914         r = tracer_tracing_is_on(tr);
8915         r = sprintf(buf, "%d\n", r);
8916
8917         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8918 }
8919
8920 static ssize_t
8921 rb_simple_write(struct file *filp, const char __user *ubuf,
8922                 size_t cnt, loff_t *ppos)
8923 {
8924         struct trace_array *tr = filp->private_data;
8925         struct trace_buffer *buffer = tr->array_buffer.buffer;
8926         unsigned long val;
8927         int ret;
8928
8929         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8930         if (ret)
8931                 return ret;
8932
8933         if (buffer) {
8934                 mutex_lock(&trace_types_lock);
8935                 if (!!val == tracer_tracing_is_on(tr)) {
8936                         val = 0; /* do nothing */
8937                 } else if (val) {
8938                         tracer_tracing_on(tr);
8939                         if (tr->current_trace->start)
8940                                 tr->current_trace->start(tr);
8941                 } else {
8942                         tracer_tracing_off(tr);
8943                         if (tr->current_trace->stop)
8944                                 tr->current_trace->stop(tr);
8945                 }
8946                 mutex_unlock(&trace_types_lock);
8947         }
8948
8949         (*ppos)++;
8950
8951         return cnt;
8952 }
8953
8954 static const struct file_operations rb_simple_fops = {
8955         .open           = tracing_open_generic_tr,
8956         .read           = rb_simple_read,
8957         .write          = rb_simple_write,
8958         .release        = tracing_release_generic_tr,
8959         .llseek         = default_llseek,
8960 };
8961
8962 static ssize_t
8963 buffer_percent_read(struct file *filp, char __user *ubuf,
8964                     size_t cnt, loff_t *ppos)
8965 {
8966         struct trace_array *tr = filp->private_data;
8967         char buf[64];
8968         int r;
8969
8970         r = tr->buffer_percent;
8971         r = sprintf(buf, "%d\n", r);
8972
8973         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8974 }
8975
8976 static ssize_t
8977 buffer_percent_write(struct file *filp, const char __user *ubuf,
8978                      size_t cnt, loff_t *ppos)
8979 {
8980         struct trace_array *tr = filp->private_data;
8981         unsigned long val;
8982         int ret;
8983
8984         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8985         if (ret)
8986                 return ret;
8987
8988         if (val > 100)
8989                 return -EINVAL;
8990
8991         if (!val)
8992                 val = 1;
8993
8994         tr->buffer_percent = val;
8995
8996         (*ppos)++;
8997
8998         return cnt;
8999 }
9000
9001 static const struct file_operations buffer_percent_fops = {
9002         .open           = tracing_open_generic_tr,
9003         .read           = buffer_percent_read,
9004         .write          = buffer_percent_write,
9005         .release        = tracing_release_generic_tr,
9006         .llseek         = default_llseek,
9007 };
9008
9009 static struct dentry *trace_instance_dir;
9010
9011 static void
9012 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9013
9014 static int
9015 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9016 {
9017         enum ring_buffer_flags rb_flags;
9018
9019         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9020
9021         buf->tr = tr;
9022
9023         buf->buffer = ring_buffer_alloc(size, rb_flags);
9024         if (!buf->buffer)
9025                 return -ENOMEM;
9026
9027         buf->data = alloc_percpu(struct trace_array_cpu);
9028         if (!buf->data) {
9029                 ring_buffer_free(buf->buffer);
9030                 buf->buffer = NULL;
9031                 return -ENOMEM;
9032         }
9033
9034         /* Allocate the first page for all buffers */
9035         set_buffer_entries(&tr->array_buffer,
9036                            ring_buffer_size(tr->array_buffer.buffer, 0));
9037
9038         return 0;
9039 }
9040
9041 static int allocate_trace_buffers(struct trace_array *tr, int size)
9042 {
9043         int ret;
9044
9045         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9046         if (ret)
9047                 return ret;
9048
9049 #ifdef CONFIG_TRACER_MAX_TRACE
9050         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9051                                     allocate_snapshot ? size : 1);
9052         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9053                 ring_buffer_free(tr->array_buffer.buffer);
9054                 tr->array_buffer.buffer = NULL;
9055                 free_percpu(tr->array_buffer.data);
9056                 tr->array_buffer.data = NULL;
9057                 return -ENOMEM;
9058         }
9059         tr->allocated_snapshot = allocate_snapshot;
9060
9061         /*
9062          * Only the top level trace array gets its snapshot allocated
9063          * from the kernel command line.
9064          */
9065         allocate_snapshot = false;
9066 #endif
9067
9068         return 0;
9069 }
9070
9071 static void free_trace_buffer(struct array_buffer *buf)
9072 {
9073         if (buf->buffer) {
9074                 ring_buffer_free(buf->buffer);
9075                 buf->buffer = NULL;
9076                 free_percpu(buf->data);
9077                 buf->data = NULL;
9078         }
9079 }
9080
9081 static void free_trace_buffers(struct trace_array *tr)
9082 {
9083         if (!tr)
9084                 return;
9085
9086         free_trace_buffer(&tr->array_buffer);
9087
9088 #ifdef CONFIG_TRACER_MAX_TRACE
9089         free_trace_buffer(&tr->max_buffer);
9090 #endif
9091 }
9092
9093 static void init_trace_flags_index(struct trace_array *tr)
9094 {
9095         int i;
9096
9097         /* Used by the trace options files */
9098         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9099                 tr->trace_flags_index[i] = i;
9100 }
9101
9102 static void __update_tracer_options(struct trace_array *tr)
9103 {
9104         struct tracer *t;
9105
9106         for (t = trace_types; t; t = t->next)
9107                 add_tracer_options(tr, t);
9108 }
9109
9110 static void update_tracer_options(struct trace_array *tr)
9111 {
9112         mutex_lock(&trace_types_lock);
9113         __update_tracer_options(tr);
9114         mutex_unlock(&trace_types_lock);
9115 }
9116
9117 /* Must have trace_types_lock held */
9118 struct trace_array *trace_array_find(const char *instance)
9119 {
9120         struct trace_array *tr, *found = NULL;
9121
9122         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9123                 if (tr->name && strcmp(tr->name, instance) == 0) {
9124                         found = tr;
9125                         break;
9126                 }
9127         }
9128
9129         return found;
9130 }
9131
9132 struct trace_array *trace_array_find_get(const char *instance)
9133 {
9134         struct trace_array *tr;
9135
9136         mutex_lock(&trace_types_lock);
9137         tr = trace_array_find(instance);
9138         if (tr)
9139                 tr->ref++;
9140         mutex_unlock(&trace_types_lock);
9141
9142         return tr;
9143 }
9144
9145 static int trace_array_create_dir(struct trace_array *tr)
9146 {
9147         int ret;
9148
9149         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9150         if (!tr->dir)
9151                 return -EINVAL;
9152
9153         ret = event_trace_add_tracer(tr->dir, tr);
9154         if (ret) {
9155                 tracefs_remove(tr->dir);
9156                 return ret;
9157         }
9158
9159         init_tracer_tracefs(tr, tr->dir);
9160         __update_tracer_options(tr);
9161
9162         return ret;
9163 }
9164
9165 static struct trace_array *trace_array_create(const char *name)
9166 {
9167         struct trace_array *tr;
9168         int ret;
9169
9170         ret = -ENOMEM;
9171         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9172         if (!tr)
9173                 return ERR_PTR(ret);
9174
9175         tr->name = kstrdup(name, GFP_KERNEL);
9176         if (!tr->name)
9177                 goto out_free_tr;
9178
9179         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9180                 goto out_free_tr;
9181
9182         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9183
9184         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9185
9186         raw_spin_lock_init(&tr->start_lock);
9187
9188         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9189
9190         tr->current_trace = &nop_trace;
9191
9192         INIT_LIST_HEAD(&tr->systems);
9193         INIT_LIST_HEAD(&tr->events);
9194         INIT_LIST_HEAD(&tr->hist_vars);
9195         INIT_LIST_HEAD(&tr->err_log);
9196
9197         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9198                 goto out_free_tr;
9199
9200         if (ftrace_allocate_ftrace_ops(tr) < 0)
9201                 goto out_free_tr;
9202
9203         ftrace_init_trace_array(tr);
9204
9205         init_trace_flags_index(tr);
9206
9207         if (trace_instance_dir) {
9208                 ret = trace_array_create_dir(tr);
9209                 if (ret)
9210                         goto out_free_tr;
9211         } else
9212                 __trace_early_add_events(tr);
9213
9214         list_add(&tr->list, &ftrace_trace_arrays);
9215
9216         tr->ref++;
9217
9218         return tr;
9219
9220  out_free_tr:
9221         ftrace_free_ftrace_ops(tr);
9222         free_trace_buffers(tr);
9223         free_cpumask_var(tr->tracing_cpumask);
9224         kfree(tr->name);
9225         kfree(tr);
9226
9227         return ERR_PTR(ret);
9228 }
9229
9230 static int instance_mkdir(const char *name)
9231 {
9232         struct trace_array *tr;
9233         int ret;
9234
9235         mutex_lock(&event_mutex);
9236         mutex_lock(&trace_types_lock);
9237
9238         ret = -EEXIST;
9239         if (trace_array_find(name))
9240                 goto out_unlock;
9241
9242         tr = trace_array_create(name);
9243
9244         ret = PTR_ERR_OR_ZERO(tr);
9245
9246 out_unlock:
9247         mutex_unlock(&trace_types_lock);
9248         mutex_unlock(&event_mutex);
9249         return ret;
9250 }
9251
9252 /**
9253  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9254  * @name: The name of the trace array to be looked up/created.
9255  *
9256  * Returns pointer to trace array with given name.
9257  * NULL, if it cannot be created.
9258  *
9259  * NOTE: This function increments the reference counter associated with the
9260  * trace array returned. This makes sure it cannot be freed while in use.
9261  * Use trace_array_put() once the trace array is no longer needed.
9262  * If the trace_array is to be freed, trace_array_destroy() needs to
9263  * be called after the trace_array_put(), or simply let user space delete
9264  * it from the tracefs instances directory. But until the
9265  * trace_array_put() is called, user space can not delete it.
9266  *
9267  */
9268 struct trace_array *trace_array_get_by_name(const char *name)
9269 {
9270         struct trace_array *tr;
9271
9272         mutex_lock(&event_mutex);
9273         mutex_lock(&trace_types_lock);
9274
9275         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9276                 if (tr->name && strcmp(tr->name, name) == 0)
9277                         goto out_unlock;
9278         }
9279
9280         tr = trace_array_create(name);
9281
9282         if (IS_ERR(tr))
9283                 tr = NULL;
9284 out_unlock:
9285         if (tr)
9286                 tr->ref++;
9287
9288         mutex_unlock(&trace_types_lock);
9289         mutex_unlock(&event_mutex);
9290         return tr;
9291 }
9292 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9293
9294 static int __remove_instance(struct trace_array *tr)
9295 {
9296         int i;
9297
9298         /* Reference counter for a newly created trace array = 1. */
9299         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9300                 return -EBUSY;
9301
9302         list_del(&tr->list);
9303
9304         /* Disable all the flags that were enabled coming in */
9305         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9306                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9307                         set_tracer_flag(tr, 1 << i, 0);
9308         }
9309
9310         tracing_set_nop(tr);
9311         clear_ftrace_function_probes(tr);
9312         event_trace_del_tracer(tr);
9313         ftrace_clear_pids(tr);
9314         ftrace_destroy_function_files(tr);
9315         tracefs_remove(tr->dir);
9316         free_percpu(tr->last_func_repeats);
9317         free_trace_buffers(tr);
9318
9319         for (i = 0; i < tr->nr_topts; i++) {
9320                 kfree(tr->topts[i].topts);
9321         }
9322         kfree(tr->topts);
9323
9324         free_cpumask_var(tr->tracing_cpumask);
9325         kfree(tr->name);
9326         kfree(tr);
9327
9328         return 0;
9329 }
9330
9331 int trace_array_destroy(struct trace_array *this_tr)
9332 {
9333         struct trace_array *tr;
9334         int ret;
9335
9336         if (!this_tr)
9337                 return -EINVAL;
9338
9339         mutex_lock(&event_mutex);
9340         mutex_lock(&trace_types_lock);
9341
9342         ret = -ENODEV;
9343
9344         /* Making sure trace array exists before destroying it. */
9345         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9346                 if (tr == this_tr) {
9347                         ret = __remove_instance(tr);
9348                         break;
9349                 }
9350         }
9351
9352         mutex_unlock(&trace_types_lock);
9353         mutex_unlock(&event_mutex);
9354
9355         return ret;
9356 }
9357 EXPORT_SYMBOL_GPL(trace_array_destroy);
9358
9359 static int instance_rmdir(const char *name)
9360 {
9361         struct trace_array *tr;
9362         int ret;
9363
9364         mutex_lock(&event_mutex);
9365         mutex_lock(&trace_types_lock);
9366
9367         ret = -ENODEV;
9368         tr = trace_array_find(name);
9369         if (tr)
9370                 ret = __remove_instance(tr);
9371
9372         mutex_unlock(&trace_types_lock);
9373         mutex_unlock(&event_mutex);
9374
9375         return ret;
9376 }
9377
9378 static __init void create_trace_instances(struct dentry *d_tracer)
9379 {
9380         struct trace_array *tr;
9381
9382         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9383                                                          instance_mkdir,
9384                                                          instance_rmdir);
9385         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9386                 return;
9387
9388         mutex_lock(&event_mutex);
9389         mutex_lock(&trace_types_lock);
9390
9391         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9392                 if (!tr->name)
9393                         continue;
9394                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9395                              "Failed to create instance directory\n"))
9396                         break;
9397         }
9398
9399         mutex_unlock(&trace_types_lock);
9400         mutex_unlock(&event_mutex);
9401 }
9402
9403 static void
9404 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9405 {
9406         struct trace_event_file *file;
9407         int cpu;
9408
9409         trace_create_file("available_tracers", 0444, d_tracer,
9410                         tr, &show_traces_fops);
9411
9412         trace_create_file("current_tracer", 0644, d_tracer,
9413                         tr, &set_tracer_fops);
9414
9415         trace_create_file("tracing_cpumask", 0644, d_tracer,
9416                           tr, &tracing_cpumask_fops);
9417
9418         trace_create_file("trace_options", 0644, d_tracer,
9419                           tr, &tracing_iter_fops);
9420
9421         trace_create_file("trace", 0644, d_tracer,
9422                           tr, &tracing_fops);
9423
9424         trace_create_file("trace_pipe", 0444, d_tracer,
9425                           tr, &tracing_pipe_fops);
9426
9427         trace_create_file("buffer_size_kb", 0644, d_tracer,
9428                           tr, &tracing_entries_fops);
9429
9430         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9431                           tr, &tracing_total_entries_fops);
9432
9433         trace_create_file("free_buffer", 0200, d_tracer,
9434                           tr, &tracing_free_buffer_fops);
9435
9436         trace_create_file("trace_marker", 0220, d_tracer,
9437                           tr, &tracing_mark_fops);
9438
9439         file = __find_event_file(tr, "ftrace", "print");
9440         if (file && file->dir)
9441                 trace_create_file("trigger", 0644, file->dir, file,
9442                                   &event_trigger_fops);
9443         tr->trace_marker_file = file;
9444
9445         trace_create_file("trace_marker_raw", 0220, d_tracer,
9446                           tr, &tracing_mark_raw_fops);
9447
9448         trace_create_file("trace_clock", 0644, d_tracer, tr,
9449                           &trace_clock_fops);
9450
9451         trace_create_file("tracing_on", 0644, d_tracer,
9452                           tr, &rb_simple_fops);
9453
9454         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9455                           &trace_time_stamp_mode_fops);
9456
9457         tr->buffer_percent = 50;
9458
9459         trace_create_file("buffer_percent", 0444, d_tracer,
9460                         tr, &buffer_percent_fops);
9461
9462         create_trace_options_dir(tr);
9463
9464 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9465         trace_create_maxlat_file(tr, d_tracer);
9466 #endif
9467
9468         if (ftrace_create_function_files(tr, d_tracer))
9469                 MEM_FAIL(1, "Could not allocate function filter files");
9470
9471 #ifdef CONFIG_TRACER_SNAPSHOT
9472         trace_create_file("snapshot", 0644, d_tracer,
9473                           tr, &snapshot_fops);
9474 #endif
9475
9476         trace_create_file("error_log", 0644, d_tracer,
9477                           tr, &tracing_err_log_fops);
9478
9479         for_each_tracing_cpu(cpu)
9480                 tracing_init_tracefs_percpu(tr, cpu);
9481
9482         ftrace_init_tracefs(tr, d_tracer);
9483 }
9484
9485 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9486 {
9487         struct vfsmount *mnt;
9488         struct file_system_type *type;
9489
9490         /*
9491          * To maintain backward compatibility for tools that mount
9492          * debugfs to get to the tracing facility, tracefs is automatically
9493          * mounted to the debugfs/tracing directory.
9494          */
9495         type = get_fs_type("tracefs");
9496         if (!type)
9497                 return NULL;
9498         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9499         put_filesystem(type);
9500         if (IS_ERR(mnt))
9501                 return NULL;
9502         mntget(mnt);
9503
9504         return mnt;
9505 }
9506
9507 /**
9508  * tracing_init_dentry - initialize top level trace array
9509  *
9510  * This is called when creating files or directories in the tracing
9511  * directory. It is called via fs_initcall() by any of the boot up code
9512  * and expects to return the dentry of the top level tracing directory.
9513  */
9514 int tracing_init_dentry(void)
9515 {
9516         struct trace_array *tr = &global_trace;
9517
9518         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9519                 pr_warn("Tracing disabled due to lockdown\n");
9520                 return -EPERM;
9521         }
9522
9523         /* The top level trace array uses  NULL as parent */
9524         if (tr->dir)
9525                 return 0;
9526
9527         if (WARN_ON(!tracefs_initialized()))
9528                 return -ENODEV;
9529
9530         /*
9531          * As there may still be users that expect the tracing
9532          * files to exist in debugfs/tracing, we must automount
9533          * the tracefs file system there, so older tools still
9534          * work with the newer kernel.
9535          */
9536         tr->dir = debugfs_create_automount("tracing", NULL,
9537                                            trace_automount, NULL);
9538
9539         return 0;
9540 }
9541
9542 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9543 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9544
9545 static struct workqueue_struct *eval_map_wq __initdata;
9546 static struct work_struct eval_map_work __initdata;
9547
9548 static void __init eval_map_work_func(struct work_struct *work)
9549 {
9550         int len;
9551
9552         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9553         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9554 }
9555
9556 static int __init trace_eval_init(void)
9557 {
9558         INIT_WORK(&eval_map_work, eval_map_work_func);
9559
9560         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9561         if (!eval_map_wq) {
9562                 pr_err("Unable to allocate eval_map_wq\n");
9563                 /* Do work here */
9564                 eval_map_work_func(&eval_map_work);
9565                 return -ENOMEM;
9566         }
9567
9568         queue_work(eval_map_wq, &eval_map_work);
9569         return 0;
9570 }
9571
9572 static int __init trace_eval_sync(void)
9573 {
9574         /* Make sure the eval map updates are finished */
9575         if (eval_map_wq)
9576                 destroy_workqueue(eval_map_wq);
9577         return 0;
9578 }
9579
9580 late_initcall_sync(trace_eval_sync);
9581
9582
9583 #ifdef CONFIG_MODULES
9584 static void trace_module_add_evals(struct module *mod)
9585 {
9586         if (!mod->num_trace_evals)
9587                 return;
9588
9589         /*
9590          * Modules with bad taint do not have events created, do
9591          * not bother with enums either.
9592          */
9593         if (trace_module_has_bad_taint(mod))
9594                 return;
9595
9596         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9597 }
9598
9599 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9600 static void trace_module_remove_evals(struct module *mod)
9601 {
9602         union trace_eval_map_item *map;
9603         union trace_eval_map_item **last = &trace_eval_maps;
9604
9605         if (!mod->num_trace_evals)
9606                 return;
9607
9608         mutex_lock(&trace_eval_mutex);
9609
9610         map = trace_eval_maps;
9611
9612         while (map) {
9613                 if (map->head.mod == mod)
9614                         break;
9615                 map = trace_eval_jmp_to_tail(map);
9616                 last = &map->tail.next;
9617                 map = map->tail.next;
9618         }
9619         if (!map)
9620                 goto out;
9621
9622         *last = trace_eval_jmp_to_tail(map)->tail.next;
9623         kfree(map);
9624  out:
9625         mutex_unlock(&trace_eval_mutex);
9626 }
9627 #else
9628 static inline void trace_module_remove_evals(struct module *mod) { }
9629 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9630
9631 static int trace_module_notify(struct notifier_block *self,
9632                                unsigned long val, void *data)
9633 {
9634         struct module *mod = data;
9635
9636         switch (val) {
9637         case MODULE_STATE_COMING:
9638                 trace_module_add_evals(mod);
9639                 break;
9640         case MODULE_STATE_GOING:
9641                 trace_module_remove_evals(mod);
9642                 break;
9643         }
9644
9645         return NOTIFY_OK;
9646 }
9647
9648 static struct notifier_block trace_module_nb = {
9649         .notifier_call = trace_module_notify,
9650         .priority = 0,
9651 };
9652 #endif /* CONFIG_MODULES */
9653
9654 static __init int tracer_init_tracefs(void)
9655 {
9656         int ret;
9657
9658         trace_access_lock_init();
9659
9660         ret = tracing_init_dentry();
9661         if (ret)
9662                 return 0;
9663
9664         event_trace_init();
9665
9666         init_tracer_tracefs(&global_trace, NULL);
9667         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9668
9669         trace_create_file("tracing_thresh", 0644, NULL,
9670                         &global_trace, &tracing_thresh_fops);
9671
9672         trace_create_file("README", 0444, NULL,
9673                         NULL, &tracing_readme_fops);
9674
9675         trace_create_file("saved_cmdlines", 0444, NULL,
9676                         NULL, &tracing_saved_cmdlines_fops);
9677
9678         trace_create_file("saved_cmdlines_size", 0644, NULL,
9679                           NULL, &tracing_saved_cmdlines_size_fops);
9680
9681         trace_create_file("saved_tgids", 0444, NULL,
9682                         NULL, &tracing_saved_tgids_fops);
9683
9684         trace_eval_init();
9685
9686         trace_create_eval_file(NULL);
9687
9688 #ifdef CONFIG_MODULES
9689         register_module_notifier(&trace_module_nb);
9690 #endif
9691
9692 #ifdef CONFIG_DYNAMIC_FTRACE
9693         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9694                         NULL, &tracing_dyn_info_fops);
9695 #endif
9696
9697         create_trace_instances(NULL);
9698
9699         update_tracer_options(&global_trace);
9700
9701         return 0;
9702 }
9703
9704 fs_initcall(tracer_init_tracefs);
9705
9706 static int trace_panic_handler(struct notifier_block *this,
9707                                unsigned long event, void *unused)
9708 {
9709         if (ftrace_dump_on_oops)
9710                 ftrace_dump(ftrace_dump_on_oops);
9711         return NOTIFY_OK;
9712 }
9713
9714 static struct notifier_block trace_panic_notifier = {
9715         .notifier_call  = trace_panic_handler,
9716         .next           = NULL,
9717         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9718 };
9719
9720 static int trace_die_handler(struct notifier_block *self,
9721                              unsigned long val,
9722                              void *data)
9723 {
9724         switch (val) {
9725         case DIE_OOPS:
9726                 if (ftrace_dump_on_oops)
9727                         ftrace_dump(ftrace_dump_on_oops);
9728                 break;
9729         default:
9730                 break;
9731         }
9732         return NOTIFY_OK;
9733 }
9734
9735 static struct notifier_block trace_die_notifier = {
9736         .notifier_call = trace_die_handler,
9737         .priority = 200
9738 };
9739
9740 /*
9741  * printk is set to max of 1024, we really don't need it that big.
9742  * Nothing should be printing 1000 characters anyway.
9743  */
9744 #define TRACE_MAX_PRINT         1000
9745
9746 /*
9747  * Define here KERN_TRACE so that we have one place to modify
9748  * it if we decide to change what log level the ftrace dump
9749  * should be at.
9750  */
9751 #define KERN_TRACE              KERN_EMERG
9752
9753 void
9754 trace_printk_seq(struct trace_seq *s)
9755 {
9756         /* Probably should print a warning here. */
9757         if (s->seq.len >= TRACE_MAX_PRINT)
9758                 s->seq.len = TRACE_MAX_PRINT;
9759
9760         /*
9761          * More paranoid code. Although the buffer size is set to
9762          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9763          * an extra layer of protection.
9764          */
9765         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9766                 s->seq.len = s->seq.size - 1;
9767
9768         /* should be zero ended, but we are paranoid. */
9769         s->buffer[s->seq.len] = 0;
9770
9771         printk(KERN_TRACE "%s", s->buffer);
9772
9773         trace_seq_init(s);
9774 }
9775
9776 void trace_init_global_iter(struct trace_iterator *iter)
9777 {
9778         iter->tr = &global_trace;
9779         iter->trace = iter->tr->current_trace;
9780         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9781         iter->array_buffer = &global_trace.array_buffer;
9782
9783         if (iter->trace && iter->trace->open)
9784                 iter->trace->open(iter);
9785
9786         /* Annotate start of buffers if we had overruns */
9787         if (ring_buffer_overruns(iter->array_buffer->buffer))
9788                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9789
9790         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9791         if (trace_clocks[iter->tr->clock_id].in_ns)
9792                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9793 }
9794
9795 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9796 {
9797         /* use static because iter can be a bit big for the stack */
9798         static struct trace_iterator iter;
9799         static atomic_t dump_running;
9800         struct trace_array *tr = &global_trace;
9801         unsigned int old_userobj;
9802         unsigned long flags;
9803         int cnt = 0, cpu;
9804
9805         /* Only allow one dump user at a time. */
9806         if (atomic_inc_return(&dump_running) != 1) {
9807                 atomic_dec(&dump_running);
9808                 return;
9809         }
9810
9811         /*
9812          * Always turn off tracing when we dump.
9813          * We don't need to show trace output of what happens
9814          * between multiple crashes.
9815          *
9816          * If the user does a sysrq-z, then they can re-enable
9817          * tracing with echo 1 > tracing_on.
9818          */
9819         tracing_off();
9820
9821         local_irq_save(flags);
9822
9823         /* Simulate the iterator */
9824         trace_init_global_iter(&iter);
9825         /* Can not use kmalloc for iter.temp and iter.fmt */
9826         iter.temp = static_temp_buf;
9827         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9828         iter.fmt = static_fmt_buf;
9829         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9830
9831         for_each_tracing_cpu(cpu) {
9832                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9833         }
9834
9835         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9836
9837         /* don't look at user memory in panic mode */
9838         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9839
9840         switch (oops_dump_mode) {
9841         case DUMP_ALL:
9842                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9843                 break;
9844         case DUMP_ORIG:
9845                 iter.cpu_file = raw_smp_processor_id();
9846                 break;
9847         case DUMP_NONE:
9848                 goto out_enable;
9849         default:
9850                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9851                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9852         }
9853
9854         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9855
9856         /* Did function tracer already get disabled? */
9857         if (ftrace_is_dead()) {
9858                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9859                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9860         }
9861
9862         /*
9863          * We need to stop all tracing on all CPUS to read
9864          * the next buffer. This is a bit expensive, but is
9865          * not done often. We fill all what we can read,
9866          * and then release the locks again.
9867          */
9868
9869         while (!trace_empty(&iter)) {
9870
9871                 if (!cnt)
9872                         printk(KERN_TRACE "---------------------------------\n");
9873
9874                 cnt++;
9875
9876                 trace_iterator_reset(&iter);
9877                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9878
9879                 if (trace_find_next_entry_inc(&iter) != NULL) {
9880                         int ret;
9881
9882                         ret = print_trace_line(&iter);
9883                         if (ret != TRACE_TYPE_NO_CONSUME)
9884                                 trace_consume(&iter);
9885                 }
9886                 touch_nmi_watchdog();
9887
9888                 trace_printk_seq(&iter.seq);
9889         }
9890
9891         if (!cnt)
9892                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9893         else
9894                 printk(KERN_TRACE "---------------------------------\n");
9895
9896  out_enable:
9897         tr->trace_flags |= old_userobj;
9898
9899         for_each_tracing_cpu(cpu) {
9900                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9901         }
9902         atomic_dec(&dump_running);
9903         local_irq_restore(flags);
9904 }
9905 EXPORT_SYMBOL_GPL(ftrace_dump);
9906
9907 #define WRITE_BUFSIZE  4096
9908
9909 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9910                                 size_t count, loff_t *ppos,
9911                                 int (*createfn)(const char *))
9912 {
9913         char *kbuf, *buf, *tmp;
9914         int ret = 0;
9915         size_t done = 0;
9916         size_t size;
9917
9918         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9919         if (!kbuf)
9920                 return -ENOMEM;
9921
9922         while (done < count) {
9923                 size = count - done;
9924
9925                 if (size >= WRITE_BUFSIZE)
9926                         size = WRITE_BUFSIZE - 1;
9927
9928                 if (copy_from_user(kbuf, buffer + done, size)) {
9929                         ret = -EFAULT;
9930                         goto out;
9931                 }
9932                 kbuf[size] = '\0';
9933                 buf = kbuf;
9934                 do {
9935                         tmp = strchr(buf, '\n');
9936                         if (tmp) {
9937                                 *tmp = '\0';
9938                                 size = tmp - buf + 1;
9939                         } else {
9940                                 size = strlen(buf);
9941                                 if (done + size < count) {
9942                                         if (buf != kbuf)
9943                                                 break;
9944                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9945                                         pr_warn("Line length is too long: Should be less than %d\n",
9946                                                 WRITE_BUFSIZE - 2);
9947                                         ret = -EINVAL;
9948                                         goto out;
9949                                 }
9950                         }
9951                         done += size;
9952
9953                         /* Remove comments */
9954                         tmp = strchr(buf, '#');
9955
9956                         if (tmp)
9957                                 *tmp = '\0';
9958
9959                         ret = createfn(buf);
9960                         if (ret)
9961                                 goto out;
9962                         buf += size;
9963
9964                 } while (done < count);
9965         }
9966         ret = done;
9967
9968 out:
9969         kfree(kbuf);
9970
9971         return ret;
9972 }
9973
9974 __init static int tracer_alloc_buffers(void)
9975 {
9976         int ring_buf_size;
9977         int ret = -ENOMEM;
9978
9979
9980         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9981                 pr_warn("Tracing disabled due to lockdown\n");
9982                 return -EPERM;
9983         }
9984
9985         /*
9986          * Make sure we don't accidentally add more trace options
9987          * than we have bits for.
9988          */
9989         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9990
9991         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9992                 goto out;
9993
9994         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9995                 goto out_free_buffer_mask;
9996
9997         /* Only allocate trace_printk buffers if a trace_printk exists */
9998         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9999                 /* Must be called before global_trace.buffer is allocated */
10000                 trace_printk_init_buffers();
10001
10002         /* To save memory, keep the ring buffer size to its minimum */
10003         if (ring_buffer_expanded)
10004                 ring_buf_size = trace_buf_size;
10005         else
10006                 ring_buf_size = 1;
10007
10008         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10009         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10010
10011         raw_spin_lock_init(&global_trace.start_lock);
10012
10013         /*
10014          * The prepare callbacks allocates some memory for the ring buffer. We
10015          * don't free the buffer if the CPU goes down. If we were to free
10016          * the buffer, then the user would lose any trace that was in the
10017          * buffer. The memory will be removed once the "instance" is removed.
10018          */
10019         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10020                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10021                                       NULL);
10022         if (ret < 0)
10023                 goto out_free_cpumask;
10024         /* Used for event triggers */
10025         ret = -ENOMEM;
10026         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10027         if (!temp_buffer)
10028                 goto out_rm_hp_state;
10029
10030         if (trace_create_savedcmd() < 0)
10031                 goto out_free_temp_buffer;
10032
10033         /* TODO: make the number of buffers hot pluggable with CPUS */
10034         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10035                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10036                 goto out_free_savedcmd;
10037         }
10038
10039         if (global_trace.buffer_disabled)
10040                 tracing_off();
10041
10042         if (trace_boot_clock) {
10043                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10044                 if (ret < 0)
10045                         pr_warn("Trace clock %s not defined, going back to default\n",
10046                                 trace_boot_clock);
10047         }
10048
10049         /*
10050          * register_tracer() might reference current_trace, so it
10051          * needs to be set before we register anything. This is
10052          * just a bootstrap of current_trace anyway.
10053          */
10054         global_trace.current_trace = &nop_trace;
10055
10056         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10057
10058         ftrace_init_global_array_ops(&global_trace);
10059
10060         init_trace_flags_index(&global_trace);
10061
10062         register_tracer(&nop_trace);
10063
10064         /* Function tracing may start here (via kernel command line) */
10065         init_function_trace();
10066
10067         /* All seems OK, enable tracing */
10068         tracing_disabled = 0;
10069
10070         atomic_notifier_chain_register(&panic_notifier_list,
10071                                        &trace_panic_notifier);
10072
10073         register_die_notifier(&trace_die_notifier);
10074
10075         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10076
10077         INIT_LIST_HEAD(&global_trace.systems);
10078         INIT_LIST_HEAD(&global_trace.events);
10079         INIT_LIST_HEAD(&global_trace.hist_vars);
10080         INIT_LIST_HEAD(&global_trace.err_log);
10081         list_add(&global_trace.list, &ftrace_trace_arrays);
10082
10083         apply_trace_boot_options();
10084
10085         register_snapshot_cmd();
10086
10087         test_can_verify();
10088
10089         return 0;
10090
10091 out_free_savedcmd:
10092         free_saved_cmdlines_buffer(savedcmd);
10093 out_free_temp_buffer:
10094         ring_buffer_free(temp_buffer);
10095 out_rm_hp_state:
10096         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10097 out_free_cpumask:
10098         free_cpumask_var(global_trace.tracing_cpumask);
10099 out_free_buffer_mask:
10100         free_cpumask_var(tracing_buffer_mask);
10101 out:
10102         return ret;
10103 }
10104
10105 void __init early_trace_init(void)
10106 {
10107         if (tracepoint_printk) {
10108                 tracepoint_print_iter =
10109                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10110                 if (MEM_FAIL(!tracepoint_print_iter,
10111                              "Failed to allocate trace iterator\n"))
10112                         tracepoint_printk = 0;
10113                 else
10114                         static_key_enable(&tracepoint_printk_key.key);
10115         }
10116         tracer_alloc_buffers();
10117 }
10118
10119 void __init trace_init(void)
10120 {
10121         trace_event_init();
10122 }
10123
10124 __init static void clear_boot_tracer(void)
10125 {
10126         /*
10127          * The default tracer at boot buffer is an init section.
10128          * This function is called in lateinit. If we did not
10129          * find the boot tracer, then clear it out, to prevent
10130          * later registration from accessing the buffer that is
10131          * about to be freed.
10132          */
10133         if (!default_bootup_tracer)
10134                 return;
10135
10136         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10137                default_bootup_tracer);
10138         default_bootup_tracer = NULL;
10139 }
10140
10141 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10142 __init static void tracing_set_default_clock(void)
10143 {
10144         /* sched_clock_stable() is determined in late_initcall */
10145         if (!trace_boot_clock && !sched_clock_stable()) {
10146                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10147                         pr_warn("Can not set tracing clock due to lockdown\n");
10148                         return;
10149                 }
10150
10151                 printk(KERN_WARNING
10152                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10153                        "If you want to keep using the local clock, then add:\n"
10154                        "  \"trace_clock=local\"\n"
10155                        "on the kernel command line\n");
10156                 tracing_set_clock(&global_trace, "global");
10157         }
10158 }
10159 #else
10160 static inline void tracing_set_default_clock(void) { }
10161 #endif
10162
10163 __init static int late_trace_init(void)
10164 {
10165         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10166                 static_key_disable(&tracepoint_printk_key.key);
10167                 tracepoint_printk = 0;
10168         }
10169
10170         tracing_set_default_clock();
10171         clear_boot_tracer();
10172         return 0;
10173 }
10174
10175 late_initcall_sync(late_trace_init);