drm/amdgpu: Add support for RAS XGMI err query
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188
189 static int __init set_cmdline_ftrace(char *str)
190 {
191         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192         default_bootup_tracer = bootup_tracer_buf;
193         /* We are using ftrace early, expand it */
194         ring_buffer_expanded = true;
195         return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201         if (*str++ != '=' || !*str || !strcmp("1", str)) {
202                 ftrace_dump_on_oops = DUMP_ALL;
203                 return 1;
204         }
205
206         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207                 ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
215 static int __init stop_trace_on_warning(char *str)
216 {
217         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218                 __disable_trace_on_warning = 1;
219         return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
223 static int __init boot_alloc_snapshot(char *str)
224 {
225         allocate_snapshot = true;
226         /* We also need the main ring buffer expanded */
227         ring_buffer_expanded = true;
228         return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234
235 static int __init set_trace_boot_options(char *str)
236 {
237         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238         return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244
245 static int __init set_trace_boot_clock(char *str)
246 {
247         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248         trace_boot_clock = trace_boot_clock_buf;
249         return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252
253 static int __init set_tracepoint_printk(char *str)
254 {
255         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256                 tracepoint_printk = 1;
257         return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263         tracepoint_printk_stop_on_boot = true;
264         return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267
268 unsigned long long ns2usecs(u64 nsec)
269 {
270         nsec += 500;
271         do_div(nsec, 1000);
272         return nsec;
273 }
274
275 static void
276 trace_process_export(struct trace_export *export,
277                struct ring_buffer_event *event, int flag)
278 {
279         struct trace_entry *entry;
280         unsigned int size = 0;
281
282         if (export->flags & flag) {
283                 entry = ring_buffer_event_data(event);
284                 size = ring_buffer_event_length(event);
285                 export->write(export, entry, size);
286         }
287 }
288
289 static DEFINE_MUTEX(ftrace_export_lock);
290
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299         if (export->flags & TRACE_EXPORT_FUNCTION)
300                 static_branch_inc(&trace_function_exports_enabled);
301
302         if (export->flags & TRACE_EXPORT_EVENT)
303                 static_branch_inc(&trace_event_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_MARKER)
306                 static_branch_inc(&trace_marker_exports_enabled);
307 }
308
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311         if (export->flags & TRACE_EXPORT_FUNCTION)
312                 static_branch_dec(&trace_function_exports_enabled);
313
314         if (export->flags & TRACE_EXPORT_EVENT)
315                 static_branch_dec(&trace_event_exports_enabled);
316
317         if (export->flags & TRACE_EXPORT_MARKER)
318                 static_branch_dec(&trace_marker_exports_enabled);
319 }
320
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323         struct trace_export *export;
324
325         preempt_disable_notrace();
326
327         export = rcu_dereference_raw_check(ftrace_exports_list);
328         while (export) {
329                 trace_process_export(export, event, flag);
330                 export = rcu_dereference_raw_check(export->next);
331         }
332
333         preempt_enable_notrace();
334 }
335
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339         rcu_assign_pointer(export->next, *list);
340         /*
341          * We are entering export into the list but another
342          * CPU might be walking that list. We need to make sure
343          * the export->next pointer is valid before another CPU sees
344          * the export pointer included into the list.
345          */
346         rcu_assign_pointer(*list, export);
347 }
348
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352         struct trace_export **p;
353
354         for (p = list; *p != NULL; p = &(*p)->next)
355                 if (*p == export)
356                         break;
357
358         if (*p != export)
359                 return -1;
360
361         rcu_assign_pointer(*p, (*p)->next);
362
363         return 0;
364 }
365
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369         ftrace_exports_enable(export);
370
371         add_trace_export(list, export);
372 }
373
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377         int ret;
378
379         ret = rm_trace_export(list, export);
380         ftrace_exports_disable(export);
381
382         return ret;
383 }
384
385 int register_ftrace_export(struct trace_export *export)
386 {
387         if (WARN_ON_ONCE(!export->write))
388                 return -1;
389
390         mutex_lock(&ftrace_export_lock);
391
392         add_ftrace_export(&ftrace_exports_list, export);
393
394         mutex_unlock(&ftrace_export_lock);
395
396         return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402         int ret;
403
404         mutex_lock(&ftrace_export_lock);
405
406         ret = rm_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS                                             \
416         (FUNCTION_DEFAULT_FLAGS |                                       \
417          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
418          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
419          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
420          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
421          TRACE_ITER_HASH_PTR)
422
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
425                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436         .trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438
439 LIST_HEAD(ftrace_trace_arrays);
440
441 int trace_array_get(struct trace_array *this_tr)
442 {
443         struct trace_array *tr;
444         int ret = -ENODEV;
445
446         mutex_lock(&trace_types_lock);
447         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448                 if (tr == this_tr) {
449                         tr->ref++;
450                         ret = 0;
451                         break;
452                 }
453         }
454         mutex_unlock(&trace_types_lock);
455
456         return ret;
457 }
458
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461         WARN_ON(!this_tr->ref);
462         this_tr->ref--;
463 }
464
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476         if (!this_tr)
477                 return;
478
479         mutex_lock(&trace_types_lock);
480         __trace_array_put(this_tr);
481         mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487         int ret;
488
489         ret = security_locked_down(LOCKDOWN_TRACEFS);
490         if (ret)
491                 return ret;
492
493         if (tracing_disabled)
494                 return -ENODEV;
495
496         if (tr && trace_array_get(tr) < 0)
497                 return -ENODEV;
498
499         return 0;
500 }
501
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503                               struct trace_buffer *buffer,
504                               struct ring_buffer_event *event)
505 {
506         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507             !filter_match_preds(call->filter, rec)) {
508                 __trace_event_discard_commit(buffer, event);
509                 return 1;
510         }
511
512         return 0;
513 }
514
515 void trace_free_pid_list(struct trace_pid_list *pid_list)
516 {
517         vfree(pid_list->pids);
518         kfree(pid_list);
519 }
520
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531         /*
532          * If pid_max changed after filtered_pids was created, we
533          * by default ignore all pids greater than the previous pid_max.
534          */
535         if (search_pid >= filtered_pids->pid_max)
536                 return false;
537
538         return test_bit(search_pid, filtered_pids->pids);
539 }
540
541 /**
542  * trace_ignore_this_task - should a task be ignored for tracing
543  * @filtered_pids: The list of pids to check
544  * @filtered_no_pids: The list of pids not to be traced
545  * @task: The task that should be ignored if not filtered
546  *
547  * Checks if @task should be traced or not from @filtered_pids.
548  * Returns true if @task should *NOT* be traced.
549  * Returns false if @task should be traced.
550  */
551 bool
552 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
553                        struct trace_pid_list *filtered_no_pids,
554                        struct task_struct *task)
555 {
556         /*
557          * If filtered_no_pids is not empty, and the task's pid is listed
558          * in filtered_no_pids, then return true.
559          * Otherwise, if filtered_pids is empty, that means we can
560          * trace all tasks. If it has content, then only trace pids
561          * within filtered_pids.
562          */
563
564         return (filtered_pids &&
565                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
566                 (filtered_no_pids &&
567                  trace_find_filtered_pid(filtered_no_pids, task->pid));
568 }
569
570 /**
571  * trace_filter_add_remove_task - Add or remove a task from a pid_list
572  * @pid_list: The list to modify
573  * @self: The current task for fork or NULL for exit
574  * @task: The task to add or remove
575  *
576  * If adding a task, if @self is defined, the task is only added if @self
577  * is also included in @pid_list. This happens on fork and tasks should
578  * only be added when the parent is listed. If @self is NULL, then the
579  * @task pid will be removed from the list, which would happen on exit
580  * of a task.
581  */
582 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
583                                   struct task_struct *self,
584                                   struct task_struct *task)
585 {
586         if (!pid_list)
587                 return;
588
589         /* For forks, we only add if the forking task is listed */
590         if (self) {
591                 if (!trace_find_filtered_pid(pid_list, self->pid))
592                         return;
593         }
594
595         /* Sorry, but we don't support pid_max changing after setting */
596         if (task->pid >= pid_list->pid_max)
597                 return;
598
599         /* "self" is set for forks, and NULL for exits */
600         if (self)
601                 set_bit(task->pid, pid_list->pids);
602         else
603                 clear_bit(task->pid, pid_list->pids);
604 }
605
606 /**
607  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
608  * @pid_list: The pid list to show
609  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
610  * @pos: The position of the file
611  *
612  * This is used by the seq_file "next" operation to iterate the pids
613  * listed in a trace_pid_list structure.
614  *
615  * Returns the pid+1 as we want to display pid of zero, but NULL would
616  * stop the iteration.
617  */
618 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
619 {
620         unsigned long pid = (unsigned long)v;
621
622         (*pos)++;
623
624         /* pid already is +1 of the actual previous bit */
625         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
626
627         /* Return pid + 1 to allow zero to be represented */
628         if (pid < pid_list->pid_max)
629                 return (void *)(pid + 1);
630
631         return NULL;
632 }
633
634 /**
635  * trace_pid_start - Used for seq_file to start reading pid lists
636  * @pid_list: The pid list to show
637  * @pos: The position of the file
638  *
639  * This is used by seq_file "start" operation to start the iteration
640  * of listing pids.
641  *
642  * Returns the pid+1 as we want to display pid of zero, but NULL would
643  * stop the iteration.
644  */
645 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
646 {
647         unsigned long pid;
648         loff_t l = 0;
649
650         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
651         if (pid >= pid_list->pid_max)
652                 return NULL;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret = 0;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         pid_list->pid_max = READ_ONCE(pid_max);
709
710         /* Only truncating will shrink pid_max */
711         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
712                 pid_list->pid_max = filtered_pids->pid_max;
713
714         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
715         if (!pid_list->pids) {
716                 trace_parser_put(&parser);
717                 kfree(pid_list);
718                 return -ENOMEM;
719         }
720
721         if (filtered_pids) {
722                 /* copy the current bits to the new max */
723                 for_each_set_bit(pid, filtered_pids->pids,
724                                  filtered_pids->pid_max) {
725                         set_bit(pid, pid_list->pids);
726                         nr_pids++;
727                 }
728         }
729
730         while (cnt > 0) {
731
732                 pos = 0;
733
734                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
735                 if (ret < 0 || !trace_parser_loaded(&parser))
736                         break;
737
738                 read += ret;
739                 ubuf += ret;
740                 cnt -= ret;
741
742                 ret = -EINVAL;
743                 if (kstrtoul(parser.buffer, 0, &val))
744                         break;
745                 if (val >= pid_list->pid_max)
746                         break;
747
748                 pid = (pid_t)val;
749
750                 set_bit(pid, pid_list->pids);
751                 nr_pids++;
752
753                 trace_parser_clear(&parser);
754                 ret = 0;
755         }
756         trace_parser_put(&parser);
757
758         if (ret < 0) {
759                 trace_free_pid_list(pid_list);
760                 return ret;
761         }
762
763         if (!nr_pids) {
764                 /* Cleared the list of pids */
765                 trace_free_pid_list(pid_list);
766                 read = ret;
767                 pid_list = NULL;
768         }
769
770         *new_pid_list = pid_list;
771
772         return read;
773 }
774
775 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
776 {
777         u64 ts;
778
779         /* Early boot up does not have a buffer yet */
780         if (!buf->buffer)
781                 return trace_clock_local();
782
783         ts = ring_buffer_time_stamp(buf->buffer);
784         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
785
786         return ts;
787 }
788
789 u64 ftrace_now(int cpu)
790 {
791         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
792 }
793
794 /**
795  * tracing_is_enabled - Show if global_trace has been enabled
796  *
797  * Shows if the global trace has been enabled or not. It uses the
798  * mirror flag "buffer_disabled" to be used in fast paths such as for
799  * the irqsoff tracer. But it may be inaccurate due to races. If you
800  * need to know the accurate state, use tracing_is_on() which is a little
801  * slower, but accurate.
802  */
803 int tracing_is_enabled(void)
804 {
805         /*
806          * For quick access (irqsoff uses this in fast path), just
807          * return the mirror variable of the state of the ring buffer.
808          * It's a little racy, but we don't really care.
809          */
810         smp_rmb();
811         return !global_trace.buffer_disabled;
812 }
813
814 /*
815  * trace_buf_size is the size in bytes that is allocated
816  * for a buffer. Note, the number of bytes is always rounded
817  * to page size.
818  *
819  * This number is purposely set to a low number of 16384.
820  * If the dump on oops happens, it will be much appreciated
821  * to not have to wait for all that output. Anyway this can be
822  * boot time and run time configurable.
823  */
824 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
825
826 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
827
828 /* trace_types holds a link list of available tracers. */
829 static struct tracer            *trace_types __read_mostly;
830
831 /*
832  * trace_types_lock is used to protect the trace_types list.
833  */
834 DEFINE_MUTEX(trace_types_lock);
835
836 /*
837  * serialize the access of the ring buffer
838  *
839  * ring buffer serializes readers, but it is low level protection.
840  * The validity of the events (which returns by ring_buffer_peek() ..etc)
841  * are not protected by ring buffer.
842  *
843  * The content of events may become garbage if we allow other process consumes
844  * these events concurrently:
845  *   A) the page of the consumed events may become a normal page
846  *      (not reader page) in ring buffer, and this page will be rewritten
847  *      by events producer.
848  *   B) The page of the consumed events may become a page for splice_read,
849  *      and this page will be returned to system.
850  *
851  * These primitives allow multi process access to different cpu ring buffer
852  * concurrently.
853  *
854  * These primitives don't distinguish read-only and read-consume access.
855  * Multi read-only access are also serialized.
856  */
857
858 #ifdef CONFIG_SMP
859 static DECLARE_RWSEM(all_cpu_access_lock);
860 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
861
862 static inline void trace_access_lock(int cpu)
863 {
864         if (cpu == RING_BUFFER_ALL_CPUS) {
865                 /* gain it for accessing the whole ring buffer. */
866                 down_write(&all_cpu_access_lock);
867         } else {
868                 /* gain it for accessing a cpu ring buffer. */
869
870                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
871                 down_read(&all_cpu_access_lock);
872
873                 /* Secondly block other access to this @cpu ring buffer. */
874                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
875         }
876 }
877
878 static inline void trace_access_unlock(int cpu)
879 {
880         if (cpu == RING_BUFFER_ALL_CPUS) {
881                 up_write(&all_cpu_access_lock);
882         } else {
883                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
884                 up_read(&all_cpu_access_lock);
885         }
886 }
887
888 static inline void trace_access_lock_init(void)
889 {
890         int cpu;
891
892         for_each_possible_cpu(cpu)
893                 mutex_init(&per_cpu(cpu_access_lock, cpu));
894 }
895
896 #else
897
898 static DEFINE_MUTEX(access_lock);
899
900 static inline void trace_access_lock(int cpu)
901 {
902         (void)cpu;
903         mutex_lock(&access_lock);
904 }
905
906 static inline void trace_access_unlock(int cpu)
907 {
908         (void)cpu;
909         mutex_unlock(&access_lock);
910 }
911
912 static inline void trace_access_lock_init(void)
913 {
914 }
915
916 #endif
917
918 #ifdef CONFIG_STACKTRACE
919 static void __ftrace_trace_stack(struct trace_buffer *buffer,
920                                  unsigned int trace_ctx,
921                                  int skip, struct pt_regs *regs);
922 static inline void ftrace_trace_stack(struct trace_array *tr,
923                                       struct trace_buffer *buffer,
924                                       unsigned int trace_ctx,
925                                       int skip, struct pt_regs *regs);
926
927 #else
928 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
929                                         unsigned int trace_ctx,
930                                         int skip, struct pt_regs *regs)
931 {
932 }
933 static inline void ftrace_trace_stack(struct trace_array *tr,
934                                       struct trace_buffer *buffer,
935                                       unsigned long trace_ctx,
936                                       int skip, struct pt_regs *regs)
937 {
938 }
939
940 #endif
941
942 static __always_inline void
943 trace_event_setup(struct ring_buffer_event *event,
944                   int type, unsigned int trace_ctx)
945 {
946         struct trace_entry *ent = ring_buffer_event_data(event);
947
948         tracing_generic_entry_update(ent, type, trace_ctx);
949 }
950
951 static __always_inline struct ring_buffer_event *
952 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
953                           int type,
954                           unsigned long len,
955                           unsigned int trace_ctx)
956 {
957         struct ring_buffer_event *event;
958
959         event = ring_buffer_lock_reserve(buffer, len);
960         if (event != NULL)
961                 trace_event_setup(event, type, trace_ctx);
962
963         return event;
964 }
965
966 void tracer_tracing_on(struct trace_array *tr)
967 {
968         if (tr->array_buffer.buffer)
969                 ring_buffer_record_on(tr->array_buffer.buffer);
970         /*
971          * This flag is looked at when buffers haven't been allocated
972          * yet, or by some tracers (like irqsoff), that just want to
973          * know if the ring buffer has been disabled, but it can handle
974          * races of where it gets disabled but we still do a record.
975          * As the check is in the fast path of the tracers, it is more
976          * important to be fast than accurate.
977          */
978         tr->buffer_disabled = 0;
979         /* Make the flag seen by readers */
980         smp_wmb();
981 }
982
983 /**
984  * tracing_on - enable tracing buffers
985  *
986  * This function enables tracing buffers that may have been
987  * disabled with tracing_off.
988  */
989 void tracing_on(void)
990 {
991         tracer_tracing_on(&global_trace);
992 }
993 EXPORT_SYMBOL_GPL(tracing_on);
994
995
996 static __always_inline void
997 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
998 {
999         __this_cpu_write(trace_taskinfo_save, true);
1000
1001         /* If this is the temp buffer, we need to commit fully */
1002         if (this_cpu_read(trace_buffered_event) == event) {
1003                 /* Length is in event->array[0] */
1004                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005                 /* Release the temp buffer */
1006                 this_cpu_dec(trace_buffered_event_cnt);
1007         } else
1008                 ring_buffer_unlock_commit(buffer, event);
1009 }
1010
1011 /**
1012  * __trace_puts - write a constant string into the trace buffer.
1013  * @ip:    The address of the caller
1014  * @str:   The constant string to write
1015  * @size:  The size of the string.
1016  */
1017 int __trace_puts(unsigned long ip, const char *str, int size)
1018 {
1019         struct ring_buffer_event *event;
1020         struct trace_buffer *buffer;
1021         struct print_entry *entry;
1022         unsigned int trace_ctx;
1023         int alloc;
1024
1025         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026                 return 0;
1027
1028         if (unlikely(tracing_selftest_running || tracing_disabled))
1029                 return 0;
1030
1031         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032
1033         trace_ctx = tracing_gen_ctx();
1034         buffer = global_trace.array_buffer.buffer;
1035         ring_buffer_nest_start(buffer);
1036         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037                                             trace_ctx);
1038         if (!event) {
1039                 size = 0;
1040                 goto out;
1041         }
1042
1043         entry = ring_buffer_event_data(event);
1044         entry->ip = ip;
1045
1046         memcpy(&entry->buf, str, size);
1047
1048         /* Add a newline if necessary */
1049         if (entry->buf[size - 1] != '\n') {
1050                 entry->buf[size] = '\n';
1051                 entry->buf[size + 1] = '\0';
1052         } else
1053                 entry->buf[size] = '\0';
1054
1055         __buffer_unlock_commit(buffer, event);
1056         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057  out:
1058         ring_buffer_nest_end(buffer);
1059         return size;
1060 }
1061 EXPORT_SYMBOL_GPL(__trace_puts);
1062
1063 /**
1064  * __trace_bputs - write the pointer to a constant string into trace buffer
1065  * @ip:    The address of the caller
1066  * @str:   The constant string to write to the buffer to
1067  */
1068 int __trace_bputs(unsigned long ip, const char *str)
1069 {
1070         struct ring_buffer_event *event;
1071         struct trace_buffer *buffer;
1072         struct bputs_entry *entry;
1073         unsigned int trace_ctx;
1074         int size = sizeof(struct bputs_entry);
1075         int ret = 0;
1076
1077         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078                 return 0;
1079
1080         if (unlikely(tracing_selftest_running || tracing_disabled))
1081                 return 0;
1082
1083         trace_ctx = tracing_gen_ctx();
1084         buffer = global_trace.array_buffer.buffer;
1085
1086         ring_buffer_nest_start(buffer);
1087         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088                                             trace_ctx);
1089         if (!event)
1090                 goto out;
1091
1092         entry = ring_buffer_event_data(event);
1093         entry->ip                       = ip;
1094         entry->str                      = str;
1095
1096         __buffer_unlock_commit(buffer, event);
1097         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098
1099         ret = 1;
1100  out:
1101         ring_buffer_nest_end(buffer);
1102         return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(__trace_bputs);
1105
1106 #ifdef CONFIG_TRACER_SNAPSHOT
1107 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108                                            void *cond_data)
1109 {
1110         struct tracer *tracer = tr->current_trace;
1111         unsigned long flags;
1112
1113         if (in_nmi()) {
1114                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1116                 return;
1117         }
1118
1119         if (!tr->allocated_snapshot) {
1120                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121                 internal_trace_puts("*** stopping trace here!   ***\n");
1122                 tracing_off();
1123                 return;
1124         }
1125
1126         /* Note, snapshot can not be used when the tracer uses it */
1127         if (tracer->use_max_tr) {
1128                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130                 return;
1131         }
1132
1133         local_irq_save(flags);
1134         update_max_tr(tr, current, smp_processor_id(), cond_data);
1135         local_irq_restore(flags);
1136 }
1137
1138 void tracing_snapshot_instance(struct trace_array *tr)
1139 {
1140         tracing_snapshot_instance_cond(tr, NULL);
1141 }
1142
1143 /**
1144  * tracing_snapshot - take a snapshot of the current buffer.
1145  *
1146  * This causes a swap between the snapshot buffer and the current live
1147  * tracing buffer. You can use this to take snapshots of the live
1148  * trace when some condition is triggered, but continue to trace.
1149  *
1150  * Note, make sure to allocate the snapshot with either
1151  * a tracing_snapshot_alloc(), or by doing it manually
1152  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153  *
1154  * If the snapshot buffer is not allocated, it will stop tracing.
1155  * Basically making a permanent snapshot.
1156  */
1157 void tracing_snapshot(void)
1158 {
1159         struct trace_array *tr = &global_trace;
1160
1161         tracing_snapshot_instance(tr);
1162 }
1163 EXPORT_SYMBOL_GPL(tracing_snapshot);
1164
1165 /**
1166  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167  * @tr:         The tracing instance to snapshot
1168  * @cond_data:  The data to be tested conditionally, and possibly saved
1169  *
1170  * This is the same as tracing_snapshot() except that the snapshot is
1171  * conditional - the snapshot will only happen if the
1172  * cond_snapshot.update() implementation receiving the cond_data
1173  * returns true, which means that the trace array's cond_snapshot
1174  * update() operation used the cond_data to determine whether the
1175  * snapshot should be taken, and if it was, presumably saved it along
1176  * with the snapshot.
1177  */
1178 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179 {
1180         tracing_snapshot_instance_cond(tr, cond_data);
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183
1184 /**
1185  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186  * @tr:         The tracing instance
1187  *
1188  * When the user enables a conditional snapshot using
1189  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190  * with the snapshot.  This accessor is used to retrieve it.
1191  *
1192  * Should not be called from cond_snapshot.update(), since it takes
1193  * the tr->max_lock lock, which the code calling
1194  * cond_snapshot.update() has already done.
1195  *
1196  * Returns the cond_data associated with the trace array's snapshot.
1197  */
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200         void *cond_data = NULL;
1201
1202         arch_spin_lock(&tr->max_lock);
1203
1204         if (tr->cond_snapshot)
1205                 cond_data = tr->cond_snapshot->cond_data;
1206
1207         arch_spin_unlock(&tr->max_lock);
1208
1209         return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214                                         struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216
1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219         int ret;
1220
1221         if (!tr->allocated_snapshot) {
1222
1223                 /* allocate spare buffer */
1224                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226                 if (ret < 0)
1227                         return ret;
1228
1229                 tr->allocated_snapshot = true;
1230         }
1231
1232         return 0;
1233 }
1234
1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237         /*
1238          * We don't free the ring buffer. instead, resize it because
1239          * The max_tr ring buffer has some state (e.g. ring->clock) and
1240          * we want preserve it.
1241          */
1242         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243         set_buffer_entries(&tr->max_buffer, 1);
1244         tracing_reset_online_cpus(&tr->max_buffer);
1245         tr->allocated_snapshot = false;
1246 }
1247
1248 /**
1249  * tracing_alloc_snapshot - allocate snapshot buffer.
1250  *
1251  * This only allocates the snapshot buffer if it isn't already
1252  * allocated - it doesn't also take a snapshot.
1253  *
1254  * This is meant to be used in cases where the snapshot buffer needs
1255  * to be set up for events that can't sleep but need to be able to
1256  * trigger a snapshot.
1257  */
1258 int tracing_alloc_snapshot(void)
1259 {
1260         struct trace_array *tr = &global_trace;
1261         int ret;
1262
1263         ret = tracing_alloc_snapshot_instance(tr);
1264         WARN_ON(ret < 0);
1265
1266         return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269
1270 /**
1271  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272  *
1273  * This is similar to tracing_snapshot(), but it will allocate the
1274  * snapshot buffer if it isn't already allocated. Use this only
1275  * where it is safe to sleep, as the allocation may sleep.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  */
1281 void tracing_snapshot_alloc(void)
1282 {
1283         int ret;
1284
1285         ret = tracing_alloc_snapshot();
1286         if (ret < 0)
1287                 return;
1288
1289         tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292
1293 /**
1294  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295  * @tr:         The tracing instance
1296  * @cond_data:  User data to associate with the snapshot
1297  * @update:     Implementation of the cond_snapshot update function
1298  *
1299  * Check whether the conditional snapshot for the given instance has
1300  * already been enabled, or if the current tracer is already using a
1301  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302  * save the cond_data and update function inside.
1303  *
1304  * Returns 0 if successful, error otherwise.
1305  */
1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307                                  cond_update_fn_t update)
1308 {
1309         struct cond_snapshot *cond_snapshot;
1310         int ret = 0;
1311
1312         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313         if (!cond_snapshot)
1314                 return -ENOMEM;
1315
1316         cond_snapshot->cond_data = cond_data;
1317         cond_snapshot->update = update;
1318
1319         mutex_lock(&trace_types_lock);
1320
1321         ret = tracing_alloc_snapshot_instance(tr);
1322         if (ret)
1323                 goto fail_unlock;
1324
1325         if (tr->current_trace->use_max_tr) {
1326                 ret = -EBUSY;
1327                 goto fail_unlock;
1328         }
1329
1330         /*
1331          * The cond_snapshot can only change to NULL without the
1332          * trace_types_lock. We don't care if we race with it going
1333          * to NULL, but we want to make sure that it's not set to
1334          * something other than NULL when we get here, which we can
1335          * do safely with only holding the trace_types_lock and not
1336          * having to take the max_lock.
1337          */
1338         if (tr->cond_snapshot) {
1339                 ret = -EBUSY;
1340                 goto fail_unlock;
1341         }
1342
1343         arch_spin_lock(&tr->max_lock);
1344         tr->cond_snapshot = cond_snapshot;
1345         arch_spin_unlock(&tr->max_lock);
1346
1347         mutex_unlock(&trace_types_lock);
1348
1349         return ret;
1350
1351  fail_unlock:
1352         mutex_unlock(&trace_types_lock);
1353         kfree(cond_snapshot);
1354         return ret;
1355 }
1356 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357
1358 /**
1359  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360  * @tr:         The tracing instance
1361  *
1362  * Check whether the conditional snapshot for the given instance is
1363  * enabled; if so, free the cond_snapshot associated with it,
1364  * otherwise return -EINVAL.
1365  *
1366  * Returns 0 if successful, error otherwise.
1367  */
1368 int tracing_snapshot_cond_disable(struct trace_array *tr)
1369 {
1370         int ret = 0;
1371
1372         arch_spin_lock(&tr->max_lock);
1373
1374         if (!tr->cond_snapshot)
1375                 ret = -EINVAL;
1376         else {
1377                 kfree(tr->cond_snapshot);
1378                 tr->cond_snapshot = NULL;
1379         }
1380
1381         arch_spin_unlock(&tr->max_lock);
1382
1383         return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400         return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405         /* Give warning */
1406         tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411         return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416         return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421         return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428         if (tr->array_buffer.buffer)
1429                 ring_buffer_record_off(tr->array_buffer.buffer);
1430         /*
1431          * This flag is looked at when buffers haven't been allocated
1432          * yet, or by some tracers (like irqsoff), that just want to
1433          * know if the ring buffer has been disabled, but it can handle
1434          * races of where it gets disabled but we still do a record.
1435          * As the check is in the fast path of the tracers, it is more
1436          * important to be fast than accurate.
1437          */
1438         tr->buffer_disabled = 1;
1439         /* Make the flag seen by readers */
1440         smp_wmb();
1441 }
1442
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453         tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456
1457 void disable_trace_on_warning(void)
1458 {
1459         if (__disable_trace_on_warning) {
1460                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                         "Disabling tracing due to warning\n");
1462                 tracing_off();
1463         }
1464 }
1465
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474         if (tr->array_buffer.buffer)
1475                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476         return !tr->buffer_disabled;
1477 }
1478
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484         return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488 static int __init set_buf_size(char *str)
1489 {
1490         unsigned long buf_size;
1491
1492         if (!str)
1493                 return 0;
1494         buf_size = memparse(str, &str);
1495         /* nr_entries can not be zero */
1496         if (buf_size == 0)
1497                 return 0;
1498         trace_buf_size = buf_size;
1499         return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502
1503 static int __init set_tracing_thresh(char *str)
1504 {
1505         unsigned long threshold;
1506         int ret;
1507
1508         if (!str)
1509                 return 0;
1510         ret = kstrtoul(str, 0, &threshold);
1511         if (ret < 0)
1512                 return 0;
1513         tracing_thresh = threshold * 1000;
1514         return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517
1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520         return nsecs / 1000;
1521 }
1522
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534         TRACE_FLAGS
1535         NULL
1536 };
1537
1538 static struct {
1539         u64 (*func)(void);
1540         const char *name;
1541         int in_ns;              /* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543         { trace_clock_local,            "local",        1 },
1544         { trace_clock_global,           "global",       1 },
1545         { trace_clock_counter,          "counter",      0 },
1546         { trace_clock_jiffies,          "uptime",       0 },
1547         { trace_clock,                  "perf",         1 },
1548         { ktime_get_mono_fast_ns,       "mono",         1 },
1549         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1550         { ktime_get_boot_fast_ns,       "boot",         1 },
1551         ARCH_TRACE_CLOCKS
1552 };
1553
1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556         if (trace_clocks[tr->clock_id].in_ns)
1557                 return true;
1558
1559         return false;
1560 }
1561
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567         memset(parser, 0, sizeof(*parser));
1568
1569         parser->buffer = kmalloc(size, GFP_KERNEL);
1570         if (!parser->buffer)
1571                 return 1;
1572
1573         parser->size = size;
1574         return 0;
1575 }
1576
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582         kfree(parser->buffer);
1583         parser->buffer = NULL;
1584 }
1585
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598         size_t cnt, loff_t *ppos)
1599 {
1600         char ch;
1601         size_t read = 0;
1602         ssize_t ret;
1603
1604         if (!*ppos)
1605                 trace_parser_clear(parser);
1606
1607         ret = get_user(ch, ubuf++);
1608         if (ret)
1609                 goto out;
1610
1611         read++;
1612         cnt--;
1613
1614         /*
1615          * The parser is not finished with the last write,
1616          * continue reading the user input without skipping spaces.
1617          */
1618         if (!parser->cont) {
1619                 /* skip white space */
1620                 while (cnt && isspace(ch)) {
1621                         ret = get_user(ch, ubuf++);
1622                         if (ret)
1623                                 goto out;
1624                         read++;
1625                         cnt--;
1626                 }
1627
1628                 parser->idx = 0;
1629
1630                 /* only spaces were written */
1631                 if (isspace(ch) || !ch) {
1632                         *ppos += read;
1633                         ret = read;
1634                         goto out;
1635                 }
1636         }
1637
1638         /* read the non-space input */
1639         while (cnt && !isspace(ch) && ch) {
1640                 if (parser->idx < parser->size - 1)
1641                         parser->buffer[parser->idx++] = ch;
1642                 else {
1643                         ret = -EINVAL;
1644                         goto out;
1645                 }
1646                 ret = get_user(ch, ubuf++);
1647                 if (ret)
1648                         goto out;
1649                 read++;
1650                 cnt--;
1651         }
1652
1653         /* We either got finished input or we have to wait for another call. */
1654         if (isspace(ch) || !ch) {
1655                 parser->buffer[parser->idx] = 0;
1656                 parser->cont = false;
1657         } else if (parser->idx < parser->size - 1) {
1658                 parser->cont = true;
1659                 parser->buffer[parser->idx++] = ch;
1660                 /* Make sure the parsed string always terminates with '\0'. */
1661                 parser->buffer[parser->idx] = 0;
1662         } else {
1663                 ret = -EINVAL;
1664                 goto out;
1665         }
1666
1667         *ppos += read;
1668         ret = read;
1669
1670 out:
1671         return ret;
1672 }
1673
1674 /* TODO add a seq_buf_to_buffer() */
1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677         int len;
1678
1679         if (trace_seq_used(s) <= s->seq.readpos)
1680                 return -EBUSY;
1681
1682         len = trace_seq_used(s) - s->seq.readpos;
1683         if (cnt > len)
1684                 cnt = len;
1685         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686
1687         s->seq.readpos += cnt;
1688         return cnt;
1689 }
1690
1691 unsigned long __read_mostly     tracing_thresh;
1692 static const struct file_operations tracing_max_lat_fops;
1693
1694 #ifdef LATENCY_FS_NOTIFY
1695
1696 static struct workqueue_struct *fsnotify_wq;
1697
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700         struct trace_array *tr = container_of(work, struct trace_array,
1701                                               fsnotify_work);
1702         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707         struct trace_array *tr = container_of(iwork, struct trace_array,
1708                                               fsnotify_irqwork);
1709         queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713                                      struct dentry *d_tracer)
1714 {
1715         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718                                               d_tracer, &tr->max_latency,
1719                                               &tracing_max_lat_fops);
1720 }
1721
1722 __init static int latency_fsnotify_init(void)
1723 {
1724         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1726         if (!fsnotify_wq) {
1727                 pr_err("Unable to allocate tr_max_lat_wq\n");
1728                 return -ENOMEM;
1729         }
1730         return 0;
1731 }
1732
1733 late_initcall_sync(latency_fsnotify_init);
1734
1735 void latency_fsnotify(struct trace_array *tr)
1736 {
1737         if (!fsnotify_wq)
1738                 return;
1739         /*
1740          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741          * possible that we are called from __schedule() or do_idle(), which
1742          * could cause a deadlock.
1743          */
1744         irq_work_queue(&tr->fsnotify_irqwork);
1745 }
1746
1747 /*
1748  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1749  *  defined(CONFIG_FSNOTIFY)
1750  */
1751 #else
1752
1753 #define trace_create_maxlat_file(tr, d_tracer)                          \
1754         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1755                           &tr->max_latency, &tracing_max_lat_fops)
1756
1757 #endif
1758
1759 #ifdef CONFIG_TRACER_MAX_TRACE
1760 /*
1761  * Copy the new maximum trace into the separate maximum-trace
1762  * structure. (this way the maximum trace is permanently saved,
1763  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1764  */
1765 static void
1766 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1767 {
1768         struct array_buffer *trace_buf = &tr->array_buffer;
1769         struct array_buffer *max_buf = &tr->max_buffer;
1770         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1771         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1772
1773         max_buf->cpu = cpu;
1774         max_buf->time_start = data->preempt_timestamp;
1775
1776         max_data->saved_latency = tr->max_latency;
1777         max_data->critical_start = data->critical_start;
1778         max_data->critical_end = data->critical_end;
1779
1780         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1781         max_data->pid = tsk->pid;
1782         /*
1783          * If tsk == current, then use current_uid(), as that does not use
1784          * RCU. The irq tracer can be called out of RCU scope.
1785          */
1786         if (tsk == current)
1787                 max_data->uid = current_uid();
1788         else
1789                 max_data->uid = task_uid(tsk);
1790
1791         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1792         max_data->policy = tsk->policy;
1793         max_data->rt_priority = tsk->rt_priority;
1794
1795         /* record this tasks comm */
1796         tracing_record_cmdline(tsk);
1797         latency_fsnotify(tr);
1798 }
1799
1800 /**
1801  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1802  * @tr: tracer
1803  * @tsk: the task with the latency
1804  * @cpu: The cpu that initiated the trace.
1805  * @cond_data: User data associated with a conditional snapshot
1806  *
1807  * Flip the buffers between the @tr and the max_tr and record information
1808  * about which task was the cause of this latency.
1809  */
1810 void
1811 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1812               void *cond_data)
1813 {
1814         if (tr->stop_count)
1815                 return;
1816
1817         WARN_ON_ONCE(!irqs_disabled());
1818
1819         if (!tr->allocated_snapshot) {
1820                 /* Only the nop tracer should hit this when disabling */
1821                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1822                 return;
1823         }
1824
1825         arch_spin_lock(&tr->max_lock);
1826
1827         /* Inherit the recordable setting from array_buffer */
1828         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1829                 ring_buffer_record_on(tr->max_buffer.buffer);
1830         else
1831                 ring_buffer_record_off(tr->max_buffer.buffer);
1832
1833 #ifdef CONFIG_TRACER_SNAPSHOT
1834         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1835                 goto out_unlock;
1836 #endif
1837         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838
1839         __update_max_tr(tr, tsk, cpu);
1840
1841  out_unlock:
1842         arch_spin_unlock(&tr->max_lock);
1843 }
1844
1845 /**
1846  * update_max_tr_single - only copy one trace over, and reset the rest
1847  * @tr: tracer
1848  * @tsk: task with the latency
1849  * @cpu: the cpu of the buffer to copy.
1850  *
1851  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1852  */
1853 void
1854 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1855 {
1856         int ret;
1857
1858         if (tr->stop_count)
1859                 return;
1860
1861         WARN_ON_ONCE(!irqs_disabled());
1862         if (!tr->allocated_snapshot) {
1863                 /* Only the nop tracer should hit this when disabling */
1864                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1865                 return;
1866         }
1867
1868         arch_spin_lock(&tr->max_lock);
1869
1870         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1871
1872         if (ret == -EBUSY) {
1873                 /*
1874                  * We failed to swap the buffer due to a commit taking
1875                  * place on this CPU. We fail to record, but we reset
1876                  * the max trace buffer (no one writes directly to it)
1877                  * and flag that it failed.
1878                  */
1879                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1880                         "Failed to swap buffers due to commit in progress\n");
1881         }
1882
1883         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1884
1885         __update_max_tr(tr, tsk, cpu);
1886         arch_spin_unlock(&tr->max_lock);
1887 }
1888 #endif /* CONFIG_TRACER_MAX_TRACE */
1889
1890 static int wait_on_pipe(struct trace_iterator *iter, int full)
1891 {
1892         /* Iterators are static, they should be filled or empty */
1893         if (trace_buffer_iter(iter, iter->cpu_file))
1894                 return 0;
1895
1896         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1897                                 full);
1898 }
1899
1900 #ifdef CONFIG_FTRACE_STARTUP_TEST
1901 static bool selftests_can_run;
1902
1903 struct trace_selftests {
1904         struct list_head                list;
1905         struct tracer                   *type;
1906 };
1907
1908 static LIST_HEAD(postponed_selftests);
1909
1910 static int save_selftest(struct tracer *type)
1911 {
1912         struct trace_selftests *selftest;
1913
1914         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1915         if (!selftest)
1916                 return -ENOMEM;
1917
1918         selftest->type = type;
1919         list_add(&selftest->list, &postponed_selftests);
1920         return 0;
1921 }
1922
1923 static int run_tracer_selftest(struct tracer *type)
1924 {
1925         struct trace_array *tr = &global_trace;
1926         struct tracer *saved_tracer = tr->current_trace;
1927         int ret;
1928
1929         if (!type->selftest || tracing_selftest_disabled)
1930                 return 0;
1931
1932         /*
1933          * If a tracer registers early in boot up (before scheduling is
1934          * initialized and such), then do not run its selftests yet.
1935          * Instead, run it a little later in the boot process.
1936          */
1937         if (!selftests_can_run)
1938                 return save_selftest(type);
1939
1940         if (!tracing_is_on()) {
1941                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1942                         type->name);
1943                 return 0;
1944         }
1945
1946         /*
1947          * Run a selftest on this tracer.
1948          * Here we reset the trace buffer, and set the current
1949          * tracer to be this tracer. The tracer can then run some
1950          * internal tracing to verify that everything is in order.
1951          * If we fail, we do not register this tracer.
1952          */
1953         tracing_reset_online_cpus(&tr->array_buffer);
1954
1955         tr->current_trace = type;
1956
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958         if (type->use_max_tr) {
1959                 /* If we expanded the buffers, make sure the max is expanded too */
1960                 if (ring_buffer_expanded)
1961                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1962                                            RING_BUFFER_ALL_CPUS);
1963                 tr->allocated_snapshot = true;
1964         }
1965 #endif
1966
1967         /* the test is responsible for initializing and enabling */
1968         pr_info("Testing tracer %s: ", type->name);
1969         ret = type->selftest(type, tr);
1970         /* the test is responsible for resetting too */
1971         tr->current_trace = saved_tracer;
1972         if (ret) {
1973                 printk(KERN_CONT "FAILED!\n");
1974                 /* Add the warning after printing 'FAILED' */
1975                 WARN_ON(1);
1976                 return -1;
1977         }
1978         /* Only reset on passing, to avoid touching corrupted buffers */
1979         tracing_reset_online_cpus(&tr->array_buffer);
1980
1981 #ifdef CONFIG_TRACER_MAX_TRACE
1982         if (type->use_max_tr) {
1983                 tr->allocated_snapshot = false;
1984
1985                 /* Shrink the max buffer again */
1986                 if (ring_buffer_expanded)
1987                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1988                                            RING_BUFFER_ALL_CPUS);
1989         }
1990 #endif
1991
1992         printk(KERN_CONT "PASSED\n");
1993         return 0;
1994 }
1995
1996 static __init int init_trace_selftests(void)
1997 {
1998         struct trace_selftests *p, *n;
1999         struct tracer *t, **last;
2000         int ret;
2001
2002         selftests_can_run = true;
2003
2004         mutex_lock(&trace_types_lock);
2005
2006         if (list_empty(&postponed_selftests))
2007                 goto out;
2008
2009         pr_info("Running postponed tracer tests:\n");
2010
2011         tracing_selftest_running = true;
2012         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2013                 /* This loop can take minutes when sanitizers are enabled, so
2014                  * lets make sure we allow RCU processing.
2015                  */
2016                 cond_resched();
2017                 ret = run_tracer_selftest(p->type);
2018                 /* If the test fails, then warn and remove from available_tracers */
2019                 if (ret < 0) {
2020                         WARN(1, "tracer: %s failed selftest, disabling\n",
2021                              p->type->name);
2022                         last = &trace_types;
2023                         for (t = trace_types; t; t = t->next) {
2024                                 if (t == p->type) {
2025                                         *last = t->next;
2026                                         break;
2027                                 }
2028                                 last = &t->next;
2029                         }
2030                 }
2031                 list_del(&p->list);
2032                 kfree(p);
2033         }
2034         tracing_selftest_running = false;
2035
2036  out:
2037         mutex_unlock(&trace_types_lock);
2038
2039         return 0;
2040 }
2041 core_initcall(init_trace_selftests);
2042 #else
2043 static inline int run_tracer_selftest(struct tracer *type)
2044 {
2045         return 0;
2046 }
2047 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2048
2049 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2050
2051 static void __init apply_trace_boot_options(void);
2052
2053 /**
2054  * register_tracer - register a tracer with the ftrace system.
2055  * @type: the plugin for the tracer
2056  *
2057  * Register a new plugin tracer.
2058  */
2059 int __init register_tracer(struct tracer *type)
2060 {
2061         struct tracer *t;
2062         int ret = 0;
2063
2064         if (!type->name) {
2065                 pr_info("Tracer must have a name\n");
2066                 return -1;
2067         }
2068
2069         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2070                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2071                 return -1;
2072         }
2073
2074         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2075                 pr_warn("Can not register tracer %s due to lockdown\n",
2076                            type->name);
2077                 return -EPERM;
2078         }
2079
2080         mutex_lock(&trace_types_lock);
2081
2082         tracing_selftest_running = true;
2083
2084         for (t = trace_types; t; t = t->next) {
2085                 if (strcmp(type->name, t->name) == 0) {
2086                         /* already found */
2087                         pr_info("Tracer %s already registered\n",
2088                                 type->name);
2089                         ret = -1;
2090                         goto out;
2091                 }
2092         }
2093
2094         if (!type->set_flag)
2095                 type->set_flag = &dummy_set_flag;
2096         if (!type->flags) {
2097                 /*allocate a dummy tracer_flags*/
2098                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2099                 if (!type->flags) {
2100                         ret = -ENOMEM;
2101                         goto out;
2102                 }
2103                 type->flags->val = 0;
2104                 type->flags->opts = dummy_tracer_opt;
2105         } else
2106                 if (!type->flags->opts)
2107                         type->flags->opts = dummy_tracer_opt;
2108
2109         /* store the tracer for __set_tracer_option */
2110         type->flags->trace = type;
2111
2112         ret = run_tracer_selftest(type);
2113         if (ret < 0)
2114                 goto out;
2115
2116         type->next = trace_types;
2117         trace_types = type;
2118         add_tracer_options(&global_trace, type);
2119
2120  out:
2121         tracing_selftest_running = false;
2122         mutex_unlock(&trace_types_lock);
2123
2124         if (ret || !default_bootup_tracer)
2125                 goto out_unlock;
2126
2127         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2128                 goto out_unlock;
2129
2130         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2131         /* Do we want this tracer to start on bootup? */
2132         tracing_set_tracer(&global_trace, type->name);
2133         default_bootup_tracer = NULL;
2134
2135         apply_trace_boot_options();
2136
2137         /* disable other selftests, since this will break it. */
2138         disable_tracing_selftest("running a tracer");
2139
2140  out_unlock:
2141         return ret;
2142 }
2143
2144 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2145 {
2146         struct trace_buffer *buffer = buf->buffer;
2147
2148         if (!buffer)
2149                 return;
2150
2151         ring_buffer_record_disable(buffer);
2152
2153         /* Make sure all commits have finished */
2154         synchronize_rcu();
2155         ring_buffer_reset_cpu(buffer, cpu);
2156
2157         ring_buffer_record_enable(buffer);
2158 }
2159
2160 void tracing_reset_online_cpus(struct array_buffer *buf)
2161 {
2162         struct trace_buffer *buffer = buf->buffer;
2163
2164         if (!buffer)
2165                 return;
2166
2167         ring_buffer_record_disable(buffer);
2168
2169         /* Make sure all commits have finished */
2170         synchronize_rcu();
2171
2172         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2173
2174         ring_buffer_reset_online_cpus(buffer);
2175
2176         ring_buffer_record_enable(buffer);
2177 }
2178
2179 /* Must have trace_types_lock held */
2180 void tracing_reset_all_online_cpus(void)
2181 {
2182         struct trace_array *tr;
2183
2184         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2185                 if (!tr->clear_trace)
2186                         continue;
2187                 tr->clear_trace = false;
2188                 tracing_reset_online_cpus(&tr->array_buffer);
2189 #ifdef CONFIG_TRACER_MAX_TRACE
2190                 tracing_reset_online_cpus(&tr->max_buffer);
2191 #endif
2192         }
2193 }
2194
2195 /*
2196  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2197  * is the tgid last observed corresponding to pid=i.
2198  */
2199 static int *tgid_map;
2200
2201 /* The maximum valid index into tgid_map. */
2202 static size_t tgid_map_max;
2203
2204 #define SAVED_CMDLINES_DEFAULT 128
2205 #define NO_CMDLINE_MAP UINT_MAX
2206 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2207 struct saved_cmdlines_buffer {
2208         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2209         unsigned *map_cmdline_to_pid;
2210         unsigned cmdline_num;
2211         int cmdline_idx;
2212         char *saved_cmdlines;
2213 };
2214 static struct saved_cmdlines_buffer *savedcmd;
2215
2216 static inline char *get_saved_cmdlines(int idx)
2217 {
2218         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2219 }
2220
2221 static inline void set_cmdline(int idx, const char *cmdline)
2222 {
2223         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2224 }
2225
2226 static int allocate_cmdlines_buffer(unsigned int val,
2227                                     struct saved_cmdlines_buffer *s)
2228 {
2229         s->map_cmdline_to_pid = kmalloc_array(val,
2230                                               sizeof(*s->map_cmdline_to_pid),
2231                                               GFP_KERNEL);
2232         if (!s->map_cmdline_to_pid)
2233                 return -ENOMEM;
2234
2235         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2236         if (!s->saved_cmdlines) {
2237                 kfree(s->map_cmdline_to_pid);
2238                 return -ENOMEM;
2239         }
2240
2241         s->cmdline_idx = 0;
2242         s->cmdline_num = val;
2243         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2244                sizeof(s->map_pid_to_cmdline));
2245         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2246                val * sizeof(*s->map_cmdline_to_pid));
2247
2248         return 0;
2249 }
2250
2251 static int trace_create_savedcmd(void)
2252 {
2253         int ret;
2254
2255         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2256         if (!savedcmd)
2257                 return -ENOMEM;
2258
2259         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2260         if (ret < 0) {
2261                 kfree(savedcmd);
2262                 savedcmd = NULL;
2263                 return -ENOMEM;
2264         }
2265
2266         return 0;
2267 }
2268
2269 int is_tracing_stopped(void)
2270 {
2271         return global_trace.stop_count;
2272 }
2273
2274 /**
2275  * tracing_start - quick start of the tracer
2276  *
2277  * If tracing is enabled but was stopped by tracing_stop,
2278  * this will start the tracer back up.
2279  */
2280 void tracing_start(void)
2281 {
2282         struct trace_buffer *buffer;
2283         unsigned long flags;
2284
2285         if (tracing_disabled)
2286                 return;
2287
2288         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2289         if (--global_trace.stop_count) {
2290                 if (global_trace.stop_count < 0) {
2291                         /* Someone screwed up their debugging */
2292                         WARN_ON_ONCE(1);
2293                         global_trace.stop_count = 0;
2294                 }
2295                 goto out;
2296         }
2297
2298         /* Prevent the buffers from switching */
2299         arch_spin_lock(&global_trace.max_lock);
2300
2301         buffer = global_trace.array_buffer.buffer;
2302         if (buffer)
2303                 ring_buffer_record_enable(buffer);
2304
2305 #ifdef CONFIG_TRACER_MAX_TRACE
2306         buffer = global_trace.max_buffer.buffer;
2307         if (buffer)
2308                 ring_buffer_record_enable(buffer);
2309 #endif
2310
2311         arch_spin_unlock(&global_trace.max_lock);
2312
2313  out:
2314         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2315 }
2316
2317 static void tracing_start_tr(struct trace_array *tr)
2318 {
2319         struct trace_buffer *buffer;
2320         unsigned long flags;
2321
2322         if (tracing_disabled)
2323                 return;
2324
2325         /* If global, we need to also start the max tracer */
2326         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2327                 return tracing_start();
2328
2329         raw_spin_lock_irqsave(&tr->start_lock, flags);
2330
2331         if (--tr->stop_count) {
2332                 if (tr->stop_count < 0) {
2333                         /* Someone screwed up their debugging */
2334                         WARN_ON_ONCE(1);
2335                         tr->stop_count = 0;
2336                 }
2337                 goto out;
2338         }
2339
2340         buffer = tr->array_buffer.buffer;
2341         if (buffer)
2342                 ring_buffer_record_enable(buffer);
2343
2344  out:
2345         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2346 }
2347
2348 /**
2349  * tracing_stop - quick stop of the tracer
2350  *
2351  * Light weight way to stop tracing. Use in conjunction with
2352  * tracing_start.
2353  */
2354 void tracing_stop(void)
2355 {
2356         struct trace_buffer *buffer;
2357         unsigned long flags;
2358
2359         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2360         if (global_trace.stop_count++)
2361                 goto out;
2362
2363         /* Prevent the buffers from switching */
2364         arch_spin_lock(&global_trace.max_lock);
2365
2366         buffer = global_trace.array_buffer.buffer;
2367         if (buffer)
2368                 ring_buffer_record_disable(buffer);
2369
2370 #ifdef CONFIG_TRACER_MAX_TRACE
2371         buffer = global_trace.max_buffer.buffer;
2372         if (buffer)
2373                 ring_buffer_record_disable(buffer);
2374 #endif
2375
2376         arch_spin_unlock(&global_trace.max_lock);
2377
2378  out:
2379         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2380 }
2381
2382 static void tracing_stop_tr(struct trace_array *tr)
2383 {
2384         struct trace_buffer *buffer;
2385         unsigned long flags;
2386
2387         /* If global, we need to also stop the max tracer */
2388         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2389                 return tracing_stop();
2390
2391         raw_spin_lock_irqsave(&tr->start_lock, flags);
2392         if (tr->stop_count++)
2393                 goto out;
2394
2395         buffer = tr->array_buffer.buffer;
2396         if (buffer)
2397                 ring_buffer_record_disable(buffer);
2398
2399  out:
2400         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2401 }
2402
2403 static int trace_save_cmdline(struct task_struct *tsk)
2404 {
2405         unsigned tpid, idx;
2406
2407         /* treat recording of idle task as a success */
2408         if (!tsk->pid)
2409                 return 1;
2410
2411         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412
2413         /*
2414          * It's not the end of the world if we don't get
2415          * the lock, but we also don't want to spin
2416          * nor do we want to disable interrupts,
2417          * so if we miss here, then better luck next time.
2418          */
2419         if (!arch_spin_trylock(&trace_cmdline_lock))
2420                 return 0;
2421
2422         idx = savedcmd->map_pid_to_cmdline[tpid];
2423         if (idx == NO_CMDLINE_MAP) {
2424                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2425
2426                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2427                 savedcmd->cmdline_idx = idx;
2428         }
2429
2430         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2431         set_cmdline(idx, tsk->comm);
2432
2433         arch_spin_unlock(&trace_cmdline_lock);
2434
2435         return 1;
2436 }
2437
2438 static void __trace_find_cmdline(int pid, char comm[])
2439 {
2440         unsigned map;
2441         int tpid;
2442
2443         if (!pid) {
2444                 strcpy(comm, "<idle>");
2445                 return;
2446         }
2447
2448         if (WARN_ON_ONCE(pid < 0)) {
2449                 strcpy(comm, "<XXX>");
2450                 return;
2451         }
2452
2453         tpid = pid & (PID_MAX_DEFAULT - 1);
2454         map = savedcmd->map_pid_to_cmdline[tpid];
2455         if (map != NO_CMDLINE_MAP) {
2456                 tpid = savedcmd->map_cmdline_to_pid[map];
2457                 if (tpid == pid) {
2458                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2459                         return;
2460                 }
2461         }
2462         strcpy(comm, "<...>");
2463 }
2464
2465 void trace_find_cmdline(int pid, char comm[])
2466 {
2467         preempt_disable();
2468         arch_spin_lock(&trace_cmdline_lock);
2469
2470         __trace_find_cmdline(pid, comm);
2471
2472         arch_spin_unlock(&trace_cmdline_lock);
2473         preempt_enable();
2474 }
2475
2476 static int *trace_find_tgid_ptr(int pid)
2477 {
2478         /*
2479          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2480          * if we observe a non-NULL tgid_map then we also observe the correct
2481          * tgid_map_max.
2482          */
2483         int *map = smp_load_acquire(&tgid_map);
2484
2485         if (unlikely(!map || pid > tgid_map_max))
2486                 return NULL;
2487
2488         return &map[pid];
2489 }
2490
2491 int trace_find_tgid(int pid)
2492 {
2493         int *ptr = trace_find_tgid_ptr(pid);
2494
2495         return ptr ? *ptr : 0;
2496 }
2497
2498 static int trace_save_tgid(struct task_struct *tsk)
2499 {
2500         int *ptr;
2501
2502         /* treat recording of idle task as a success */
2503         if (!tsk->pid)
2504                 return 1;
2505
2506         ptr = trace_find_tgid_ptr(tsk->pid);
2507         if (!ptr)
2508                 return 0;
2509
2510         *ptr = tsk->tgid;
2511         return 1;
2512 }
2513
2514 static bool tracing_record_taskinfo_skip(int flags)
2515 {
2516         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2517                 return true;
2518         if (!__this_cpu_read(trace_taskinfo_save))
2519                 return true;
2520         return false;
2521 }
2522
2523 /**
2524  * tracing_record_taskinfo - record the task info of a task
2525  *
2526  * @task:  task to record
2527  * @flags: TRACE_RECORD_CMDLINE for recording comm
2528  *         TRACE_RECORD_TGID for recording tgid
2529  */
2530 void tracing_record_taskinfo(struct task_struct *task, int flags)
2531 {
2532         bool done;
2533
2534         if (tracing_record_taskinfo_skip(flags))
2535                 return;
2536
2537         /*
2538          * Record as much task information as possible. If some fail, continue
2539          * to try to record the others.
2540          */
2541         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2542         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2543
2544         /* If recording any information failed, retry again soon. */
2545         if (!done)
2546                 return;
2547
2548         __this_cpu_write(trace_taskinfo_save, false);
2549 }
2550
2551 /**
2552  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2553  *
2554  * @prev: previous task during sched_switch
2555  * @next: next task during sched_switch
2556  * @flags: TRACE_RECORD_CMDLINE for recording comm
2557  *         TRACE_RECORD_TGID for recording tgid
2558  */
2559 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2560                                           struct task_struct *next, int flags)
2561 {
2562         bool done;
2563
2564         if (tracing_record_taskinfo_skip(flags))
2565                 return;
2566
2567         /*
2568          * Record as much task information as possible. If some fail, continue
2569          * to try to record the others.
2570          */
2571         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2572         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2573         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2574         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2575
2576         /* If recording any information failed, retry again soon. */
2577         if (!done)
2578                 return;
2579
2580         __this_cpu_write(trace_taskinfo_save, false);
2581 }
2582
2583 /* Helpers to record a specific task information */
2584 void tracing_record_cmdline(struct task_struct *task)
2585 {
2586         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2587 }
2588
2589 void tracing_record_tgid(struct task_struct *task)
2590 {
2591         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2592 }
2593
2594 /*
2595  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2596  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2597  * simplifies those functions and keeps them in sync.
2598  */
2599 enum print_line_t trace_handle_return(struct trace_seq *s)
2600 {
2601         return trace_seq_has_overflowed(s) ?
2602                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2603 }
2604 EXPORT_SYMBOL_GPL(trace_handle_return);
2605
2606 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2607 {
2608         unsigned int trace_flags = irqs_status;
2609         unsigned int pc;
2610
2611         pc = preempt_count();
2612
2613         if (pc & NMI_MASK)
2614                 trace_flags |= TRACE_FLAG_NMI;
2615         if (pc & HARDIRQ_MASK)
2616                 trace_flags |= TRACE_FLAG_HARDIRQ;
2617         if (in_serving_softirq())
2618                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2619
2620         if (tif_need_resched())
2621                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2622         if (test_preempt_need_resched())
2623                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2624         return (trace_flags << 16) | (pc & 0xff);
2625 }
2626
2627 struct ring_buffer_event *
2628 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2629                           int type,
2630                           unsigned long len,
2631                           unsigned int trace_ctx)
2632 {
2633         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2634 }
2635
2636 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2637 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2638 static int trace_buffered_event_ref;
2639
2640 /**
2641  * trace_buffered_event_enable - enable buffering events
2642  *
2643  * When events are being filtered, it is quicker to use a temporary
2644  * buffer to write the event data into if there's a likely chance
2645  * that it will not be committed. The discard of the ring buffer
2646  * is not as fast as committing, and is much slower than copying
2647  * a commit.
2648  *
2649  * When an event is to be filtered, allocate per cpu buffers to
2650  * write the event data into, and if the event is filtered and discarded
2651  * it is simply dropped, otherwise, the entire data is to be committed
2652  * in one shot.
2653  */
2654 void trace_buffered_event_enable(void)
2655 {
2656         struct ring_buffer_event *event;
2657         struct page *page;
2658         int cpu;
2659
2660         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2661
2662         if (trace_buffered_event_ref++)
2663                 return;
2664
2665         for_each_tracing_cpu(cpu) {
2666                 page = alloc_pages_node(cpu_to_node(cpu),
2667                                         GFP_KERNEL | __GFP_NORETRY, 0);
2668                 if (!page)
2669                         goto failed;
2670
2671                 event = page_address(page);
2672                 memset(event, 0, sizeof(*event));
2673
2674                 per_cpu(trace_buffered_event, cpu) = event;
2675
2676                 preempt_disable();
2677                 if (cpu == smp_processor_id() &&
2678                     __this_cpu_read(trace_buffered_event) !=
2679                     per_cpu(trace_buffered_event, cpu))
2680                         WARN_ON_ONCE(1);
2681                 preempt_enable();
2682         }
2683
2684         return;
2685  failed:
2686         trace_buffered_event_disable();
2687 }
2688
2689 static void enable_trace_buffered_event(void *data)
2690 {
2691         /* Probably not needed, but do it anyway */
2692         smp_rmb();
2693         this_cpu_dec(trace_buffered_event_cnt);
2694 }
2695
2696 static void disable_trace_buffered_event(void *data)
2697 {
2698         this_cpu_inc(trace_buffered_event_cnt);
2699 }
2700
2701 /**
2702  * trace_buffered_event_disable - disable buffering events
2703  *
2704  * When a filter is removed, it is faster to not use the buffered
2705  * events, and to commit directly into the ring buffer. Free up
2706  * the temp buffers when there are no more users. This requires
2707  * special synchronization with current events.
2708  */
2709 void trace_buffered_event_disable(void)
2710 {
2711         int cpu;
2712
2713         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2714
2715         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2716                 return;
2717
2718         if (--trace_buffered_event_ref)
2719                 return;
2720
2721         preempt_disable();
2722         /* For each CPU, set the buffer as used. */
2723         smp_call_function_many(tracing_buffer_mask,
2724                                disable_trace_buffered_event, NULL, 1);
2725         preempt_enable();
2726
2727         /* Wait for all current users to finish */
2728         synchronize_rcu();
2729
2730         for_each_tracing_cpu(cpu) {
2731                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2732                 per_cpu(trace_buffered_event, cpu) = NULL;
2733         }
2734         /*
2735          * Make sure trace_buffered_event is NULL before clearing
2736          * trace_buffered_event_cnt.
2737          */
2738         smp_wmb();
2739
2740         preempt_disable();
2741         /* Do the work on each cpu */
2742         smp_call_function_many(tracing_buffer_mask,
2743                                enable_trace_buffered_event, NULL, 1);
2744         preempt_enable();
2745 }
2746
2747 static struct trace_buffer *temp_buffer;
2748
2749 struct ring_buffer_event *
2750 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2751                           struct trace_event_file *trace_file,
2752                           int type, unsigned long len,
2753                           unsigned int trace_ctx)
2754 {
2755         struct ring_buffer_event *entry;
2756         struct trace_array *tr = trace_file->tr;
2757         int val;
2758
2759         *current_rb = tr->array_buffer.buffer;
2760
2761         if (!tr->no_filter_buffering_ref &&
2762             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2763             (entry = this_cpu_read(trace_buffered_event))) {
2764                 /*
2765                  * Filtering is on, so try to use the per cpu buffer first.
2766                  * This buffer will simulate a ring_buffer_event,
2767                  * where the type_len is zero and the array[0] will
2768                  * hold the full length.
2769                  * (see include/linux/ring-buffer.h for details on
2770                  *  how the ring_buffer_event is structured).
2771                  *
2772                  * Using a temp buffer during filtering and copying it
2773                  * on a matched filter is quicker than writing directly
2774                  * into the ring buffer and then discarding it when
2775                  * it doesn't match. That is because the discard
2776                  * requires several atomic operations to get right.
2777                  * Copying on match and doing nothing on a failed match
2778                  * is still quicker than no copy on match, but having
2779                  * to discard out of the ring buffer on a failed match.
2780                  */
2781                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2782
2783                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2784
2785                 /*
2786                  * Preemption is disabled, but interrupts and NMIs
2787                  * can still come in now. If that happens after
2788                  * the above increment, then it will have to go
2789                  * back to the old method of allocating the event
2790                  * on the ring buffer, and if the filter fails, it
2791                  * will have to call ring_buffer_discard_commit()
2792                  * to remove it.
2793                  *
2794                  * Need to also check the unlikely case that the
2795                  * length is bigger than the temp buffer size.
2796                  * If that happens, then the reserve is pretty much
2797                  * guaranteed to fail, as the ring buffer currently
2798                  * only allows events less than a page. But that may
2799                  * change in the future, so let the ring buffer reserve
2800                  * handle the failure in that case.
2801                  */
2802                 if (val == 1 && likely(len <= max_len)) {
2803                         trace_event_setup(entry, type, trace_ctx);
2804                         entry->array[0] = len;
2805                         return entry;
2806                 }
2807                 this_cpu_dec(trace_buffered_event_cnt);
2808         }
2809
2810         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2811                                             trace_ctx);
2812         /*
2813          * If tracing is off, but we have triggers enabled
2814          * we still need to look at the event data. Use the temp_buffer
2815          * to store the trace event for the trigger to use. It's recursive
2816          * safe and will not be recorded anywhere.
2817          */
2818         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2819                 *current_rb = temp_buffer;
2820                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2821                                                     trace_ctx);
2822         }
2823         return entry;
2824 }
2825 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2826
2827 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2828 static DEFINE_MUTEX(tracepoint_printk_mutex);
2829
2830 static void output_printk(struct trace_event_buffer *fbuffer)
2831 {
2832         struct trace_event_call *event_call;
2833         struct trace_event_file *file;
2834         struct trace_event *event;
2835         unsigned long flags;
2836         struct trace_iterator *iter = tracepoint_print_iter;
2837
2838         /* We should never get here if iter is NULL */
2839         if (WARN_ON_ONCE(!iter))
2840                 return;
2841
2842         event_call = fbuffer->trace_file->event_call;
2843         if (!event_call || !event_call->event.funcs ||
2844             !event_call->event.funcs->trace)
2845                 return;
2846
2847         file = fbuffer->trace_file;
2848         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2849             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2850              !filter_match_preds(file->filter, fbuffer->entry)))
2851                 return;
2852
2853         event = &fbuffer->trace_file->event_call->event;
2854
2855         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2856         trace_seq_init(&iter->seq);
2857         iter->ent = fbuffer->entry;
2858         event_call->event.funcs->trace(iter, 0, event);
2859         trace_seq_putc(&iter->seq, 0);
2860         printk("%s", iter->seq.buffer);
2861
2862         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2863 }
2864
2865 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2866                              void *buffer, size_t *lenp,
2867                              loff_t *ppos)
2868 {
2869         int save_tracepoint_printk;
2870         int ret;
2871
2872         mutex_lock(&tracepoint_printk_mutex);
2873         save_tracepoint_printk = tracepoint_printk;
2874
2875         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2876
2877         /*
2878          * This will force exiting early, as tracepoint_printk
2879          * is always zero when tracepoint_printk_iter is not allocated
2880          */
2881         if (!tracepoint_print_iter)
2882                 tracepoint_printk = 0;
2883
2884         if (save_tracepoint_printk == tracepoint_printk)
2885                 goto out;
2886
2887         if (tracepoint_printk)
2888                 static_key_enable(&tracepoint_printk_key.key);
2889         else
2890                 static_key_disable(&tracepoint_printk_key.key);
2891
2892  out:
2893         mutex_unlock(&tracepoint_printk_mutex);
2894
2895         return ret;
2896 }
2897
2898 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2899 {
2900         if (static_key_false(&tracepoint_printk_key.key))
2901                 output_printk(fbuffer);
2902
2903         if (static_branch_unlikely(&trace_event_exports_enabled))
2904                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2905         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2906                                     fbuffer->event, fbuffer->entry,
2907                                     fbuffer->trace_ctx, fbuffer->regs);
2908 }
2909 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2910
2911 /*
2912  * Skip 3:
2913  *
2914  *   trace_buffer_unlock_commit_regs()
2915  *   trace_event_buffer_commit()
2916  *   trace_event_raw_event_xxx()
2917  */
2918 # define STACK_SKIP 3
2919
2920 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2921                                      struct trace_buffer *buffer,
2922                                      struct ring_buffer_event *event,
2923                                      unsigned int trace_ctx,
2924                                      struct pt_regs *regs)
2925 {
2926         __buffer_unlock_commit(buffer, event);
2927
2928         /*
2929          * If regs is not set, then skip the necessary functions.
2930          * Note, we can still get here via blktrace, wakeup tracer
2931          * and mmiotrace, but that's ok if they lose a function or
2932          * two. They are not that meaningful.
2933          */
2934         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2935         ftrace_trace_userstack(tr, buffer, trace_ctx);
2936 }
2937
2938 /*
2939  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2940  */
2941 void
2942 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2943                                    struct ring_buffer_event *event)
2944 {
2945         __buffer_unlock_commit(buffer, event);
2946 }
2947
2948 void
2949 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2950                parent_ip, unsigned int trace_ctx)
2951 {
2952         struct trace_event_call *call = &event_function;
2953         struct trace_buffer *buffer = tr->array_buffer.buffer;
2954         struct ring_buffer_event *event;
2955         struct ftrace_entry *entry;
2956
2957         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2958                                             trace_ctx);
2959         if (!event)
2960                 return;
2961         entry   = ring_buffer_event_data(event);
2962         entry->ip                       = ip;
2963         entry->parent_ip                = parent_ip;
2964
2965         if (!call_filter_check_discard(call, entry, buffer, event)) {
2966                 if (static_branch_unlikely(&trace_function_exports_enabled))
2967                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2968                 __buffer_unlock_commit(buffer, event);
2969         }
2970 }
2971
2972 #ifdef CONFIG_STACKTRACE
2973
2974 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2975 #define FTRACE_KSTACK_NESTING   4
2976
2977 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2978
2979 struct ftrace_stack {
2980         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2981 };
2982
2983
2984 struct ftrace_stacks {
2985         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2986 };
2987
2988 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2989 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2990
2991 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2992                                  unsigned int trace_ctx,
2993                                  int skip, struct pt_regs *regs)
2994 {
2995         struct trace_event_call *call = &event_kernel_stack;
2996         struct ring_buffer_event *event;
2997         unsigned int size, nr_entries;
2998         struct ftrace_stack *fstack;
2999         struct stack_entry *entry;
3000         int stackidx;
3001
3002         /*
3003          * Add one, for this function and the call to save_stack_trace()
3004          * If regs is set, then these functions will not be in the way.
3005          */
3006 #ifndef CONFIG_UNWINDER_ORC
3007         if (!regs)
3008                 skip++;
3009 #endif
3010
3011         preempt_disable_notrace();
3012
3013         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3014
3015         /* This should never happen. If it does, yell once and skip */
3016         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3017                 goto out;
3018
3019         /*
3020          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3021          * interrupt will either see the value pre increment or post
3022          * increment. If the interrupt happens pre increment it will have
3023          * restored the counter when it returns.  We just need a barrier to
3024          * keep gcc from moving things around.
3025          */
3026         barrier();
3027
3028         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3029         size = ARRAY_SIZE(fstack->calls);
3030
3031         if (regs) {
3032                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3033                                                    size, skip);
3034         } else {
3035                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3036         }
3037
3038         size = nr_entries * sizeof(unsigned long);
3039         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3040                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3041                                     trace_ctx);
3042         if (!event)
3043                 goto out;
3044         entry = ring_buffer_event_data(event);
3045
3046         memcpy(&entry->caller, fstack->calls, size);
3047         entry->size = nr_entries;
3048
3049         if (!call_filter_check_discard(call, entry, buffer, event))
3050                 __buffer_unlock_commit(buffer, event);
3051
3052  out:
3053         /* Again, don't let gcc optimize things here */
3054         barrier();
3055         __this_cpu_dec(ftrace_stack_reserve);
3056         preempt_enable_notrace();
3057
3058 }
3059
3060 static inline void ftrace_trace_stack(struct trace_array *tr,
3061                                       struct trace_buffer *buffer,
3062                                       unsigned int trace_ctx,
3063                                       int skip, struct pt_regs *regs)
3064 {
3065         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3066                 return;
3067
3068         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3069 }
3070
3071 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3072                    int skip)
3073 {
3074         struct trace_buffer *buffer = tr->array_buffer.buffer;
3075
3076         if (rcu_is_watching()) {
3077                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3078                 return;
3079         }
3080
3081         /*
3082          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3083          * but if the above rcu_is_watching() failed, then the NMI
3084          * triggered someplace critical, and rcu_irq_enter() should
3085          * not be called from NMI.
3086          */
3087         if (unlikely(in_nmi()))
3088                 return;
3089
3090         rcu_irq_enter_irqson();
3091         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3092         rcu_irq_exit_irqson();
3093 }
3094
3095 /**
3096  * trace_dump_stack - record a stack back trace in the trace buffer
3097  * @skip: Number of functions to skip (helper handlers)
3098  */
3099 void trace_dump_stack(int skip)
3100 {
3101         if (tracing_disabled || tracing_selftest_running)
3102                 return;
3103
3104 #ifndef CONFIG_UNWINDER_ORC
3105         /* Skip 1 to skip this function. */
3106         skip++;
3107 #endif
3108         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3109                              tracing_gen_ctx(), skip, NULL);
3110 }
3111 EXPORT_SYMBOL_GPL(trace_dump_stack);
3112
3113 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3114 static DEFINE_PER_CPU(int, user_stack_count);
3115
3116 static void
3117 ftrace_trace_userstack(struct trace_array *tr,
3118                        struct trace_buffer *buffer, unsigned int trace_ctx)
3119 {
3120         struct trace_event_call *call = &event_user_stack;
3121         struct ring_buffer_event *event;
3122         struct userstack_entry *entry;
3123
3124         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3125                 return;
3126
3127         /*
3128          * NMIs can not handle page faults, even with fix ups.
3129          * The save user stack can (and often does) fault.
3130          */
3131         if (unlikely(in_nmi()))
3132                 return;
3133
3134         /*
3135          * prevent recursion, since the user stack tracing may
3136          * trigger other kernel events.
3137          */
3138         preempt_disable();
3139         if (__this_cpu_read(user_stack_count))
3140                 goto out;
3141
3142         __this_cpu_inc(user_stack_count);
3143
3144         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3145                                             sizeof(*entry), trace_ctx);
3146         if (!event)
3147                 goto out_drop_count;
3148         entry   = ring_buffer_event_data(event);
3149
3150         entry->tgid             = current->tgid;
3151         memset(&entry->caller, 0, sizeof(entry->caller));
3152
3153         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3154         if (!call_filter_check_discard(call, entry, buffer, event))
3155                 __buffer_unlock_commit(buffer, event);
3156
3157  out_drop_count:
3158         __this_cpu_dec(user_stack_count);
3159  out:
3160         preempt_enable();
3161 }
3162 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3163 static void ftrace_trace_userstack(struct trace_array *tr,
3164                                    struct trace_buffer *buffer,
3165                                    unsigned int trace_ctx)
3166 {
3167 }
3168 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3169
3170 #endif /* CONFIG_STACKTRACE */
3171
3172 static inline void
3173 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3174                           unsigned long long delta)
3175 {
3176         entry->bottom_delta_ts = delta & U32_MAX;
3177         entry->top_delta_ts = (delta >> 32);
3178 }
3179
3180 void trace_last_func_repeats(struct trace_array *tr,
3181                              struct trace_func_repeats *last_info,
3182                              unsigned int trace_ctx)
3183 {
3184         struct trace_buffer *buffer = tr->array_buffer.buffer;
3185         struct func_repeats_entry *entry;
3186         struct ring_buffer_event *event;
3187         u64 delta;
3188
3189         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3190                                             sizeof(*entry), trace_ctx);
3191         if (!event)
3192                 return;
3193
3194         delta = ring_buffer_event_time_stamp(buffer, event) -
3195                 last_info->ts_last_call;
3196
3197         entry = ring_buffer_event_data(event);
3198         entry->ip = last_info->ip;
3199         entry->parent_ip = last_info->parent_ip;
3200         entry->count = last_info->count;
3201         func_repeats_set_delta_ts(entry, delta);
3202
3203         __buffer_unlock_commit(buffer, event);
3204 }
3205
3206 /* created for use with alloc_percpu */
3207 struct trace_buffer_struct {
3208         int nesting;
3209         char buffer[4][TRACE_BUF_SIZE];
3210 };
3211
3212 static struct trace_buffer_struct *trace_percpu_buffer;
3213
3214 /*
3215  * This allows for lockless recording.  If we're nested too deeply, then
3216  * this returns NULL.
3217  */
3218 static char *get_trace_buf(void)
3219 {
3220         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3221
3222         if (!buffer || buffer->nesting >= 4)
3223                 return NULL;
3224
3225         buffer->nesting++;
3226
3227         /* Interrupts must see nesting incremented before we use the buffer */
3228         barrier();
3229         return &buffer->buffer[buffer->nesting - 1][0];
3230 }
3231
3232 static void put_trace_buf(void)
3233 {
3234         /* Don't let the decrement of nesting leak before this */
3235         barrier();
3236         this_cpu_dec(trace_percpu_buffer->nesting);
3237 }
3238
3239 static int alloc_percpu_trace_buffer(void)
3240 {
3241         struct trace_buffer_struct *buffers;
3242
3243         if (trace_percpu_buffer)
3244                 return 0;
3245
3246         buffers = alloc_percpu(struct trace_buffer_struct);
3247         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3248                 return -ENOMEM;
3249
3250         trace_percpu_buffer = buffers;
3251         return 0;
3252 }
3253
3254 static int buffers_allocated;
3255
3256 void trace_printk_init_buffers(void)
3257 {
3258         if (buffers_allocated)
3259                 return;
3260
3261         if (alloc_percpu_trace_buffer())
3262                 return;
3263
3264         /* trace_printk() is for debug use only. Don't use it in production. */
3265
3266         pr_warn("\n");
3267         pr_warn("**********************************************************\n");
3268         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3269         pr_warn("**                                                      **\n");
3270         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3271         pr_warn("**                                                      **\n");
3272         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3273         pr_warn("** unsafe for production use.                           **\n");
3274         pr_warn("**                                                      **\n");
3275         pr_warn("** If you see this message and you are not debugging    **\n");
3276         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3277         pr_warn("**                                                      **\n");
3278         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3279         pr_warn("**********************************************************\n");
3280
3281         /* Expand the buffers to set size */
3282         tracing_update_buffers();
3283
3284         buffers_allocated = 1;
3285
3286         /*
3287          * trace_printk_init_buffers() can be called by modules.
3288          * If that happens, then we need to start cmdline recording
3289          * directly here. If the global_trace.buffer is already
3290          * allocated here, then this was called by module code.
3291          */
3292         if (global_trace.array_buffer.buffer)
3293                 tracing_start_cmdline_record();
3294 }
3295 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3296
3297 void trace_printk_start_comm(void)
3298 {
3299         /* Start tracing comms if trace printk is set */
3300         if (!buffers_allocated)
3301                 return;
3302         tracing_start_cmdline_record();
3303 }
3304
3305 static void trace_printk_start_stop_comm(int enabled)
3306 {
3307         if (!buffers_allocated)
3308                 return;
3309
3310         if (enabled)
3311                 tracing_start_cmdline_record();
3312         else
3313                 tracing_stop_cmdline_record();
3314 }
3315
3316 /**
3317  * trace_vbprintk - write binary msg to tracing buffer
3318  * @ip:    The address of the caller
3319  * @fmt:   The string format to write to the buffer
3320  * @args:  Arguments for @fmt
3321  */
3322 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3323 {
3324         struct trace_event_call *call = &event_bprint;
3325         struct ring_buffer_event *event;
3326         struct trace_buffer *buffer;
3327         struct trace_array *tr = &global_trace;
3328         struct bprint_entry *entry;
3329         unsigned int trace_ctx;
3330         char *tbuffer;
3331         int len = 0, size;
3332
3333         if (unlikely(tracing_selftest_running || tracing_disabled))
3334                 return 0;
3335
3336         /* Don't pollute graph traces with trace_vprintk internals */
3337         pause_graph_tracing();
3338
3339         trace_ctx = tracing_gen_ctx();
3340         preempt_disable_notrace();
3341
3342         tbuffer = get_trace_buf();
3343         if (!tbuffer) {
3344                 len = 0;
3345                 goto out_nobuffer;
3346         }
3347
3348         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3349
3350         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3351                 goto out_put;
3352
3353         size = sizeof(*entry) + sizeof(u32) * len;
3354         buffer = tr->array_buffer.buffer;
3355         ring_buffer_nest_start(buffer);
3356         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3357                                             trace_ctx);
3358         if (!event)
3359                 goto out;
3360         entry = ring_buffer_event_data(event);
3361         entry->ip                       = ip;
3362         entry->fmt                      = fmt;
3363
3364         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3365         if (!call_filter_check_discard(call, entry, buffer, event)) {
3366                 __buffer_unlock_commit(buffer, event);
3367                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3368         }
3369
3370 out:
3371         ring_buffer_nest_end(buffer);
3372 out_put:
3373         put_trace_buf();
3374
3375 out_nobuffer:
3376         preempt_enable_notrace();
3377         unpause_graph_tracing();
3378
3379         return len;
3380 }
3381 EXPORT_SYMBOL_GPL(trace_vbprintk);
3382
3383 __printf(3, 0)
3384 static int
3385 __trace_array_vprintk(struct trace_buffer *buffer,
3386                       unsigned long ip, const char *fmt, va_list args)
3387 {
3388         struct trace_event_call *call = &event_print;
3389         struct ring_buffer_event *event;
3390         int len = 0, size;
3391         struct print_entry *entry;
3392         unsigned int trace_ctx;
3393         char *tbuffer;
3394
3395         if (tracing_disabled || tracing_selftest_running)
3396                 return 0;
3397
3398         /* Don't pollute graph traces with trace_vprintk internals */
3399         pause_graph_tracing();
3400
3401         trace_ctx = tracing_gen_ctx();
3402         preempt_disable_notrace();
3403
3404
3405         tbuffer = get_trace_buf();
3406         if (!tbuffer) {
3407                 len = 0;
3408                 goto out_nobuffer;
3409         }
3410
3411         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3412
3413         size = sizeof(*entry) + len + 1;
3414         ring_buffer_nest_start(buffer);
3415         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3416                                             trace_ctx);
3417         if (!event)
3418                 goto out;
3419         entry = ring_buffer_event_data(event);
3420         entry->ip = ip;
3421
3422         memcpy(&entry->buf, tbuffer, len + 1);
3423         if (!call_filter_check_discard(call, entry, buffer, event)) {
3424                 __buffer_unlock_commit(buffer, event);
3425                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3426         }
3427
3428 out:
3429         ring_buffer_nest_end(buffer);
3430         put_trace_buf();
3431
3432 out_nobuffer:
3433         preempt_enable_notrace();
3434         unpause_graph_tracing();
3435
3436         return len;
3437 }
3438
3439 __printf(3, 0)
3440 int trace_array_vprintk(struct trace_array *tr,
3441                         unsigned long ip, const char *fmt, va_list args)
3442 {
3443         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3444 }
3445
3446 /**
3447  * trace_array_printk - Print a message to a specific instance
3448  * @tr: The instance trace_array descriptor
3449  * @ip: The instruction pointer that this is called from.
3450  * @fmt: The format to print (printf format)
3451  *
3452  * If a subsystem sets up its own instance, they have the right to
3453  * printk strings into their tracing instance buffer using this
3454  * function. Note, this function will not write into the top level
3455  * buffer (use trace_printk() for that), as writing into the top level
3456  * buffer should only have events that can be individually disabled.
3457  * trace_printk() is only used for debugging a kernel, and should not
3458  * be ever incorporated in normal use.
3459  *
3460  * trace_array_printk() can be used, as it will not add noise to the
3461  * top level tracing buffer.
3462  *
3463  * Note, trace_array_init_printk() must be called on @tr before this
3464  * can be used.
3465  */
3466 __printf(3, 0)
3467 int trace_array_printk(struct trace_array *tr,
3468                        unsigned long ip, const char *fmt, ...)
3469 {
3470         int ret;
3471         va_list ap;
3472
3473         if (!tr)
3474                 return -ENOENT;
3475
3476         /* This is only allowed for created instances */
3477         if (tr == &global_trace)
3478                 return 0;
3479
3480         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3481                 return 0;
3482
3483         va_start(ap, fmt);
3484         ret = trace_array_vprintk(tr, ip, fmt, ap);
3485         va_end(ap);
3486         return ret;
3487 }
3488 EXPORT_SYMBOL_GPL(trace_array_printk);
3489
3490 /**
3491  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3492  * @tr: The trace array to initialize the buffers for
3493  *
3494  * As trace_array_printk() only writes into instances, they are OK to
3495  * have in the kernel (unlike trace_printk()). This needs to be called
3496  * before trace_array_printk() can be used on a trace_array.
3497  */
3498 int trace_array_init_printk(struct trace_array *tr)
3499 {
3500         if (!tr)
3501                 return -ENOENT;
3502
3503         /* This is only allowed for created instances */
3504         if (tr == &global_trace)
3505                 return -EINVAL;
3506
3507         return alloc_percpu_trace_buffer();
3508 }
3509 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3510
3511 __printf(3, 4)
3512 int trace_array_printk_buf(struct trace_buffer *buffer,
3513                            unsigned long ip, const char *fmt, ...)
3514 {
3515         int ret;
3516         va_list ap;
3517
3518         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3519                 return 0;
3520
3521         va_start(ap, fmt);
3522         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3523         va_end(ap);
3524         return ret;
3525 }
3526
3527 __printf(2, 0)
3528 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3529 {
3530         return trace_array_vprintk(&global_trace, ip, fmt, args);
3531 }
3532 EXPORT_SYMBOL_GPL(trace_vprintk);
3533
3534 static void trace_iterator_increment(struct trace_iterator *iter)
3535 {
3536         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3537
3538         iter->idx++;
3539         if (buf_iter)
3540                 ring_buffer_iter_advance(buf_iter);
3541 }
3542
3543 static struct trace_entry *
3544 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3545                 unsigned long *lost_events)
3546 {
3547         struct ring_buffer_event *event;
3548         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3549
3550         if (buf_iter) {
3551                 event = ring_buffer_iter_peek(buf_iter, ts);
3552                 if (lost_events)
3553                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3554                                 (unsigned long)-1 : 0;
3555         } else {
3556                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3557                                          lost_events);
3558         }
3559
3560         if (event) {
3561                 iter->ent_size = ring_buffer_event_length(event);
3562                 return ring_buffer_event_data(event);
3563         }
3564         iter->ent_size = 0;
3565         return NULL;
3566 }
3567
3568 static struct trace_entry *
3569 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3570                   unsigned long *missing_events, u64 *ent_ts)
3571 {
3572         struct trace_buffer *buffer = iter->array_buffer->buffer;
3573         struct trace_entry *ent, *next = NULL;
3574         unsigned long lost_events = 0, next_lost = 0;
3575         int cpu_file = iter->cpu_file;
3576         u64 next_ts = 0, ts;
3577         int next_cpu = -1;
3578         int next_size = 0;
3579         int cpu;
3580
3581         /*
3582          * If we are in a per_cpu trace file, don't bother by iterating over
3583          * all cpu and peek directly.
3584          */
3585         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3586                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3587                         return NULL;
3588                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3589                 if (ent_cpu)
3590                         *ent_cpu = cpu_file;
3591
3592                 return ent;
3593         }
3594
3595         for_each_tracing_cpu(cpu) {
3596
3597                 if (ring_buffer_empty_cpu(buffer, cpu))
3598                         continue;
3599
3600                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3601
3602                 /*
3603                  * Pick the entry with the smallest timestamp:
3604                  */
3605                 if (ent && (!next || ts < next_ts)) {
3606                         next = ent;
3607                         next_cpu = cpu;
3608                         next_ts = ts;
3609                         next_lost = lost_events;
3610                         next_size = iter->ent_size;
3611                 }
3612         }
3613
3614         iter->ent_size = next_size;
3615
3616         if (ent_cpu)
3617                 *ent_cpu = next_cpu;
3618
3619         if (ent_ts)
3620                 *ent_ts = next_ts;
3621
3622         if (missing_events)
3623                 *missing_events = next_lost;
3624
3625         return next;
3626 }
3627
3628 #define STATIC_FMT_BUF_SIZE     128
3629 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3630
3631 static char *trace_iter_expand_format(struct trace_iterator *iter)
3632 {
3633         char *tmp;
3634
3635         /*
3636          * iter->tr is NULL when used with tp_printk, which makes
3637          * this get called where it is not safe to call krealloc().
3638          */
3639         if (!iter->tr || iter->fmt == static_fmt_buf)
3640                 return NULL;
3641
3642         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3643                        GFP_KERNEL);
3644         if (tmp) {
3645                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3646                 iter->fmt = tmp;
3647         }
3648
3649         return tmp;
3650 }
3651
3652 /* Returns true if the string is safe to dereference from an event */
3653 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3654 {
3655         unsigned long addr = (unsigned long)str;
3656         struct trace_event *trace_event;
3657         struct trace_event_call *event;
3658
3659         /* OK if part of the event data */
3660         if ((addr >= (unsigned long)iter->ent) &&
3661             (addr < (unsigned long)iter->ent + iter->ent_size))
3662                 return true;
3663
3664         /* OK if part of the temp seq buffer */
3665         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3666             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3667                 return true;
3668
3669         /* Core rodata can not be freed */
3670         if (is_kernel_rodata(addr))
3671                 return true;
3672
3673         if (trace_is_tracepoint_string(str))
3674                 return true;
3675
3676         /*
3677          * Now this could be a module event, referencing core module
3678          * data, which is OK.
3679          */
3680         if (!iter->ent)
3681                 return false;
3682
3683         trace_event = ftrace_find_event(iter->ent->type);
3684         if (!trace_event)
3685                 return false;
3686
3687         event = container_of(trace_event, struct trace_event_call, event);
3688         if (!event->mod)
3689                 return false;
3690
3691         /* Would rather have rodata, but this will suffice */
3692         if (within_module_core(addr, event->mod))
3693                 return true;
3694
3695         return false;
3696 }
3697
3698 static const char *show_buffer(struct trace_seq *s)
3699 {
3700         struct seq_buf *seq = &s->seq;
3701
3702         seq_buf_terminate(seq);
3703
3704         return seq->buffer;
3705 }
3706
3707 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3708
3709 static int test_can_verify_check(const char *fmt, ...)
3710 {
3711         char buf[16];
3712         va_list ap;
3713         int ret;
3714
3715         /*
3716          * The verifier is dependent on vsnprintf() modifies the va_list
3717          * passed to it, where it is sent as a reference. Some architectures
3718          * (like x86_32) passes it by value, which means that vsnprintf()
3719          * does not modify the va_list passed to it, and the verifier
3720          * would then need to be able to understand all the values that
3721          * vsnprintf can use. If it is passed by value, then the verifier
3722          * is disabled.
3723          */
3724         va_start(ap, fmt);
3725         vsnprintf(buf, 16, "%d", ap);
3726         ret = va_arg(ap, int);
3727         va_end(ap);
3728
3729         return ret;
3730 }
3731
3732 static void test_can_verify(void)
3733 {
3734         if (!test_can_verify_check("%d %d", 0, 1)) {
3735                 pr_info("trace event string verifier disabled\n");
3736                 static_branch_inc(&trace_no_verify);
3737         }
3738 }
3739
3740 /**
3741  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3742  * @iter: The iterator that holds the seq buffer and the event being printed
3743  * @fmt: The format used to print the event
3744  * @ap: The va_list holding the data to print from @fmt.
3745  *
3746  * This writes the data into the @iter->seq buffer using the data from
3747  * @fmt and @ap. If the format has a %s, then the source of the string
3748  * is examined to make sure it is safe to print, otherwise it will
3749  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3750  * pointer.
3751  */
3752 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3753                          va_list ap)
3754 {
3755         const char *p = fmt;
3756         const char *str;
3757         int i, j;
3758
3759         if (WARN_ON_ONCE(!fmt))
3760                 return;
3761
3762         if (static_branch_unlikely(&trace_no_verify))
3763                 goto print;
3764
3765         /* Don't bother checking when doing a ftrace_dump() */
3766         if (iter->fmt == static_fmt_buf)
3767                 goto print;
3768
3769         while (*p) {
3770                 bool star = false;
3771                 int len = 0;
3772
3773                 j = 0;
3774
3775                 /* We only care about %s and variants */
3776                 for (i = 0; p[i]; i++) {
3777                         if (i + 1 >= iter->fmt_size) {
3778                                 /*
3779                                  * If we can't expand the copy buffer,
3780                                  * just print it.
3781                                  */
3782                                 if (!trace_iter_expand_format(iter))
3783                                         goto print;
3784                         }
3785
3786                         if (p[i] == '\\' && p[i+1]) {
3787                                 i++;
3788                                 continue;
3789                         }
3790                         if (p[i] == '%') {
3791                                 /* Need to test cases like %08.*s */
3792                                 for (j = 1; p[i+j]; j++) {
3793                                         if (isdigit(p[i+j]) ||
3794                                             p[i+j] == '.')
3795                                                 continue;
3796                                         if (p[i+j] == '*') {
3797                                                 star = true;
3798                                                 continue;
3799                                         }
3800                                         break;
3801                                 }
3802                                 if (p[i+j] == 's')
3803                                         break;
3804                                 star = false;
3805                         }
3806                         j = 0;
3807                 }
3808                 /* If no %s found then just print normally */
3809                 if (!p[i])
3810                         break;
3811
3812                 /* Copy up to the %s, and print that */
3813                 strncpy(iter->fmt, p, i);
3814                 iter->fmt[i] = '\0';
3815                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3816
3817                 if (star)
3818                         len = va_arg(ap, int);
3819
3820                 /* The ap now points to the string data of the %s */
3821                 str = va_arg(ap, const char *);
3822
3823                 /*
3824                  * If you hit this warning, it is likely that the
3825                  * trace event in question used %s on a string that
3826                  * was saved at the time of the event, but may not be
3827                  * around when the trace is read. Use __string(),
3828                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3829                  * instead. See samples/trace_events/trace-events-sample.h
3830                  * for reference.
3831                  */
3832                 if (WARN_ONCE(!trace_safe_str(iter, str),
3833                               "fmt: '%s' current_buffer: '%s'",
3834                               fmt, show_buffer(&iter->seq))) {
3835                         int ret;
3836
3837                         /* Try to safely read the string */
3838                         if (star) {
3839                                 if (len + 1 > iter->fmt_size)
3840                                         len = iter->fmt_size - 1;
3841                                 if (len < 0)
3842                                         len = 0;
3843                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3844                                 iter->fmt[len] = 0;
3845                                 star = false;
3846                         } else {
3847                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3848                                                                   iter->fmt_size);
3849                         }
3850                         if (ret < 0)
3851                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3852                         else
3853                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3854                                                  str, iter->fmt);
3855                         str = "[UNSAFE-MEMORY]";
3856                         strcpy(iter->fmt, "%s");
3857                 } else {
3858                         strncpy(iter->fmt, p + i, j + 1);
3859                         iter->fmt[j+1] = '\0';
3860                 }
3861                 if (star)
3862                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3863                 else
3864                         trace_seq_printf(&iter->seq, iter->fmt, str);
3865
3866                 p += i + j + 1;
3867         }
3868  print:
3869         if (*p)
3870                 trace_seq_vprintf(&iter->seq, p, ap);
3871 }
3872
3873 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3874 {
3875         const char *p, *new_fmt;
3876         char *q;
3877
3878         if (WARN_ON_ONCE(!fmt))
3879                 return fmt;
3880
3881         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3882                 return fmt;
3883
3884         p = fmt;
3885         new_fmt = q = iter->fmt;
3886         while (*p) {
3887                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3888                         if (!trace_iter_expand_format(iter))
3889                                 return fmt;
3890
3891                         q += iter->fmt - new_fmt;
3892                         new_fmt = iter->fmt;
3893                 }
3894
3895                 *q++ = *p++;
3896
3897                 /* Replace %p with %px */
3898                 if (p[-1] == '%') {
3899                         if (p[0] == '%') {
3900                                 *q++ = *p++;
3901                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3902                                 *q++ = *p++;
3903                                 *q++ = 'x';
3904                         }
3905                 }
3906         }
3907         *q = '\0';
3908
3909         return new_fmt;
3910 }
3911
3912 #define STATIC_TEMP_BUF_SIZE    128
3913 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3914
3915 /* Find the next real entry, without updating the iterator itself */
3916 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3917                                           int *ent_cpu, u64 *ent_ts)
3918 {
3919         /* __find_next_entry will reset ent_size */
3920         int ent_size = iter->ent_size;
3921         struct trace_entry *entry;
3922
3923         /*
3924          * If called from ftrace_dump(), then the iter->temp buffer
3925          * will be the static_temp_buf and not created from kmalloc.
3926          * If the entry size is greater than the buffer, we can
3927          * not save it. Just return NULL in that case. This is only
3928          * used to add markers when two consecutive events' time
3929          * stamps have a large delta. See trace_print_lat_context()
3930          */
3931         if (iter->temp == static_temp_buf &&
3932             STATIC_TEMP_BUF_SIZE < ent_size)
3933                 return NULL;
3934
3935         /*
3936          * The __find_next_entry() may call peek_next_entry(), which may
3937          * call ring_buffer_peek() that may make the contents of iter->ent
3938          * undefined. Need to copy iter->ent now.
3939          */
3940         if (iter->ent && iter->ent != iter->temp) {
3941                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3942                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3943                         void *temp;
3944                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3945                         if (!temp)
3946                                 return NULL;
3947                         kfree(iter->temp);
3948                         iter->temp = temp;
3949                         iter->temp_size = iter->ent_size;
3950                 }
3951                 memcpy(iter->temp, iter->ent, iter->ent_size);
3952                 iter->ent = iter->temp;
3953         }
3954         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3955         /* Put back the original ent_size */
3956         iter->ent_size = ent_size;
3957
3958         return entry;
3959 }
3960
3961 /* Find the next real entry, and increment the iterator to the next entry */
3962 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3963 {
3964         iter->ent = __find_next_entry(iter, &iter->cpu,
3965                                       &iter->lost_events, &iter->ts);
3966
3967         if (iter->ent)
3968                 trace_iterator_increment(iter);
3969
3970         return iter->ent ? iter : NULL;
3971 }
3972
3973 static void trace_consume(struct trace_iterator *iter)
3974 {
3975         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3976                             &iter->lost_events);
3977 }
3978
3979 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3980 {
3981         struct trace_iterator *iter = m->private;
3982         int i = (int)*pos;
3983         void *ent;
3984
3985         WARN_ON_ONCE(iter->leftover);
3986
3987         (*pos)++;
3988
3989         /* can't go backwards */
3990         if (iter->idx > i)
3991                 return NULL;
3992
3993         if (iter->idx < 0)
3994                 ent = trace_find_next_entry_inc(iter);
3995         else
3996                 ent = iter;
3997
3998         while (ent && iter->idx < i)
3999                 ent = trace_find_next_entry_inc(iter);
4000
4001         iter->pos = *pos;
4002
4003         return ent;
4004 }
4005
4006 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4007 {
4008         struct ring_buffer_iter *buf_iter;
4009         unsigned long entries = 0;
4010         u64 ts;
4011
4012         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4013
4014         buf_iter = trace_buffer_iter(iter, cpu);
4015         if (!buf_iter)
4016                 return;
4017
4018         ring_buffer_iter_reset(buf_iter);
4019
4020         /*
4021          * We could have the case with the max latency tracers
4022          * that a reset never took place on a cpu. This is evident
4023          * by the timestamp being before the start of the buffer.
4024          */
4025         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4026                 if (ts >= iter->array_buffer->time_start)
4027                         break;
4028                 entries++;
4029                 ring_buffer_iter_advance(buf_iter);
4030         }
4031
4032         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4033 }
4034
4035 /*
4036  * The current tracer is copied to avoid a global locking
4037  * all around.
4038  */
4039 static void *s_start(struct seq_file *m, loff_t *pos)
4040 {
4041         struct trace_iterator *iter = m->private;
4042         struct trace_array *tr = iter->tr;
4043         int cpu_file = iter->cpu_file;
4044         void *p = NULL;
4045         loff_t l = 0;
4046         int cpu;
4047
4048         /*
4049          * copy the tracer to avoid using a global lock all around.
4050          * iter->trace is a copy of current_trace, the pointer to the
4051          * name may be used instead of a strcmp(), as iter->trace->name
4052          * will point to the same string as current_trace->name.
4053          */
4054         mutex_lock(&trace_types_lock);
4055         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4056                 *iter->trace = *tr->current_trace;
4057         mutex_unlock(&trace_types_lock);
4058
4059 #ifdef CONFIG_TRACER_MAX_TRACE
4060         if (iter->snapshot && iter->trace->use_max_tr)
4061                 return ERR_PTR(-EBUSY);
4062 #endif
4063
4064         if (*pos != iter->pos) {
4065                 iter->ent = NULL;
4066                 iter->cpu = 0;
4067                 iter->idx = -1;
4068
4069                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4070                         for_each_tracing_cpu(cpu)
4071                                 tracing_iter_reset(iter, cpu);
4072                 } else
4073                         tracing_iter_reset(iter, cpu_file);
4074
4075                 iter->leftover = 0;
4076                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4077                         ;
4078
4079         } else {
4080                 /*
4081                  * If we overflowed the seq_file before, then we want
4082                  * to just reuse the trace_seq buffer again.
4083                  */
4084                 if (iter->leftover)
4085                         p = iter;
4086                 else {
4087                         l = *pos - 1;
4088                         p = s_next(m, p, &l);
4089                 }
4090         }
4091
4092         trace_event_read_lock();
4093         trace_access_lock(cpu_file);
4094         return p;
4095 }
4096
4097 static void s_stop(struct seq_file *m, void *p)
4098 {
4099         struct trace_iterator *iter = m->private;
4100
4101 #ifdef CONFIG_TRACER_MAX_TRACE
4102         if (iter->snapshot && iter->trace->use_max_tr)
4103                 return;
4104 #endif
4105
4106         trace_access_unlock(iter->cpu_file);
4107         trace_event_read_unlock();
4108 }
4109
4110 static void
4111 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4112                       unsigned long *entries, int cpu)
4113 {
4114         unsigned long count;
4115
4116         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4117         /*
4118          * If this buffer has skipped entries, then we hold all
4119          * entries for the trace and we need to ignore the
4120          * ones before the time stamp.
4121          */
4122         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4123                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4124                 /* total is the same as the entries */
4125                 *total = count;
4126         } else
4127                 *total = count +
4128                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4129         *entries = count;
4130 }
4131
4132 static void
4133 get_total_entries(struct array_buffer *buf,
4134                   unsigned long *total, unsigned long *entries)
4135 {
4136         unsigned long t, e;
4137         int cpu;
4138
4139         *total = 0;
4140         *entries = 0;
4141
4142         for_each_tracing_cpu(cpu) {
4143                 get_total_entries_cpu(buf, &t, &e, cpu);
4144                 *total += t;
4145                 *entries += e;
4146         }
4147 }
4148
4149 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4150 {
4151         unsigned long total, entries;
4152
4153         if (!tr)
4154                 tr = &global_trace;
4155
4156         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4157
4158         return entries;
4159 }
4160
4161 unsigned long trace_total_entries(struct trace_array *tr)
4162 {
4163         unsigned long total, entries;
4164
4165         if (!tr)
4166                 tr = &global_trace;
4167
4168         get_total_entries(&tr->array_buffer, &total, &entries);
4169
4170         return entries;
4171 }
4172
4173 static void print_lat_help_header(struct seq_file *m)
4174 {
4175         seq_puts(m, "#                    _------=> CPU#            \n"
4176                     "#                   / _-----=> irqs-off        \n"
4177                     "#                  | / _----=> need-resched    \n"
4178                     "#                  || / _---=> hardirq/softirq \n"
4179                     "#                  ||| / _--=> preempt-depth   \n"
4180                     "#                  |||| /     delay            \n"
4181                     "#  cmd     pid     ||||| time  |   caller      \n"
4182                     "#     \\   /        |||||  \\    |   /         \n");
4183 }
4184
4185 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4186 {
4187         unsigned long total;
4188         unsigned long entries;
4189
4190         get_total_entries(buf, &total, &entries);
4191         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4192                    entries, total, num_online_cpus());
4193         seq_puts(m, "#\n");
4194 }
4195
4196 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4197                                    unsigned int flags)
4198 {
4199         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4200
4201         print_event_info(buf, m);
4202
4203         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4204         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4205 }
4206
4207 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4208                                        unsigned int flags)
4209 {
4210         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4211         const char *space = "            ";
4212         int prec = tgid ? 12 : 2;
4213
4214         print_event_info(buf, m);
4215
4216         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4217         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4218         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4219         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4220         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
4221         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4222         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
4223 }
4224
4225 void
4226 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4227 {
4228         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4229         struct array_buffer *buf = iter->array_buffer;
4230         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4231         struct tracer *type = iter->trace;
4232         unsigned long entries;
4233         unsigned long total;
4234         const char *name = "preemption";
4235
4236         name = type->name;
4237
4238         get_total_entries(buf, &total, &entries);
4239
4240         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4241                    name, UTS_RELEASE);
4242         seq_puts(m, "# -----------------------------------"
4243                  "---------------------------------\n");
4244         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4245                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4246                    nsecs_to_usecs(data->saved_latency),
4247                    entries,
4248                    total,
4249                    buf->cpu,
4250 #if defined(CONFIG_PREEMPT_NONE)
4251                    "server",
4252 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4253                    "desktop",
4254 #elif defined(CONFIG_PREEMPT)
4255                    "preempt",
4256 #elif defined(CONFIG_PREEMPT_RT)
4257                    "preempt_rt",
4258 #else
4259                    "unknown",
4260 #endif
4261                    /* These are reserved for later use */
4262                    0, 0, 0, 0);
4263 #ifdef CONFIG_SMP
4264         seq_printf(m, " #P:%d)\n", num_online_cpus());
4265 #else
4266         seq_puts(m, ")\n");
4267 #endif
4268         seq_puts(m, "#    -----------------\n");
4269         seq_printf(m, "#    | task: %.16s-%d "
4270                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4271                    data->comm, data->pid,
4272                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4273                    data->policy, data->rt_priority);
4274         seq_puts(m, "#    -----------------\n");
4275
4276         if (data->critical_start) {
4277                 seq_puts(m, "#  => started at: ");
4278                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4279                 trace_print_seq(m, &iter->seq);
4280                 seq_puts(m, "\n#  => ended at:   ");
4281                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4282                 trace_print_seq(m, &iter->seq);
4283                 seq_puts(m, "\n#\n");
4284         }
4285
4286         seq_puts(m, "#\n");
4287 }
4288
4289 static void test_cpu_buff_start(struct trace_iterator *iter)
4290 {
4291         struct trace_seq *s = &iter->seq;
4292         struct trace_array *tr = iter->tr;
4293
4294         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4295                 return;
4296
4297         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4298                 return;
4299
4300         if (cpumask_available(iter->started) &&
4301             cpumask_test_cpu(iter->cpu, iter->started))
4302                 return;
4303
4304         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4305                 return;
4306
4307         if (cpumask_available(iter->started))
4308                 cpumask_set_cpu(iter->cpu, iter->started);
4309
4310         /* Don't print started cpu buffer for the first entry of the trace */
4311         if (iter->idx > 1)
4312                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4313                                 iter->cpu);
4314 }
4315
4316 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4317 {
4318         struct trace_array *tr = iter->tr;
4319         struct trace_seq *s = &iter->seq;
4320         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4321         struct trace_entry *entry;
4322         struct trace_event *event;
4323
4324         entry = iter->ent;
4325
4326         test_cpu_buff_start(iter);
4327
4328         event = ftrace_find_event(entry->type);
4329
4330         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4331                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4332                         trace_print_lat_context(iter);
4333                 else
4334                         trace_print_context(iter);
4335         }
4336
4337         if (trace_seq_has_overflowed(s))
4338                 return TRACE_TYPE_PARTIAL_LINE;
4339
4340         if (event)
4341                 return event->funcs->trace(iter, sym_flags, event);
4342
4343         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4344
4345         return trace_handle_return(s);
4346 }
4347
4348 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4349 {
4350         struct trace_array *tr = iter->tr;
4351         struct trace_seq *s = &iter->seq;
4352         struct trace_entry *entry;
4353         struct trace_event *event;
4354
4355         entry = iter->ent;
4356
4357         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4358                 trace_seq_printf(s, "%d %d %llu ",
4359                                  entry->pid, iter->cpu, iter->ts);
4360
4361         if (trace_seq_has_overflowed(s))
4362                 return TRACE_TYPE_PARTIAL_LINE;
4363
4364         event = ftrace_find_event(entry->type);
4365         if (event)
4366                 return event->funcs->raw(iter, 0, event);
4367
4368         trace_seq_printf(s, "%d ?\n", entry->type);
4369
4370         return trace_handle_return(s);
4371 }
4372
4373 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4374 {
4375         struct trace_array *tr = iter->tr;
4376         struct trace_seq *s = &iter->seq;
4377         unsigned char newline = '\n';
4378         struct trace_entry *entry;
4379         struct trace_event *event;
4380
4381         entry = iter->ent;
4382
4383         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4384                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4385                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4386                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4387                 if (trace_seq_has_overflowed(s))
4388                         return TRACE_TYPE_PARTIAL_LINE;
4389         }
4390
4391         event = ftrace_find_event(entry->type);
4392         if (event) {
4393                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4394                 if (ret != TRACE_TYPE_HANDLED)
4395                         return ret;
4396         }
4397
4398         SEQ_PUT_FIELD(s, newline);
4399
4400         return trace_handle_return(s);
4401 }
4402
4403 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4404 {
4405         struct trace_array *tr = iter->tr;
4406         struct trace_seq *s = &iter->seq;
4407         struct trace_entry *entry;
4408         struct trace_event *event;
4409
4410         entry = iter->ent;
4411
4412         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4413                 SEQ_PUT_FIELD(s, entry->pid);
4414                 SEQ_PUT_FIELD(s, iter->cpu);
4415                 SEQ_PUT_FIELD(s, iter->ts);
4416                 if (trace_seq_has_overflowed(s))
4417                         return TRACE_TYPE_PARTIAL_LINE;
4418         }
4419
4420         event = ftrace_find_event(entry->type);
4421         return event ? event->funcs->binary(iter, 0, event) :
4422                 TRACE_TYPE_HANDLED;
4423 }
4424
4425 int trace_empty(struct trace_iterator *iter)
4426 {
4427         struct ring_buffer_iter *buf_iter;
4428         int cpu;
4429
4430         /* If we are looking at one CPU buffer, only check that one */
4431         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4432                 cpu = iter->cpu_file;
4433                 buf_iter = trace_buffer_iter(iter, cpu);
4434                 if (buf_iter) {
4435                         if (!ring_buffer_iter_empty(buf_iter))
4436                                 return 0;
4437                 } else {
4438                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4439                                 return 0;
4440                 }
4441                 return 1;
4442         }
4443
4444         for_each_tracing_cpu(cpu) {
4445                 buf_iter = trace_buffer_iter(iter, cpu);
4446                 if (buf_iter) {
4447                         if (!ring_buffer_iter_empty(buf_iter))
4448                                 return 0;
4449                 } else {
4450                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4451                                 return 0;
4452                 }
4453         }
4454
4455         return 1;
4456 }
4457
4458 /*  Called with trace_event_read_lock() held. */
4459 enum print_line_t print_trace_line(struct trace_iterator *iter)
4460 {
4461         struct trace_array *tr = iter->tr;
4462         unsigned long trace_flags = tr->trace_flags;
4463         enum print_line_t ret;
4464
4465         if (iter->lost_events) {
4466                 if (iter->lost_events == (unsigned long)-1)
4467                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4468                                          iter->cpu);
4469                 else
4470                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4471                                          iter->cpu, iter->lost_events);
4472                 if (trace_seq_has_overflowed(&iter->seq))
4473                         return TRACE_TYPE_PARTIAL_LINE;
4474         }
4475
4476         if (iter->trace && iter->trace->print_line) {
4477                 ret = iter->trace->print_line(iter);
4478                 if (ret != TRACE_TYPE_UNHANDLED)
4479                         return ret;
4480         }
4481
4482         if (iter->ent->type == TRACE_BPUTS &&
4483                         trace_flags & TRACE_ITER_PRINTK &&
4484                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4485                 return trace_print_bputs_msg_only(iter);
4486
4487         if (iter->ent->type == TRACE_BPRINT &&
4488                         trace_flags & TRACE_ITER_PRINTK &&
4489                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4490                 return trace_print_bprintk_msg_only(iter);
4491
4492         if (iter->ent->type == TRACE_PRINT &&
4493                         trace_flags & TRACE_ITER_PRINTK &&
4494                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4495                 return trace_print_printk_msg_only(iter);
4496
4497         if (trace_flags & TRACE_ITER_BIN)
4498                 return print_bin_fmt(iter);
4499
4500         if (trace_flags & TRACE_ITER_HEX)
4501                 return print_hex_fmt(iter);
4502
4503         if (trace_flags & TRACE_ITER_RAW)
4504                 return print_raw_fmt(iter);
4505
4506         return print_trace_fmt(iter);
4507 }
4508
4509 void trace_latency_header(struct seq_file *m)
4510 {
4511         struct trace_iterator *iter = m->private;
4512         struct trace_array *tr = iter->tr;
4513
4514         /* print nothing if the buffers are empty */
4515         if (trace_empty(iter))
4516                 return;
4517
4518         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4519                 print_trace_header(m, iter);
4520
4521         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4522                 print_lat_help_header(m);
4523 }
4524
4525 void trace_default_header(struct seq_file *m)
4526 {
4527         struct trace_iterator *iter = m->private;
4528         struct trace_array *tr = iter->tr;
4529         unsigned long trace_flags = tr->trace_flags;
4530
4531         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4532                 return;
4533
4534         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4535                 /* print nothing if the buffers are empty */
4536                 if (trace_empty(iter))
4537                         return;
4538                 print_trace_header(m, iter);
4539                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4540                         print_lat_help_header(m);
4541         } else {
4542                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4543                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4544                                 print_func_help_header_irq(iter->array_buffer,
4545                                                            m, trace_flags);
4546                         else
4547                                 print_func_help_header(iter->array_buffer, m,
4548                                                        trace_flags);
4549                 }
4550         }
4551 }
4552
4553 static void test_ftrace_alive(struct seq_file *m)
4554 {
4555         if (!ftrace_is_dead())
4556                 return;
4557         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4558                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4559 }
4560
4561 #ifdef CONFIG_TRACER_MAX_TRACE
4562 static void show_snapshot_main_help(struct seq_file *m)
4563 {
4564         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4565                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4566                     "#                      Takes a snapshot of the main buffer.\n"
4567                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4568                     "#                      (Doesn't have to be '2' works with any number that\n"
4569                     "#                       is not a '0' or '1')\n");
4570 }
4571
4572 static void show_snapshot_percpu_help(struct seq_file *m)
4573 {
4574         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4575 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4576         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4577                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4578 #else
4579         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4580                     "#                     Must use main snapshot file to allocate.\n");
4581 #endif
4582         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4583                     "#                      (Doesn't have to be '2' works with any number that\n"
4584                     "#                       is not a '0' or '1')\n");
4585 }
4586
4587 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4588 {
4589         if (iter->tr->allocated_snapshot)
4590                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4591         else
4592                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4593
4594         seq_puts(m, "# Snapshot commands:\n");
4595         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4596                 show_snapshot_main_help(m);
4597         else
4598                 show_snapshot_percpu_help(m);
4599 }
4600 #else
4601 /* Should never be called */
4602 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4603 #endif
4604
4605 static int s_show(struct seq_file *m, void *v)
4606 {
4607         struct trace_iterator *iter = v;
4608         int ret;
4609
4610         if (iter->ent == NULL) {
4611                 if (iter->tr) {
4612                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4613                         seq_puts(m, "#\n");
4614                         test_ftrace_alive(m);
4615                 }
4616                 if (iter->snapshot && trace_empty(iter))
4617                         print_snapshot_help(m, iter);
4618                 else if (iter->trace && iter->trace->print_header)
4619                         iter->trace->print_header(m);
4620                 else
4621                         trace_default_header(m);
4622
4623         } else if (iter->leftover) {
4624                 /*
4625                  * If we filled the seq_file buffer earlier, we
4626                  * want to just show it now.
4627                  */
4628                 ret = trace_print_seq(m, &iter->seq);
4629
4630                 /* ret should this time be zero, but you never know */
4631                 iter->leftover = ret;
4632
4633         } else {
4634                 print_trace_line(iter);
4635                 ret = trace_print_seq(m, &iter->seq);
4636                 /*
4637                  * If we overflow the seq_file buffer, then it will
4638                  * ask us for this data again at start up.
4639                  * Use that instead.
4640                  *  ret is 0 if seq_file write succeeded.
4641                  *        -1 otherwise.
4642                  */
4643                 iter->leftover = ret;
4644         }
4645
4646         return 0;
4647 }
4648
4649 /*
4650  * Should be used after trace_array_get(), trace_types_lock
4651  * ensures that i_cdev was already initialized.
4652  */
4653 static inline int tracing_get_cpu(struct inode *inode)
4654 {
4655         if (inode->i_cdev) /* See trace_create_cpu_file() */
4656                 return (long)inode->i_cdev - 1;
4657         return RING_BUFFER_ALL_CPUS;
4658 }
4659
4660 static const struct seq_operations tracer_seq_ops = {
4661         .start          = s_start,
4662         .next           = s_next,
4663         .stop           = s_stop,
4664         .show           = s_show,
4665 };
4666
4667 static struct trace_iterator *
4668 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4669 {
4670         struct trace_array *tr = inode->i_private;
4671         struct trace_iterator *iter;
4672         int cpu;
4673
4674         if (tracing_disabled)
4675                 return ERR_PTR(-ENODEV);
4676
4677         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4678         if (!iter)
4679                 return ERR_PTR(-ENOMEM);
4680
4681         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4682                                     GFP_KERNEL);
4683         if (!iter->buffer_iter)
4684                 goto release;
4685
4686         /*
4687          * trace_find_next_entry() may need to save off iter->ent.
4688          * It will place it into the iter->temp buffer. As most
4689          * events are less than 128, allocate a buffer of that size.
4690          * If one is greater, then trace_find_next_entry() will
4691          * allocate a new buffer to adjust for the bigger iter->ent.
4692          * It's not critical if it fails to get allocated here.
4693          */
4694         iter->temp = kmalloc(128, GFP_KERNEL);
4695         if (iter->temp)
4696                 iter->temp_size = 128;
4697
4698         /*
4699          * trace_event_printf() may need to modify given format
4700          * string to replace %p with %px so that it shows real address
4701          * instead of hash value. However, that is only for the event
4702          * tracing, other tracer may not need. Defer the allocation
4703          * until it is needed.
4704          */
4705         iter->fmt = NULL;
4706         iter->fmt_size = 0;
4707
4708         /*
4709          * We make a copy of the current tracer to avoid concurrent
4710          * changes on it while we are reading.
4711          */
4712         mutex_lock(&trace_types_lock);
4713         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4714         if (!iter->trace)
4715                 goto fail;
4716
4717         *iter->trace = *tr->current_trace;
4718
4719         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4720                 goto fail;
4721
4722         iter->tr = tr;
4723
4724 #ifdef CONFIG_TRACER_MAX_TRACE
4725         /* Currently only the top directory has a snapshot */
4726         if (tr->current_trace->print_max || snapshot)
4727                 iter->array_buffer = &tr->max_buffer;
4728         else
4729 #endif
4730                 iter->array_buffer = &tr->array_buffer;
4731         iter->snapshot = snapshot;
4732         iter->pos = -1;
4733         iter->cpu_file = tracing_get_cpu(inode);
4734         mutex_init(&iter->mutex);
4735
4736         /* Notify the tracer early; before we stop tracing. */
4737         if (iter->trace->open)
4738                 iter->trace->open(iter);
4739
4740         /* Annotate start of buffers if we had overruns */
4741         if (ring_buffer_overruns(iter->array_buffer->buffer))
4742                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4743
4744         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4745         if (trace_clocks[tr->clock_id].in_ns)
4746                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4747
4748         /*
4749          * If pause-on-trace is enabled, then stop the trace while
4750          * dumping, unless this is the "snapshot" file
4751          */
4752         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4753                 tracing_stop_tr(tr);
4754
4755         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4756                 for_each_tracing_cpu(cpu) {
4757                         iter->buffer_iter[cpu] =
4758                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4759                                                          cpu, GFP_KERNEL);
4760                 }
4761                 ring_buffer_read_prepare_sync();
4762                 for_each_tracing_cpu(cpu) {
4763                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4764                         tracing_iter_reset(iter, cpu);
4765                 }
4766         } else {
4767                 cpu = iter->cpu_file;
4768                 iter->buffer_iter[cpu] =
4769                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4770                                                  cpu, GFP_KERNEL);
4771                 ring_buffer_read_prepare_sync();
4772                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4773                 tracing_iter_reset(iter, cpu);
4774         }
4775
4776         mutex_unlock(&trace_types_lock);
4777
4778         return iter;
4779
4780  fail:
4781         mutex_unlock(&trace_types_lock);
4782         kfree(iter->trace);
4783         kfree(iter->temp);
4784         kfree(iter->buffer_iter);
4785 release:
4786         seq_release_private(inode, file);
4787         return ERR_PTR(-ENOMEM);
4788 }
4789
4790 int tracing_open_generic(struct inode *inode, struct file *filp)
4791 {
4792         int ret;
4793
4794         ret = tracing_check_open_get_tr(NULL);
4795         if (ret)
4796                 return ret;
4797
4798         filp->private_data = inode->i_private;
4799         return 0;
4800 }
4801
4802 bool tracing_is_disabled(void)
4803 {
4804         return (tracing_disabled) ? true: false;
4805 }
4806
4807 /*
4808  * Open and update trace_array ref count.
4809  * Must have the current trace_array passed to it.
4810  */
4811 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4812 {
4813         struct trace_array *tr = inode->i_private;
4814         int ret;
4815
4816         ret = tracing_check_open_get_tr(tr);
4817         if (ret)
4818                 return ret;
4819
4820         filp->private_data = inode->i_private;
4821
4822         return 0;
4823 }
4824
4825 static int tracing_release(struct inode *inode, struct file *file)
4826 {
4827         struct trace_array *tr = inode->i_private;
4828         struct seq_file *m = file->private_data;
4829         struct trace_iterator *iter;
4830         int cpu;
4831
4832         if (!(file->f_mode & FMODE_READ)) {
4833                 trace_array_put(tr);
4834                 return 0;
4835         }
4836
4837         /* Writes do not use seq_file */
4838         iter = m->private;
4839         mutex_lock(&trace_types_lock);
4840
4841         for_each_tracing_cpu(cpu) {
4842                 if (iter->buffer_iter[cpu])
4843                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4844         }
4845
4846         if (iter->trace && iter->trace->close)
4847                 iter->trace->close(iter);
4848
4849         if (!iter->snapshot && tr->stop_count)
4850                 /* reenable tracing if it was previously enabled */
4851                 tracing_start_tr(tr);
4852
4853         __trace_array_put(tr);
4854
4855         mutex_unlock(&trace_types_lock);
4856
4857         mutex_destroy(&iter->mutex);
4858         free_cpumask_var(iter->started);
4859         kfree(iter->fmt);
4860         kfree(iter->temp);
4861         kfree(iter->trace);
4862         kfree(iter->buffer_iter);
4863         seq_release_private(inode, file);
4864
4865         return 0;
4866 }
4867
4868 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4869 {
4870         struct trace_array *tr = inode->i_private;
4871
4872         trace_array_put(tr);
4873         return 0;
4874 }
4875
4876 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4877 {
4878         struct trace_array *tr = inode->i_private;
4879
4880         trace_array_put(tr);
4881
4882         return single_release(inode, file);
4883 }
4884
4885 static int tracing_open(struct inode *inode, struct file *file)
4886 {
4887         struct trace_array *tr = inode->i_private;
4888         struct trace_iterator *iter;
4889         int ret;
4890
4891         ret = tracing_check_open_get_tr(tr);
4892         if (ret)
4893                 return ret;
4894
4895         /* If this file was open for write, then erase contents */
4896         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4897                 int cpu = tracing_get_cpu(inode);
4898                 struct array_buffer *trace_buf = &tr->array_buffer;
4899
4900 #ifdef CONFIG_TRACER_MAX_TRACE
4901                 if (tr->current_trace->print_max)
4902                         trace_buf = &tr->max_buffer;
4903 #endif
4904
4905                 if (cpu == RING_BUFFER_ALL_CPUS)
4906                         tracing_reset_online_cpus(trace_buf);
4907                 else
4908                         tracing_reset_cpu(trace_buf, cpu);
4909         }
4910
4911         if (file->f_mode & FMODE_READ) {
4912                 iter = __tracing_open(inode, file, false);
4913                 if (IS_ERR(iter))
4914                         ret = PTR_ERR(iter);
4915                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4916                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4917         }
4918
4919         if (ret < 0)
4920                 trace_array_put(tr);
4921
4922         return ret;
4923 }
4924
4925 /*
4926  * Some tracers are not suitable for instance buffers.
4927  * A tracer is always available for the global array (toplevel)
4928  * or if it explicitly states that it is.
4929  */
4930 static bool
4931 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4932 {
4933         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4934 }
4935
4936 /* Find the next tracer that this trace array may use */
4937 static struct tracer *
4938 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4939 {
4940         while (t && !trace_ok_for_array(t, tr))
4941                 t = t->next;
4942
4943         return t;
4944 }
4945
4946 static void *
4947 t_next(struct seq_file *m, void *v, loff_t *pos)
4948 {
4949         struct trace_array *tr = m->private;
4950         struct tracer *t = v;
4951
4952         (*pos)++;
4953
4954         if (t)
4955                 t = get_tracer_for_array(tr, t->next);
4956
4957         return t;
4958 }
4959
4960 static void *t_start(struct seq_file *m, loff_t *pos)
4961 {
4962         struct trace_array *tr = m->private;
4963         struct tracer *t;
4964         loff_t l = 0;
4965
4966         mutex_lock(&trace_types_lock);
4967
4968         t = get_tracer_for_array(tr, trace_types);
4969         for (; t && l < *pos; t = t_next(m, t, &l))
4970                         ;
4971
4972         return t;
4973 }
4974
4975 static void t_stop(struct seq_file *m, void *p)
4976 {
4977         mutex_unlock(&trace_types_lock);
4978 }
4979
4980 static int t_show(struct seq_file *m, void *v)
4981 {
4982         struct tracer *t = v;
4983
4984         if (!t)
4985                 return 0;
4986
4987         seq_puts(m, t->name);
4988         if (t->next)
4989                 seq_putc(m, ' ');
4990         else
4991                 seq_putc(m, '\n');
4992
4993         return 0;
4994 }
4995
4996 static const struct seq_operations show_traces_seq_ops = {
4997         .start          = t_start,
4998         .next           = t_next,
4999         .stop           = t_stop,
5000         .show           = t_show,
5001 };
5002
5003 static int show_traces_open(struct inode *inode, struct file *file)
5004 {
5005         struct trace_array *tr = inode->i_private;
5006         struct seq_file *m;
5007         int ret;
5008
5009         ret = tracing_check_open_get_tr(tr);
5010         if (ret)
5011                 return ret;
5012
5013         ret = seq_open(file, &show_traces_seq_ops);
5014         if (ret) {
5015                 trace_array_put(tr);
5016                 return ret;
5017         }
5018
5019         m = file->private_data;
5020         m->private = tr;
5021
5022         return 0;
5023 }
5024
5025 static int show_traces_release(struct inode *inode, struct file *file)
5026 {
5027         struct trace_array *tr = inode->i_private;
5028
5029         trace_array_put(tr);
5030         return seq_release(inode, file);
5031 }
5032
5033 static ssize_t
5034 tracing_write_stub(struct file *filp, const char __user *ubuf,
5035                    size_t count, loff_t *ppos)
5036 {
5037         return count;
5038 }
5039
5040 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5041 {
5042         int ret;
5043
5044         if (file->f_mode & FMODE_READ)
5045                 ret = seq_lseek(file, offset, whence);
5046         else
5047                 file->f_pos = ret = 0;
5048
5049         return ret;
5050 }
5051
5052 static const struct file_operations tracing_fops = {
5053         .open           = tracing_open,
5054         .read           = seq_read,
5055         .write          = tracing_write_stub,
5056         .llseek         = tracing_lseek,
5057         .release        = tracing_release,
5058 };
5059
5060 static const struct file_operations show_traces_fops = {
5061         .open           = show_traces_open,
5062         .read           = seq_read,
5063         .llseek         = seq_lseek,
5064         .release        = show_traces_release,
5065 };
5066
5067 static ssize_t
5068 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5069                      size_t count, loff_t *ppos)
5070 {
5071         struct trace_array *tr = file_inode(filp)->i_private;
5072         char *mask_str;
5073         int len;
5074
5075         len = snprintf(NULL, 0, "%*pb\n",
5076                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5077         mask_str = kmalloc(len, GFP_KERNEL);
5078         if (!mask_str)
5079                 return -ENOMEM;
5080
5081         len = snprintf(mask_str, len, "%*pb\n",
5082                        cpumask_pr_args(tr->tracing_cpumask));
5083         if (len >= count) {
5084                 count = -EINVAL;
5085                 goto out_err;
5086         }
5087         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5088
5089 out_err:
5090         kfree(mask_str);
5091
5092         return count;
5093 }
5094
5095 int tracing_set_cpumask(struct trace_array *tr,
5096                         cpumask_var_t tracing_cpumask_new)
5097 {
5098         int cpu;
5099
5100         if (!tr)
5101                 return -EINVAL;
5102
5103         local_irq_disable();
5104         arch_spin_lock(&tr->max_lock);
5105         for_each_tracing_cpu(cpu) {
5106                 /*
5107                  * Increase/decrease the disabled counter if we are
5108                  * about to flip a bit in the cpumask:
5109                  */
5110                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5111                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5112                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5113                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5114                 }
5115                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5116                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5117                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5118                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5119                 }
5120         }
5121         arch_spin_unlock(&tr->max_lock);
5122         local_irq_enable();
5123
5124         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5125
5126         return 0;
5127 }
5128
5129 static ssize_t
5130 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5131                       size_t count, loff_t *ppos)
5132 {
5133         struct trace_array *tr = file_inode(filp)->i_private;
5134         cpumask_var_t tracing_cpumask_new;
5135         int err;
5136
5137         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5138                 return -ENOMEM;
5139
5140         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5141         if (err)
5142                 goto err_free;
5143
5144         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5145         if (err)
5146                 goto err_free;
5147
5148         free_cpumask_var(tracing_cpumask_new);
5149
5150         return count;
5151
5152 err_free:
5153         free_cpumask_var(tracing_cpumask_new);
5154
5155         return err;
5156 }
5157
5158 static const struct file_operations tracing_cpumask_fops = {
5159         .open           = tracing_open_generic_tr,
5160         .read           = tracing_cpumask_read,
5161         .write          = tracing_cpumask_write,
5162         .release        = tracing_release_generic_tr,
5163         .llseek         = generic_file_llseek,
5164 };
5165
5166 static int tracing_trace_options_show(struct seq_file *m, void *v)
5167 {
5168         struct tracer_opt *trace_opts;
5169         struct trace_array *tr = m->private;
5170         u32 tracer_flags;
5171         int i;
5172
5173         mutex_lock(&trace_types_lock);
5174         tracer_flags = tr->current_trace->flags->val;
5175         trace_opts = tr->current_trace->flags->opts;
5176
5177         for (i = 0; trace_options[i]; i++) {
5178                 if (tr->trace_flags & (1 << i))
5179                         seq_printf(m, "%s\n", trace_options[i]);
5180                 else
5181                         seq_printf(m, "no%s\n", trace_options[i]);
5182         }
5183
5184         for (i = 0; trace_opts[i].name; i++) {
5185                 if (tracer_flags & trace_opts[i].bit)
5186                         seq_printf(m, "%s\n", trace_opts[i].name);
5187                 else
5188                         seq_printf(m, "no%s\n", trace_opts[i].name);
5189         }
5190         mutex_unlock(&trace_types_lock);
5191
5192         return 0;
5193 }
5194
5195 static int __set_tracer_option(struct trace_array *tr,
5196                                struct tracer_flags *tracer_flags,
5197                                struct tracer_opt *opts, int neg)
5198 {
5199         struct tracer *trace = tracer_flags->trace;
5200         int ret;
5201
5202         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5203         if (ret)
5204                 return ret;
5205
5206         if (neg)
5207                 tracer_flags->val &= ~opts->bit;
5208         else
5209                 tracer_flags->val |= opts->bit;
5210         return 0;
5211 }
5212
5213 /* Try to assign a tracer specific option */
5214 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5215 {
5216         struct tracer *trace = tr->current_trace;
5217         struct tracer_flags *tracer_flags = trace->flags;
5218         struct tracer_opt *opts = NULL;
5219         int i;
5220
5221         for (i = 0; tracer_flags->opts[i].name; i++) {
5222                 opts = &tracer_flags->opts[i];
5223
5224                 if (strcmp(cmp, opts->name) == 0)
5225                         return __set_tracer_option(tr, trace->flags, opts, neg);
5226         }
5227
5228         return -EINVAL;
5229 }
5230
5231 /* Some tracers require overwrite to stay enabled */
5232 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5233 {
5234         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5235                 return -1;
5236
5237         return 0;
5238 }
5239
5240 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5241 {
5242         int *map;
5243
5244         if ((mask == TRACE_ITER_RECORD_TGID) ||
5245             (mask == TRACE_ITER_RECORD_CMD))
5246                 lockdep_assert_held(&event_mutex);
5247
5248         /* do nothing if flag is already set */
5249         if (!!(tr->trace_flags & mask) == !!enabled)
5250                 return 0;
5251
5252         /* Give the tracer a chance to approve the change */
5253         if (tr->current_trace->flag_changed)
5254                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5255                         return -EINVAL;
5256
5257         if (enabled)
5258                 tr->trace_flags |= mask;
5259         else
5260                 tr->trace_flags &= ~mask;
5261
5262         if (mask == TRACE_ITER_RECORD_CMD)
5263                 trace_event_enable_cmd_record(enabled);
5264
5265         if (mask == TRACE_ITER_RECORD_TGID) {
5266                 if (!tgid_map) {
5267                         tgid_map_max = pid_max;
5268                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5269                                        GFP_KERNEL);
5270
5271                         /*
5272                          * Pairs with smp_load_acquire() in
5273                          * trace_find_tgid_ptr() to ensure that if it observes
5274                          * the tgid_map we just allocated then it also observes
5275                          * the corresponding tgid_map_max value.
5276                          */
5277                         smp_store_release(&tgid_map, map);
5278                 }
5279                 if (!tgid_map) {
5280                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5281                         return -ENOMEM;
5282                 }
5283
5284                 trace_event_enable_tgid_record(enabled);
5285         }
5286
5287         if (mask == TRACE_ITER_EVENT_FORK)
5288                 trace_event_follow_fork(tr, enabled);
5289
5290         if (mask == TRACE_ITER_FUNC_FORK)
5291                 ftrace_pid_follow_fork(tr, enabled);
5292
5293         if (mask == TRACE_ITER_OVERWRITE) {
5294                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5295 #ifdef CONFIG_TRACER_MAX_TRACE
5296                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5297 #endif
5298         }
5299
5300         if (mask == TRACE_ITER_PRINTK) {
5301                 trace_printk_start_stop_comm(enabled);
5302                 trace_printk_control(enabled);
5303         }
5304
5305         return 0;
5306 }
5307
5308 int trace_set_options(struct trace_array *tr, char *option)
5309 {
5310         char *cmp;
5311         int neg = 0;
5312         int ret;
5313         size_t orig_len = strlen(option);
5314         int len;
5315
5316         cmp = strstrip(option);
5317
5318         len = str_has_prefix(cmp, "no");
5319         if (len)
5320                 neg = 1;
5321
5322         cmp += len;
5323
5324         mutex_lock(&event_mutex);
5325         mutex_lock(&trace_types_lock);
5326
5327         ret = match_string(trace_options, -1, cmp);
5328         /* If no option could be set, test the specific tracer options */
5329         if (ret < 0)
5330                 ret = set_tracer_option(tr, cmp, neg);
5331         else
5332                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5333
5334         mutex_unlock(&trace_types_lock);
5335         mutex_unlock(&event_mutex);
5336
5337         /*
5338          * If the first trailing whitespace is replaced with '\0' by strstrip,
5339          * turn it back into a space.
5340          */
5341         if (orig_len > strlen(option))
5342                 option[strlen(option)] = ' ';
5343
5344         return ret;
5345 }
5346
5347 static void __init apply_trace_boot_options(void)
5348 {
5349         char *buf = trace_boot_options_buf;
5350         char *option;
5351
5352         while (true) {
5353                 option = strsep(&buf, ",");
5354
5355                 if (!option)
5356                         break;
5357
5358                 if (*option)
5359                         trace_set_options(&global_trace, option);
5360
5361                 /* Put back the comma to allow this to be called again */
5362                 if (buf)
5363                         *(buf - 1) = ',';
5364         }
5365 }
5366
5367 static ssize_t
5368 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5369                         size_t cnt, loff_t *ppos)
5370 {
5371         struct seq_file *m = filp->private_data;
5372         struct trace_array *tr = m->private;
5373         char buf[64];
5374         int ret;
5375
5376         if (cnt >= sizeof(buf))
5377                 return -EINVAL;
5378
5379         if (copy_from_user(buf, ubuf, cnt))
5380                 return -EFAULT;
5381
5382         buf[cnt] = 0;
5383
5384         ret = trace_set_options(tr, buf);
5385         if (ret < 0)
5386                 return ret;
5387
5388         *ppos += cnt;
5389
5390         return cnt;
5391 }
5392
5393 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5394 {
5395         struct trace_array *tr = inode->i_private;
5396         int ret;
5397
5398         ret = tracing_check_open_get_tr(tr);
5399         if (ret)
5400                 return ret;
5401
5402         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5403         if (ret < 0)
5404                 trace_array_put(tr);
5405
5406         return ret;
5407 }
5408
5409 static const struct file_operations tracing_iter_fops = {
5410         .open           = tracing_trace_options_open,
5411         .read           = seq_read,
5412         .llseek         = seq_lseek,
5413         .release        = tracing_single_release_tr,
5414         .write          = tracing_trace_options_write,
5415 };
5416
5417 static const char readme_msg[] =
5418         "tracing mini-HOWTO:\n\n"
5419         "# echo 0 > tracing_on : quick way to disable tracing\n"
5420         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5421         " Important files:\n"
5422         "  trace\t\t\t- The static contents of the buffer\n"
5423         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5424         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5425         "  current_tracer\t- function and latency tracers\n"
5426         "  available_tracers\t- list of configured tracers for current_tracer\n"
5427         "  error_log\t- error log for failed commands (that support it)\n"
5428         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5429         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5430         "  trace_clock\t\t-change the clock used to order events\n"
5431         "       local:   Per cpu clock but may not be synced across CPUs\n"
5432         "      global:   Synced across CPUs but slows tracing down.\n"
5433         "     counter:   Not a clock, but just an increment\n"
5434         "      uptime:   Jiffy counter from time of boot\n"
5435         "        perf:   Same clock that perf events use\n"
5436 #ifdef CONFIG_X86_64
5437         "     x86-tsc:   TSC cycle counter\n"
5438 #endif
5439         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5440         "       delta:   Delta difference against a buffer-wide timestamp\n"
5441         "    absolute:   Absolute (standalone) timestamp\n"
5442         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5443         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5444         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5445         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5446         "\t\t\t  Remove sub-buffer with rmdir\n"
5447         "  trace_options\t\t- Set format or modify how tracing happens\n"
5448         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5449         "\t\t\t  option name\n"
5450         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5451 #ifdef CONFIG_DYNAMIC_FTRACE
5452         "\n  available_filter_functions - list of functions that can be filtered on\n"
5453         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5454         "\t\t\t  functions\n"
5455         "\t     accepts: func_full_name or glob-matching-pattern\n"
5456         "\t     modules: Can select a group via module\n"
5457         "\t      Format: :mod:<module-name>\n"
5458         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5459         "\t    triggers: a command to perform when function is hit\n"
5460         "\t      Format: <function>:<trigger>[:count]\n"
5461         "\t     trigger: traceon, traceoff\n"
5462         "\t\t      enable_event:<system>:<event>\n"
5463         "\t\t      disable_event:<system>:<event>\n"
5464 #ifdef CONFIG_STACKTRACE
5465         "\t\t      stacktrace\n"
5466 #endif
5467 #ifdef CONFIG_TRACER_SNAPSHOT
5468         "\t\t      snapshot\n"
5469 #endif
5470         "\t\t      dump\n"
5471         "\t\t      cpudump\n"
5472         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5473         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5474         "\t     The first one will disable tracing every time do_fault is hit\n"
5475         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5476         "\t       The first time do trap is hit and it disables tracing, the\n"
5477         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5478         "\t       the counter will not decrement. It only decrements when the\n"
5479         "\t       trigger did work\n"
5480         "\t     To remove trigger without count:\n"
5481         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5482         "\t     To remove trigger with a count:\n"
5483         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5484         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5485         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5486         "\t    modules: Can select a group via module command :mod:\n"
5487         "\t    Does not accept triggers\n"
5488 #endif /* CONFIG_DYNAMIC_FTRACE */
5489 #ifdef CONFIG_FUNCTION_TRACER
5490         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5491         "\t\t    (function)\n"
5492         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5493         "\t\t    (function)\n"
5494 #endif
5495 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5496         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5497         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5498         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5499 #endif
5500 #ifdef CONFIG_TRACER_SNAPSHOT
5501         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5502         "\t\t\t  snapshot buffer. Read the contents for more\n"
5503         "\t\t\t  information\n"
5504 #endif
5505 #ifdef CONFIG_STACK_TRACER
5506         "  stack_trace\t\t- Shows the max stack trace when active\n"
5507         "  stack_max_size\t- Shows current max stack size that was traced\n"
5508         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5509         "\t\t\t  new trace)\n"
5510 #ifdef CONFIG_DYNAMIC_FTRACE
5511         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5512         "\t\t\t  traces\n"
5513 #endif
5514 #endif /* CONFIG_STACK_TRACER */
5515 #ifdef CONFIG_DYNAMIC_EVENTS
5516         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5517         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5518 #endif
5519 #ifdef CONFIG_KPROBE_EVENTS
5520         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5521         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5522 #endif
5523 #ifdef CONFIG_UPROBE_EVENTS
5524         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5525         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5526 #endif
5527 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5528         "\t  accepts: event-definitions (one definition per line)\n"
5529         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5530         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5531 #ifdef CONFIG_HIST_TRIGGERS
5532         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5533 #endif
5534         "\t           -:[<group>/]<event>\n"
5535 #ifdef CONFIG_KPROBE_EVENTS
5536         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5537   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5538 #endif
5539 #ifdef CONFIG_UPROBE_EVENTS
5540   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5541 #endif
5542         "\t     args: <name>=fetcharg[:type]\n"
5543         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5544 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5545         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5546 #else
5547         "\t           $stack<index>, $stack, $retval, $comm,\n"
5548 #endif
5549         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5550         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5551         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5552         "\t           <type>\\[<array-size>\\]\n"
5553 #ifdef CONFIG_HIST_TRIGGERS
5554         "\t    field: <stype> <name>;\n"
5555         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5556         "\t           [unsigned] char/int/long\n"
5557 #endif
5558 #endif
5559         "  events/\t\t- Directory containing all trace event subsystems:\n"
5560         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5561         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5562         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5563         "\t\t\t  events\n"
5564         "      filter\t\t- If set, only events passing filter are traced\n"
5565         "  events/<system>/<event>/\t- Directory containing control files for\n"
5566         "\t\t\t  <event>:\n"
5567         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5568         "      filter\t\t- If set, only events passing filter are traced\n"
5569         "      trigger\t\t- If set, a command to perform when event is hit\n"
5570         "\t    Format: <trigger>[:count][if <filter>]\n"
5571         "\t   trigger: traceon, traceoff\n"
5572         "\t            enable_event:<system>:<event>\n"
5573         "\t            disable_event:<system>:<event>\n"
5574 #ifdef CONFIG_HIST_TRIGGERS
5575         "\t            enable_hist:<system>:<event>\n"
5576         "\t            disable_hist:<system>:<event>\n"
5577 #endif
5578 #ifdef CONFIG_STACKTRACE
5579         "\t\t    stacktrace\n"
5580 #endif
5581 #ifdef CONFIG_TRACER_SNAPSHOT
5582         "\t\t    snapshot\n"
5583 #endif
5584 #ifdef CONFIG_HIST_TRIGGERS
5585         "\t\t    hist (see below)\n"
5586 #endif
5587         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5588         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5589         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5590         "\t                  events/block/block_unplug/trigger\n"
5591         "\t   The first disables tracing every time block_unplug is hit.\n"
5592         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5593         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5594         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5595         "\t   Like function triggers, the counter is only decremented if it\n"
5596         "\t    enabled or disabled tracing.\n"
5597         "\t   To remove a trigger without a count:\n"
5598         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5599         "\t   To remove a trigger with a count:\n"
5600         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5601         "\t   Filters can be ignored when removing a trigger.\n"
5602 #ifdef CONFIG_HIST_TRIGGERS
5603         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5604         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5605         "\t            [:values=<field1[,field2,...]>]\n"
5606         "\t            [:sort=<field1[,field2,...]>]\n"
5607         "\t            [:size=#entries]\n"
5608         "\t            [:pause][:continue][:clear]\n"
5609         "\t            [:name=histname1]\n"
5610         "\t            [:<handler>.<action>]\n"
5611         "\t            [if <filter>]\n\n"
5612         "\t    Note, special fields can be used as well:\n"
5613         "\t            common_timestamp - to record current timestamp\n"
5614         "\t            common_cpu - to record the CPU the event happened on\n"
5615         "\n"
5616         "\t    When a matching event is hit, an entry is added to a hash\n"
5617         "\t    table using the key(s) and value(s) named, and the value of a\n"
5618         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5619         "\t    correspond to fields in the event's format description.  Keys\n"
5620         "\t    can be any field, or the special string 'stacktrace'.\n"
5621         "\t    Compound keys consisting of up to two fields can be specified\n"
5622         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5623         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5624         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5625         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5626         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5627         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5628         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5629         "\t    its histogram data will be shared with other triggers of the\n"
5630         "\t    same name, and trigger hits will update this common data.\n\n"
5631         "\t    Reading the 'hist' file for the event will dump the hash\n"
5632         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5633         "\t    triggers attached to an event, there will be a table for each\n"
5634         "\t    trigger in the output.  The table displayed for a named\n"
5635         "\t    trigger will be the same as any other instance having the\n"
5636         "\t    same name.  The default format used to display a given field\n"
5637         "\t    can be modified by appending any of the following modifiers\n"
5638         "\t    to the field name, as applicable:\n\n"
5639         "\t            .hex        display a number as a hex value\n"
5640         "\t            .sym        display an address as a symbol\n"
5641         "\t            .sym-offset display an address as a symbol and offset\n"
5642         "\t            .execname   display a common_pid as a program name\n"
5643         "\t            .syscall    display a syscall id as a syscall name\n"
5644         "\t            .log2       display log2 value rather than raw number\n"
5645         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5646         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5647         "\t    trigger or to start a hist trigger but not log any events\n"
5648         "\t    until told to do so.  'continue' can be used to start or\n"
5649         "\t    restart a paused hist trigger.\n\n"
5650         "\t    The 'clear' parameter will clear the contents of a running\n"
5651         "\t    hist trigger and leave its current paused/active state\n"
5652         "\t    unchanged.\n\n"
5653         "\t    The enable_hist and disable_hist triggers can be used to\n"
5654         "\t    have one event conditionally start and stop another event's\n"
5655         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5656         "\t    the enable_event and disable_event triggers.\n\n"
5657         "\t    Hist trigger handlers and actions are executed whenever a\n"
5658         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5659         "\t        <handler>.<action>\n\n"
5660         "\t    The available handlers are:\n\n"
5661         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5662         "\t        onmax(var)               - invoke if var exceeds current max\n"
5663         "\t        onchange(var)            - invoke action if var changes\n\n"
5664         "\t    The available actions are:\n\n"
5665         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5666         "\t        save(field,...)                      - save current event fields\n"
5667 #ifdef CONFIG_TRACER_SNAPSHOT
5668         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5669 #endif
5670 #ifdef CONFIG_SYNTH_EVENTS
5671         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5672         "\t  Write into this file to define/undefine new synthetic events.\n"
5673         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5674 #endif
5675 #endif
5676 ;
5677
5678 static ssize_t
5679 tracing_readme_read(struct file *filp, char __user *ubuf,
5680                        size_t cnt, loff_t *ppos)
5681 {
5682         return simple_read_from_buffer(ubuf, cnt, ppos,
5683                                         readme_msg, strlen(readme_msg));
5684 }
5685
5686 static const struct file_operations tracing_readme_fops = {
5687         .open           = tracing_open_generic,
5688         .read           = tracing_readme_read,
5689         .llseek         = generic_file_llseek,
5690 };
5691
5692 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5693 {
5694         int pid = ++(*pos);
5695
5696         return trace_find_tgid_ptr(pid);
5697 }
5698
5699 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5700 {
5701         int pid = *pos;
5702
5703         return trace_find_tgid_ptr(pid);
5704 }
5705
5706 static void saved_tgids_stop(struct seq_file *m, void *v)
5707 {
5708 }
5709
5710 static int saved_tgids_show(struct seq_file *m, void *v)
5711 {
5712         int *entry = (int *)v;
5713         int pid = entry - tgid_map;
5714         int tgid = *entry;
5715
5716         if (tgid == 0)
5717                 return SEQ_SKIP;
5718
5719         seq_printf(m, "%d %d\n", pid, tgid);
5720         return 0;
5721 }
5722
5723 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5724         .start          = saved_tgids_start,
5725         .stop           = saved_tgids_stop,
5726         .next           = saved_tgids_next,
5727         .show           = saved_tgids_show,
5728 };
5729
5730 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5731 {
5732         int ret;
5733
5734         ret = tracing_check_open_get_tr(NULL);
5735         if (ret)
5736                 return ret;
5737
5738         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5739 }
5740
5741
5742 static const struct file_operations tracing_saved_tgids_fops = {
5743         .open           = tracing_saved_tgids_open,
5744         .read           = seq_read,
5745         .llseek         = seq_lseek,
5746         .release        = seq_release,
5747 };
5748
5749 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5750 {
5751         unsigned int *ptr = v;
5752
5753         if (*pos || m->count)
5754                 ptr++;
5755
5756         (*pos)++;
5757
5758         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5759              ptr++) {
5760                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5761                         continue;
5762
5763                 return ptr;
5764         }
5765
5766         return NULL;
5767 }
5768
5769 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5770 {
5771         void *v;
5772         loff_t l = 0;
5773
5774         preempt_disable();
5775         arch_spin_lock(&trace_cmdline_lock);
5776
5777         v = &savedcmd->map_cmdline_to_pid[0];
5778         while (l <= *pos) {
5779                 v = saved_cmdlines_next(m, v, &l);
5780                 if (!v)
5781                         return NULL;
5782         }
5783
5784         return v;
5785 }
5786
5787 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5788 {
5789         arch_spin_unlock(&trace_cmdline_lock);
5790         preempt_enable();
5791 }
5792
5793 static int saved_cmdlines_show(struct seq_file *m, void *v)
5794 {
5795         char buf[TASK_COMM_LEN];
5796         unsigned int *pid = v;
5797
5798         __trace_find_cmdline(*pid, buf);
5799         seq_printf(m, "%d %s\n", *pid, buf);
5800         return 0;
5801 }
5802
5803 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5804         .start          = saved_cmdlines_start,
5805         .next           = saved_cmdlines_next,
5806         .stop           = saved_cmdlines_stop,
5807         .show           = saved_cmdlines_show,
5808 };
5809
5810 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5811 {
5812         int ret;
5813
5814         ret = tracing_check_open_get_tr(NULL);
5815         if (ret)
5816                 return ret;
5817
5818         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5819 }
5820
5821 static const struct file_operations tracing_saved_cmdlines_fops = {
5822         .open           = tracing_saved_cmdlines_open,
5823         .read           = seq_read,
5824         .llseek         = seq_lseek,
5825         .release        = seq_release,
5826 };
5827
5828 static ssize_t
5829 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5830                                  size_t cnt, loff_t *ppos)
5831 {
5832         char buf[64];
5833         int r;
5834
5835         arch_spin_lock(&trace_cmdline_lock);
5836         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5837         arch_spin_unlock(&trace_cmdline_lock);
5838
5839         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5840 }
5841
5842 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5843 {
5844         kfree(s->saved_cmdlines);
5845         kfree(s->map_cmdline_to_pid);
5846         kfree(s);
5847 }
5848
5849 static int tracing_resize_saved_cmdlines(unsigned int val)
5850 {
5851         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5852
5853         s = kmalloc(sizeof(*s), GFP_KERNEL);
5854         if (!s)
5855                 return -ENOMEM;
5856
5857         if (allocate_cmdlines_buffer(val, s) < 0) {
5858                 kfree(s);
5859                 return -ENOMEM;
5860         }
5861
5862         arch_spin_lock(&trace_cmdline_lock);
5863         savedcmd_temp = savedcmd;
5864         savedcmd = s;
5865         arch_spin_unlock(&trace_cmdline_lock);
5866         free_saved_cmdlines_buffer(savedcmd_temp);
5867
5868         return 0;
5869 }
5870
5871 static ssize_t
5872 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5873                                   size_t cnt, loff_t *ppos)
5874 {
5875         unsigned long val;
5876         int ret;
5877
5878         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5879         if (ret)
5880                 return ret;
5881
5882         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5883         if (!val || val > PID_MAX_DEFAULT)
5884                 return -EINVAL;
5885
5886         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5887         if (ret < 0)
5888                 return ret;
5889
5890         *ppos += cnt;
5891
5892         return cnt;
5893 }
5894
5895 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5896         .open           = tracing_open_generic,
5897         .read           = tracing_saved_cmdlines_size_read,
5898         .write          = tracing_saved_cmdlines_size_write,
5899 };
5900
5901 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5902 static union trace_eval_map_item *
5903 update_eval_map(union trace_eval_map_item *ptr)
5904 {
5905         if (!ptr->map.eval_string) {
5906                 if (ptr->tail.next) {
5907                         ptr = ptr->tail.next;
5908                         /* Set ptr to the next real item (skip head) */
5909                         ptr++;
5910                 } else
5911                         return NULL;
5912         }
5913         return ptr;
5914 }
5915
5916 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5917 {
5918         union trace_eval_map_item *ptr = v;
5919
5920         /*
5921          * Paranoid! If ptr points to end, we don't want to increment past it.
5922          * This really should never happen.
5923          */
5924         (*pos)++;
5925         ptr = update_eval_map(ptr);
5926         if (WARN_ON_ONCE(!ptr))
5927                 return NULL;
5928
5929         ptr++;
5930         ptr = update_eval_map(ptr);
5931
5932         return ptr;
5933 }
5934
5935 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5936 {
5937         union trace_eval_map_item *v;
5938         loff_t l = 0;
5939
5940         mutex_lock(&trace_eval_mutex);
5941
5942         v = trace_eval_maps;
5943         if (v)
5944                 v++;
5945
5946         while (v && l < *pos) {
5947                 v = eval_map_next(m, v, &l);
5948         }
5949
5950         return v;
5951 }
5952
5953 static void eval_map_stop(struct seq_file *m, void *v)
5954 {
5955         mutex_unlock(&trace_eval_mutex);
5956 }
5957
5958 static int eval_map_show(struct seq_file *m, void *v)
5959 {
5960         union trace_eval_map_item *ptr = v;
5961
5962         seq_printf(m, "%s %ld (%s)\n",
5963                    ptr->map.eval_string, ptr->map.eval_value,
5964                    ptr->map.system);
5965
5966         return 0;
5967 }
5968
5969 static const struct seq_operations tracing_eval_map_seq_ops = {
5970         .start          = eval_map_start,
5971         .next           = eval_map_next,
5972         .stop           = eval_map_stop,
5973         .show           = eval_map_show,
5974 };
5975
5976 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5977 {
5978         int ret;
5979
5980         ret = tracing_check_open_get_tr(NULL);
5981         if (ret)
5982                 return ret;
5983
5984         return seq_open(filp, &tracing_eval_map_seq_ops);
5985 }
5986
5987 static const struct file_operations tracing_eval_map_fops = {
5988         .open           = tracing_eval_map_open,
5989         .read           = seq_read,
5990         .llseek         = seq_lseek,
5991         .release        = seq_release,
5992 };
5993
5994 static inline union trace_eval_map_item *
5995 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5996 {
5997         /* Return tail of array given the head */
5998         return ptr + ptr->head.length + 1;
5999 }
6000
6001 static void
6002 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6003                            int len)
6004 {
6005         struct trace_eval_map **stop;
6006         struct trace_eval_map **map;
6007         union trace_eval_map_item *map_array;
6008         union trace_eval_map_item *ptr;
6009
6010         stop = start + len;
6011
6012         /*
6013          * The trace_eval_maps contains the map plus a head and tail item,
6014          * where the head holds the module and length of array, and the
6015          * tail holds a pointer to the next list.
6016          */
6017         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6018         if (!map_array) {
6019                 pr_warn("Unable to allocate trace eval mapping\n");
6020                 return;
6021         }
6022
6023         mutex_lock(&trace_eval_mutex);
6024
6025         if (!trace_eval_maps)
6026                 trace_eval_maps = map_array;
6027         else {
6028                 ptr = trace_eval_maps;
6029                 for (;;) {
6030                         ptr = trace_eval_jmp_to_tail(ptr);
6031                         if (!ptr->tail.next)
6032                                 break;
6033                         ptr = ptr->tail.next;
6034
6035                 }
6036                 ptr->tail.next = map_array;
6037         }
6038         map_array->head.mod = mod;
6039         map_array->head.length = len;
6040         map_array++;
6041
6042         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6043                 map_array->map = **map;
6044                 map_array++;
6045         }
6046         memset(map_array, 0, sizeof(*map_array));
6047
6048         mutex_unlock(&trace_eval_mutex);
6049 }
6050
6051 static void trace_create_eval_file(struct dentry *d_tracer)
6052 {
6053         trace_create_file("eval_map", 0444, d_tracer,
6054                           NULL, &tracing_eval_map_fops);
6055 }
6056
6057 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6058 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6059 static inline void trace_insert_eval_map_file(struct module *mod,
6060                               struct trace_eval_map **start, int len) { }
6061 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6062
6063 static void trace_insert_eval_map(struct module *mod,
6064                                   struct trace_eval_map **start, int len)
6065 {
6066         struct trace_eval_map **map;
6067
6068         if (len <= 0)
6069                 return;
6070
6071         map = start;
6072
6073         trace_event_eval_update(map, len);
6074
6075         trace_insert_eval_map_file(mod, start, len);
6076 }
6077
6078 static ssize_t
6079 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6080                        size_t cnt, loff_t *ppos)
6081 {
6082         struct trace_array *tr = filp->private_data;
6083         char buf[MAX_TRACER_SIZE+2];
6084         int r;
6085
6086         mutex_lock(&trace_types_lock);
6087         r = sprintf(buf, "%s\n", tr->current_trace->name);
6088         mutex_unlock(&trace_types_lock);
6089
6090         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6091 }
6092
6093 int tracer_init(struct tracer *t, struct trace_array *tr)
6094 {
6095         tracing_reset_online_cpus(&tr->array_buffer);
6096         return t->init(tr);
6097 }
6098
6099 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6100 {
6101         int cpu;
6102
6103         for_each_tracing_cpu(cpu)
6104                 per_cpu_ptr(buf->data, cpu)->entries = val;
6105 }
6106
6107 #ifdef CONFIG_TRACER_MAX_TRACE
6108 /* resize @tr's buffer to the size of @size_tr's entries */
6109 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6110                                         struct array_buffer *size_buf, int cpu_id)
6111 {
6112         int cpu, ret = 0;
6113
6114         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6115                 for_each_tracing_cpu(cpu) {
6116                         ret = ring_buffer_resize(trace_buf->buffer,
6117                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6118                         if (ret < 0)
6119                                 break;
6120                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6121                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6122                 }
6123         } else {
6124                 ret = ring_buffer_resize(trace_buf->buffer,
6125                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6126                 if (ret == 0)
6127                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6128                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6129         }
6130
6131         return ret;
6132 }
6133 #endif /* CONFIG_TRACER_MAX_TRACE */
6134
6135 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6136                                         unsigned long size, int cpu)
6137 {
6138         int ret;
6139
6140         /*
6141          * If kernel or user changes the size of the ring buffer
6142          * we use the size that was given, and we can forget about
6143          * expanding it later.
6144          */
6145         ring_buffer_expanded = true;
6146
6147         /* May be called before buffers are initialized */
6148         if (!tr->array_buffer.buffer)
6149                 return 0;
6150
6151         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6152         if (ret < 0)
6153                 return ret;
6154
6155 #ifdef CONFIG_TRACER_MAX_TRACE
6156         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6157             !tr->current_trace->use_max_tr)
6158                 goto out;
6159
6160         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6161         if (ret < 0) {
6162                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6163                                                      &tr->array_buffer, cpu);
6164                 if (r < 0) {
6165                         /*
6166                          * AARGH! We are left with different
6167                          * size max buffer!!!!
6168                          * The max buffer is our "snapshot" buffer.
6169                          * When a tracer needs a snapshot (one of the
6170                          * latency tracers), it swaps the max buffer
6171                          * with the saved snap shot. We succeeded to
6172                          * update the size of the main buffer, but failed to
6173                          * update the size of the max buffer. But when we tried
6174                          * to reset the main buffer to the original size, we
6175                          * failed there too. This is very unlikely to
6176                          * happen, but if it does, warn and kill all
6177                          * tracing.
6178                          */
6179                         WARN_ON(1);
6180                         tracing_disabled = 1;
6181                 }
6182                 return ret;
6183         }
6184
6185         if (cpu == RING_BUFFER_ALL_CPUS)
6186                 set_buffer_entries(&tr->max_buffer, size);
6187         else
6188                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6189
6190  out:
6191 #endif /* CONFIG_TRACER_MAX_TRACE */
6192
6193         if (cpu == RING_BUFFER_ALL_CPUS)
6194                 set_buffer_entries(&tr->array_buffer, size);
6195         else
6196                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6197
6198         return ret;
6199 }
6200
6201 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6202                                   unsigned long size, int cpu_id)
6203 {
6204         int ret;
6205
6206         mutex_lock(&trace_types_lock);
6207
6208         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6209                 /* make sure, this cpu is enabled in the mask */
6210                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6211                         ret = -EINVAL;
6212                         goto out;
6213                 }
6214         }
6215
6216         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6217         if (ret < 0)
6218                 ret = -ENOMEM;
6219
6220 out:
6221         mutex_unlock(&trace_types_lock);
6222
6223         return ret;
6224 }
6225
6226
6227 /**
6228  * tracing_update_buffers - used by tracing facility to expand ring buffers
6229  *
6230  * To save on memory when the tracing is never used on a system with it
6231  * configured in. The ring buffers are set to a minimum size. But once
6232  * a user starts to use the tracing facility, then they need to grow
6233  * to their default size.
6234  *
6235  * This function is to be called when a tracer is about to be used.
6236  */
6237 int tracing_update_buffers(void)
6238 {
6239         int ret = 0;
6240
6241         mutex_lock(&trace_types_lock);
6242         if (!ring_buffer_expanded)
6243                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6244                                                 RING_BUFFER_ALL_CPUS);
6245         mutex_unlock(&trace_types_lock);
6246
6247         return ret;
6248 }
6249
6250 struct trace_option_dentry;
6251
6252 static void
6253 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6254
6255 /*
6256  * Used to clear out the tracer before deletion of an instance.
6257  * Must have trace_types_lock held.
6258  */
6259 static void tracing_set_nop(struct trace_array *tr)
6260 {
6261         if (tr->current_trace == &nop_trace)
6262                 return;
6263         
6264         tr->current_trace->enabled--;
6265
6266         if (tr->current_trace->reset)
6267                 tr->current_trace->reset(tr);
6268
6269         tr->current_trace = &nop_trace;
6270 }
6271
6272 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6273 {
6274         /* Only enable if the directory has been created already. */
6275         if (!tr->dir)
6276                 return;
6277
6278         create_trace_option_files(tr, t);
6279 }
6280
6281 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6282 {
6283         struct tracer *t;
6284 #ifdef CONFIG_TRACER_MAX_TRACE
6285         bool had_max_tr;
6286 #endif
6287         int ret = 0;
6288
6289         mutex_lock(&trace_types_lock);
6290
6291         if (!ring_buffer_expanded) {
6292                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6293                                                 RING_BUFFER_ALL_CPUS);
6294                 if (ret < 0)
6295                         goto out;
6296                 ret = 0;
6297         }
6298
6299         for (t = trace_types; t; t = t->next) {
6300                 if (strcmp(t->name, buf) == 0)
6301                         break;
6302         }
6303         if (!t) {
6304                 ret = -EINVAL;
6305                 goto out;
6306         }
6307         if (t == tr->current_trace)
6308                 goto out;
6309
6310 #ifdef CONFIG_TRACER_SNAPSHOT
6311         if (t->use_max_tr) {
6312                 arch_spin_lock(&tr->max_lock);
6313                 if (tr->cond_snapshot)
6314                         ret = -EBUSY;
6315                 arch_spin_unlock(&tr->max_lock);
6316                 if (ret)
6317                         goto out;
6318         }
6319 #endif
6320         /* Some tracers won't work on kernel command line */
6321         if (system_state < SYSTEM_RUNNING && t->noboot) {
6322                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6323                         t->name);
6324                 goto out;
6325         }
6326
6327         /* Some tracers are only allowed for the top level buffer */
6328         if (!trace_ok_for_array(t, tr)) {
6329                 ret = -EINVAL;
6330                 goto out;
6331         }
6332
6333         /* If trace pipe files are being read, we can't change the tracer */
6334         if (tr->trace_ref) {
6335                 ret = -EBUSY;
6336                 goto out;
6337         }
6338
6339         trace_branch_disable();
6340
6341         tr->current_trace->enabled--;
6342
6343         if (tr->current_trace->reset)
6344                 tr->current_trace->reset(tr);
6345
6346         /* Current trace needs to be nop_trace before synchronize_rcu */
6347         tr->current_trace = &nop_trace;
6348
6349 #ifdef CONFIG_TRACER_MAX_TRACE
6350         had_max_tr = tr->allocated_snapshot;
6351
6352         if (had_max_tr && !t->use_max_tr) {
6353                 /*
6354                  * We need to make sure that the update_max_tr sees that
6355                  * current_trace changed to nop_trace to keep it from
6356                  * swapping the buffers after we resize it.
6357                  * The update_max_tr is called from interrupts disabled
6358                  * so a synchronized_sched() is sufficient.
6359                  */
6360                 synchronize_rcu();
6361                 free_snapshot(tr);
6362         }
6363 #endif
6364
6365 #ifdef CONFIG_TRACER_MAX_TRACE
6366         if (t->use_max_tr && !had_max_tr) {
6367                 ret = tracing_alloc_snapshot_instance(tr);
6368                 if (ret < 0)
6369                         goto out;
6370         }
6371 #endif
6372
6373         if (t->init) {
6374                 ret = tracer_init(t, tr);
6375                 if (ret)
6376                         goto out;
6377         }
6378
6379         tr->current_trace = t;
6380         tr->current_trace->enabled++;
6381         trace_branch_enable(tr);
6382  out:
6383         mutex_unlock(&trace_types_lock);
6384
6385         return ret;
6386 }
6387
6388 static ssize_t
6389 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6390                         size_t cnt, loff_t *ppos)
6391 {
6392         struct trace_array *tr = filp->private_data;
6393         char buf[MAX_TRACER_SIZE+1];
6394         int i;
6395         size_t ret;
6396         int err;
6397
6398         ret = cnt;
6399
6400         if (cnt > MAX_TRACER_SIZE)
6401                 cnt = MAX_TRACER_SIZE;
6402
6403         if (copy_from_user(buf, ubuf, cnt))
6404                 return -EFAULT;
6405
6406         buf[cnt] = 0;
6407
6408         /* strip ending whitespace. */
6409         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6410                 buf[i] = 0;
6411
6412         err = tracing_set_tracer(tr, buf);
6413         if (err)
6414                 return err;
6415
6416         *ppos += ret;
6417
6418         return ret;
6419 }
6420
6421 static ssize_t
6422 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6423                    size_t cnt, loff_t *ppos)
6424 {
6425         char buf[64];
6426         int r;
6427
6428         r = snprintf(buf, sizeof(buf), "%ld\n",
6429                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6430         if (r > sizeof(buf))
6431                 r = sizeof(buf);
6432         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6433 }
6434
6435 static ssize_t
6436 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6437                     size_t cnt, loff_t *ppos)
6438 {
6439         unsigned long val;
6440         int ret;
6441
6442         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6443         if (ret)
6444                 return ret;
6445
6446         *ptr = val * 1000;
6447
6448         return cnt;
6449 }
6450
6451 static ssize_t
6452 tracing_thresh_read(struct file *filp, char __user *ubuf,
6453                     size_t cnt, loff_t *ppos)
6454 {
6455         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6456 }
6457
6458 static ssize_t
6459 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6460                      size_t cnt, loff_t *ppos)
6461 {
6462         struct trace_array *tr = filp->private_data;
6463         int ret;
6464
6465         mutex_lock(&trace_types_lock);
6466         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6467         if (ret < 0)
6468                 goto out;
6469
6470         if (tr->current_trace->update_thresh) {
6471                 ret = tr->current_trace->update_thresh(tr);
6472                 if (ret < 0)
6473                         goto out;
6474         }
6475
6476         ret = cnt;
6477 out:
6478         mutex_unlock(&trace_types_lock);
6479
6480         return ret;
6481 }
6482
6483 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6484
6485 static ssize_t
6486 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6487                      size_t cnt, loff_t *ppos)
6488 {
6489         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6490 }
6491
6492 static ssize_t
6493 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6494                       size_t cnt, loff_t *ppos)
6495 {
6496         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6497 }
6498
6499 #endif
6500
6501 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6502 {
6503         struct trace_array *tr = inode->i_private;
6504         struct trace_iterator *iter;
6505         int ret;
6506
6507         ret = tracing_check_open_get_tr(tr);
6508         if (ret)
6509                 return ret;
6510
6511         mutex_lock(&trace_types_lock);
6512
6513         /* create a buffer to store the information to pass to userspace */
6514         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6515         if (!iter) {
6516                 ret = -ENOMEM;
6517                 __trace_array_put(tr);
6518                 goto out;
6519         }
6520
6521         trace_seq_init(&iter->seq);
6522         iter->trace = tr->current_trace;
6523
6524         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6525                 ret = -ENOMEM;
6526                 goto fail;
6527         }
6528
6529         /* trace pipe does not show start of buffer */
6530         cpumask_setall(iter->started);
6531
6532         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6533                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6534
6535         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6536         if (trace_clocks[tr->clock_id].in_ns)
6537                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6538
6539         iter->tr = tr;
6540         iter->array_buffer = &tr->array_buffer;
6541         iter->cpu_file = tracing_get_cpu(inode);
6542         mutex_init(&iter->mutex);
6543         filp->private_data = iter;
6544
6545         if (iter->trace->pipe_open)
6546                 iter->trace->pipe_open(iter);
6547
6548         nonseekable_open(inode, filp);
6549
6550         tr->trace_ref++;
6551 out:
6552         mutex_unlock(&trace_types_lock);
6553         return ret;
6554
6555 fail:
6556         kfree(iter);
6557         __trace_array_put(tr);
6558         mutex_unlock(&trace_types_lock);
6559         return ret;
6560 }
6561
6562 static int tracing_release_pipe(struct inode *inode, struct file *file)
6563 {
6564         struct trace_iterator *iter = file->private_data;
6565         struct trace_array *tr = inode->i_private;
6566
6567         mutex_lock(&trace_types_lock);
6568
6569         tr->trace_ref--;
6570
6571         if (iter->trace->pipe_close)
6572                 iter->trace->pipe_close(iter);
6573
6574         mutex_unlock(&trace_types_lock);
6575
6576         free_cpumask_var(iter->started);
6577         mutex_destroy(&iter->mutex);
6578         kfree(iter);
6579
6580         trace_array_put(tr);
6581
6582         return 0;
6583 }
6584
6585 static __poll_t
6586 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6587 {
6588         struct trace_array *tr = iter->tr;
6589
6590         /* Iterators are static, they should be filled or empty */
6591         if (trace_buffer_iter(iter, iter->cpu_file))
6592                 return EPOLLIN | EPOLLRDNORM;
6593
6594         if (tr->trace_flags & TRACE_ITER_BLOCK)
6595                 /*
6596                  * Always select as readable when in blocking mode
6597                  */
6598                 return EPOLLIN | EPOLLRDNORM;
6599         else
6600                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6601                                              filp, poll_table);
6602 }
6603
6604 static __poll_t
6605 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6606 {
6607         struct trace_iterator *iter = filp->private_data;
6608
6609         return trace_poll(iter, filp, poll_table);
6610 }
6611
6612 /* Must be called with iter->mutex held. */
6613 static int tracing_wait_pipe(struct file *filp)
6614 {
6615         struct trace_iterator *iter = filp->private_data;
6616         int ret;
6617
6618         while (trace_empty(iter)) {
6619
6620                 if ((filp->f_flags & O_NONBLOCK)) {
6621                         return -EAGAIN;
6622                 }
6623
6624                 /*
6625                  * We block until we read something and tracing is disabled.
6626                  * We still block if tracing is disabled, but we have never
6627                  * read anything. This allows a user to cat this file, and
6628                  * then enable tracing. But after we have read something,
6629                  * we give an EOF when tracing is again disabled.
6630                  *
6631                  * iter->pos will be 0 if we haven't read anything.
6632                  */
6633                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6634                         break;
6635
6636                 mutex_unlock(&iter->mutex);
6637
6638                 ret = wait_on_pipe(iter, 0);
6639
6640                 mutex_lock(&iter->mutex);
6641
6642                 if (ret)
6643                         return ret;
6644         }
6645
6646         return 1;
6647 }
6648
6649 /*
6650  * Consumer reader.
6651  */
6652 static ssize_t
6653 tracing_read_pipe(struct file *filp, char __user *ubuf,
6654                   size_t cnt, loff_t *ppos)
6655 {
6656         struct trace_iterator *iter = filp->private_data;
6657         ssize_t sret;
6658
6659         /*
6660          * Avoid more than one consumer on a single file descriptor
6661          * This is just a matter of traces coherency, the ring buffer itself
6662          * is protected.
6663          */
6664         mutex_lock(&iter->mutex);
6665
6666         /* return any leftover data */
6667         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6668         if (sret != -EBUSY)
6669                 goto out;
6670
6671         trace_seq_init(&iter->seq);
6672
6673         if (iter->trace->read) {
6674                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6675                 if (sret)
6676                         goto out;
6677         }
6678
6679 waitagain:
6680         sret = tracing_wait_pipe(filp);
6681         if (sret <= 0)
6682                 goto out;
6683
6684         /* stop when tracing is finished */
6685         if (trace_empty(iter)) {
6686                 sret = 0;
6687                 goto out;
6688         }
6689
6690         if (cnt >= PAGE_SIZE)
6691                 cnt = PAGE_SIZE - 1;
6692
6693         /* reset all but tr, trace, and overruns */
6694         memset(&iter->seq, 0,
6695                sizeof(struct trace_iterator) -
6696                offsetof(struct trace_iterator, seq));
6697         cpumask_clear(iter->started);
6698         trace_seq_init(&iter->seq);
6699         iter->pos = -1;
6700
6701         trace_event_read_lock();
6702         trace_access_lock(iter->cpu_file);
6703         while (trace_find_next_entry_inc(iter) != NULL) {
6704                 enum print_line_t ret;
6705                 int save_len = iter->seq.seq.len;
6706
6707                 ret = print_trace_line(iter);
6708                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6709                         /* don't print partial lines */
6710                         iter->seq.seq.len = save_len;
6711                         break;
6712                 }
6713                 if (ret != TRACE_TYPE_NO_CONSUME)
6714                         trace_consume(iter);
6715
6716                 if (trace_seq_used(&iter->seq) >= cnt)
6717                         break;
6718
6719                 /*
6720                  * Setting the full flag means we reached the trace_seq buffer
6721                  * size and we should leave by partial output condition above.
6722                  * One of the trace_seq_* functions is not used properly.
6723                  */
6724                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6725                           iter->ent->type);
6726         }
6727         trace_access_unlock(iter->cpu_file);
6728         trace_event_read_unlock();
6729
6730         /* Now copy what we have to the user */
6731         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6732         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6733                 trace_seq_init(&iter->seq);
6734
6735         /*
6736          * If there was nothing to send to user, in spite of consuming trace
6737          * entries, go back to wait for more entries.
6738          */
6739         if (sret == -EBUSY)
6740                 goto waitagain;
6741
6742 out:
6743         mutex_unlock(&iter->mutex);
6744
6745         return sret;
6746 }
6747
6748 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6749                                      unsigned int idx)
6750 {
6751         __free_page(spd->pages[idx]);
6752 }
6753
6754 static size_t
6755 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6756 {
6757         size_t count;
6758         int save_len;
6759         int ret;
6760
6761         /* Seq buffer is page-sized, exactly what we need. */
6762         for (;;) {
6763                 save_len = iter->seq.seq.len;
6764                 ret = print_trace_line(iter);
6765
6766                 if (trace_seq_has_overflowed(&iter->seq)) {
6767                         iter->seq.seq.len = save_len;
6768                         break;
6769                 }
6770
6771                 /*
6772                  * This should not be hit, because it should only
6773                  * be set if the iter->seq overflowed. But check it
6774                  * anyway to be safe.
6775                  */
6776                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6777                         iter->seq.seq.len = save_len;
6778                         break;
6779                 }
6780
6781                 count = trace_seq_used(&iter->seq) - save_len;
6782                 if (rem < count) {
6783                         rem = 0;
6784                         iter->seq.seq.len = save_len;
6785                         break;
6786                 }
6787
6788                 if (ret != TRACE_TYPE_NO_CONSUME)
6789                         trace_consume(iter);
6790                 rem -= count;
6791                 if (!trace_find_next_entry_inc(iter))   {
6792                         rem = 0;
6793                         iter->ent = NULL;
6794                         break;
6795                 }
6796         }
6797
6798         return rem;
6799 }
6800
6801 static ssize_t tracing_splice_read_pipe(struct file *filp,
6802                                         loff_t *ppos,
6803                                         struct pipe_inode_info *pipe,
6804                                         size_t len,
6805                                         unsigned int flags)
6806 {
6807         struct page *pages_def[PIPE_DEF_BUFFERS];
6808         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6809         struct trace_iterator *iter = filp->private_data;
6810         struct splice_pipe_desc spd = {
6811                 .pages          = pages_def,
6812                 .partial        = partial_def,
6813                 .nr_pages       = 0, /* This gets updated below. */
6814                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6815                 .ops            = &default_pipe_buf_ops,
6816                 .spd_release    = tracing_spd_release_pipe,
6817         };
6818         ssize_t ret;
6819         size_t rem;
6820         unsigned int i;
6821
6822         if (splice_grow_spd(pipe, &spd))
6823                 return -ENOMEM;
6824
6825         mutex_lock(&iter->mutex);
6826
6827         if (iter->trace->splice_read) {
6828                 ret = iter->trace->splice_read(iter, filp,
6829                                                ppos, pipe, len, flags);
6830                 if (ret)
6831                         goto out_err;
6832         }
6833
6834         ret = tracing_wait_pipe(filp);
6835         if (ret <= 0)
6836                 goto out_err;
6837
6838         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6839                 ret = -EFAULT;
6840                 goto out_err;
6841         }
6842
6843         trace_event_read_lock();
6844         trace_access_lock(iter->cpu_file);
6845
6846         /* Fill as many pages as possible. */
6847         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6848                 spd.pages[i] = alloc_page(GFP_KERNEL);
6849                 if (!spd.pages[i])
6850                         break;
6851
6852                 rem = tracing_fill_pipe_page(rem, iter);
6853
6854                 /* Copy the data into the page, so we can start over. */
6855                 ret = trace_seq_to_buffer(&iter->seq,
6856                                           page_address(spd.pages[i]),
6857                                           trace_seq_used(&iter->seq));
6858                 if (ret < 0) {
6859                         __free_page(spd.pages[i]);
6860                         break;
6861                 }
6862                 spd.partial[i].offset = 0;
6863                 spd.partial[i].len = trace_seq_used(&iter->seq);
6864
6865                 trace_seq_init(&iter->seq);
6866         }
6867
6868         trace_access_unlock(iter->cpu_file);
6869         trace_event_read_unlock();
6870         mutex_unlock(&iter->mutex);
6871
6872         spd.nr_pages = i;
6873
6874         if (i)
6875                 ret = splice_to_pipe(pipe, &spd);
6876         else
6877                 ret = 0;
6878 out:
6879         splice_shrink_spd(&spd);
6880         return ret;
6881
6882 out_err:
6883         mutex_unlock(&iter->mutex);
6884         goto out;
6885 }
6886
6887 static ssize_t
6888 tracing_entries_read(struct file *filp, char __user *ubuf,
6889                      size_t cnt, loff_t *ppos)
6890 {
6891         struct inode *inode = file_inode(filp);
6892         struct trace_array *tr = inode->i_private;
6893         int cpu = tracing_get_cpu(inode);
6894         char buf[64];
6895         int r = 0;
6896         ssize_t ret;
6897
6898         mutex_lock(&trace_types_lock);
6899
6900         if (cpu == RING_BUFFER_ALL_CPUS) {
6901                 int cpu, buf_size_same;
6902                 unsigned long size;
6903
6904                 size = 0;
6905                 buf_size_same = 1;
6906                 /* check if all cpu sizes are same */
6907                 for_each_tracing_cpu(cpu) {
6908                         /* fill in the size from first enabled cpu */
6909                         if (size == 0)
6910                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6911                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6912                                 buf_size_same = 0;
6913                                 break;
6914                         }
6915                 }
6916
6917                 if (buf_size_same) {
6918                         if (!ring_buffer_expanded)
6919                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6920                                             size >> 10,
6921                                             trace_buf_size >> 10);
6922                         else
6923                                 r = sprintf(buf, "%lu\n", size >> 10);
6924                 } else
6925                         r = sprintf(buf, "X\n");
6926         } else
6927                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6928
6929         mutex_unlock(&trace_types_lock);
6930
6931         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6932         return ret;
6933 }
6934
6935 static ssize_t
6936 tracing_entries_write(struct file *filp, const char __user *ubuf,
6937                       size_t cnt, loff_t *ppos)
6938 {
6939         struct inode *inode = file_inode(filp);
6940         struct trace_array *tr = inode->i_private;
6941         unsigned long val;
6942         int ret;
6943
6944         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6945         if (ret)
6946                 return ret;
6947
6948         /* must have at least 1 entry */
6949         if (!val)
6950                 return -EINVAL;
6951
6952         /* value is in KB */
6953         val <<= 10;
6954         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6955         if (ret < 0)
6956                 return ret;
6957
6958         *ppos += cnt;
6959
6960         return cnt;
6961 }
6962
6963 static ssize_t
6964 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6965                                 size_t cnt, loff_t *ppos)
6966 {
6967         struct trace_array *tr = filp->private_data;
6968         char buf[64];
6969         int r, cpu;
6970         unsigned long size = 0, expanded_size = 0;
6971
6972         mutex_lock(&trace_types_lock);
6973         for_each_tracing_cpu(cpu) {
6974                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6975                 if (!ring_buffer_expanded)
6976                         expanded_size += trace_buf_size >> 10;
6977         }
6978         if (ring_buffer_expanded)
6979                 r = sprintf(buf, "%lu\n", size);
6980         else
6981                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6982         mutex_unlock(&trace_types_lock);
6983
6984         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6985 }
6986
6987 static ssize_t
6988 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6989                           size_t cnt, loff_t *ppos)
6990 {
6991         /*
6992          * There is no need to read what the user has written, this function
6993          * is just to make sure that there is no error when "echo" is used
6994          */
6995
6996         *ppos += cnt;
6997
6998         return cnt;
6999 }
7000
7001 static int
7002 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7003 {
7004         struct trace_array *tr = inode->i_private;
7005
7006         /* disable tracing ? */
7007         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7008                 tracer_tracing_off(tr);
7009         /* resize the ring buffer to 0 */
7010         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7011
7012         trace_array_put(tr);
7013
7014         return 0;
7015 }
7016
7017 static ssize_t
7018 tracing_mark_write(struct file *filp, const char __user *ubuf,
7019                                         size_t cnt, loff_t *fpos)
7020 {
7021         struct trace_array *tr = filp->private_data;
7022         struct ring_buffer_event *event;
7023         enum event_trigger_type tt = ETT_NONE;
7024         struct trace_buffer *buffer;
7025         struct print_entry *entry;
7026         ssize_t written;
7027         int size;
7028         int len;
7029
7030 /* Used in tracing_mark_raw_write() as well */
7031 #define FAULTED_STR "<faulted>"
7032 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7033
7034         if (tracing_disabled)
7035                 return -EINVAL;
7036
7037         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7038                 return -EINVAL;
7039
7040         if (cnt > TRACE_BUF_SIZE)
7041                 cnt = TRACE_BUF_SIZE;
7042
7043         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7044
7045         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7046
7047         /* If less than "<faulted>", then make sure we can still add that */
7048         if (cnt < FAULTED_SIZE)
7049                 size += FAULTED_SIZE - cnt;
7050
7051         buffer = tr->array_buffer.buffer;
7052         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7053                                             tracing_gen_ctx());
7054         if (unlikely(!event))
7055                 /* Ring buffer disabled, return as if not open for write */
7056                 return -EBADF;
7057
7058         entry = ring_buffer_event_data(event);
7059         entry->ip = _THIS_IP_;
7060
7061         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7062         if (len) {
7063                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7064                 cnt = FAULTED_SIZE;
7065                 written = -EFAULT;
7066         } else
7067                 written = cnt;
7068
7069         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7070                 /* do not add \n before testing triggers, but add \0 */
7071                 entry->buf[cnt] = '\0';
7072                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7073         }
7074
7075         if (entry->buf[cnt - 1] != '\n') {
7076                 entry->buf[cnt] = '\n';
7077                 entry->buf[cnt + 1] = '\0';
7078         } else
7079                 entry->buf[cnt] = '\0';
7080
7081         if (static_branch_unlikely(&trace_marker_exports_enabled))
7082                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7083         __buffer_unlock_commit(buffer, event);
7084
7085         if (tt)
7086                 event_triggers_post_call(tr->trace_marker_file, tt);
7087
7088         if (written > 0)
7089                 *fpos += written;
7090
7091         return written;
7092 }
7093
7094 /* Limit it for now to 3K (including tag) */
7095 #define RAW_DATA_MAX_SIZE (1024*3)
7096
7097 static ssize_t
7098 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7099                                         size_t cnt, loff_t *fpos)
7100 {
7101         struct trace_array *tr = filp->private_data;
7102         struct ring_buffer_event *event;
7103         struct trace_buffer *buffer;
7104         struct raw_data_entry *entry;
7105         ssize_t written;
7106         int size;
7107         int len;
7108
7109 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7110
7111         if (tracing_disabled)
7112                 return -EINVAL;
7113
7114         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7115                 return -EINVAL;
7116
7117         /* The marker must at least have a tag id */
7118         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7119                 return -EINVAL;
7120
7121         if (cnt > TRACE_BUF_SIZE)
7122                 cnt = TRACE_BUF_SIZE;
7123
7124         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7125
7126         size = sizeof(*entry) + cnt;
7127         if (cnt < FAULT_SIZE_ID)
7128                 size += FAULT_SIZE_ID - cnt;
7129
7130         buffer = tr->array_buffer.buffer;
7131         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7132                                             tracing_gen_ctx());
7133         if (!event)
7134                 /* Ring buffer disabled, return as if not open for write */
7135                 return -EBADF;
7136
7137         entry = ring_buffer_event_data(event);
7138
7139         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7140         if (len) {
7141                 entry->id = -1;
7142                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7143                 written = -EFAULT;
7144         } else
7145                 written = cnt;
7146
7147         __buffer_unlock_commit(buffer, event);
7148
7149         if (written > 0)
7150                 *fpos += written;
7151
7152         return written;
7153 }
7154
7155 static int tracing_clock_show(struct seq_file *m, void *v)
7156 {
7157         struct trace_array *tr = m->private;
7158         int i;
7159
7160         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7161                 seq_printf(m,
7162                         "%s%s%s%s", i ? " " : "",
7163                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7164                         i == tr->clock_id ? "]" : "");
7165         seq_putc(m, '\n');
7166
7167         return 0;
7168 }
7169
7170 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7171 {
7172         int i;
7173
7174         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7175                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7176                         break;
7177         }
7178         if (i == ARRAY_SIZE(trace_clocks))
7179                 return -EINVAL;
7180
7181         mutex_lock(&trace_types_lock);
7182
7183         tr->clock_id = i;
7184
7185         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7186
7187         /*
7188          * New clock may not be consistent with the previous clock.
7189          * Reset the buffer so that it doesn't have incomparable timestamps.
7190          */
7191         tracing_reset_online_cpus(&tr->array_buffer);
7192
7193 #ifdef CONFIG_TRACER_MAX_TRACE
7194         if (tr->max_buffer.buffer)
7195                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7196         tracing_reset_online_cpus(&tr->max_buffer);
7197 #endif
7198
7199         mutex_unlock(&trace_types_lock);
7200
7201         return 0;
7202 }
7203
7204 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7205                                    size_t cnt, loff_t *fpos)
7206 {
7207         struct seq_file *m = filp->private_data;
7208         struct trace_array *tr = m->private;
7209         char buf[64];
7210         const char *clockstr;
7211         int ret;
7212
7213         if (cnt >= sizeof(buf))
7214                 return -EINVAL;
7215
7216         if (copy_from_user(buf, ubuf, cnt))
7217                 return -EFAULT;
7218
7219         buf[cnt] = 0;
7220
7221         clockstr = strstrip(buf);
7222
7223         ret = tracing_set_clock(tr, clockstr);
7224         if (ret)
7225                 return ret;
7226
7227         *fpos += cnt;
7228
7229         return cnt;
7230 }
7231
7232 static int tracing_clock_open(struct inode *inode, struct file *file)
7233 {
7234         struct trace_array *tr = inode->i_private;
7235         int ret;
7236
7237         ret = tracing_check_open_get_tr(tr);
7238         if (ret)
7239                 return ret;
7240
7241         ret = single_open(file, tracing_clock_show, inode->i_private);
7242         if (ret < 0)
7243                 trace_array_put(tr);
7244
7245         return ret;
7246 }
7247
7248 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7249 {
7250         struct trace_array *tr = m->private;
7251
7252         mutex_lock(&trace_types_lock);
7253
7254         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7255                 seq_puts(m, "delta [absolute]\n");
7256         else
7257                 seq_puts(m, "[delta] absolute\n");
7258
7259         mutex_unlock(&trace_types_lock);
7260
7261         return 0;
7262 }
7263
7264 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7265 {
7266         struct trace_array *tr = inode->i_private;
7267         int ret;
7268
7269         ret = tracing_check_open_get_tr(tr);
7270         if (ret)
7271                 return ret;
7272
7273         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7274         if (ret < 0)
7275                 trace_array_put(tr);
7276
7277         return ret;
7278 }
7279
7280 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7281 {
7282         if (rbe == this_cpu_read(trace_buffered_event))
7283                 return ring_buffer_time_stamp(buffer);
7284
7285         return ring_buffer_event_time_stamp(buffer, rbe);
7286 }
7287
7288 /*
7289  * Set or disable using the per CPU trace_buffer_event when possible.
7290  */
7291 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7292 {
7293         int ret = 0;
7294
7295         mutex_lock(&trace_types_lock);
7296
7297         if (set && tr->no_filter_buffering_ref++)
7298                 goto out;
7299
7300         if (!set) {
7301                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7302                         ret = -EINVAL;
7303                         goto out;
7304                 }
7305
7306                 --tr->no_filter_buffering_ref;
7307         }
7308  out:
7309         mutex_unlock(&trace_types_lock);
7310
7311         return ret;
7312 }
7313
7314 struct ftrace_buffer_info {
7315         struct trace_iterator   iter;
7316         void                    *spare;
7317         unsigned int            spare_cpu;
7318         unsigned int            read;
7319 };
7320
7321 #ifdef CONFIG_TRACER_SNAPSHOT
7322 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7323 {
7324         struct trace_array *tr = inode->i_private;
7325         struct trace_iterator *iter;
7326         struct seq_file *m;
7327         int ret;
7328
7329         ret = tracing_check_open_get_tr(tr);
7330         if (ret)
7331                 return ret;
7332
7333         if (file->f_mode & FMODE_READ) {
7334                 iter = __tracing_open(inode, file, true);
7335                 if (IS_ERR(iter))
7336                         ret = PTR_ERR(iter);
7337         } else {
7338                 /* Writes still need the seq_file to hold the private data */
7339                 ret = -ENOMEM;
7340                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7341                 if (!m)
7342                         goto out;
7343                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7344                 if (!iter) {
7345                         kfree(m);
7346                         goto out;
7347                 }
7348                 ret = 0;
7349
7350                 iter->tr = tr;
7351                 iter->array_buffer = &tr->max_buffer;
7352                 iter->cpu_file = tracing_get_cpu(inode);
7353                 m->private = iter;
7354                 file->private_data = m;
7355         }
7356 out:
7357         if (ret < 0)
7358                 trace_array_put(tr);
7359
7360         return ret;
7361 }
7362
7363 static ssize_t
7364 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7365                        loff_t *ppos)
7366 {
7367         struct seq_file *m = filp->private_data;
7368         struct trace_iterator *iter = m->private;
7369         struct trace_array *tr = iter->tr;
7370         unsigned long val;
7371         int ret;
7372
7373         ret = tracing_update_buffers();
7374         if (ret < 0)
7375                 return ret;
7376
7377         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7378         if (ret)
7379                 return ret;
7380
7381         mutex_lock(&trace_types_lock);
7382
7383         if (tr->current_trace->use_max_tr) {
7384                 ret = -EBUSY;
7385                 goto out;
7386         }
7387
7388         arch_spin_lock(&tr->max_lock);
7389         if (tr->cond_snapshot)
7390                 ret = -EBUSY;
7391         arch_spin_unlock(&tr->max_lock);
7392         if (ret)
7393                 goto out;
7394
7395         switch (val) {
7396         case 0:
7397                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7398                         ret = -EINVAL;
7399                         break;
7400                 }
7401                 if (tr->allocated_snapshot)
7402                         free_snapshot(tr);
7403                 break;
7404         case 1:
7405 /* Only allow per-cpu swap if the ring buffer supports it */
7406 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7407                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7408                         ret = -EINVAL;
7409                         break;
7410                 }
7411 #endif
7412                 if (tr->allocated_snapshot)
7413                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7414                                         &tr->array_buffer, iter->cpu_file);
7415                 else
7416                         ret = tracing_alloc_snapshot_instance(tr);
7417                 if (ret < 0)
7418                         break;
7419                 local_irq_disable();
7420                 /* Now, we're going to swap */
7421                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7422                         update_max_tr(tr, current, smp_processor_id(), NULL);
7423                 else
7424                         update_max_tr_single(tr, current, iter->cpu_file);
7425                 local_irq_enable();
7426                 break;
7427         default:
7428                 if (tr->allocated_snapshot) {
7429                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7430                                 tracing_reset_online_cpus(&tr->max_buffer);
7431                         else
7432                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7433                 }
7434                 break;
7435         }
7436
7437         if (ret >= 0) {
7438                 *ppos += cnt;
7439                 ret = cnt;
7440         }
7441 out:
7442         mutex_unlock(&trace_types_lock);
7443         return ret;
7444 }
7445
7446 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7447 {
7448         struct seq_file *m = file->private_data;
7449         int ret;
7450
7451         ret = tracing_release(inode, file);
7452
7453         if (file->f_mode & FMODE_READ)
7454                 return ret;
7455
7456         /* If write only, the seq_file is just a stub */
7457         if (m)
7458                 kfree(m->private);
7459         kfree(m);
7460
7461         return 0;
7462 }
7463
7464 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7465 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7466                                     size_t count, loff_t *ppos);
7467 static int tracing_buffers_release(struct inode *inode, struct file *file);
7468 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7469                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7470
7471 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7472 {
7473         struct ftrace_buffer_info *info;
7474         int ret;
7475
7476         /* The following checks for tracefs lockdown */
7477         ret = tracing_buffers_open(inode, filp);
7478         if (ret < 0)
7479                 return ret;
7480
7481         info = filp->private_data;
7482
7483         if (info->iter.trace->use_max_tr) {
7484                 tracing_buffers_release(inode, filp);
7485                 return -EBUSY;
7486         }
7487
7488         info->iter.snapshot = true;
7489         info->iter.array_buffer = &info->iter.tr->max_buffer;
7490
7491         return ret;
7492 }
7493
7494 #endif /* CONFIG_TRACER_SNAPSHOT */
7495
7496
7497 static const struct file_operations tracing_thresh_fops = {
7498         .open           = tracing_open_generic,
7499         .read           = tracing_thresh_read,
7500         .write          = tracing_thresh_write,
7501         .llseek         = generic_file_llseek,
7502 };
7503
7504 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7505 static const struct file_operations tracing_max_lat_fops = {
7506         .open           = tracing_open_generic,
7507         .read           = tracing_max_lat_read,
7508         .write          = tracing_max_lat_write,
7509         .llseek         = generic_file_llseek,
7510 };
7511 #endif
7512
7513 static const struct file_operations set_tracer_fops = {
7514         .open           = tracing_open_generic,
7515         .read           = tracing_set_trace_read,
7516         .write          = tracing_set_trace_write,
7517         .llseek         = generic_file_llseek,
7518 };
7519
7520 static const struct file_operations tracing_pipe_fops = {
7521         .open           = tracing_open_pipe,
7522         .poll           = tracing_poll_pipe,
7523         .read           = tracing_read_pipe,
7524         .splice_read    = tracing_splice_read_pipe,
7525         .release        = tracing_release_pipe,
7526         .llseek         = no_llseek,
7527 };
7528
7529 static const struct file_operations tracing_entries_fops = {
7530         .open           = tracing_open_generic_tr,
7531         .read           = tracing_entries_read,
7532         .write          = tracing_entries_write,
7533         .llseek         = generic_file_llseek,
7534         .release        = tracing_release_generic_tr,
7535 };
7536
7537 static const struct file_operations tracing_total_entries_fops = {
7538         .open           = tracing_open_generic_tr,
7539         .read           = tracing_total_entries_read,
7540         .llseek         = generic_file_llseek,
7541         .release        = tracing_release_generic_tr,
7542 };
7543
7544 static const struct file_operations tracing_free_buffer_fops = {
7545         .open           = tracing_open_generic_tr,
7546         .write          = tracing_free_buffer_write,
7547         .release        = tracing_free_buffer_release,
7548 };
7549
7550 static const struct file_operations tracing_mark_fops = {
7551         .open           = tracing_open_generic_tr,
7552         .write          = tracing_mark_write,
7553         .llseek         = generic_file_llseek,
7554         .release        = tracing_release_generic_tr,
7555 };
7556
7557 static const struct file_operations tracing_mark_raw_fops = {
7558         .open           = tracing_open_generic_tr,
7559         .write          = tracing_mark_raw_write,
7560         .llseek         = generic_file_llseek,
7561         .release        = tracing_release_generic_tr,
7562 };
7563
7564 static const struct file_operations trace_clock_fops = {
7565         .open           = tracing_clock_open,
7566         .read           = seq_read,
7567         .llseek         = seq_lseek,
7568         .release        = tracing_single_release_tr,
7569         .write          = tracing_clock_write,
7570 };
7571
7572 static const struct file_operations trace_time_stamp_mode_fops = {
7573         .open           = tracing_time_stamp_mode_open,
7574         .read           = seq_read,
7575         .llseek         = seq_lseek,
7576         .release        = tracing_single_release_tr,
7577 };
7578
7579 #ifdef CONFIG_TRACER_SNAPSHOT
7580 static const struct file_operations snapshot_fops = {
7581         .open           = tracing_snapshot_open,
7582         .read           = seq_read,
7583         .write          = tracing_snapshot_write,
7584         .llseek         = tracing_lseek,
7585         .release        = tracing_snapshot_release,
7586 };
7587
7588 static const struct file_operations snapshot_raw_fops = {
7589         .open           = snapshot_raw_open,
7590         .read           = tracing_buffers_read,
7591         .release        = tracing_buffers_release,
7592         .splice_read    = tracing_buffers_splice_read,
7593         .llseek         = no_llseek,
7594 };
7595
7596 #endif /* CONFIG_TRACER_SNAPSHOT */
7597
7598 /*
7599  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7600  * @filp: The active open file structure
7601  * @ubuf: The userspace provided buffer to read value into
7602  * @cnt: The maximum number of bytes to read
7603  * @ppos: The current "file" position
7604  *
7605  * This function implements the write interface for a struct trace_min_max_param.
7606  * The filp->private_data must point to a trace_min_max_param structure that
7607  * defines where to write the value, the min and the max acceptable values,
7608  * and a lock to protect the write.
7609  */
7610 static ssize_t
7611 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7612 {
7613         struct trace_min_max_param *param = filp->private_data;
7614         u64 val;
7615         int err;
7616
7617         if (!param)
7618                 return -EFAULT;
7619
7620         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7621         if (err)
7622                 return err;
7623
7624         if (param->lock)
7625                 mutex_lock(param->lock);
7626
7627         if (param->min && val < *param->min)
7628                 err = -EINVAL;
7629
7630         if (param->max && val > *param->max)
7631                 err = -EINVAL;
7632
7633         if (!err)
7634                 *param->val = val;
7635
7636         if (param->lock)
7637                 mutex_unlock(param->lock);
7638
7639         if (err)
7640                 return err;
7641
7642         return cnt;
7643 }
7644
7645 /*
7646  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7647  * @filp: The active open file structure
7648  * @ubuf: The userspace provided buffer to read value into
7649  * @cnt: The maximum number of bytes to read
7650  * @ppos: The current "file" position
7651  *
7652  * This function implements the read interface for a struct trace_min_max_param.
7653  * The filp->private_data must point to a trace_min_max_param struct with valid
7654  * data.
7655  */
7656 static ssize_t
7657 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7658 {
7659         struct trace_min_max_param *param = filp->private_data;
7660         char buf[U64_STR_SIZE];
7661         int len;
7662         u64 val;
7663
7664         if (!param)
7665                 return -EFAULT;
7666
7667         val = *param->val;
7668
7669         if (cnt > sizeof(buf))
7670                 cnt = sizeof(buf);
7671
7672         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7673
7674         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7675 }
7676
7677 const struct file_operations trace_min_max_fops = {
7678         .open           = tracing_open_generic,
7679         .read           = trace_min_max_read,
7680         .write          = trace_min_max_write,
7681 };
7682
7683 #define TRACING_LOG_ERRS_MAX    8
7684 #define TRACING_LOG_LOC_MAX     128
7685
7686 #define CMD_PREFIX "  Command: "
7687
7688 struct err_info {
7689         const char      **errs; /* ptr to loc-specific array of err strings */
7690         u8              type;   /* index into errs -> specific err string */
7691         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7692         u64             ts;
7693 };
7694
7695 struct tracing_log_err {
7696         struct list_head        list;
7697         struct err_info         info;
7698         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7699         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7700 };
7701
7702 static DEFINE_MUTEX(tracing_err_log_lock);
7703
7704 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7705 {
7706         struct tracing_log_err *err;
7707
7708         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7709                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7710                 if (!err)
7711                         err = ERR_PTR(-ENOMEM);
7712                 tr->n_err_log_entries++;
7713
7714                 return err;
7715         }
7716
7717         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7718         list_del(&err->list);
7719
7720         return err;
7721 }
7722
7723 /**
7724  * err_pos - find the position of a string within a command for error careting
7725  * @cmd: The tracing command that caused the error
7726  * @str: The string to position the caret at within @cmd
7727  *
7728  * Finds the position of the first occurrence of @str within @cmd.  The
7729  * return value can be passed to tracing_log_err() for caret placement
7730  * within @cmd.
7731  *
7732  * Returns the index within @cmd of the first occurrence of @str or 0
7733  * if @str was not found.
7734  */
7735 unsigned int err_pos(char *cmd, const char *str)
7736 {
7737         char *found;
7738
7739         if (WARN_ON(!strlen(cmd)))
7740                 return 0;
7741
7742         found = strstr(cmd, str);
7743         if (found)
7744                 return found - cmd;
7745
7746         return 0;
7747 }
7748
7749 /**
7750  * tracing_log_err - write an error to the tracing error log
7751  * @tr: The associated trace array for the error (NULL for top level array)
7752  * @loc: A string describing where the error occurred
7753  * @cmd: The tracing command that caused the error
7754  * @errs: The array of loc-specific static error strings
7755  * @type: The index into errs[], which produces the specific static err string
7756  * @pos: The position the caret should be placed in the cmd
7757  *
7758  * Writes an error into tracing/error_log of the form:
7759  *
7760  * <loc>: error: <text>
7761  *   Command: <cmd>
7762  *              ^
7763  *
7764  * tracing/error_log is a small log file containing the last
7765  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7766  * unless there has been a tracing error, and the error log can be
7767  * cleared and have its memory freed by writing the empty string in
7768  * truncation mode to it i.e. echo > tracing/error_log.
7769  *
7770  * NOTE: the @errs array along with the @type param are used to
7771  * produce a static error string - this string is not copied and saved
7772  * when the error is logged - only a pointer to it is saved.  See
7773  * existing callers for examples of how static strings are typically
7774  * defined for use with tracing_log_err().
7775  */
7776 void tracing_log_err(struct trace_array *tr,
7777                      const char *loc, const char *cmd,
7778                      const char **errs, u8 type, u8 pos)
7779 {
7780         struct tracing_log_err *err;
7781
7782         if (!tr)
7783                 tr = &global_trace;
7784
7785         mutex_lock(&tracing_err_log_lock);
7786         err = get_tracing_log_err(tr);
7787         if (PTR_ERR(err) == -ENOMEM) {
7788                 mutex_unlock(&tracing_err_log_lock);
7789                 return;
7790         }
7791
7792         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7793         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7794
7795         err->info.errs = errs;
7796         err->info.type = type;
7797         err->info.pos = pos;
7798         err->info.ts = local_clock();
7799
7800         list_add_tail(&err->list, &tr->err_log);
7801         mutex_unlock(&tracing_err_log_lock);
7802 }
7803
7804 static void clear_tracing_err_log(struct trace_array *tr)
7805 {
7806         struct tracing_log_err *err, *next;
7807
7808         mutex_lock(&tracing_err_log_lock);
7809         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7810                 list_del(&err->list);
7811                 kfree(err);
7812         }
7813
7814         tr->n_err_log_entries = 0;
7815         mutex_unlock(&tracing_err_log_lock);
7816 }
7817
7818 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7819 {
7820         struct trace_array *tr = m->private;
7821
7822         mutex_lock(&tracing_err_log_lock);
7823
7824         return seq_list_start(&tr->err_log, *pos);
7825 }
7826
7827 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7828 {
7829         struct trace_array *tr = m->private;
7830
7831         return seq_list_next(v, &tr->err_log, pos);
7832 }
7833
7834 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7835 {
7836         mutex_unlock(&tracing_err_log_lock);
7837 }
7838
7839 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7840 {
7841         u8 i;
7842
7843         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7844                 seq_putc(m, ' ');
7845         for (i = 0; i < pos; i++)
7846                 seq_putc(m, ' ');
7847         seq_puts(m, "^\n");
7848 }
7849
7850 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7851 {
7852         struct tracing_log_err *err = v;
7853
7854         if (err) {
7855                 const char *err_text = err->info.errs[err->info.type];
7856                 u64 sec = err->info.ts;
7857                 u32 nsec;
7858
7859                 nsec = do_div(sec, NSEC_PER_SEC);
7860                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7861                            err->loc, err_text);
7862                 seq_printf(m, "%s", err->cmd);
7863                 tracing_err_log_show_pos(m, err->info.pos);
7864         }
7865
7866         return 0;
7867 }
7868
7869 static const struct seq_operations tracing_err_log_seq_ops = {
7870         .start  = tracing_err_log_seq_start,
7871         .next   = tracing_err_log_seq_next,
7872         .stop   = tracing_err_log_seq_stop,
7873         .show   = tracing_err_log_seq_show
7874 };
7875
7876 static int tracing_err_log_open(struct inode *inode, struct file *file)
7877 {
7878         struct trace_array *tr = inode->i_private;
7879         int ret = 0;
7880
7881         ret = tracing_check_open_get_tr(tr);
7882         if (ret)
7883                 return ret;
7884
7885         /* If this file was opened for write, then erase contents */
7886         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7887                 clear_tracing_err_log(tr);
7888
7889         if (file->f_mode & FMODE_READ) {
7890                 ret = seq_open(file, &tracing_err_log_seq_ops);
7891                 if (!ret) {
7892                         struct seq_file *m = file->private_data;
7893                         m->private = tr;
7894                 } else {
7895                         trace_array_put(tr);
7896                 }
7897         }
7898         return ret;
7899 }
7900
7901 static ssize_t tracing_err_log_write(struct file *file,
7902                                      const char __user *buffer,
7903                                      size_t count, loff_t *ppos)
7904 {
7905         return count;
7906 }
7907
7908 static int tracing_err_log_release(struct inode *inode, struct file *file)
7909 {
7910         struct trace_array *tr = inode->i_private;
7911
7912         trace_array_put(tr);
7913
7914         if (file->f_mode & FMODE_READ)
7915                 seq_release(inode, file);
7916
7917         return 0;
7918 }
7919
7920 static const struct file_operations tracing_err_log_fops = {
7921         .open           = tracing_err_log_open,
7922         .write          = tracing_err_log_write,
7923         .read           = seq_read,
7924         .llseek         = seq_lseek,
7925         .release        = tracing_err_log_release,
7926 };
7927
7928 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7929 {
7930         struct trace_array *tr = inode->i_private;
7931         struct ftrace_buffer_info *info;
7932         int ret;
7933
7934         ret = tracing_check_open_get_tr(tr);
7935         if (ret)
7936                 return ret;
7937
7938         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7939         if (!info) {
7940                 trace_array_put(tr);
7941                 return -ENOMEM;
7942         }
7943
7944         mutex_lock(&trace_types_lock);
7945
7946         info->iter.tr           = tr;
7947         info->iter.cpu_file     = tracing_get_cpu(inode);
7948         info->iter.trace        = tr->current_trace;
7949         info->iter.array_buffer = &tr->array_buffer;
7950         info->spare             = NULL;
7951         /* Force reading ring buffer for first read */
7952         info->read              = (unsigned int)-1;
7953
7954         filp->private_data = info;
7955
7956         tr->trace_ref++;
7957
7958         mutex_unlock(&trace_types_lock);
7959
7960         ret = nonseekable_open(inode, filp);
7961         if (ret < 0)
7962                 trace_array_put(tr);
7963
7964         return ret;
7965 }
7966
7967 static __poll_t
7968 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7969 {
7970         struct ftrace_buffer_info *info = filp->private_data;
7971         struct trace_iterator *iter = &info->iter;
7972
7973         return trace_poll(iter, filp, poll_table);
7974 }
7975
7976 static ssize_t
7977 tracing_buffers_read(struct file *filp, char __user *ubuf,
7978                      size_t count, loff_t *ppos)
7979 {
7980         struct ftrace_buffer_info *info = filp->private_data;
7981         struct trace_iterator *iter = &info->iter;
7982         ssize_t ret = 0;
7983         ssize_t size;
7984
7985         if (!count)
7986                 return 0;
7987
7988 #ifdef CONFIG_TRACER_MAX_TRACE
7989         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7990                 return -EBUSY;
7991 #endif
7992
7993         if (!info->spare) {
7994                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7995                                                           iter->cpu_file);
7996                 if (IS_ERR(info->spare)) {
7997                         ret = PTR_ERR(info->spare);
7998                         info->spare = NULL;
7999                 } else {
8000                         info->spare_cpu = iter->cpu_file;
8001                 }
8002         }
8003         if (!info->spare)
8004                 return ret;
8005
8006         /* Do we have previous read data to read? */
8007         if (info->read < PAGE_SIZE)
8008                 goto read;
8009
8010  again:
8011         trace_access_lock(iter->cpu_file);
8012         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8013                                     &info->spare,
8014                                     count,
8015                                     iter->cpu_file, 0);
8016         trace_access_unlock(iter->cpu_file);
8017
8018         if (ret < 0) {
8019                 if (trace_empty(iter)) {
8020                         if ((filp->f_flags & O_NONBLOCK))
8021                                 return -EAGAIN;
8022
8023                         ret = wait_on_pipe(iter, 0);
8024                         if (ret)
8025                                 return ret;
8026
8027                         goto again;
8028                 }
8029                 return 0;
8030         }
8031
8032         info->read = 0;
8033  read:
8034         size = PAGE_SIZE - info->read;
8035         if (size > count)
8036                 size = count;
8037
8038         ret = copy_to_user(ubuf, info->spare + info->read, size);
8039         if (ret == size)
8040                 return -EFAULT;
8041
8042         size -= ret;
8043
8044         *ppos += size;
8045         info->read += size;
8046
8047         return size;
8048 }
8049
8050 static int tracing_buffers_release(struct inode *inode, struct file *file)
8051 {
8052         struct ftrace_buffer_info *info = file->private_data;
8053         struct trace_iterator *iter = &info->iter;
8054
8055         mutex_lock(&trace_types_lock);
8056
8057         iter->tr->trace_ref--;
8058
8059         __trace_array_put(iter->tr);
8060
8061         if (info->spare)
8062                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8063                                            info->spare_cpu, info->spare);
8064         kvfree(info);
8065
8066         mutex_unlock(&trace_types_lock);
8067
8068         return 0;
8069 }
8070
8071 struct buffer_ref {
8072         struct trace_buffer     *buffer;
8073         void                    *page;
8074         int                     cpu;
8075         refcount_t              refcount;
8076 };
8077
8078 static void buffer_ref_release(struct buffer_ref *ref)
8079 {
8080         if (!refcount_dec_and_test(&ref->refcount))
8081                 return;
8082         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8083         kfree(ref);
8084 }
8085
8086 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8087                                     struct pipe_buffer *buf)
8088 {
8089         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8090
8091         buffer_ref_release(ref);
8092         buf->private = 0;
8093 }
8094
8095 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8096                                 struct pipe_buffer *buf)
8097 {
8098         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8099
8100         if (refcount_read(&ref->refcount) > INT_MAX/2)
8101                 return false;
8102
8103         refcount_inc(&ref->refcount);
8104         return true;
8105 }
8106
8107 /* Pipe buffer operations for a buffer. */
8108 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8109         .release                = buffer_pipe_buf_release,
8110         .get                    = buffer_pipe_buf_get,
8111 };
8112
8113 /*
8114  * Callback from splice_to_pipe(), if we need to release some pages
8115  * at the end of the spd in case we error'ed out in filling the pipe.
8116  */
8117 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8118 {
8119         struct buffer_ref *ref =
8120                 (struct buffer_ref *)spd->partial[i].private;
8121
8122         buffer_ref_release(ref);
8123         spd->partial[i].private = 0;
8124 }
8125
8126 static ssize_t
8127 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8128                             struct pipe_inode_info *pipe, size_t len,
8129                             unsigned int flags)
8130 {
8131         struct ftrace_buffer_info *info = file->private_data;
8132         struct trace_iterator *iter = &info->iter;
8133         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8134         struct page *pages_def[PIPE_DEF_BUFFERS];
8135         struct splice_pipe_desc spd = {
8136                 .pages          = pages_def,
8137                 .partial        = partial_def,
8138                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8139                 .ops            = &buffer_pipe_buf_ops,
8140                 .spd_release    = buffer_spd_release,
8141         };
8142         struct buffer_ref *ref;
8143         int entries, i;
8144         ssize_t ret = 0;
8145
8146 #ifdef CONFIG_TRACER_MAX_TRACE
8147         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8148                 return -EBUSY;
8149 #endif
8150
8151         if (*ppos & (PAGE_SIZE - 1))
8152                 return -EINVAL;
8153
8154         if (len & (PAGE_SIZE - 1)) {
8155                 if (len < PAGE_SIZE)
8156                         return -EINVAL;
8157                 len &= PAGE_MASK;
8158         }
8159
8160         if (splice_grow_spd(pipe, &spd))
8161                 return -ENOMEM;
8162
8163  again:
8164         trace_access_lock(iter->cpu_file);
8165         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8166
8167         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8168                 struct page *page;
8169                 int r;
8170
8171                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8172                 if (!ref) {
8173                         ret = -ENOMEM;
8174                         break;
8175                 }
8176
8177                 refcount_set(&ref->refcount, 1);
8178                 ref->buffer = iter->array_buffer->buffer;
8179                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8180                 if (IS_ERR(ref->page)) {
8181                         ret = PTR_ERR(ref->page);
8182                         ref->page = NULL;
8183                         kfree(ref);
8184                         break;
8185                 }
8186                 ref->cpu = iter->cpu_file;
8187
8188                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8189                                           len, iter->cpu_file, 1);
8190                 if (r < 0) {
8191                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8192                                                    ref->page);
8193                         kfree(ref);
8194                         break;
8195                 }
8196
8197                 page = virt_to_page(ref->page);
8198
8199                 spd.pages[i] = page;
8200                 spd.partial[i].len = PAGE_SIZE;
8201                 spd.partial[i].offset = 0;
8202                 spd.partial[i].private = (unsigned long)ref;
8203                 spd.nr_pages++;
8204                 *ppos += PAGE_SIZE;
8205
8206                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8207         }
8208
8209         trace_access_unlock(iter->cpu_file);
8210         spd.nr_pages = i;
8211
8212         /* did we read anything? */
8213         if (!spd.nr_pages) {
8214                 if (ret)
8215                         goto out;
8216
8217                 ret = -EAGAIN;
8218                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8219                         goto out;
8220
8221                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8222                 if (ret)
8223                         goto out;
8224
8225                 goto again;
8226         }
8227
8228         ret = splice_to_pipe(pipe, &spd);
8229 out:
8230         splice_shrink_spd(&spd);
8231
8232         return ret;
8233 }
8234
8235 static const struct file_operations tracing_buffers_fops = {
8236         .open           = tracing_buffers_open,
8237         .read           = tracing_buffers_read,
8238         .poll           = tracing_buffers_poll,
8239         .release        = tracing_buffers_release,
8240         .splice_read    = tracing_buffers_splice_read,
8241         .llseek         = no_llseek,
8242 };
8243
8244 static ssize_t
8245 tracing_stats_read(struct file *filp, char __user *ubuf,
8246                    size_t count, loff_t *ppos)
8247 {
8248         struct inode *inode = file_inode(filp);
8249         struct trace_array *tr = inode->i_private;
8250         struct array_buffer *trace_buf = &tr->array_buffer;
8251         int cpu = tracing_get_cpu(inode);
8252         struct trace_seq *s;
8253         unsigned long cnt;
8254         unsigned long long t;
8255         unsigned long usec_rem;
8256
8257         s = kmalloc(sizeof(*s), GFP_KERNEL);
8258         if (!s)
8259                 return -ENOMEM;
8260
8261         trace_seq_init(s);
8262
8263         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8264         trace_seq_printf(s, "entries: %ld\n", cnt);
8265
8266         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8267         trace_seq_printf(s, "overrun: %ld\n", cnt);
8268
8269         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8270         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8271
8272         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8273         trace_seq_printf(s, "bytes: %ld\n", cnt);
8274
8275         if (trace_clocks[tr->clock_id].in_ns) {
8276                 /* local or global for trace_clock */
8277                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8278                 usec_rem = do_div(t, USEC_PER_SEC);
8279                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8280                                                                 t, usec_rem);
8281
8282                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8283                 usec_rem = do_div(t, USEC_PER_SEC);
8284                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8285         } else {
8286                 /* counter or tsc mode for trace_clock */
8287                 trace_seq_printf(s, "oldest event ts: %llu\n",
8288                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8289
8290                 trace_seq_printf(s, "now ts: %llu\n",
8291                                 ring_buffer_time_stamp(trace_buf->buffer));
8292         }
8293
8294         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8295         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8296
8297         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8298         trace_seq_printf(s, "read events: %ld\n", cnt);
8299
8300         count = simple_read_from_buffer(ubuf, count, ppos,
8301                                         s->buffer, trace_seq_used(s));
8302
8303         kfree(s);
8304
8305         return count;
8306 }
8307
8308 static const struct file_operations tracing_stats_fops = {
8309         .open           = tracing_open_generic_tr,
8310         .read           = tracing_stats_read,
8311         .llseek         = generic_file_llseek,
8312         .release        = tracing_release_generic_tr,
8313 };
8314
8315 #ifdef CONFIG_DYNAMIC_FTRACE
8316
8317 static ssize_t
8318 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8319                   size_t cnt, loff_t *ppos)
8320 {
8321         ssize_t ret;
8322         char *buf;
8323         int r;
8324
8325         /* 256 should be plenty to hold the amount needed */
8326         buf = kmalloc(256, GFP_KERNEL);
8327         if (!buf)
8328                 return -ENOMEM;
8329
8330         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8331                       ftrace_update_tot_cnt,
8332                       ftrace_number_of_pages,
8333                       ftrace_number_of_groups);
8334
8335         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8336         kfree(buf);
8337         return ret;
8338 }
8339
8340 static const struct file_operations tracing_dyn_info_fops = {
8341         .open           = tracing_open_generic,
8342         .read           = tracing_read_dyn_info,
8343         .llseek         = generic_file_llseek,
8344 };
8345 #endif /* CONFIG_DYNAMIC_FTRACE */
8346
8347 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8348 static void
8349 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8350                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8351                 void *data)
8352 {
8353         tracing_snapshot_instance(tr);
8354 }
8355
8356 static void
8357 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8358                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8359                       void *data)
8360 {
8361         struct ftrace_func_mapper *mapper = data;
8362         long *count = NULL;
8363
8364         if (mapper)
8365                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8366
8367         if (count) {
8368
8369                 if (*count <= 0)
8370                         return;
8371
8372                 (*count)--;
8373         }
8374
8375         tracing_snapshot_instance(tr);
8376 }
8377
8378 static int
8379 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8380                       struct ftrace_probe_ops *ops, void *data)
8381 {
8382         struct ftrace_func_mapper *mapper = data;
8383         long *count = NULL;
8384
8385         seq_printf(m, "%ps:", (void *)ip);
8386
8387         seq_puts(m, "snapshot");
8388
8389         if (mapper)
8390                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8391
8392         if (count)
8393                 seq_printf(m, ":count=%ld\n", *count);
8394         else
8395                 seq_puts(m, ":unlimited\n");
8396
8397         return 0;
8398 }
8399
8400 static int
8401 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8402                      unsigned long ip, void *init_data, void **data)
8403 {
8404         struct ftrace_func_mapper *mapper = *data;
8405
8406         if (!mapper) {
8407                 mapper = allocate_ftrace_func_mapper();
8408                 if (!mapper)
8409                         return -ENOMEM;
8410                 *data = mapper;
8411         }
8412
8413         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8414 }
8415
8416 static void
8417 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8418                      unsigned long ip, void *data)
8419 {
8420         struct ftrace_func_mapper *mapper = data;
8421
8422         if (!ip) {
8423                 if (!mapper)
8424                         return;
8425                 free_ftrace_func_mapper(mapper, NULL);
8426                 return;
8427         }
8428
8429         ftrace_func_mapper_remove_ip(mapper, ip);
8430 }
8431
8432 static struct ftrace_probe_ops snapshot_probe_ops = {
8433         .func                   = ftrace_snapshot,
8434         .print                  = ftrace_snapshot_print,
8435 };
8436
8437 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8438         .func                   = ftrace_count_snapshot,
8439         .print                  = ftrace_snapshot_print,
8440         .init                   = ftrace_snapshot_init,
8441         .free                   = ftrace_snapshot_free,
8442 };
8443
8444 static int
8445 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8446                                char *glob, char *cmd, char *param, int enable)
8447 {
8448         struct ftrace_probe_ops *ops;
8449         void *count = (void *)-1;
8450         char *number;
8451         int ret;
8452
8453         if (!tr)
8454                 return -ENODEV;
8455
8456         /* hash funcs only work with set_ftrace_filter */
8457         if (!enable)
8458                 return -EINVAL;
8459
8460         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8461
8462         if (glob[0] == '!')
8463                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8464
8465         if (!param)
8466                 goto out_reg;
8467
8468         number = strsep(&param, ":");
8469
8470         if (!strlen(number))
8471                 goto out_reg;
8472
8473         /*
8474          * We use the callback data field (which is a pointer)
8475          * as our counter.
8476          */
8477         ret = kstrtoul(number, 0, (unsigned long *)&count);
8478         if (ret)
8479                 return ret;
8480
8481  out_reg:
8482         ret = tracing_alloc_snapshot_instance(tr);
8483         if (ret < 0)
8484                 goto out;
8485
8486         ret = register_ftrace_function_probe(glob, tr, ops, count);
8487
8488  out:
8489         return ret < 0 ? ret : 0;
8490 }
8491
8492 static struct ftrace_func_command ftrace_snapshot_cmd = {
8493         .name                   = "snapshot",
8494         .func                   = ftrace_trace_snapshot_callback,
8495 };
8496
8497 static __init int register_snapshot_cmd(void)
8498 {
8499         return register_ftrace_command(&ftrace_snapshot_cmd);
8500 }
8501 #else
8502 static inline __init int register_snapshot_cmd(void) { return 0; }
8503 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8504
8505 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8506 {
8507         if (WARN_ON(!tr->dir))
8508                 return ERR_PTR(-ENODEV);
8509
8510         /* Top directory uses NULL as the parent */
8511         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8512                 return NULL;
8513
8514         /* All sub buffers have a descriptor */
8515         return tr->dir;
8516 }
8517
8518 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8519 {
8520         struct dentry *d_tracer;
8521
8522         if (tr->percpu_dir)
8523                 return tr->percpu_dir;
8524
8525         d_tracer = tracing_get_dentry(tr);
8526         if (IS_ERR(d_tracer))
8527                 return NULL;
8528
8529         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8530
8531         MEM_FAIL(!tr->percpu_dir,
8532                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8533
8534         return tr->percpu_dir;
8535 }
8536
8537 static struct dentry *
8538 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8539                       void *data, long cpu, const struct file_operations *fops)
8540 {
8541         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8542
8543         if (ret) /* See tracing_get_cpu() */
8544                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8545         return ret;
8546 }
8547
8548 static void
8549 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8550 {
8551         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8552         struct dentry *d_cpu;
8553         char cpu_dir[30]; /* 30 characters should be more than enough */
8554
8555         if (!d_percpu)
8556                 return;
8557
8558         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8559         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8560         if (!d_cpu) {
8561                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8562                 return;
8563         }
8564
8565         /* per cpu trace_pipe */
8566         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8567                                 tr, cpu, &tracing_pipe_fops);
8568
8569         /* per cpu trace */
8570         trace_create_cpu_file("trace", 0644, d_cpu,
8571                                 tr, cpu, &tracing_fops);
8572
8573         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8574                                 tr, cpu, &tracing_buffers_fops);
8575
8576         trace_create_cpu_file("stats", 0444, d_cpu,
8577                                 tr, cpu, &tracing_stats_fops);
8578
8579         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8580                                 tr, cpu, &tracing_entries_fops);
8581
8582 #ifdef CONFIG_TRACER_SNAPSHOT
8583         trace_create_cpu_file("snapshot", 0644, d_cpu,
8584                                 tr, cpu, &snapshot_fops);
8585
8586         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8587                                 tr, cpu, &snapshot_raw_fops);
8588 #endif
8589 }
8590
8591 #ifdef CONFIG_FTRACE_SELFTEST
8592 /* Let selftest have access to static functions in this file */
8593 #include "trace_selftest.c"
8594 #endif
8595
8596 static ssize_t
8597 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8598                         loff_t *ppos)
8599 {
8600         struct trace_option_dentry *topt = filp->private_data;
8601         char *buf;
8602
8603         if (topt->flags->val & topt->opt->bit)
8604                 buf = "1\n";
8605         else
8606                 buf = "0\n";
8607
8608         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8609 }
8610
8611 static ssize_t
8612 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8613                          loff_t *ppos)
8614 {
8615         struct trace_option_dentry *topt = filp->private_data;
8616         unsigned long val;
8617         int ret;
8618
8619         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8620         if (ret)
8621                 return ret;
8622
8623         if (val != 0 && val != 1)
8624                 return -EINVAL;
8625
8626         if (!!(topt->flags->val & topt->opt->bit) != val) {
8627                 mutex_lock(&trace_types_lock);
8628                 ret = __set_tracer_option(topt->tr, topt->flags,
8629                                           topt->opt, !val);
8630                 mutex_unlock(&trace_types_lock);
8631                 if (ret)
8632                         return ret;
8633         }
8634
8635         *ppos += cnt;
8636
8637         return cnt;
8638 }
8639
8640
8641 static const struct file_operations trace_options_fops = {
8642         .open = tracing_open_generic,
8643         .read = trace_options_read,
8644         .write = trace_options_write,
8645         .llseek = generic_file_llseek,
8646 };
8647
8648 /*
8649  * In order to pass in both the trace_array descriptor as well as the index
8650  * to the flag that the trace option file represents, the trace_array
8651  * has a character array of trace_flags_index[], which holds the index
8652  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8653  * The address of this character array is passed to the flag option file
8654  * read/write callbacks.
8655  *
8656  * In order to extract both the index and the trace_array descriptor,
8657  * get_tr_index() uses the following algorithm.
8658  *
8659  *   idx = *ptr;
8660  *
8661  * As the pointer itself contains the address of the index (remember
8662  * index[1] == 1).
8663  *
8664  * Then to get the trace_array descriptor, by subtracting that index
8665  * from the ptr, we get to the start of the index itself.
8666  *
8667  *   ptr - idx == &index[0]
8668  *
8669  * Then a simple container_of() from that pointer gets us to the
8670  * trace_array descriptor.
8671  */
8672 static void get_tr_index(void *data, struct trace_array **ptr,
8673                          unsigned int *pindex)
8674 {
8675         *pindex = *(unsigned char *)data;
8676
8677         *ptr = container_of(data - *pindex, struct trace_array,
8678                             trace_flags_index);
8679 }
8680
8681 static ssize_t
8682 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8683                         loff_t *ppos)
8684 {
8685         void *tr_index = filp->private_data;
8686         struct trace_array *tr;
8687         unsigned int index;
8688         char *buf;
8689
8690         get_tr_index(tr_index, &tr, &index);
8691
8692         if (tr->trace_flags & (1 << index))
8693                 buf = "1\n";
8694         else
8695                 buf = "0\n";
8696
8697         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8698 }
8699
8700 static ssize_t
8701 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8702                          loff_t *ppos)
8703 {
8704         void *tr_index = filp->private_data;
8705         struct trace_array *tr;
8706         unsigned int index;
8707         unsigned long val;
8708         int ret;
8709
8710         get_tr_index(tr_index, &tr, &index);
8711
8712         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8713         if (ret)
8714                 return ret;
8715
8716         if (val != 0 && val != 1)
8717                 return -EINVAL;
8718
8719         mutex_lock(&event_mutex);
8720         mutex_lock(&trace_types_lock);
8721         ret = set_tracer_flag(tr, 1 << index, val);
8722         mutex_unlock(&trace_types_lock);
8723         mutex_unlock(&event_mutex);
8724
8725         if (ret < 0)
8726                 return ret;
8727
8728         *ppos += cnt;
8729
8730         return cnt;
8731 }
8732
8733 static const struct file_operations trace_options_core_fops = {
8734         .open = tracing_open_generic,
8735         .read = trace_options_core_read,
8736         .write = trace_options_core_write,
8737         .llseek = generic_file_llseek,
8738 };
8739
8740 struct dentry *trace_create_file(const char *name,
8741                                  umode_t mode,
8742                                  struct dentry *parent,
8743                                  void *data,
8744                                  const struct file_operations *fops)
8745 {
8746         struct dentry *ret;
8747
8748         ret = tracefs_create_file(name, mode, parent, data, fops);
8749         if (!ret)
8750                 pr_warn("Could not create tracefs '%s' entry\n", name);
8751
8752         return ret;
8753 }
8754
8755
8756 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8757 {
8758         struct dentry *d_tracer;
8759
8760         if (tr->options)
8761                 return tr->options;
8762
8763         d_tracer = tracing_get_dentry(tr);
8764         if (IS_ERR(d_tracer))
8765                 return NULL;
8766
8767         tr->options = tracefs_create_dir("options", d_tracer);
8768         if (!tr->options) {
8769                 pr_warn("Could not create tracefs directory 'options'\n");
8770                 return NULL;
8771         }
8772
8773         return tr->options;
8774 }
8775
8776 static void
8777 create_trace_option_file(struct trace_array *tr,
8778                          struct trace_option_dentry *topt,
8779                          struct tracer_flags *flags,
8780                          struct tracer_opt *opt)
8781 {
8782         struct dentry *t_options;
8783
8784         t_options = trace_options_init_dentry(tr);
8785         if (!t_options)
8786                 return;
8787
8788         topt->flags = flags;
8789         topt->opt = opt;
8790         topt->tr = tr;
8791
8792         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8793                                     &trace_options_fops);
8794
8795 }
8796
8797 static void
8798 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8799 {
8800         struct trace_option_dentry *topts;
8801         struct trace_options *tr_topts;
8802         struct tracer_flags *flags;
8803         struct tracer_opt *opts;
8804         int cnt;
8805         int i;
8806
8807         if (!tracer)
8808                 return;
8809
8810         flags = tracer->flags;
8811
8812         if (!flags || !flags->opts)
8813                 return;
8814
8815         /*
8816          * If this is an instance, only create flags for tracers
8817          * the instance may have.
8818          */
8819         if (!trace_ok_for_array(tracer, tr))
8820                 return;
8821
8822         for (i = 0; i < tr->nr_topts; i++) {
8823                 /* Make sure there's no duplicate flags. */
8824                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8825                         return;
8826         }
8827
8828         opts = flags->opts;
8829
8830         for (cnt = 0; opts[cnt].name; cnt++)
8831                 ;
8832
8833         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8834         if (!topts)
8835                 return;
8836
8837         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8838                             GFP_KERNEL);
8839         if (!tr_topts) {
8840                 kfree(topts);
8841                 return;
8842         }
8843
8844         tr->topts = tr_topts;
8845         tr->topts[tr->nr_topts].tracer = tracer;
8846         tr->topts[tr->nr_topts].topts = topts;
8847         tr->nr_topts++;
8848
8849         for (cnt = 0; opts[cnt].name; cnt++) {
8850                 create_trace_option_file(tr, &topts[cnt], flags,
8851                                          &opts[cnt]);
8852                 MEM_FAIL(topts[cnt].entry == NULL,
8853                           "Failed to create trace option: %s",
8854                           opts[cnt].name);
8855         }
8856 }
8857
8858 static struct dentry *
8859 create_trace_option_core_file(struct trace_array *tr,
8860                               const char *option, long index)
8861 {
8862         struct dentry *t_options;
8863
8864         t_options = trace_options_init_dentry(tr);
8865         if (!t_options)
8866                 return NULL;
8867
8868         return trace_create_file(option, 0644, t_options,
8869                                  (void *)&tr->trace_flags_index[index],
8870                                  &trace_options_core_fops);
8871 }
8872
8873 static void create_trace_options_dir(struct trace_array *tr)
8874 {
8875         struct dentry *t_options;
8876         bool top_level = tr == &global_trace;
8877         int i;
8878
8879         t_options = trace_options_init_dentry(tr);
8880         if (!t_options)
8881                 return;
8882
8883         for (i = 0; trace_options[i]; i++) {
8884                 if (top_level ||
8885                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8886                         create_trace_option_core_file(tr, trace_options[i], i);
8887         }
8888 }
8889
8890 static ssize_t
8891 rb_simple_read(struct file *filp, char __user *ubuf,
8892                size_t cnt, loff_t *ppos)
8893 {
8894         struct trace_array *tr = filp->private_data;
8895         char buf[64];
8896         int r;
8897
8898         r = tracer_tracing_is_on(tr);
8899         r = sprintf(buf, "%d\n", r);
8900
8901         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8902 }
8903
8904 static ssize_t
8905 rb_simple_write(struct file *filp, const char __user *ubuf,
8906                 size_t cnt, loff_t *ppos)
8907 {
8908         struct trace_array *tr = filp->private_data;
8909         struct trace_buffer *buffer = tr->array_buffer.buffer;
8910         unsigned long val;
8911         int ret;
8912
8913         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8914         if (ret)
8915                 return ret;
8916
8917         if (buffer) {
8918                 mutex_lock(&trace_types_lock);
8919                 if (!!val == tracer_tracing_is_on(tr)) {
8920                         val = 0; /* do nothing */
8921                 } else if (val) {
8922                         tracer_tracing_on(tr);
8923                         if (tr->current_trace->start)
8924                                 tr->current_trace->start(tr);
8925                 } else {
8926                         tracer_tracing_off(tr);
8927                         if (tr->current_trace->stop)
8928                                 tr->current_trace->stop(tr);
8929                 }
8930                 mutex_unlock(&trace_types_lock);
8931         }
8932
8933         (*ppos)++;
8934
8935         return cnt;
8936 }
8937
8938 static const struct file_operations rb_simple_fops = {
8939         .open           = tracing_open_generic_tr,
8940         .read           = rb_simple_read,
8941         .write          = rb_simple_write,
8942         .release        = tracing_release_generic_tr,
8943         .llseek         = default_llseek,
8944 };
8945
8946 static ssize_t
8947 buffer_percent_read(struct file *filp, char __user *ubuf,
8948                     size_t cnt, loff_t *ppos)
8949 {
8950         struct trace_array *tr = filp->private_data;
8951         char buf[64];
8952         int r;
8953
8954         r = tr->buffer_percent;
8955         r = sprintf(buf, "%d\n", r);
8956
8957         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8958 }
8959
8960 static ssize_t
8961 buffer_percent_write(struct file *filp, const char __user *ubuf,
8962                      size_t cnt, loff_t *ppos)
8963 {
8964         struct trace_array *tr = filp->private_data;
8965         unsigned long val;
8966         int ret;
8967
8968         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8969         if (ret)
8970                 return ret;
8971
8972         if (val > 100)
8973                 return -EINVAL;
8974
8975         if (!val)
8976                 val = 1;
8977
8978         tr->buffer_percent = val;
8979
8980         (*ppos)++;
8981
8982         return cnt;
8983 }
8984
8985 static const struct file_operations buffer_percent_fops = {
8986         .open           = tracing_open_generic_tr,
8987         .read           = buffer_percent_read,
8988         .write          = buffer_percent_write,
8989         .release        = tracing_release_generic_tr,
8990         .llseek         = default_llseek,
8991 };
8992
8993 static struct dentry *trace_instance_dir;
8994
8995 static void
8996 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8997
8998 static int
8999 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9000 {
9001         enum ring_buffer_flags rb_flags;
9002
9003         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9004
9005         buf->tr = tr;
9006
9007         buf->buffer = ring_buffer_alloc(size, rb_flags);
9008         if (!buf->buffer)
9009                 return -ENOMEM;
9010
9011         buf->data = alloc_percpu(struct trace_array_cpu);
9012         if (!buf->data) {
9013                 ring_buffer_free(buf->buffer);
9014                 buf->buffer = NULL;
9015                 return -ENOMEM;
9016         }
9017
9018         /* Allocate the first page for all buffers */
9019         set_buffer_entries(&tr->array_buffer,
9020                            ring_buffer_size(tr->array_buffer.buffer, 0));
9021
9022         return 0;
9023 }
9024
9025 static int allocate_trace_buffers(struct trace_array *tr, int size)
9026 {
9027         int ret;
9028
9029         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9030         if (ret)
9031                 return ret;
9032
9033 #ifdef CONFIG_TRACER_MAX_TRACE
9034         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9035                                     allocate_snapshot ? size : 1);
9036         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9037                 ring_buffer_free(tr->array_buffer.buffer);
9038                 tr->array_buffer.buffer = NULL;
9039                 free_percpu(tr->array_buffer.data);
9040                 tr->array_buffer.data = NULL;
9041                 return -ENOMEM;
9042         }
9043         tr->allocated_snapshot = allocate_snapshot;
9044
9045         /*
9046          * Only the top level trace array gets its snapshot allocated
9047          * from the kernel command line.
9048          */
9049         allocate_snapshot = false;
9050 #endif
9051
9052         return 0;
9053 }
9054
9055 static void free_trace_buffer(struct array_buffer *buf)
9056 {
9057         if (buf->buffer) {
9058                 ring_buffer_free(buf->buffer);
9059                 buf->buffer = NULL;
9060                 free_percpu(buf->data);
9061                 buf->data = NULL;
9062         }
9063 }
9064
9065 static void free_trace_buffers(struct trace_array *tr)
9066 {
9067         if (!tr)
9068                 return;
9069
9070         free_trace_buffer(&tr->array_buffer);
9071
9072 #ifdef CONFIG_TRACER_MAX_TRACE
9073         free_trace_buffer(&tr->max_buffer);
9074 #endif
9075 }
9076
9077 static void init_trace_flags_index(struct trace_array *tr)
9078 {
9079         int i;
9080
9081         /* Used by the trace options files */
9082         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9083                 tr->trace_flags_index[i] = i;
9084 }
9085
9086 static void __update_tracer_options(struct trace_array *tr)
9087 {
9088         struct tracer *t;
9089
9090         for (t = trace_types; t; t = t->next)
9091                 add_tracer_options(tr, t);
9092 }
9093
9094 static void update_tracer_options(struct trace_array *tr)
9095 {
9096         mutex_lock(&trace_types_lock);
9097         __update_tracer_options(tr);
9098         mutex_unlock(&trace_types_lock);
9099 }
9100
9101 /* Must have trace_types_lock held */
9102 struct trace_array *trace_array_find(const char *instance)
9103 {
9104         struct trace_array *tr, *found = NULL;
9105
9106         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9107                 if (tr->name && strcmp(tr->name, instance) == 0) {
9108                         found = tr;
9109                         break;
9110                 }
9111         }
9112
9113         return found;
9114 }
9115
9116 struct trace_array *trace_array_find_get(const char *instance)
9117 {
9118         struct trace_array *tr;
9119
9120         mutex_lock(&trace_types_lock);
9121         tr = trace_array_find(instance);
9122         if (tr)
9123                 tr->ref++;
9124         mutex_unlock(&trace_types_lock);
9125
9126         return tr;
9127 }
9128
9129 static int trace_array_create_dir(struct trace_array *tr)
9130 {
9131         int ret;
9132
9133         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9134         if (!tr->dir)
9135                 return -EINVAL;
9136
9137         ret = event_trace_add_tracer(tr->dir, tr);
9138         if (ret)
9139                 tracefs_remove(tr->dir);
9140
9141         init_tracer_tracefs(tr, tr->dir);
9142         __update_tracer_options(tr);
9143
9144         return ret;
9145 }
9146
9147 static struct trace_array *trace_array_create(const char *name)
9148 {
9149         struct trace_array *tr;
9150         int ret;
9151
9152         ret = -ENOMEM;
9153         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9154         if (!tr)
9155                 return ERR_PTR(ret);
9156
9157         tr->name = kstrdup(name, GFP_KERNEL);
9158         if (!tr->name)
9159                 goto out_free_tr;
9160
9161         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9162                 goto out_free_tr;
9163
9164         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9165
9166         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9167
9168         raw_spin_lock_init(&tr->start_lock);
9169
9170         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9171
9172         tr->current_trace = &nop_trace;
9173
9174         INIT_LIST_HEAD(&tr->systems);
9175         INIT_LIST_HEAD(&tr->events);
9176         INIT_LIST_HEAD(&tr->hist_vars);
9177         INIT_LIST_HEAD(&tr->err_log);
9178
9179         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9180                 goto out_free_tr;
9181
9182         if (ftrace_allocate_ftrace_ops(tr) < 0)
9183                 goto out_free_tr;
9184
9185         ftrace_init_trace_array(tr);
9186
9187         init_trace_flags_index(tr);
9188
9189         if (trace_instance_dir) {
9190                 ret = trace_array_create_dir(tr);
9191                 if (ret)
9192                         goto out_free_tr;
9193         } else
9194                 __trace_early_add_events(tr);
9195
9196         list_add(&tr->list, &ftrace_trace_arrays);
9197
9198         tr->ref++;
9199
9200         return tr;
9201
9202  out_free_tr:
9203         ftrace_free_ftrace_ops(tr);
9204         free_trace_buffers(tr);
9205         free_cpumask_var(tr->tracing_cpumask);
9206         kfree(tr->name);
9207         kfree(tr);
9208
9209         return ERR_PTR(ret);
9210 }
9211
9212 static int instance_mkdir(const char *name)
9213 {
9214         struct trace_array *tr;
9215         int ret;
9216
9217         mutex_lock(&event_mutex);
9218         mutex_lock(&trace_types_lock);
9219
9220         ret = -EEXIST;
9221         if (trace_array_find(name))
9222                 goto out_unlock;
9223
9224         tr = trace_array_create(name);
9225
9226         ret = PTR_ERR_OR_ZERO(tr);
9227
9228 out_unlock:
9229         mutex_unlock(&trace_types_lock);
9230         mutex_unlock(&event_mutex);
9231         return ret;
9232 }
9233
9234 /**
9235  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9236  * @name: The name of the trace array to be looked up/created.
9237  *
9238  * Returns pointer to trace array with given name.
9239  * NULL, if it cannot be created.
9240  *
9241  * NOTE: This function increments the reference counter associated with the
9242  * trace array returned. This makes sure it cannot be freed while in use.
9243  * Use trace_array_put() once the trace array is no longer needed.
9244  * If the trace_array is to be freed, trace_array_destroy() needs to
9245  * be called after the trace_array_put(), or simply let user space delete
9246  * it from the tracefs instances directory. But until the
9247  * trace_array_put() is called, user space can not delete it.
9248  *
9249  */
9250 struct trace_array *trace_array_get_by_name(const char *name)
9251 {
9252         struct trace_array *tr;
9253
9254         mutex_lock(&event_mutex);
9255         mutex_lock(&trace_types_lock);
9256
9257         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9258                 if (tr->name && strcmp(tr->name, name) == 0)
9259                         goto out_unlock;
9260         }
9261
9262         tr = trace_array_create(name);
9263
9264         if (IS_ERR(tr))
9265                 tr = NULL;
9266 out_unlock:
9267         if (tr)
9268                 tr->ref++;
9269
9270         mutex_unlock(&trace_types_lock);
9271         mutex_unlock(&event_mutex);
9272         return tr;
9273 }
9274 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9275
9276 static int __remove_instance(struct trace_array *tr)
9277 {
9278         int i;
9279
9280         /* Reference counter for a newly created trace array = 1. */
9281         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9282                 return -EBUSY;
9283
9284         list_del(&tr->list);
9285
9286         /* Disable all the flags that were enabled coming in */
9287         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9288                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9289                         set_tracer_flag(tr, 1 << i, 0);
9290         }
9291
9292         tracing_set_nop(tr);
9293         clear_ftrace_function_probes(tr);
9294         event_trace_del_tracer(tr);
9295         ftrace_clear_pids(tr);
9296         ftrace_destroy_function_files(tr);
9297         tracefs_remove(tr->dir);
9298         free_percpu(tr->last_func_repeats);
9299         free_trace_buffers(tr);
9300
9301         for (i = 0; i < tr->nr_topts; i++) {
9302                 kfree(tr->topts[i].topts);
9303         }
9304         kfree(tr->topts);
9305
9306         free_cpumask_var(tr->tracing_cpumask);
9307         kfree(tr->name);
9308         kfree(tr);
9309
9310         return 0;
9311 }
9312
9313 int trace_array_destroy(struct trace_array *this_tr)
9314 {
9315         struct trace_array *tr;
9316         int ret;
9317
9318         if (!this_tr)
9319                 return -EINVAL;
9320
9321         mutex_lock(&event_mutex);
9322         mutex_lock(&trace_types_lock);
9323
9324         ret = -ENODEV;
9325
9326         /* Making sure trace array exists before destroying it. */
9327         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9328                 if (tr == this_tr) {
9329                         ret = __remove_instance(tr);
9330                         break;
9331                 }
9332         }
9333
9334         mutex_unlock(&trace_types_lock);
9335         mutex_unlock(&event_mutex);
9336
9337         return ret;
9338 }
9339 EXPORT_SYMBOL_GPL(trace_array_destroy);
9340
9341 static int instance_rmdir(const char *name)
9342 {
9343         struct trace_array *tr;
9344         int ret;
9345
9346         mutex_lock(&event_mutex);
9347         mutex_lock(&trace_types_lock);
9348
9349         ret = -ENODEV;
9350         tr = trace_array_find(name);
9351         if (tr)
9352                 ret = __remove_instance(tr);
9353
9354         mutex_unlock(&trace_types_lock);
9355         mutex_unlock(&event_mutex);
9356
9357         return ret;
9358 }
9359
9360 static __init void create_trace_instances(struct dentry *d_tracer)
9361 {
9362         struct trace_array *tr;
9363
9364         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9365                                                          instance_mkdir,
9366                                                          instance_rmdir);
9367         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9368                 return;
9369
9370         mutex_lock(&event_mutex);
9371         mutex_lock(&trace_types_lock);
9372
9373         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9374                 if (!tr->name)
9375                         continue;
9376                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9377                              "Failed to create instance directory\n"))
9378                         break;
9379         }
9380
9381         mutex_unlock(&trace_types_lock);
9382         mutex_unlock(&event_mutex);
9383 }
9384
9385 static void
9386 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9387 {
9388         struct trace_event_file *file;
9389         int cpu;
9390
9391         trace_create_file("available_tracers", 0444, d_tracer,
9392                         tr, &show_traces_fops);
9393
9394         trace_create_file("current_tracer", 0644, d_tracer,
9395                         tr, &set_tracer_fops);
9396
9397         trace_create_file("tracing_cpumask", 0644, d_tracer,
9398                           tr, &tracing_cpumask_fops);
9399
9400         trace_create_file("trace_options", 0644, d_tracer,
9401                           tr, &tracing_iter_fops);
9402
9403         trace_create_file("trace", 0644, d_tracer,
9404                           tr, &tracing_fops);
9405
9406         trace_create_file("trace_pipe", 0444, d_tracer,
9407                           tr, &tracing_pipe_fops);
9408
9409         trace_create_file("buffer_size_kb", 0644, d_tracer,
9410                           tr, &tracing_entries_fops);
9411
9412         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9413                           tr, &tracing_total_entries_fops);
9414
9415         trace_create_file("free_buffer", 0200, d_tracer,
9416                           tr, &tracing_free_buffer_fops);
9417
9418         trace_create_file("trace_marker", 0220, d_tracer,
9419                           tr, &tracing_mark_fops);
9420
9421         file = __find_event_file(tr, "ftrace", "print");
9422         if (file && file->dir)
9423                 trace_create_file("trigger", 0644, file->dir, file,
9424                                   &event_trigger_fops);
9425         tr->trace_marker_file = file;
9426
9427         trace_create_file("trace_marker_raw", 0220, d_tracer,
9428                           tr, &tracing_mark_raw_fops);
9429
9430         trace_create_file("trace_clock", 0644, d_tracer, tr,
9431                           &trace_clock_fops);
9432
9433         trace_create_file("tracing_on", 0644, d_tracer,
9434                           tr, &rb_simple_fops);
9435
9436         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9437                           &trace_time_stamp_mode_fops);
9438
9439         tr->buffer_percent = 50;
9440
9441         trace_create_file("buffer_percent", 0444, d_tracer,
9442                         tr, &buffer_percent_fops);
9443
9444         create_trace_options_dir(tr);
9445
9446 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9447         trace_create_maxlat_file(tr, d_tracer);
9448 #endif
9449
9450         if (ftrace_create_function_files(tr, d_tracer))
9451                 MEM_FAIL(1, "Could not allocate function filter files");
9452
9453 #ifdef CONFIG_TRACER_SNAPSHOT
9454         trace_create_file("snapshot", 0644, d_tracer,
9455                           tr, &snapshot_fops);
9456 #endif
9457
9458         trace_create_file("error_log", 0644, d_tracer,
9459                           tr, &tracing_err_log_fops);
9460
9461         for_each_tracing_cpu(cpu)
9462                 tracing_init_tracefs_percpu(tr, cpu);
9463
9464         ftrace_init_tracefs(tr, d_tracer);
9465 }
9466
9467 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9468 {
9469         struct vfsmount *mnt;
9470         struct file_system_type *type;
9471
9472         /*
9473          * To maintain backward compatibility for tools that mount
9474          * debugfs to get to the tracing facility, tracefs is automatically
9475          * mounted to the debugfs/tracing directory.
9476          */
9477         type = get_fs_type("tracefs");
9478         if (!type)
9479                 return NULL;
9480         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9481         put_filesystem(type);
9482         if (IS_ERR(mnt))
9483                 return NULL;
9484         mntget(mnt);
9485
9486         return mnt;
9487 }
9488
9489 /**
9490  * tracing_init_dentry - initialize top level trace array
9491  *
9492  * This is called when creating files or directories in the tracing
9493  * directory. It is called via fs_initcall() by any of the boot up code
9494  * and expects to return the dentry of the top level tracing directory.
9495  */
9496 int tracing_init_dentry(void)
9497 {
9498         struct trace_array *tr = &global_trace;
9499
9500         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9501                 pr_warn("Tracing disabled due to lockdown\n");
9502                 return -EPERM;
9503         }
9504
9505         /* The top level trace array uses  NULL as parent */
9506         if (tr->dir)
9507                 return 0;
9508
9509         if (WARN_ON(!tracefs_initialized()))
9510                 return -ENODEV;
9511
9512         /*
9513          * As there may still be users that expect the tracing
9514          * files to exist in debugfs/tracing, we must automount
9515          * the tracefs file system there, so older tools still
9516          * work with the newer kernel.
9517          */
9518         tr->dir = debugfs_create_automount("tracing", NULL,
9519                                            trace_automount, NULL);
9520
9521         return 0;
9522 }
9523
9524 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9525 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9526
9527 static struct workqueue_struct *eval_map_wq __initdata;
9528 static struct work_struct eval_map_work __initdata;
9529
9530 static void __init eval_map_work_func(struct work_struct *work)
9531 {
9532         int len;
9533
9534         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9535         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9536 }
9537
9538 static int __init trace_eval_init(void)
9539 {
9540         INIT_WORK(&eval_map_work, eval_map_work_func);
9541
9542         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9543         if (!eval_map_wq) {
9544                 pr_err("Unable to allocate eval_map_wq\n");
9545                 /* Do work here */
9546                 eval_map_work_func(&eval_map_work);
9547                 return -ENOMEM;
9548         }
9549
9550         queue_work(eval_map_wq, &eval_map_work);
9551         return 0;
9552 }
9553
9554 static int __init trace_eval_sync(void)
9555 {
9556         /* Make sure the eval map updates are finished */
9557         if (eval_map_wq)
9558                 destroy_workqueue(eval_map_wq);
9559         return 0;
9560 }
9561
9562 late_initcall_sync(trace_eval_sync);
9563
9564
9565 #ifdef CONFIG_MODULES
9566 static void trace_module_add_evals(struct module *mod)
9567 {
9568         if (!mod->num_trace_evals)
9569                 return;
9570
9571         /*
9572          * Modules with bad taint do not have events created, do
9573          * not bother with enums either.
9574          */
9575         if (trace_module_has_bad_taint(mod))
9576                 return;
9577
9578         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9579 }
9580
9581 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9582 static void trace_module_remove_evals(struct module *mod)
9583 {
9584         union trace_eval_map_item *map;
9585         union trace_eval_map_item **last = &trace_eval_maps;
9586
9587         if (!mod->num_trace_evals)
9588                 return;
9589
9590         mutex_lock(&trace_eval_mutex);
9591
9592         map = trace_eval_maps;
9593
9594         while (map) {
9595                 if (map->head.mod == mod)
9596                         break;
9597                 map = trace_eval_jmp_to_tail(map);
9598                 last = &map->tail.next;
9599                 map = map->tail.next;
9600         }
9601         if (!map)
9602                 goto out;
9603
9604         *last = trace_eval_jmp_to_tail(map)->tail.next;
9605         kfree(map);
9606  out:
9607         mutex_unlock(&trace_eval_mutex);
9608 }
9609 #else
9610 static inline void trace_module_remove_evals(struct module *mod) { }
9611 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9612
9613 static int trace_module_notify(struct notifier_block *self,
9614                                unsigned long val, void *data)
9615 {
9616         struct module *mod = data;
9617
9618         switch (val) {
9619         case MODULE_STATE_COMING:
9620                 trace_module_add_evals(mod);
9621                 break;
9622         case MODULE_STATE_GOING:
9623                 trace_module_remove_evals(mod);
9624                 break;
9625         }
9626
9627         return NOTIFY_OK;
9628 }
9629
9630 static struct notifier_block trace_module_nb = {
9631         .notifier_call = trace_module_notify,
9632         .priority = 0,
9633 };
9634 #endif /* CONFIG_MODULES */
9635
9636 static __init int tracer_init_tracefs(void)
9637 {
9638         int ret;
9639
9640         trace_access_lock_init();
9641
9642         ret = tracing_init_dentry();
9643         if (ret)
9644                 return 0;
9645
9646         event_trace_init();
9647
9648         init_tracer_tracefs(&global_trace, NULL);
9649         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9650
9651         trace_create_file("tracing_thresh", 0644, NULL,
9652                         &global_trace, &tracing_thresh_fops);
9653
9654         trace_create_file("README", 0444, NULL,
9655                         NULL, &tracing_readme_fops);
9656
9657         trace_create_file("saved_cmdlines", 0444, NULL,
9658                         NULL, &tracing_saved_cmdlines_fops);
9659
9660         trace_create_file("saved_cmdlines_size", 0644, NULL,
9661                           NULL, &tracing_saved_cmdlines_size_fops);
9662
9663         trace_create_file("saved_tgids", 0444, NULL,
9664                         NULL, &tracing_saved_tgids_fops);
9665
9666         trace_eval_init();
9667
9668         trace_create_eval_file(NULL);
9669
9670 #ifdef CONFIG_MODULES
9671         register_module_notifier(&trace_module_nb);
9672 #endif
9673
9674 #ifdef CONFIG_DYNAMIC_FTRACE
9675         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9676                         NULL, &tracing_dyn_info_fops);
9677 #endif
9678
9679         create_trace_instances(NULL);
9680
9681         update_tracer_options(&global_trace);
9682
9683         return 0;
9684 }
9685
9686 fs_initcall(tracer_init_tracefs);
9687
9688 static int trace_panic_handler(struct notifier_block *this,
9689                                unsigned long event, void *unused)
9690 {
9691         if (ftrace_dump_on_oops)
9692                 ftrace_dump(ftrace_dump_on_oops);
9693         return NOTIFY_OK;
9694 }
9695
9696 static struct notifier_block trace_panic_notifier = {
9697         .notifier_call  = trace_panic_handler,
9698         .next           = NULL,
9699         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9700 };
9701
9702 static int trace_die_handler(struct notifier_block *self,
9703                              unsigned long val,
9704                              void *data)
9705 {
9706         switch (val) {
9707         case DIE_OOPS:
9708                 if (ftrace_dump_on_oops)
9709                         ftrace_dump(ftrace_dump_on_oops);
9710                 break;
9711         default:
9712                 break;
9713         }
9714         return NOTIFY_OK;
9715 }
9716
9717 static struct notifier_block trace_die_notifier = {
9718         .notifier_call = trace_die_handler,
9719         .priority = 200
9720 };
9721
9722 /*
9723  * printk is set to max of 1024, we really don't need it that big.
9724  * Nothing should be printing 1000 characters anyway.
9725  */
9726 #define TRACE_MAX_PRINT         1000
9727
9728 /*
9729  * Define here KERN_TRACE so that we have one place to modify
9730  * it if we decide to change what log level the ftrace dump
9731  * should be at.
9732  */
9733 #define KERN_TRACE              KERN_EMERG
9734
9735 void
9736 trace_printk_seq(struct trace_seq *s)
9737 {
9738         /* Probably should print a warning here. */
9739         if (s->seq.len >= TRACE_MAX_PRINT)
9740                 s->seq.len = TRACE_MAX_PRINT;
9741
9742         /*
9743          * More paranoid code. Although the buffer size is set to
9744          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9745          * an extra layer of protection.
9746          */
9747         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9748                 s->seq.len = s->seq.size - 1;
9749
9750         /* should be zero ended, but we are paranoid. */
9751         s->buffer[s->seq.len] = 0;
9752
9753         printk(KERN_TRACE "%s", s->buffer);
9754
9755         trace_seq_init(s);
9756 }
9757
9758 void trace_init_global_iter(struct trace_iterator *iter)
9759 {
9760         iter->tr = &global_trace;
9761         iter->trace = iter->tr->current_trace;
9762         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9763         iter->array_buffer = &global_trace.array_buffer;
9764
9765         if (iter->trace && iter->trace->open)
9766                 iter->trace->open(iter);
9767
9768         /* Annotate start of buffers if we had overruns */
9769         if (ring_buffer_overruns(iter->array_buffer->buffer))
9770                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9771
9772         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9773         if (trace_clocks[iter->tr->clock_id].in_ns)
9774                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9775 }
9776
9777 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9778 {
9779         /* use static because iter can be a bit big for the stack */
9780         static struct trace_iterator iter;
9781         static atomic_t dump_running;
9782         struct trace_array *tr = &global_trace;
9783         unsigned int old_userobj;
9784         unsigned long flags;
9785         int cnt = 0, cpu;
9786
9787         /* Only allow one dump user at a time. */
9788         if (atomic_inc_return(&dump_running) != 1) {
9789                 atomic_dec(&dump_running);
9790                 return;
9791         }
9792
9793         /*
9794          * Always turn off tracing when we dump.
9795          * We don't need to show trace output of what happens
9796          * between multiple crashes.
9797          *
9798          * If the user does a sysrq-z, then they can re-enable
9799          * tracing with echo 1 > tracing_on.
9800          */
9801         tracing_off();
9802
9803         local_irq_save(flags);
9804         printk_nmi_direct_enter();
9805
9806         /* Simulate the iterator */
9807         trace_init_global_iter(&iter);
9808         /* Can not use kmalloc for iter.temp and iter.fmt */
9809         iter.temp = static_temp_buf;
9810         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9811         iter.fmt = static_fmt_buf;
9812         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9813
9814         for_each_tracing_cpu(cpu) {
9815                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9816         }
9817
9818         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9819
9820         /* don't look at user memory in panic mode */
9821         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9822
9823         switch (oops_dump_mode) {
9824         case DUMP_ALL:
9825                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9826                 break;
9827         case DUMP_ORIG:
9828                 iter.cpu_file = raw_smp_processor_id();
9829                 break;
9830         case DUMP_NONE:
9831                 goto out_enable;
9832         default:
9833                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9834                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9835         }
9836
9837         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9838
9839         /* Did function tracer already get disabled? */
9840         if (ftrace_is_dead()) {
9841                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9842                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9843         }
9844
9845         /*
9846          * We need to stop all tracing on all CPUS to read
9847          * the next buffer. This is a bit expensive, but is
9848          * not done often. We fill all what we can read,
9849          * and then release the locks again.
9850          */
9851
9852         while (!trace_empty(&iter)) {
9853
9854                 if (!cnt)
9855                         printk(KERN_TRACE "---------------------------------\n");
9856
9857                 cnt++;
9858
9859                 trace_iterator_reset(&iter);
9860                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9861
9862                 if (trace_find_next_entry_inc(&iter) != NULL) {
9863                         int ret;
9864
9865                         ret = print_trace_line(&iter);
9866                         if (ret != TRACE_TYPE_NO_CONSUME)
9867                                 trace_consume(&iter);
9868                 }
9869                 touch_nmi_watchdog();
9870
9871                 trace_printk_seq(&iter.seq);
9872         }
9873
9874         if (!cnt)
9875                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9876         else
9877                 printk(KERN_TRACE "---------------------------------\n");
9878
9879  out_enable:
9880         tr->trace_flags |= old_userobj;
9881
9882         for_each_tracing_cpu(cpu) {
9883                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9884         }
9885         atomic_dec(&dump_running);
9886         printk_nmi_direct_exit();
9887         local_irq_restore(flags);
9888 }
9889 EXPORT_SYMBOL_GPL(ftrace_dump);
9890
9891 #define WRITE_BUFSIZE  4096
9892
9893 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9894                                 size_t count, loff_t *ppos,
9895                                 int (*createfn)(const char *))
9896 {
9897         char *kbuf, *buf, *tmp;
9898         int ret = 0;
9899         size_t done = 0;
9900         size_t size;
9901
9902         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9903         if (!kbuf)
9904                 return -ENOMEM;
9905
9906         while (done < count) {
9907                 size = count - done;
9908
9909                 if (size >= WRITE_BUFSIZE)
9910                         size = WRITE_BUFSIZE - 1;
9911
9912                 if (copy_from_user(kbuf, buffer + done, size)) {
9913                         ret = -EFAULT;
9914                         goto out;
9915                 }
9916                 kbuf[size] = '\0';
9917                 buf = kbuf;
9918                 do {
9919                         tmp = strchr(buf, '\n');
9920                         if (tmp) {
9921                                 *tmp = '\0';
9922                                 size = tmp - buf + 1;
9923                         } else {
9924                                 size = strlen(buf);
9925                                 if (done + size < count) {
9926                                         if (buf != kbuf)
9927                                                 break;
9928                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9929                                         pr_warn("Line length is too long: Should be less than %d\n",
9930                                                 WRITE_BUFSIZE - 2);
9931                                         ret = -EINVAL;
9932                                         goto out;
9933                                 }
9934                         }
9935                         done += size;
9936
9937                         /* Remove comments */
9938                         tmp = strchr(buf, '#');
9939
9940                         if (tmp)
9941                                 *tmp = '\0';
9942
9943                         ret = createfn(buf);
9944                         if (ret)
9945                                 goto out;
9946                         buf += size;
9947
9948                 } while (done < count);
9949         }
9950         ret = done;
9951
9952 out:
9953         kfree(kbuf);
9954
9955         return ret;
9956 }
9957
9958 __init static int tracer_alloc_buffers(void)
9959 {
9960         int ring_buf_size;
9961         int ret = -ENOMEM;
9962
9963
9964         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9965                 pr_warn("Tracing disabled due to lockdown\n");
9966                 return -EPERM;
9967         }
9968
9969         /*
9970          * Make sure we don't accidentally add more trace options
9971          * than we have bits for.
9972          */
9973         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9974
9975         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9976                 goto out;
9977
9978         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9979                 goto out_free_buffer_mask;
9980
9981         /* Only allocate trace_printk buffers if a trace_printk exists */
9982         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9983                 /* Must be called before global_trace.buffer is allocated */
9984                 trace_printk_init_buffers();
9985
9986         /* To save memory, keep the ring buffer size to its minimum */
9987         if (ring_buffer_expanded)
9988                 ring_buf_size = trace_buf_size;
9989         else
9990                 ring_buf_size = 1;
9991
9992         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9993         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9994
9995         raw_spin_lock_init(&global_trace.start_lock);
9996
9997         /*
9998          * The prepare callbacks allocates some memory for the ring buffer. We
9999          * don't free the buffer if the CPU goes down. If we were to free
10000          * the buffer, then the user would lose any trace that was in the
10001          * buffer. The memory will be removed once the "instance" is removed.
10002          */
10003         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10004                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10005                                       NULL);
10006         if (ret < 0)
10007                 goto out_free_cpumask;
10008         /* Used for event triggers */
10009         ret = -ENOMEM;
10010         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10011         if (!temp_buffer)
10012                 goto out_rm_hp_state;
10013
10014         if (trace_create_savedcmd() < 0)
10015                 goto out_free_temp_buffer;
10016
10017         /* TODO: make the number of buffers hot pluggable with CPUS */
10018         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10019                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10020                 goto out_free_savedcmd;
10021         }
10022
10023         if (global_trace.buffer_disabled)
10024                 tracing_off();
10025
10026         if (trace_boot_clock) {
10027                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10028                 if (ret < 0)
10029                         pr_warn("Trace clock %s not defined, going back to default\n",
10030                                 trace_boot_clock);
10031         }
10032
10033         /*
10034          * register_tracer() might reference current_trace, so it
10035          * needs to be set before we register anything. This is
10036          * just a bootstrap of current_trace anyway.
10037          */
10038         global_trace.current_trace = &nop_trace;
10039
10040         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10041
10042         ftrace_init_global_array_ops(&global_trace);
10043
10044         init_trace_flags_index(&global_trace);
10045
10046         register_tracer(&nop_trace);
10047
10048         /* Function tracing may start here (via kernel command line) */
10049         init_function_trace();
10050
10051         /* All seems OK, enable tracing */
10052         tracing_disabled = 0;
10053
10054         atomic_notifier_chain_register(&panic_notifier_list,
10055                                        &trace_panic_notifier);
10056
10057         register_die_notifier(&trace_die_notifier);
10058
10059         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10060
10061         INIT_LIST_HEAD(&global_trace.systems);
10062         INIT_LIST_HEAD(&global_trace.events);
10063         INIT_LIST_HEAD(&global_trace.hist_vars);
10064         INIT_LIST_HEAD(&global_trace.err_log);
10065         list_add(&global_trace.list, &ftrace_trace_arrays);
10066
10067         apply_trace_boot_options();
10068
10069         register_snapshot_cmd();
10070
10071         test_can_verify();
10072
10073         return 0;
10074
10075 out_free_savedcmd:
10076         free_saved_cmdlines_buffer(savedcmd);
10077 out_free_temp_buffer:
10078         ring_buffer_free(temp_buffer);
10079 out_rm_hp_state:
10080         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10081 out_free_cpumask:
10082         free_cpumask_var(global_trace.tracing_cpumask);
10083 out_free_buffer_mask:
10084         free_cpumask_var(tracing_buffer_mask);
10085 out:
10086         return ret;
10087 }
10088
10089 void __init early_trace_init(void)
10090 {
10091         if (tracepoint_printk) {
10092                 tracepoint_print_iter =
10093                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10094                 if (MEM_FAIL(!tracepoint_print_iter,
10095                              "Failed to allocate trace iterator\n"))
10096                         tracepoint_printk = 0;
10097                 else
10098                         static_key_enable(&tracepoint_printk_key.key);
10099         }
10100         tracer_alloc_buffers();
10101 }
10102
10103 void __init trace_init(void)
10104 {
10105         trace_event_init();
10106 }
10107
10108 __init static void clear_boot_tracer(void)
10109 {
10110         /*
10111          * The default tracer at boot buffer is an init section.
10112          * This function is called in lateinit. If we did not
10113          * find the boot tracer, then clear it out, to prevent
10114          * later registration from accessing the buffer that is
10115          * about to be freed.
10116          */
10117         if (!default_bootup_tracer)
10118                 return;
10119
10120         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10121                default_bootup_tracer);
10122         default_bootup_tracer = NULL;
10123 }
10124
10125 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10126 __init static void tracing_set_default_clock(void)
10127 {
10128         /* sched_clock_stable() is determined in late_initcall */
10129         if (!trace_boot_clock && !sched_clock_stable()) {
10130                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10131                         pr_warn("Can not set tracing clock due to lockdown\n");
10132                         return;
10133                 }
10134
10135                 printk(KERN_WARNING
10136                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10137                        "If you want to keep using the local clock, then add:\n"
10138                        "  \"trace_clock=local\"\n"
10139                        "on the kernel command line\n");
10140                 tracing_set_clock(&global_trace, "global");
10141         }
10142 }
10143 #else
10144 static inline void tracing_set_default_clock(void) { }
10145 #endif
10146
10147 __init static int late_trace_init(void)
10148 {
10149         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10150                 static_key_disable(&tracepoint_printk_key.key);
10151                 tracepoint_printk = 0;
10152         }
10153
10154         tracing_set_default_clock();
10155         clear_boot_tracer();
10156         return 0;
10157 }
10158
10159 late_initcall_sync(late_trace_init);