Merge branch 'misc.namei' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188
189 static int __init set_cmdline_ftrace(char *str)
190 {
191         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192         default_bootup_tracer = bootup_tracer_buf;
193         /* We are using ftrace early, expand it */
194         ring_buffer_expanded = true;
195         return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201         if (*str++ != '=' || !*str || !strcmp("1", str)) {
202                 ftrace_dump_on_oops = DUMP_ALL;
203                 return 1;
204         }
205
206         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207                 ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
215 static int __init stop_trace_on_warning(char *str)
216 {
217         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218                 __disable_trace_on_warning = 1;
219         return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
223 static int __init boot_alloc_snapshot(char *str)
224 {
225         allocate_snapshot = true;
226         /* We also need the main ring buffer expanded */
227         ring_buffer_expanded = true;
228         return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234
235 static int __init set_trace_boot_options(char *str)
236 {
237         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238         return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244
245 static int __init set_trace_boot_clock(char *str)
246 {
247         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248         trace_boot_clock = trace_boot_clock_buf;
249         return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252
253 static int __init set_tracepoint_printk(char *str)
254 {
255         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256                 tracepoint_printk = 1;
257         return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263         tracepoint_printk_stop_on_boot = true;
264         return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267
268 unsigned long long ns2usecs(u64 nsec)
269 {
270         nsec += 500;
271         do_div(nsec, 1000);
272         return nsec;
273 }
274
275 static void
276 trace_process_export(struct trace_export *export,
277                struct ring_buffer_event *event, int flag)
278 {
279         struct trace_entry *entry;
280         unsigned int size = 0;
281
282         if (export->flags & flag) {
283                 entry = ring_buffer_event_data(event);
284                 size = ring_buffer_event_length(event);
285                 export->write(export, entry, size);
286         }
287 }
288
289 static DEFINE_MUTEX(ftrace_export_lock);
290
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299         if (export->flags & TRACE_EXPORT_FUNCTION)
300                 static_branch_inc(&trace_function_exports_enabled);
301
302         if (export->flags & TRACE_EXPORT_EVENT)
303                 static_branch_inc(&trace_event_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_MARKER)
306                 static_branch_inc(&trace_marker_exports_enabled);
307 }
308
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311         if (export->flags & TRACE_EXPORT_FUNCTION)
312                 static_branch_dec(&trace_function_exports_enabled);
313
314         if (export->flags & TRACE_EXPORT_EVENT)
315                 static_branch_dec(&trace_event_exports_enabled);
316
317         if (export->flags & TRACE_EXPORT_MARKER)
318                 static_branch_dec(&trace_marker_exports_enabled);
319 }
320
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323         struct trace_export *export;
324
325         preempt_disable_notrace();
326
327         export = rcu_dereference_raw_check(ftrace_exports_list);
328         while (export) {
329                 trace_process_export(export, event, flag);
330                 export = rcu_dereference_raw_check(export->next);
331         }
332
333         preempt_enable_notrace();
334 }
335
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339         rcu_assign_pointer(export->next, *list);
340         /*
341          * We are entering export into the list but another
342          * CPU might be walking that list. We need to make sure
343          * the export->next pointer is valid before another CPU sees
344          * the export pointer included into the list.
345          */
346         rcu_assign_pointer(*list, export);
347 }
348
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352         struct trace_export **p;
353
354         for (p = list; *p != NULL; p = &(*p)->next)
355                 if (*p == export)
356                         break;
357
358         if (*p != export)
359                 return -1;
360
361         rcu_assign_pointer(*p, (*p)->next);
362
363         return 0;
364 }
365
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369         ftrace_exports_enable(export);
370
371         add_trace_export(list, export);
372 }
373
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377         int ret;
378
379         ret = rm_trace_export(list, export);
380         ftrace_exports_disable(export);
381
382         return ret;
383 }
384
385 int register_ftrace_export(struct trace_export *export)
386 {
387         if (WARN_ON_ONCE(!export->write))
388                 return -1;
389
390         mutex_lock(&ftrace_export_lock);
391
392         add_ftrace_export(&ftrace_exports_list, export);
393
394         mutex_unlock(&ftrace_export_lock);
395
396         return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402         int ret;
403
404         mutex_lock(&ftrace_export_lock);
405
406         ret = rm_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS                                             \
416         (FUNCTION_DEFAULT_FLAGS |                                       \
417          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
418          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
419          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
420          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
421          TRACE_ITER_HASH_PTR)
422
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
425                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436         .trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438
439 LIST_HEAD(ftrace_trace_arrays);
440
441 int trace_array_get(struct trace_array *this_tr)
442 {
443         struct trace_array *tr;
444         int ret = -ENODEV;
445
446         mutex_lock(&trace_types_lock);
447         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448                 if (tr == this_tr) {
449                         tr->ref++;
450                         ret = 0;
451                         break;
452                 }
453         }
454         mutex_unlock(&trace_types_lock);
455
456         return ret;
457 }
458
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461         WARN_ON(!this_tr->ref);
462         this_tr->ref--;
463 }
464
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476         if (!this_tr)
477                 return;
478
479         mutex_lock(&trace_types_lock);
480         __trace_array_put(this_tr);
481         mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487         int ret;
488
489         ret = security_locked_down(LOCKDOWN_TRACEFS);
490         if (ret)
491                 return ret;
492
493         if (tracing_disabled)
494                 return -ENODEV;
495
496         if (tr && trace_array_get(tr) < 0)
497                 return -ENODEV;
498
499         return 0;
500 }
501
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503                               struct trace_buffer *buffer,
504                               struct ring_buffer_event *event)
505 {
506         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507             !filter_match_preds(call->filter, rec)) {
508                 __trace_event_discard_commit(buffer, event);
509                 return 1;
510         }
511
512         return 0;
513 }
514
515 void trace_free_pid_list(struct trace_pid_list *pid_list)
516 {
517         vfree(pid_list->pids);
518         kfree(pid_list);
519 }
520
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531         /*
532          * If pid_max changed after filtered_pids was created, we
533          * by default ignore all pids greater than the previous pid_max.
534          */
535         if (search_pid >= filtered_pids->pid_max)
536                 return false;
537
538         return test_bit(search_pid, filtered_pids->pids);
539 }
540
541 /**
542  * trace_ignore_this_task - should a task be ignored for tracing
543  * @filtered_pids: The list of pids to check
544  * @filtered_no_pids: The list of pids not to be traced
545  * @task: The task that should be ignored if not filtered
546  *
547  * Checks if @task should be traced or not from @filtered_pids.
548  * Returns true if @task should *NOT* be traced.
549  * Returns false if @task should be traced.
550  */
551 bool
552 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
553                        struct trace_pid_list *filtered_no_pids,
554                        struct task_struct *task)
555 {
556         /*
557          * If filtered_no_pids is not empty, and the task's pid is listed
558          * in filtered_no_pids, then return true.
559          * Otherwise, if filtered_pids is empty, that means we can
560          * trace all tasks. If it has content, then only trace pids
561          * within filtered_pids.
562          */
563
564         return (filtered_pids &&
565                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
566                 (filtered_no_pids &&
567                  trace_find_filtered_pid(filtered_no_pids, task->pid));
568 }
569
570 /**
571  * trace_filter_add_remove_task - Add or remove a task from a pid_list
572  * @pid_list: The list to modify
573  * @self: The current task for fork or NULL for exit
574  * @task: The task to add or remove
575  *
576  * If adding a task, if @self is defined, the task is only added if @self
577  * is also included in @pid_list. This happens on fork and tasks should
578  * only be added when the parent is listed. If @self is NULL, then the
579  * @task pid will be removed from the list, which would happen on exit
580  * of a task.
581  */
582 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
583                                   struct task_struct *self,
584                                   struct task_struct *task)
585 {
586         if (!pid_list)
587                 return;
588
589         /* For forks, we only add if the forking task is listed */
590         if (self) {
591                 if (!trace_find_filtered_pid(pid_list, self->pid))
592                         return;
593         }
594
595         /* Sorry, but we don't support pid_max changing after setting */
596         if (task->pid >= pid_list->pid_max)
597                 return;
598
599         /* "self" is set for forks, and NULL for exits */
600         if (self)
601                 set_bit(task->pid, pid_list->pids);
602         else
603                 clear_bit(task->pid, pid_list->pids);
604 }
605
606 /**
607  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
608  * @pid_list: The pid list to show
609  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
610  * @pos: The position of the file
611  *
612  * This is used by the seq_file "next" operation to iterate the pids
613  * listed in a trace_pid_list structure.
614  *
615  * Returns the pid+1 as we want to display pid of zero, but NULL would
616  * stop the iteration.
617  */
618 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
619 {
620         unsigned long pid = (unsigned long)v;
621
622         (*pos)++;
623
624         /* pid already is +1 of the actual previous bit */
625         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
626
627         /* Return pid + 1 to allow zero to be represented */
628         if (pid < pid_list->pid_max)
629                 return (void *)(pid + 1);
630
631         return NULL;
632 }
633
634 /**
635  * trace_pid_start - Used for seq_file to start reading pid lists
636  * @pid_list: The pid list to show
637  * @pos: The position of the file
638  *
639  * This is used by seq_file "start" operation to start the iteration
640  * of listing pids.
641  *
642  * Returns the pid+1 as we want to display pid of zero, but NULL would
643  * stop the iteration.
644  */
645 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
646 {
647         unsigned long pid;
648         loff_t l = 0;
649
650         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
651         if (pid >= pid_list->pid_max)
652                 return NULL;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret = 0;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         pid_list->pid_max = READ_ONCE(pid_max);
709
710         /* Only truncating will shrink pid_max */
711         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
712                 pid_list->pid_max = filtered_pids->pid_max;
713
714         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
715         if (!pid_list->pids) {
716                 trace_parser_put(&parser);
717                 kfree(pid_list);
718                 return -ENOMEM;
719         }
720
721         if (filtered_pids) {
722                 /* copy the current bits to the new max */
723                 for_each_set_bit(pid, filtered_pids->pids,
724                                  filtered_pids->pid_max) {
725                         set_bit(pid, pid_list->pids);
726                         nr_pids++;
727                 }
728         }
729
730         while (cnt > 0) {
731
732                 pos = 0;
733
734                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
735                 if (ret < 0 || !trace_parser_loaded(&parser))
736                         break;
737
738                 read += ret;
739                 ubuf += ret;
740                 cnt -= ret;
741
742                 ret = -EINVAL;
743                 if (kstrtoul(parser.buffer, 0, &val))
744                         break;
745                 if (val >= pid_list->pid_max)
746                         break;
747
748                 pid = (pid_t)val;
749
750                 set_bit(pid, pid_list->pids);
751                 nr_pids++;
752
753                 trace_parser_clear(&parser);
754                 ret = 0;
755         }
756         trace_parser_put(&parser);
757
758         if (ret < 0) {
759                 trace_free_pid_list(pid_list);
760                 return ret;
761         }
762
763         if (!nr_pids) {
764                 /* Cleared the list of pids */
765                 trace_free_pid_list(pid_list);
766                 read = ret;
767                 pid_list = NULL;
768         }
769
770         *new_pid_list = pid_list;
771
772         return read;
773 }
774
775 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
776 {
777         u64 ts;
778
779         /* Early boot up does not have a buffer yet */
780         if (!buf->buffer)
781                 return trace_clock_local();
782
783         ts = ring_buffer_time_stamp(buf->buffer);
784         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
785
786         return ts;
787 }
788
789 u64 ftrace_now(int cpu)
790 {
791         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
792 }
793
794 /**
795  * tracing_is_enabled - Show if global_trace has been enabled
796  *
797  * Shows if the global trace has been enabled or not. It uses the
798  * mirror flag "buffer_disabled" to be used in fast paths such as for
799  * the irqsoff tracer. But it may be inaccurate due to races. If you
800  * need to know the accurate state, use tracing_is_on() which is a little
801  * slower, but accurate.
802  */
803 int tracing_is_enabled(void)
804 {
805         /*
806          * For quick access (irqsoff uses this in fast path), just
807          * return the mirror variable of the state of the ring buffer.
808          * It's a little racy, but we don't really care.
809          */
810         smp_rmb();
811         return !global_trace.buffer_disabled;
812 }
813
814 /*
815  * trace_buf_size is the size in bytes that is allocated
816  * for a buffer. Note, the number of bytes is always rounded
817  * to page size.
818  *
819  * This number is purposely set to a low number of 16384.
820  * If the dump on oops happens, it will be much appreciated
821  * to not have to wait for all that output. Anyway this can be
822  * boot time and run time configurable.
823  */
824 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
825
826 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
827
828 /* trace_types holds a link list of available tracers. */
829 static struct tracer            *trace_types __read_mostly;
830
831 /*
832  * trace_types_lock is used to protect the trace_types list.
833  */
834 DEFINE_MUTEX(trace_types_lock);
835
836 /*
837  * serialize the access of the ring buffer
838  *
839  * ring buffer serializes readers, but it is low level protection.
840  * The validity of the events (which returns by ring_buffer_peek() ..etc)
841  * are not protected by ring buffer.
842  *
843  * The content of events may become garbage if we allow other process consumes
844  * these events concurrently:
845  *   A) the page of the consumed events may become a normal page
846  *      (not reader page) in ring buffer, and this page will be rewritten
847  *      by events producer.
848  *   B) The page of the consumed events may become a page for splice_read,
849  *      and this page will be returned to system.
850  *
851  * These primitives allow multi process access to different cpu ring buffer
852  * concurrently.
853  *
854  * These primitives don't distinguish read-only and read-consume access.
855  * Multi read-only access are also serialized.
856  */
857
858 #ifdef CONFIG_SMP
859 static DECLARE_RWSEM(all_cpu_access_lock);
860 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
861
862 static inline void trace_access_lock(int cpu)
863 {
864         if (cpu == RING_BUFFER_ALL_CPUS) {
865                 /* gain it for accessing the whole ring buffer. */
866                 down_write(&all_cpu_access_lock);
867         } else {
868                 /* gain it for accessing a cpu ring buffer. */
869
870                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
871                 down_read(&all_cpu_access_lock);
872
873                 /* Secondly block other access to this @cpu ring buffer. */
874                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
875         }
876 }
877
878 static inline void trace_access_unlock(int cpu)
879 {
880         if (cpu == RING_BUFFER_ALL_CPUS) {
881                 up_write(&all_cpu_access_lock);
882         } else {
883                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
884                 up_read(&all_cpu_access_lock);
885         }
886 }
887
888 static inline void trace_access_lock_init(void)
889 {
890         int cpu;
891
892         for_each_possible_cpu(cpu)
893                 mutex_init(&per_cpu(cpu_access_lock, cpu));
894 }
895
896 #else
897
898 static DEFINE_MUTEX(access_lock);
899
900 static inline void trace_access_lock(int cpu)
901 {
902         (void)cpu;
903         mutex_lock(&access_lock);
904 }
905
906 static inline void trace_access_unlock(int cpu)
907 {
908         (void)cpu;
909         mutex_unlock(&access_lock);
910 }
911
912 static inline void trace_access_lock_init(void)
913 {
914 }
915
916 #endif
917
918 #ifdef CONFIG_STACKTRACE
919 static void __ftrace_trace_stack(struct trace_buffer *buffer,
920                                  unsigned int trace_ctx,
921                                  int skip, struct pt_regs *regs);
922 static inline void ftrace_trace_stack(struct trace_array *tr,
923                                       struct trace_buffer *buffer,
924                                       unsigned int trace_ctx,
925                                       int skip, struct pt_regs *regs);
926
927 #else
928 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
929                                         unsigned int trace_ctx,
930                                         int skip, struct pt_regs *regs)
931 {
932 }
933 static inline void ftrace_trace_stack(struct trace_array *tr,
934                                       struct trace_buffer *buffer,
935                                       unsigned long trace_ctx,
936                                       int skip, struct pt_regs *regs)
937 {
938 }
939
940 #endif
941
942 static __always_inline void
943 trace_event_setup(struct ring_buffer_event *event,
944                   int type, unsigned int trace_ctx)
945 {
946         struct trace_entry *ent = ring_buffer_event_data(event);
947
948         tracing_generic_entry_update(ent, type, trace_ctx);
949 }
950
951 static __always_inline struct ring_buffer_event *
952 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
953                           int type,
954                           unsigned long len,
955                           unsigned int trace_ctx)
956 {
957         struct ring_buffer_event *event;
958
959         event = ring_buffer_lock_reserve(buffer, len);
960         if (event != NULL)
961                 trace_event_setup(event, type, trace_ctx);
962
963         return event;
964 }
965
966 void tracer_tracing_on(struct trace_array *tr)
967 {
968         if (tr->array_buffer.buffer)
969                 ring_buffer_record_on(tr->array_buffer.buffer);
970         /*
971          * This flag is looked at when buffers haven't been allocated
972          * yet, or by some tracers (like irqsoff), that just want to
973          * know if the ring buffer has been disabled, but it can handle
974          * races of where it gets disabled but we still do a record.
975          * As the check is in the fast path of the tracers, it is more
976          * important to be fast than accurate.
977          */
978         tr->buffer_disabled = 0;
979         /* Make the flag seen by readers */
980         smp_wmb();
981 }
982
983 /**
984  * tracing_on - enable tracing buffers
985  *
986  * This function enables tracing buffers that may have been
987  * disabled with tracing_off.
988  */
989 void tracing_on(void)
990 {
991         tracer_tracing_on(&global_trace);
992 }
993 EXPORT_SYMBOL_GPL(tracing_on);
994
995
996 static __always_inline void
997 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
998 {
999         __this_cpu_write(trace_taskinfo_save, true);
1000
1001         /* If this is the temp buffer, we need to commit fully */
1002         if (this_cpu_read(trace_buffered_event) == event) {
1003                 /* Length is in event->array[0] */
1004                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005                 /* Release the temp buffer */
1006                 this_cpu_dec(trace_buffered_event_cnt);
1007         } else
1008                 ring_buffer_unlock_commit(buffer, event);
1009 }
1010
1011 /**
1012  * __trace_puts - write a constant string into the trace buffer.
1013  * @ip:    The address of the caller
1014  * @str:   The constant string to write
1015  * @size:  The size of the string.
1016  */
1017 int __trace_puts(unsigned long ip, const char *str, int size)
1018 {
1019         struct ring_buffer_event *event;
1020         struct trace_buffer *buffer;
1021         struct print_entry *entry;
1022         unsigned int trace_ctx;
1023         int alloc;
1024
1025         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026                 return 0;
1027
1028         if (unlikely(tracing_selftest_running || tracing_disabled))
1029                 return 0;
1030
1031         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032
1033         trace_ctx = tracing_gen_ctx();
1034         buffer = global_trace.array_buffer.buffer;
1035         ring_buffer_nest_start(buffer);
1036         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037                                             trace_ctx);
1038         if (!event) {
1039                 size = 0;
1040                 goto out;
1041         }
1042
1043         entry = ring_buffer_event_data(event);
1044         entry->ip = ip;
1045
1046         memcpy(&entry->buf, str, size);
1047
1048         /* Add a newline if necessary */
1049         if (entry->buf[size - 1] != '\n') {
1050                 entry->buf[size] = '\n';
1051                 entry->buf[size + 1] = '\0';
1052         } else
1053                 entry->buf[size] = '\0';
1054
1055         __buffer_unlock_commit(buffer, event);
1056         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057  out:
1058         ring_buffer_nest_end(buffer);
1059         return size;
1060 }
1061 EXPORT_SYMBOL_GPL(__trace_puts);
1062
1063 /**
1064  * __trace_bputs - write the pointer to a constant string into trace buffer
1065  * @ip:    The address of the caller
1066  * @str:   The constant string to write to the buffer to
1067  */
1068 int __trace_bputs(unsigned long ip, const char *str)
1069 {
1070         struct ring_buffer_event *event;
1071         struct trace_buffer *buffer;
1072         struct bputs_entry *entry;
1073         unsigned int trace_ctx;
1074         int size = sizeof(struct bputs_entry);
1075         int ret = 0;
1076
1077         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078                 return 0;
1079
1080         if (unlikely(tracing_selftest_running || tracing_disabled))
1081                 return 0;
1082
1083         trace_ctx = tracing_gen_ctx();
1084         buffer = global_trace.array_buffer.buffer;
1085
1086         ring_buffer_nest_start(buffer);
1087         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088                                             trace_ctx);
1089         if (!event)
1090                 goto out;
1091
1092         entry = ring_buffer_event_data(event);
1093         entry->ip                       = ip;
1094         entry->str                      = str;
1095
1096         __buffer_unlock_commit(buffer, event);
1097         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098
1099         ret = 1;
1100  out:
1101         ring_buffer_nest_end(buffer);
1102         return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(__trace_bputs);
1105
1106 #ifdef CONFIG_TRACER_SNAPSHOT
1107 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108                                            void *cond_data)
1109 {
1110         struct tracer *tracer = tr->current_trace;
1111         unsigned long flags;
1112
1113         if (in_nmi()) {
1114                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1116                 return;
1117         }
1118
1119         if (!tr->allocated_snapshot) {
1120                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121                 internal_trace_puts("*** stopping trace here!   ***\n");
1122                 tracing_off();
1123                 return;
1124         }
1125
1126         /* Note, snapshot can not be used when the tracer uses it */
1127         if (tracer->use_max_tr) {
1128                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130                 return;
1131         }
1132
1133         local_irq_save(flags);
1134         update_max_tr(tr, current, smp_processor_id(), cond_data);
1135         local_irq_restore(flags);
1136 }
1137
1138 void tracing_snapshot_instance(struct trace_array *tr)
1139 {
1140         tracing_snapshot_instance_cond(tr, NULL);
1141 }
1142
1143 /**
1144  * tracing_snapshot - take a snapshot of the current buffer.
1145  *
1146  * This causes a swap between the snapshot buffer and the current live
1147  * tracing buffer. You can use this to take snapshots of the live
1148  * trace when some condition is triggered, but continue to trace.
1149  *
1150  * Note, make sure to allocate the snapshot with either
1151  * a tracing_snapshot_alloc(), or by doing it manually
1152  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153  *
1154  * If the snapshot buffer is not allocated, it will stop tracing.
1155  * Basically making a permanent snapshot.
1156  */
1157 void tracing_snapshot(void)
1158 {
1159         struct trace_array *tr = &global_trace;
1160
1161         tracing_snapshot_instance(tr);
1162 }
1163 EXPORT_SYMBOL_GPL(tracing_snapshot);
1164
1165 /**
1166  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167  * @tr:         The tracing instance to snapshot
1168  * @cond_data:  The data to be tested conditionally, and possibly saved
1169  *
1170  * This is the same as tracing_snapshot() except that the snapshot is
1171  * conditional - the snapshot will only happen if the
1172  * cond_snapshot.update() implementation receiving the cond_data
1173  * returns true, which means that the trace array's cond_snapshot
1174  * update() operation used the cond_data to determine whether the
1175  * snapshot should be taken, and if it was, presumably saved it along
1176  * with the snapshot.
1177  */
1178 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179 {
1180         tracing_snapshot_instance_cond(tr, cond_data);
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183
1184 /**
1185  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186  * @tr:         The tracing instance
1187  *
1188  * When the user enables a conditional snapshot using
1189  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190  * with the snapshot.  This accessor is used to retrieve it.
1191  *
1192  * Should not be called from cond_snapshot.update(), since it takes
1193  * the tr->max_lock lock, which the code calling
1194  * cond_snapshot.update() has already done.
1195  *
1196  * Returns the cond_data associated with the trace array's snapshot.
1197  */
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200         void *cond_data = NULL;
1201
1202         arch_spin_lock(&tr->max_lock);
1203
1204         if (tr->cond_snapshot)
1205                 cond_data = tr->cond_snapshot->cond_data;
1206
1207         arch_spin_unlock(&tr->max_lock);
1208
1209         return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214                                         struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216
1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219         int ret;
1220
1221         if (!tr->allocated_snapshot) {
1222
1223                 /* allocate spare buffer */
1224                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226                 if (ret < 0)
1227                         return ret;
1228
1229                 tr->allocated_snapshot = true;
1230         }
1231
1232         return 0;
1233 }
1234
1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237         /*
1238          * We don't free the ring buffer. instead, resize it because
1239          * The max_tr ring buffer has some state (e.g. ring->clock) and
1240          * we want preserve it.
1241          */
1242         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243         set_buffer_entries(&tr->max_buffer, 1);
1244         tracing_reset_online_cpus(&tr->max_buffer);
1245         tr->allocated_snapshot = false;
1246 }
1247
1248 /**
1249  * tracing_alloc_snapshot - allocate snapshot buffer.
1250  *
1251  * This only allocates the snapshot buffer if it isn't already
1252  * allocated - it doesn't also take a snapshot.
1253  *
1254  * This is meant to be used in cases where the snapshot buffer needs
1255  * to be set up for events that can't sleep but need to be able to
1256  * trigger a snapshot.
1257  */
1258 int tracing_alloc_snapshot(void)
1259 {
1260         struct trace_array *tr = &global_trace;
1261         int ret;
1262
1263         ret = tracing_alloc_snapshot_instance(tr);
1264         WARN_ON(ret < 0);
1265
1266         return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269
1270 /**
1271  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272  *
1273  * This is similar to tracing_snapshot(), but it will allocate the
1274  * snapshot buffer if it isn't already allocated. Use this only
1275  * where it is safe to sleep, as the allocation may sleep.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  */
1281 void tracing_snapshot_alloc(void)
1282 {
1283         int ret;
1284
1285         ret = tracing_alloc_snapshot();
1286         if (ret < 0)
1287                 return;
1288
1289         tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292
1293 /**
1294  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295  * @tr:         The tracing instance
1296  * @cond_data:  User data to associate with the snapshot
1297  * @update:     Implementation of the cond_snapshot update function
1298  *
1299  * Check whether the conditional snapshot for the given instance has
1300  * already been enabled, or if the current tracer is already using a
1301  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302  * save the cond_data and update function inside.
1303  *
1304  * Returns 0 if successful, error otherwise.
1305  */
1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307                                  cond_update_fn_t update)
1308 {
1309         struct cond_snapshot *cond_snapshot;
1310         int ret = 0;
1311
1312         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313         if (!cond_snapshot)
1314                 return -ENOMEM;
1315
1316         cond_snapshot->cond_data = cond_data;
1317         cond_snapshot->update = update;
1318
1319         mutex_lock(&trace_types_lock);
1320
1321         ret = tracing_alloc_snapshot_instance(tr);
1322         if (ret)
1323                 goto fail_unlock;
1324
1325         if (tr->current_trace->use_max_tr) {
1326                 ret = -EBUSY;
1327                 goto fail_unlock;
1328         }
1329
1330         /*
1331          * The cond_snapshot can only change to NULL without the
1332          * trace_types_lock. We don't care if we race with it going
1333          * to NULL, but we want to make sure that it's not set to
1334          * something other than NULL when we get here, which we can
1335          * do safely with only holding the trace_types_lock and not
1336          * having to take the max_lock.
1337          */
1338         if (tr->cond_snapshot) {
1339                 ret = -EBUSY;
1340                 goto fail_unlock;
1341         }
1342
1343         arch_spin_lock(&tr->max_lock);
1344         tr->cond_snapshot = cond_snapshot;
1345         arch_spin_unlock(&tr->max_lock);
1346
1347         mutex_unlock(&trace_types_lock);
1348
1349         return ret;
1350
1351  fail_unlock:
1352         mutex_unlock(&trace_types_lock);
1353         kfree(cond_snapshot);
1354         return ret;
1355 }
1356 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357
1358 /**
1359  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360  * @tr:         The tracing instance
1361  *
1362  * Check whether the conditional snapshot for the given instance is
1363  * enabled; if so, free the cond_snapshot associated with it,
1364  * otherwise return -EINVAL.
1365  *
1366  * Returns 0 if successful, error otherwise.
1367  */
1368 int tracing_snapshot_cond_disable(struct trace_array *tr)
1369 {
1370         int ret = 0;
1371
1372         arch_spin_lock(&tr->max_lock);
1373
1374         if (!tr->cond_snapshot)
1375                 ret = -EINVAL;
1376         else {
1377                 kfree(tr->cond_snapshot);
1378                 tr->cond_snapshot = NULL;
1379         }
1380
1381         arch_spin_unlock(&tr->max_lock);
1382
1383         return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400         return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405         /* Give warning */
1406         tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411         return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416         return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421         return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428         if (tr->array_buffer.buffer)
1429                 ring_buffer_record_off(tr->array_buffer.buffer);
1430         /*
1431          * This flag is looked at when buffers haven't been allocated
1432          * yet, or by some tracers (like irqsoff), that just want to
1433          * know if the ring buffer has been disabled, but it can handle
1434          * races of where it gets disabled but we still do a record.
1435          * As the check is in the fast path of the tracers, it is more
1436          * important to be fast than accurate.
1437          */
1438         tr->buffer_disabled = 1;
1439         /* Make the flag seen by readers */
1440         smp_wmb();
1441 }
1442
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453         tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456
1457 void disable_trace_on_warning(void)
1458 {
1459         if (__disable_trace_on_warning) {
1460                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                         "Disabling tracing due to warning\n");
1462                 tracing_off();
1463         }
1464 }
1465
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474         if (tr->array_buffer.buffer)
1475                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476         return !tr->buffer_disabled;
1477 }
1478
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484         return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488 static int __init set_buf_size(char *str)
1489 {
1490         unsigned long buf_size;
1491
1492         if (!str)
1493                 return 0;
1494         buf_size = memparse(str, &str);
1495         /* nr_entries can not be zero */
1496         if (buf_size == 0)
1497                 return 0;
1498         trace_buf_size = buf_size;
1499         return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502
1503 static int __init set_tracing_thresh(char *str)
1504 {
1505         unsigned long threshold;
1506         int ret;
1507
1508         if (!str)
1509                 return 0;
1510         ret = kstrtoul(str, 0, &threshold);
1511         if (ret < 0)
1512                 return 0;
1513         tracing_thresh = threshold * 1000;
1514         return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517
1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520         return nsecs / 1000;
1521 }
1522
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534         TRACE_FLAGS
1535         NULL
1536 };
1537
1538 static struct {
1539         u64 (*func)(void);
1540         const char *name;
1541         int in_ns;              /* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543         { trace_clock_local,            "local",        1 },
1544         { trace_clock_global,           "global",       1 },
1545         { trace_clock_counter,          "counter",      0 },
1546         { trace_clock_jiffies,          "uptime",       0 },
1547         { trace_clock,                  "perf",         1 },
1548         { ktime_get_mono_fast_ns,       "mono",         1 },
1549         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1550         { ktime_get_boot_fast_ns,       "boot",         1 },
1551         ARCH_TRACE_CLOCKS
1552 };
1553
1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556         if (trace_clocks[tr->clock_id].in_ns)
1557                 return true;
1558
1559         return false;
1560 }
1561
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567         memset(parser, 0, sizeof(*parser));
1568
1569         parser->buffer = kmalloc(size, GFP_KERNEL);
1570         if (!parser->buffer)
1571                 return 1;
1572
1573         parser->size = size;
1574         return 0;
1575 }
1576
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582         kfree(parser->buffer);
1583         parser->buffer = NULL;
1584 }
1585
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598         size_t cnt, loff_t *ppos)
1599 {
1600         char ch;
1601         size_t read = 0;
1602         ssize_t ret;
1603
1604         if (!*ppos)
1605                 trace_parser_clear(parser);
1606
1607         ret = get_user(ch, ubuf++);
1608         if (ret)
1609                 goto out;
1610
1611         read++;
1612         cnt--;
1613
1614         /*
1615          * The parser is not finished with the last write,
1616          * continue reading the user input without skipping spaces.
1617          */
1618         if (!parser->cont) {
1619                 /* skip white space */
1620                 while (cnt && isspace(ch)) {
1621                         ret = get_user(ch, ubuf++);
1622                         if (ret)
1623                                 goto out;
1624                         read++;
1625                         cnt--;
1626                 }
1627
1628                 parser->idx = 0;
1629
1630                 /* only spaces were written */
1631                 if (isspace(ch) || !ch) {
1632                         *ppos += read;
1633                         ret = read;
1634                         goto out;
1635                 }
1636         }
1637
1638         /* read the non-space input */
1639         while (cnt && !isspace(ch) && ch) {
1640                 if (parser->idx < parser->size - 1)
1641                         parser->buffer[parser->idx++] = ch;
1642                 else {
1643                         ret = -EINVAL;
1644                         goto out;
1645                 }
1646                 ret = get_user(ch, ubuf++);
1647                 if (ret)
1648                         goto out;
1649                 read++;
1650                 cnt--;
1651         }
1652
1653         /* We either got finished input or we have to wait for another call. */
1654         if (isspace(ch) || !ch) {
1655                 parser->buffer[parser->idx] = 0;
1656                 parser->cont = false;
1657         } else if (parser->idx < parser->size - 1) {
1658                 parser->cont = true;
1659                 parser->buffer[parser->idx++] = ch;
1660                 /* Make sure the parsed string always terminates with '\0'. */
1661                 parser->buffer[parser->idx] = 0;
1662         } else {
1663                 ret = -EINVAL;
1664                 goto out;
1665         }
1666
1667         *ppos += read;
1668         ret = read;
1669
1670 out:
1671         return ret;
1672 }
1673
1674 /* TODO add a seq_buf_to_buffer() */
1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677         int len;
1678
1679         if (trace_seq_used(s) <= s->seq.readpos)
1680                 return -EBUSY;
1681
1682         len = trace_seq_used(s) - s->seq.readpos;
1683         if (cnt > len)
1684                 cnt = len;
1685         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686
1687         s->seq.readpos += cnt;
1688         return cnt;
1689 }
1690
1691 unsigned long __read_mostly     tracing_thresh;
1692 static const struct file_operations tracing_max_lat_fops;
1693
1694 #ifdef LATENCY_FS_NOTIFY
1695
1696 static struct workqueue_struct *fsnotify_wq;
1697
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700         struct trace_array *tr = container_of(work, struct trace_array,
1701                                               fsnotify_work);
1702         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707         struct trace_array *tr = container_of(iwork, struct trace_array,
1708                                               fsnotify_irqwork);
1709         queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713                                      struct dentry *d_tracer)
1714 {
1715         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718                                               d_tracer, &tr->max_latency,
1719                                               &tracing_max_lat_fops);
1720 }
1721
1722 __init static int latency_fsnotify_init(void)
1723 {
1724         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1726         if (!fsnotify_wq) {
1727                 pr_err("Unable to allocate tr_max_lat_wq\n");
1728                 return -ENOMEM;
1729         }
1730         return 0;
1731 }
1732
1733 late_initcall_sync(latency_fsnotify_init);
1734
1735 void latency_fsnotify(struct trace_array *tr)
1736 {
1737         if (!fsnotify_wq)
1738                 return;
1739         /*
1740          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741          * possible that we are called from __schedule() or do_idle(), which
1742          * could cause a deadlock.
1743          */
1744         irq_work_queue(&tr->fsnotify_irqwork);
1745 }
1746
1747 /*
1748  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1749  *  defined(CONFIG_FSNOTIFY)
1750  */
1751 #else
1752
1753 #define trace_create_maxlat_file(tr, d_tracer)                          \
1754         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1755                           &tr->max_latency, &tracing_max_lat_fops)
1756
1757 #endif
1758
1759 #ifdef CONFIG_TRACER_MAX_TRACE
1760 /*
1761  * Copy the new maximum trace into the separate maximum-trace
1762  * structure. (this way the maximum trace is permanently saved,
1763  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1764  */
1765 static void
1766 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1767 {
1768         struct array_buffer *trace_buf = &tr->array_buffer;
1769         struct array_buffer *max_buf = &tr->max_buffer;
1770         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1771         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1772
1773         max_buf->cpu = cpu;
1774         max_buf->time_start = data->preempt_timestamp;
1775
1776         max_data->saved_latency = tr->max_latency;
1777         max_data->critical_start = data->critical_start;
1778         max_data->critical_end = data->critical_end;
1779
1780         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1781         max_data->pid = tsk->pid;
1782         /*
1783          * If tsk == current, then use current_uid(), as that does not use
1784          * RCU. The irq tracer can be called out of RCU scope.
1785          */
1786         if (tsk == current)
1787                 max_data->uid = current_uid();
1788         else
1789                 max_data->uid = task_uid(tsk);
1790
1791         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1792         max_data->policy = tsk->policy;
1793         max_data->rt_priority = tsk->rt_priority;
1794
1795         /* record this tasks comm */
1796         tracing_record_cmdline(tsk);
1797         latency_fsnotify(tr);
1798 }
1799
1800 /**
1801  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1802  * @tr: tracer
1803  * @tsk: the task with the latency
1804  * @cpu: The cpu that initiated the trace.
1805  * @cond_data: User data associated with a conditional snapshot
1806  *
1807  * Flip the buffers between the @tr and the max_tr and record information
1808  * about which task was the cause of this latency.
1809  */
1810 void
1811 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1812               void *cond_data)
1813 {
1814         if (tr->stop_count)
1815                 return;
1816
1817         WARN_ON_ONCE(!irqs_disabled());
1818
1819         if (!tr->allocated_snapshot) {
1820                 /* Only the nop tracer should hit this when disabling */
1821                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1822                 return;
1823         }
1824
1825         arch_spin_lock(&tr->max_lock);
1826
1827         /* Inherit the recordable setting from array_buffer */
1828         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1829                 ring_buffer_record_on(tr->max_buffer.buffer);
1830         else
1831                 ring_buffer_record_off(tr->max_buffer.buffer);
1832
1833 #ifdef CONFIG_TRACER_SNAPSHOT
1834         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1835                 goto out_unlock;
1836 #endif
1837         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838
1839         __update_max_tr(tr, tsk, cpu);
1840
1841  out_unlock:
1842         arch_spin_unlock(&tr->max_lock);
1843 }
1844
1845 /**
1846  * update_max_tr_single - only copy one trace over, and reset the rest
1847  * @tr: tracer
1848  * @tsk: task with the latency
1849  * @cpu: the cpu of the buffer to copy.
1850  *
1851  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1852  */
1853 void
1854 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1855 {
1856         int ret;
1857
1858         if (tr->stop_count)
1859                 return;
1860
1861         WARN_ON_ONCE(!irqs_disabled());
1862         if (!tr->allocated_snapshot) {
1863                 /* Only the nop tracer should hit this when disabling */
1864                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1865                 return;
1866         }
1867
1868         arch_spin_lock(&tr->max_lock);
1869
1870         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1871
1872         if (ret == -EBUSY) {
1873                 /*
1874                  * We failed to swap the buffer due to a commit taking
1875                  * place on this CPU. We fail to record, but we reset
1876                  * the max trace buffer (no one writes directly to it)
1877                  * and flag that it failed.
1878                  */
1879                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1880                         "Failed to swap buffers due to commit in progress\n");
1881         }
1882
1883         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1884
1885         __update_max_tr(tr, tsk, cpu);
1886         arch_spin_unlock(&tr->max_lock);
1887 }
1888 #endif /* CONFIG_TRACER_MAX_TRACE */
1889
1890 static int wait_on_pipe(struct trace_iterator *iter, int full)
1891 {
1892         /* Iterators are static, they should be filled or empty */
1893         if (trace_buffer_iter(iter, iter->cpu_file))
1894                 return 0;
1895
1896         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1897                                 full);
1898 }
1899
1900 #ifdef CONFIG_FTRACE_STARTUP_TEST
1901 static bool selftests_can_run;
1902
1903 struct trace_selftests {
1904         struct list_head                list;
1905         struct tracer                   *type;
1906 };
1907
1908 static LIST_HEAD(postponed_selftests);
1909
1910 static int save_selftest(struct tracer *type)
1911 {
1912         struct trace_selftests *selftest;
1913
1914         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1915         if (!selftest)
1916                 return -ENOMEM;
1917
1918         selftest->type = type;
1919         list_add(&selftest->list, &postponed_selftests);
1920         return 0;
1921 }
1922
1923 static int run_tracer_selftest(struct tracer *type)
1924 {
1925         struct trace_array *tr = &global_trace;
1926         struct tracer *saved_tracer = tr->current_trace;
1927         int ret;
1928
1929         if (!type->selftest || tracing_selftest_disabled)
1930                 return 0;
1931
1932         /*
1933          * If a tracer registers early in boot up (before scheduling is
1934          * initialized and such), then do not run its selftests yet.
1935          * Instead, run it a little later in the boot process.
1936          */
1937         if (!selftests_can_run)
1938                 return save_selftest(type);
1939
1940         if (!tracing_is_on()) {
1941                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1942                         type->name);
1943                 return 0;
1944         }
1945
1946         /*
1947          * Run a selftest on this tracer.
1948          * Here we reset the trace buffer, and set the current
1949          * tracer to be this tracer. The tracer can then run some
1950          * internal tracing to verify that everything is in order.
1951          * If we fail, we do not register this tracer.
1952          */
1953         tracing_reset_online_cpus(&tr->array_buffer);
1954
1955         tr->current_trace = type;
1956
1957 #ifdef CONFIG_TRACER_MAX_TRACE
1958         if (type->use_max_tr) {
1959                 /* If we expanded the buffers, make sure the max is expanded too */
1960                 if (ring_buffer_expanded)
1961                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1962                                            RING_BUFFER_ALL_CPUS);
1963                 tr->allocated_snapshot = true;
1964         }
1965 #endif
1966
1967         /* the test is responsible for initializing and enabling */
1968         pr_info("Testing tracer %s: ", type->name);
1969         ret = type->selftest(type, tr);
1970         /* the test is responsible for resetting too */
1971         tr->current_trace = saved_tracer;
1972         if (ret) {
1973                 printk(KERN_CONT "FAILED!\n");
1974                 /* Add the warning after printing 'FAILED' */
1975                 WARN_ON(1);
1976                 return -1;
1977         }
1978         /* Only reset on passing, to avoid touching corrupted buffers */
1979         tracing_reset_online_cpus(&tr->array_buffer);
1980
1981 #ifdef CONFIG_TRACER_MAX_TRACE
1982         if (type->use_max_tr) {
1983                 tr->allocated_snapshot = false;
1984
1985                 /* Shrink the max buffer again */
1986                 if (ring_buffer_expanded)
1987                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1988                                            RING_BUFFER_ALL_CPUS);
1989         }
1990 #endif
1991
1992         printk(KERN_CONT "PASSED\n");
1993         return 0;
1994 }
1995
1996 static __init int init_trace_selftests(void)
1997 {
1998         struct trace_selftests *p, *n;
1999         struct tracer *t, **last;
2000         int ret;
2001
2002         selftests_can_run = true;
2003
2004         mutex_lock(&trace_types_lock);
2005
2006         if (list_empty(&postponed_selftests))
2007                 goto out;
2008
2009         pr_info("Running postponed tracer tests:\n");
2010
2011         tracing_selftest_running = true;
2012         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2013                 /* This loop can take minutes when sanitizers are enabled, so
2014                  * lets make sure we allow RCU processing.
2015                  */
2016                 cond_resched();
2017                 ret = run_tracer_selftest(p->type);
2018                 /* If the test fails, then warn and remove from available_tracers */
2019                 if (ret < 0) {
2020                         WARN(1, "tracer: %s failed selftest, disabling\n",
2021                              p->type->name);
2022                         last = &trace_types;
2023                         for (t = trace_types; t; t = t->next) {
2024                                 if (t == p->type) {
2025                                         *last = t->next;
2026                                         break;
2027                                 }
2028                                 last = &t->next;
2029                         }
2030                 }
2031                 list_del(&p->list);
2032                 kfree(p);
2033         }
2034         tracing_selftest_running = false;
2035
2036  out:
2037         mutex_unlock(&trace_types_lock);
2038
2039         return 0;
2040 }
2041 core_initcall(init_trace_selftests);
2042 #else
2043 static inline int run_tracer_selftest(struct tracer *type)
2044 {
2045         return 0;
2046 }
2047 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2048
2049 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2050
2051 static void __init apply_trace_boot_options(void);
2052
2053 /**
2054  * register_tracer - register a tracer with the ftrace system.
2055  * @type: the plugin for the tracer
2056  *
2057  * Register a new plugin tracer.
2058  */
2059 int __init register_tracer(struct tracer *type)
2060 {
2061         struct tracer *t;
2062         int ret = 0;
2063
2064         if (!type->name) {
2065                 pr_info("Tracer must have a name\n");
2066                 return -1;
2067         }
2068
2069         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2070                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2071                 return -1;
2072         }
2073
2074         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2075                 pr_warn("Can not register tracer %s due to lockdown\n",
2076                            type->name);
2077                 return -EPERM;
2078         }
2079
2080         mutex_lock(&trace_types_lock);
2081
2082         tracing_selftest_running = true;
2083
2084         for (t = trace_types; t; t = t->next) {
2085                 if (strcmp(type->name, t->name) == 0) {
2086                         /* already found */
2087                         pr_info("Tracer %s already registered\n",
2088                                 type->name);
2089                         ret = -1;
2090                         goto out;
2091                 }
2092         }
2093
2094         if (!type->set_flag)
2095                 type->set_flag = &dummy_set_flag;
2096         if (!type->flags) {
2097                 /*allocate a dummy tracer_flags*/
2098                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2099                 if (!type->flags) {
2100                         ret = -ENOMEM;
2101                         goto out;
2102                 }
2103                 type->flags->val = 0;
2104                 type->flags->opts = dummy_tracer_opt;
2105         } else
2106                 if (!type->flags->opts)
2107                         type->flags->opts = dummy_tracer_opt;
2108
2109         /* store the tracer for __set_tracer_option */
2110         type->flags->trace = type;
2111
2112         ret = run_tracer_selftest(type);
2113         if (ret < 0)
2114                 goto out;
2115
2116         type->next = trace_types;
2117         trace_types = type;
2118         add_tracer_options(&global_trace, type);
2119
2120  out:
2121         tracing_selftest_running = false;
2122         mutex_unlock(&trace_types_lock);
2123
2124         if (ret || !default_bootup_tracer)
2125                 goto out_unlock;
2126
2127         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2128                 goto out_unlock;
2129
2130         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2131         /* Do we want this tracer to start on bootup? */
2132         tracing_set_tracer(&global_trace, type->name);
2133         default_bootup_tracer = NULL;
2134
2135         apply_trace_boot_options();
2136
2137         /* disable other selftests, since this will break it. */
2138         disable_tracing_selftest("running a tracer");
2139
2140  out_unlock:
2141         return ret;
2142 }
2143
2144 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2145 {
2146         struct trace_buffer *buffer = buf->buffer;
2147
2148         if (!buffer)
2149                 return;
2150
2151         ring_buffer_record_disable(buffer);
2152
2153         /* Make sure all commits have finished */
2154         synchronize_rcu();
2155         ring_buffer_reset_cpu(buffer, cpu);
2156
2157         ring_buffer_record_enable(buffer);
2158 }
2159
2160 void tracing_reset_online_cpus(struct array_buffer *buf)
2161 {
2162         struct trace_buffer *buffer = buf->buffer;
2163
2164         if (!buffer)
2165                 return;
2166
2167         ring_buffer_record_disable(buffer);
2168
2169         /* Make sure all commits have finished */
2170         synchronize_rcu();
2171
2172         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2173
2174         ring_buffer_reset_online_cpus(buffer);
2175
2176         ring_buffer_record_enable(buffer);
2177 }
2178
2179 /* Must have trace_types_lock held */
2180 void tracing_reset_all_online_cpus(void)
2181 {
2182         struct trace_array *tr;
2183
2184         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2185                 if (!tr->clear_trace)
2186                         continue;
2187                 tr->clear_trace = false;
2188                 tracing_reset_online_cpus(&tr->array_buffer);
2189 #ifdef CONFIG_TRACER_MAX_TRACE
2190                 tracing_reset_online_cpus(&tr->max_buffer);
2191 #endif
2192         }
2193 }
2194
2195 /*
2196  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2197  * is the tgid last observed corresponding to pid=i.
2198  */
2199 static int *tgid_map;
2200
2201 /* The maximum valid index into tgid_map. */
2202 static size_t tgid_map_max;
2203
2204 #define SAVED_CMDLINES_DEFAULT 128
2205 #define NO_CMDLINE_MAP UINT_MAX
2206 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2207 struct saved_cmdlines_buffer {
2208         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2209         unsigned *map_cmdline_to_pid;
2210         unsigned cmdline_num;
2211         int cmdline_idx;
2212         char *saved_cmdlines;
2213 };
2214 static struct saved_cmdlines_buffer *savedcmd;
2215
2216 static inline char *get_saved_cmdlines(int idx)
2217 {
2218         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2219 }
2220
2221 static inline void set_cmdline(int idx, const char *cmdline)
2222 {
2223         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2224 }
2225
2226 static int allocate_cmdlines_buffer(unsigned int val,
2227                                     struct saved_cmdlines_buffer *s)
2228 {
2229         s->map_cmdline_to_pid = kmalloc_array(val,
2230                                               sizeof(*s->map_cmdline_to_pid),
2231                                               GFP_KERNEL);
2232         if (!s->map_cmdline_to_pid)
2233                 return -ENOMEM;
2234
2235         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2236         if (!s->saved_cmdlines) {
2237                 kfree(s->map_cmdline_to_pid);
2238                 return -ENOMEM;
2239         }
2240
2241         s->cmdline_idx = 0;
2242         s->cmdline_num = val;
2243         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2244                sizeof(s->map_pid_to_cmdline));
2245         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2246                val * sizeof(*s->map_cmdline_to_pid));
2247
2248         return 0;
2249 }
2250
2251 static int trace_create_savedcmd(void)
2252 {
2253         int ret;
2254
2255         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2256         if (!savedcmd)
2257                 return -ENOMEM;
2258
2259         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2260         if (ret < 0) {
2261                 kfree(savedcmd);
2262                 savedcmd = NULL;
2263                 return -ENOMEM;
2264         }
2265
2266         return 0;
2267 }
2268
2269 int is_tracing_stopped(void)
2270 {
2271         return global_trace.stop_count;
2272 }
2273
2274 /**
2275  * tracing_start - quick start of the tracer
2276  *
2277  * If tracing is enabled but was stopped by tracing_stop,
2278  * this will start the tracer back up.
2279  */
2280 void tracing_start(void)
2281 {
2282         struct trace_buffer *buffer;
2283         unsigned long flags;
2284
2285         if (tracing_disabled)
2286                 return;
2287
2288         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2289         if (--global_trace.stop_count) {
2290                 if (global_trace.stop_count < 0) {
2291                         /* Someone screwed up their debugging */
2292                         WARN_ON_ONCE(1);
2293                         global_trace.stop_count = 0;
2294                 }
2295                 goto out;
2296         }
2297
2298         /* Prevent the buffers from switching */
2299         arch_spin_lock(&global_trace.max_lock);
2300
2301         buffer = global_trace.array_buffer.buffer;
2302         if (buffer)
2303                 ring_buffer_record_enable(buffer);
2304
2305 #ifdef CONFIG_TRACER_MAX_TRACE
2306         buffer = global_trace.max_buffer.buffer;
2307         if (buffer)
2308                 ring_buffer_record_enable(buffer);
2309 #endif
2310
2311         arch_spin_unlock(&global_trace.max_lock);
2312
2313  out:
2314         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2315 }
2316
2317 static void tracing_start_tr(struct trace_array *tr)
2318 {
2319         struct trace_buffer *buffer;
2320         unsigned long flags;
2321
2322         if (tracing_disabled)
2323                 return;
2324
2325         /* If global, we need to also start the max tracer */
2326         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2327                 return tracing_start();
2328
2329         raw_spin_lock_irqsave(&tr->start_lock, flags);
2330
2331         if (--tr->stop_count) {
2332                 if (tr->stop_count < 0) {
2333                         /* Someone screwed up their debugging */
2334                         WARN_ON_ONCE(1);
2335                         tr->stop_count = 0;
2336                 }
2337                 goto out;
2338         }
2339
2340         buffer = tr->array_buffer.buffer;
2341         if (buffer)
2342                 ring_buffer_record_enable(buffer);
2343
2344  out:
2345         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2346 }
2347
2348 /**
2349  * tracing_stop - quick stop of the tracer
2350  *
2351  * Light weight way to stop tracing. Use in conjunction with
2352  * tracing_start.
2353  */
2354 void tracing_stop(void)
2355 {
2356         struct trace_buffer *buffer;
2357         unsigned long flags;
2358
2359         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2360         if (global_trace.stop_count++)
2361                 goto out;
2362
2363         /* Prevent the buffers from switching */
2364         arch_spin_lock(&global_trace.max_lock);
2365
2366         buffer = global_trace.array_buffer.buffer;
2367         if (buffer)
2368                 ring_buffer_record_disable(buffer);
2369
2370 #ifdef CONFIG_TRACER_MAX_TRACE
2371         buffer = global_trace.max_buffer.buffer;
2372         if (buffer)
2373                 ring_buffer_record_disable(buffer);
2374 #endif
2375
2376         arch_spin_unlock(&global_trace.max_lock);
2377
2378  out:
2379         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2380 }
2381
2382 static void tracing_stop_tr(struct trace_array *tr)
2383 {
2384         struct trace_buffer *buffer;
2385         unsigned long flags;
2386
2387         /* If global, we need to also stop the max tracer */
2388         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2389                 return tracing_stop();
2390
2391         raw_spin_lock_irqsave(&tr->start_lock, flags);
2392         if (tr->stop_count++)
2393                 goto out;
2394
2395         buffer = tr->array_buffer.buffer;
2396         if (buffer)
2397                 ring_buffer_record_disable(buffer);
2398
2399  out:
2400         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2401 }
2402
2403 static int trace_save_cmdline(struct task_struct *tsk)
2404 {
2405         unsigned tpid, idx;
2406
2407         /* treat recording of idle task as a success */
2408         if (!tsk->pid)
2409                 return 1;
2410
2411         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412
2413         /*
2414          * It's not the end of the world if we don't get
2415          * the lock, but we also don't want to spin
2416          * nor do we want to disable interrupts,
2417          * so if we miss here, then better luck next time.
2418          */
2419         if (!arch_spin_trylock(&trace_cmdline_lock))
2420                 return 0;
2421
2422         idx = savedcmd->map_pid_to_cmdline[tpid];
2423         if (idx == NO_CMDLINE_MAP) {
2424                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2425
2426                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2427                 savedcmd->cmdline_idx = idx;
2428         }
2429
2430         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2431         set_cmdline(idx, tsk->comm);
2432
2433         arch_spin_unlock(&trace_cmdline_lock);
2434
2435         return 1;
2436 }
2437
2438 static void __trace_find_cmdline(int pid, char comm[])
2439 {
2440         unsigned map;
2441         int tpid;
2442
2443         if (!pid) {
2444                 strcpy(comm, "<idle>");
2445                 return;
2446         }
2447
2448         if (WARN_ON_ONCE(pid < 0)) {
2449                 strcpy(comm, "<XXX>");
2450                 return;
2451         }
2452
2453         tpid = pid & (PID_MAX_DEFAULT - 1);
2454         map = savedcmd->map_pid_to_cmdline[tpid];
2455         if (map != NO_CMDLINE_MAP) {
2456                 tpid = savedcmd->map_cmdline_to_pid[map];
2457                 if (tpid == pid) {
2458                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2459                         return;
2460                 }
2461         }
2462         strcpy(comm, "<...>");
2463 }
2464
2465 void trace_find_cmdline(int pid, char comm[])
2466 {
2467         preempt_disable();
2468         arch_spin_lock(&trace_cmdline_lock);
2469
2470         __trace_find_cmdline(pid, comm);
2471
2472         arch_spin_unlock(&trace_cmdline_lock);
2473         preempt_enable();
2474 }
2475
2476 static int *trace_find_tgid_ptr(int pid)
2477 {
2478         /*
2479          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2480          * if we observe a non-NULL tgid_map then we also observe the correct
2481          * tgid_map_max.
2482          */
2483         int *map = smp_load_acquire(&tgid_map);
2484
2485         if (unlikely(!map || pid > tgid_map_max))
2486                 return NULL;
2487
2488         return &map[pid];
2489 }
2490
2491 int trace_find_tgid(int pid)
2492 {
2493         int *ptr = trace_find_tgid_ptr(pid);
2494
2495         return ptr ? *ptr : 0;
2496 }
2497
2498 static int trace_save_tgid(struct task_struct *tsk)
2499 {
2500         int *ptr;
2501
2502         /* treat recording of idle task as a success */
2503         if (!tsk->pid)
2504                 return 1;
2505
2506         ptr = trace_find_tgid_ptr(tsk->pid);
2507         if (!ptr)
2508                 return 0;
2509
2510         *ptr = tsk->tgid;
2511         return 1;
2512 }
2513
2514 static bool tracing_record_taskinfo_skip(int flags)
2515 {
2516         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2517                 return true;
2518         if (!__this_cpu_read(trace_taskinfo_save))
2519                 return true;
2520         return false;
2521 }
2522
2523 /**
2524  * tracing_record_taskinfo - record the task info of a task
2525  *
2526  * @task:  task to record
2527  * @flags: TRACE_RECORD_CMDLINE for recording comm
2528  *         TRACE_RECORD_TGID for recording tgid
2529  */
2530 void tracing_record_taskinfo(struct task_struct *task, int flags)
2531 {
2532         bool done;
2533
2534         if (tracing_record_taskinfo_skip(flags))
2535                 return;
2536
2537         /*
2538          * Record as much task information as possible. If some fail, continue
2539          * to try to record the others.
2540          */
2541         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2542         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2543
2544         /* If recording any information failed, retry again soon. */
2545         if (!done)
2546                 return;
2547
2548         __this_cpu_write(trace_taskinfo_save, false);
2549 }
2550
2551 /**
2552  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2553  *
2554  * @prev: previous task during sched_switch
2555  * @next: next task during sched_switch
2556  * @flags: TRACE_RECORD_CMDLINE for recording comm
2557  *         TRACE_RECORD_TGID for recording tgid
2558  */
2559 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2560                                           struct task_struct *next, int flags)
2561 {
2562         bool done;
2563
2564         if (tracing_record_taskinfo_skip(flags))
2565                 return;
2566
2567         /*
2568          * Record as much task information as possible. If some fail, continue
2569          * to try to record the others.
2570          */
2571         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2572         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2573         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2574         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2575
2576         /* If recording any information failed, retry again soon. */
2577         if (!done)
2578                 return;
2579
2580         __this_cpu_write(trace_taskinfo_save, false);
2581 }
2582
2583 /* Helpers to record a specific task information */
2584 void tracing_record_cmdline(struct task_struct *task)
2585 {
2586         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2587 }
2588
2589 void tracing_record_tgid(struct task_struct *task)
2590 {
2591         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2592 }
2593
2594 /*
2595  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2596  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2597  * simplifies those functions and keeps them in sync.
2598  */
2599 enum print_line_t trace_handle_return(struct trace_seq *s)
2600 {
2601         return trace_seq_has_overflowed(s) ?
2602                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2603 }
2604 EXPORT_SYMBOL_GPL(trace_handle_return);
2605
2606 static unsigned short migration_disable_value(void)
2607 {
2608 #if defined(CONFIG_SMP)
2609         return current->migration_disabled;
2610 #else
2611         return 0;
2612 #endif
2613 }
2614
2615 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2616 {
2617         unsigned int trace_flags = irqs_status;
2618         unsigned int pc;
2619
2620         pc = preempt_count();
2621
2622         if (pc & NMI_MASK)
2623                 trace_flags |= TRACE_FLAG_NMI;
2624         if (pc & HARDIRQ_MASK)
2625                 trace_flags |= TRACE_FLAG_HARDIRQ;
2626         if (in_serving_softirq())
2627                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2628
2629         if (tif_need_resched())
2630                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2631         if (test_preempt_need_resched())
2632                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2633         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2634                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2635 }
2636
2637 struct ring_buffer_event *
2638 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2639                           int type,
2640                           unsigned long len,
2641                           unsigned int trace_ctx)
2642 {
2643         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2644 }
2645
2646 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2647 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2648 static int trace_buffered_event_ref;
2649
2650 /**
2651  * trace_buffered_event_enable - enable buffering events
2652  *
2653  * When events are being filtered, it is quicker to use a temporary
2654  * buffer to write the event data into if there's a likely chance
2655  * that it will not be committed. The discard of the ring buffer
2656  * is not as fast as committing, and is much slower than copying
2657  * a commit.
2658  *
2659  * When an event is to be filtered, allocate per cpu buffers to
2660  * write the event data into, and if the event is filtered and discarded
2661  * it is simply dropped, otherwise, the entire data is to be committed
2662  * in one shot.
2663  */
2664 void trace_buffered_event_enable(void)
2665 {
2666         struct ring_buffer_event *event;
2667         struct page *page;
2668         int cpu;
2669
2670         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2671
2672         if (trace_buffered_event_ref++)
2673                 return;
2674
2675         for_each_tracing_cpu(cpu) {
2676                 page = alloc_pages_node(cpu_to_node(cpu),
2677                                         GFP_KERNEL | __GFP_NORETRY, 0);
2678                 if (!page)
2679                         goto failed;
2680
2681                 event = page_address(page);
2682                 memset(event, 0, sizeof(*event));
2683
2684                 per_cpu(trace_buffered_event, cpu) = event;
2685
2686                 preempt_disable();
2687                 if (cpu == smp_processor_id() &&
2688                     __this_cpu_read(trace_buffered_event) !=
2689                     per_cpu(trace_buffered_event, cpu))
2690                         WARN_ON_ONCE(1);
2691                 preempt_enable();
2692         }
2693
2694         return;
2695  failed:
2696         trace_buffered_event_disable();
2697 }
2698
2699 static void enable_trace_buffered_event(void *data)
2700 {
2701         /* Probably not needed, but do it anyway */
2702         smp_rmb();
2703         this_cpu_dec(trace_buffered_event_cnt);
2704 }
2705
2706 static void disable_trace_buffered_event(void *data)
2707 {
2708         this_cpu_inc(trace_buffered_event_cnt);
2709 }
2710
2711 /**
2712  * trace_buffered_event_disable - disable buffering events
2713  *
2714  * When a filter is removed, it is faster to not use the buffered
2715  * events, and to commit directly into the ring buffer. Free up
2716  * the temp buffers when there are no more users. This requires
2717  * special synchronization with current events.
2718  */
2719 void trace_buffered_event_disable(void)
2720 {
2721         int cpu;
2722
2723         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2724
2725         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2726                 return;
2727
2728         if (--trace_buffered_event_ref)
2729                 return;
2730
2731         preempt_disable();
2732         /* For each CPU, set the buffer as used. */
2733         smp_call_function_many(tracing_buffer_mask,
2734                                disable_trace_buffered_event, NULL, 1);
2735         preempt_enable();
2736
2737         /* Wait for all current users to finish */
2738         synchronize_rcu();
2739
2740         for_each_tracing_cpu(cpu) {
2741                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2742                 per_cpu(trace_buffered_event, cpu) = NULL;
2743         }
2744         /*
2745          * Make sure trace_buffered_event is NULL before clearing
2746          * trace_buffered_event_cnt.
2747          */
2748         smp_wmb();
2749
2750         preempt_disable();
2751         /* Do the work on each cpu */
2752         smp_call_function_many(tracing_buffer_mask,
2753                                enable_trace_buffered_event, NULL, 1);
2754         preempt_enable();
2755 }
2756
2757 static struct trace_buffer *temp_buffer;
2758
2759 struct ring_buffer_event *
2760 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2761                           struct trace_event_file *trace_file,
2762                           int type, unsigned long len,
2763                           unsigned int trace_ctx)
2764 {
2765         struct ring_buffer_event *entry;
2766         struct trace_array *tr = trace_file->tr;
2767         int val;
2768
2769         *current_rb = tr->array_buffer.buffer;
2770
2771         if (!tr->no_filter_buffering_ref &&
2772             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2773             (entry = this_cpu_read(trace_buffered_event))) {
2774                 /*
2775                  * Filtering is on, so try to use the per cpu buffer first.
2776                  * This buffer will simulate a ring_buffer_event,
2777                  * where the type_len is zero and the array[0] will
2778                  * hold the full length.
2779                  * (see include/linux/ring-buffer.h for details on
2780                  *  how the ring_buffer_event is structured).
2781                  *
2782                  * Using a temp buffer during filtering and copying it
2783                  * on a matched filter is quicker than writing directly
2784                  * into the ring buffer and then discarding it when
2785                  * it doesn't match. That is because the discard
2786                  * requires several atomic operations to get right.
2787                  * Copying on match and doing nothing on a failed match
2788                  * is still quicker than no copy on match, but having
2789                  * to discard out of the ring buffer on a failed match.
2790                  */
2791                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2792
2793                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2794
2795                 /*
2796                  * Preemption is disabled, but interrupts and NMIs
2797                  * can still come in now. If that happens after
2798                  * the above increment, then it will have to go
2799                  * back to the old method of allocating the event
2800                  * on the ring buffer, and if the filter fails, it
2801                  * will have to call ring_buffer_discard_commit()
2802                  * to remove it.
2803                  *
2804                  * Need to also check the unlikely case that the
2805                  * length is bigger than the temp buffer size.
2806                  * If that happens, then the reserve is pretty much
2807                  * guaranteed to fail, as the ring buffer currently
2808                  * only allows events less than a page. But that may
2809                  * change in the future, so let the ring buffer reserve
2810                  * handle the failure in that case.
2811                  */
2812                 if (val == 1 && likely(len <= max_len)) {
2813                         trace_event_setup(entry, type, trace_ctx);
2814                         entry->array[0] = len;
2815                         return entry;
2816                 }
2817                 this_cpu_dec(trace_buffered_event_cnt);
2818         }
2819
2820         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2821                                             trace_ctx);
2822         /*
2823          * If tracing is off, but we have triggers enabled
2824          * we still need to look at the event data. Use the temp_buffer
2825          * to store the trace event for the trigger to use. It's recursive
2826          * safe and will not be recorded anywhere.
2827          */
2828         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2829                 *current_rb = temp_buffer;
2830                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2831                                                     trace_ctx);
2832         }
2833         return entry;
2834 }
2835 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2836
2837 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2838 static DEFINE_MUTEX(tracepoint_printk_mutex);
2839
2840 static void output_printk(struct trace_event_buffer *fbuffer)
2841 {
2842         struct trace_event_call *event_call;
2843         struct trace_event_file *file;
2844         struct trace_event *event;
2845         unsigned long flags;
2846         struct trace_iterator *iter = tracepoint_print_iter;
2847
2848         /* We should never get here if iter is NULL */
2849         if (WARN_ON_ONCE(!iter))
2850                 return;
2851
2852         event_call = fbuffer->trace_file->event_call;
2853         if (!event_call || !event_call->event.funcs ||
2854             !event_call->event.funcs->trace)
2855                 return;
2856
2857         file = fbuffer->trace_file;
2858         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2859             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2860              !filter_match_preds(file->filter, fbuffer->entry)))
2861                 return;
2862
2863         event = &fbuffer->trace_file->event_call->event;
2864
2865         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2866         trace_seq_init(&iter->seq);
2867         iter->ent = fbuffer->entry;
2868         event_call->event.funcs->trace(iter, 0, event);
2869         trace_seq_putc(&iter->seq, 0);
2870         printk("%s", iter->seq.buffer);
2871
2872         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2873 }
2874
2875 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2876                              void *buffer, size_t *lenp,
2877                              loff_t *ppos)
2878 {
2879         int save_tracepoint_printk;
2880         int ret;
2881
2882         mutex_lock(&tracepoint_printk_mutex);
2883         save_tracepoint_printk = tracepoint_printk;
2884
2885         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2886
2887         /*
2888          * This will force exiting early, as tracepoint_printk
2889          * is always zero when tracepoint_printk_iter is not allocated
2890          */
2891         if (!tracepoint_print_iter)
2892                 tracepoint_printk = 0;
2893
2894         if (save_tracepoint_printk == tracepoint_printk)
2895                 goto out;
2896
2897         if (tracepoint_printk)
2898                 static_key_enable(&tracepoint_printk_key.key);
2899         else
2900                 static_key_disable(&tracepoint_printk_key.key);
2901
2902  out:
2903         mutex_unlock(&tracepoint_printk_mutex);
2904
2905         return ret;
2906 }
2907
2908 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2909 {
2910         enum event_trigger_type tt = ETT_NONE;
2911         struct trace_event_file *file = fbuffer->trace_file;
2912
2913         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2914                         fbuffer->entry, &tt))
2915                 goto discard;
2916
2917         if (static_key_false(&tracepoint_printk_key.key))
2918                 output_printk(fbuffer);
2919
2920         if (static_branch_unlikely(&trace_event_exports_enabled))
2921                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2922
2923         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2924                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2925
2926 discard:
2927         if (tt)
2928                 event_triggers_post_call(file, tt);
2929
2930 }
2931 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2932
2933 /*
2934  * Skip 3:
2935  *
2936  *   trace_buffer_unlock_commit_regs()
2937  *   trace_event_buffer_commit()
2938  *   trace_event_raw_event_xxx()
2939  */
2940 # define STACK_SKIP 3
2941
2942 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2943                                      struct trace_buffer *buffer,
2944                                      struct ring_buffer_event *event,
2945                                      unsigned int trace_ctx,
2946                                      struct pt_regs *regs)
2947 {
2948         __buffer_unlock_commit(buffer, event);
2949
2950         /*
2951          * If regs is not set, then skip the necessary functions.
2952          * Note, we can still get here via blktrace, wakeup tracer
2953          * and mmiotrace, but that's ok if they lose a function or
2954          * two. They are not that meaningful.
2955          */
2956         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2957         ftrace_trace_userstack(tr, buffer, trace_ctx);
2958 }
2959
2960 /*
2961  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2962  */
2963 void
2964 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2965                                    struct ring_buffer_event *event)
2966 {
2967         __buffer_unlock_commit(buffer, event);
2968 }
2969
2970 void
2971 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2972                parent_ip, unsigned int trace_ctx)
2973 {
2974         struct trace_event_call *call = &event_function;
2975         struct trace_buffer *buffer = tr->array_buffer.buffer;
2976         struct ring_buffer_event *event;
2977         struct ftrace_entry *entry;
2978
2979         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2980                                             trace_ctx);
2981         if (!event)
2982                 return;
2983         entry   = ring_buffer_event_data(event);
2984         entry->ip                       = ip;
2985         entry->parent_ip                = parent_ip;
2986
2987         if (!call_filter_check_discard(call, entry, buffer, event)) {
2988                 if (static_branch_unlikely(&trace_function_exports_enabled))
2989                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2990                 __buffer_unlock_commit(buffer, event);
2991         }
2992 }
2993
2994 #ifdef CONFIG_STACKTRACE
2995
2996 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2997 #define FTRACE_KSTACK_NESTING   4
2998
2999 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3000
3001 struct ftrace_stack {
3002         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3003 };
3004
3005
3006 struct ftrace_stacks {
3007         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3008 };
3009
3010 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3011 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3012
3013 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3014                                  unsigned int trace_ctx,
3015                                  int skip, struct pt_regs *regs)
3016 {
3017         struct trace_event_call *call = &event_kernel_stack;
3018         struct ring_buffer_event *event;
3019         unsigned int size, nr_entries;
3020         struct ftrace_stack *fstack;
3021         struct stack_entry *entry;
3022         int stackidx;
3023
3024         /*
3025          * Add one, for this function and the call to save_stack_trace()
3026          * If regs is set, then these functions will not be in the way.
3027          */
3028 #ifndef CONFIG_UNWINDER_ORC
3029         if (!regs)
3030                 skip++;
3031 #endif
3032
3033         preempt_disable_notrace();
3034
3035         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3036
3037         /* This should never happen. If it does, yell once and skip */
3038         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3039                 goto out;
3040
3041         /*
3042          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3043          * interrupt will either see the value pre increment or post
3044          * increment. If the interrupt happens pre increment it will have
3045          * restored the counter when it returns.  We just need a barrier to
3046          * keep gcc from moving things around.
3047          */
3048         barrier();
3049
3050         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3051         size = ARRAY_SIZE(fstack->calls);
3052
3053         if (regs) {
3054                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3055                                                    size, skip);
3056         } else {
3057                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3058         }
3059
3060         size = nr_entries * sizeof(unsigned long);
3061         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3062                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3063                                     trace_ctx);
3064         if (!event)
3065                 goto out;
3066         entry = ring_buffer_event_data(event);
3067
3068         memcpy(&entry->caller, fstack->calls, size);
3069         entry->size = nr_entries;
3070
3071         if (!call_filter_check_discard(call, entry, buffer, event))
3072                 __buffer_unlock_commit(buffer, event);
3073
3074  out:
3075         /* Again, don't let gcc optimize things here */
3076         barrier();
3077         __this_cpu_dec(ftrace_stack_reserve);
3078         preempt_enable_notrace();
3079
3080 }
3081
3082 static inline void ftrace_trace_stack(struct trace_array *tr,
3083                                       struct trace_buffer *buffer,
3084                                       unsigned int trace_ctx,
3085                                       int skip, struct pt_regs *regs)
3086 {
3087         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3088                 return;
3089
3090         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3091 }
3092
3093 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3094                    int skip)
3095 {
3096         struct trace_buffer *buffer = tr->array_buffer.buffer;
3097
3098         if (rcu_is_watching()) {
3099                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3100                 return;
3101         }
3102
3103         /*
3104          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3105          * but if the above rcu_is_watching() failed, then the NMI
3106          * triggered someplace critical, and rcu_irq_enter() should
3107          * not be called from NMI.
3108          */
3109         if (unlikely(in_nmi()))
3110                 return;
3111
3112         rcu_irq_enter_irqson();
3113         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3114         rcu_irq_exit_irqson();
3115 }
3116
3117 /**
3118  * trace_dump_stack - record a stack back trace in the trace buffer
3119  * @skip: Number of functions to skip (helper handlers)
3120  */
3121 void trace_dump_stack(int skip)
3122 {
3123         if (tracing_disabled || tracing_selftest_running)
3124                 return;
3125
3126 #ifndef CONFIG_UNWINDER_ORC
3127         /* Skip 1 to skip this function. */
3128         skip++;
3129 #endif
3130         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3131                              tracing_gen_ctx(), skip, NULL);
3132 }
3133 EXPORT_SYMBOL_GPL(trace_dump_stack);
3134
3135 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3136 static DEFINE_PER_CPU(int, user_stack_count);
3137
3138 static void
3139 ftrace_trace_userstack(struct trace_array *tr,
3140                        struct trace_buffer *buffer, unsigned int trace_ctx)
3141 {
3142         struct trace_event_call *call = &event_user_stack;
3143         struct ring_buffer_event *event;
3144         struct userstack_entry *entry;
3145
3146         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3147                 return;
3148
3149         /*
3150          * NMIs can not handle page faults, even with fix ups.
3151          * The save user stack can (and often does) fault.
3152          */
3153         if (unlikely(in_nmi()))
3154                 return;
3155
3156         /*
3157          * prevent recursion, since the user stack tracing may
3158          * trigger other kernel events.
3159          */
3160         preempt_disable();
3161         if (__this_cpu_read(user_stack_count))
3162                 goto out;
3163
3164         __this_cpu_inc(user_stack_count);
3165
3166         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3167                                             sizeof(*entry), trace_ctx);
3168         if (!event)
3169                 goto out_drop_count;
3170         entry   = ring_buffer_event_data(event);
3171
3172         entry->tgid             = current->tgid;
3173         memset(&entry->caller, 0, sizeof(entry->caller));
3174
3175         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3176         if (!call_filter_check_discard(call, entry, buffer, event))
3177                 __buffer_unlock_commit(buffer, event);
3178
3179  out_drop_count:
3180         __this_cpu_dec(user_stack_count);
3181  out:
3182         preempt_enable();
3183 }
3184 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3185 static void ftrace_trace_userstack(struct trace_array *tr,
3186                                    struct trace_buffer *buffer,
3187                                    unsigned int trace_ctx)
3188 {
3189 }
3190 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3191
3192 #endif /* CONFIG_STACKTRACE */
3193
3194 static inline void
3195 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3196                           unsigned long long delta)
3197 {
3198         entry->bottom_delta_ts = delta & U32_MAX;
3199         entry->top_delta_ts = (delta >> 32);
3200 }
3201
3202 void trace_last_func_repeats(struct trace_array *tr,
3203                              struct trace_func_repeats *last_info,
3204                              unsigned int trace_ctx)
3205 {
3206         struct trace_buffer *buffer = tr->array_buffer.buffer;
3207         struct func_repeats_entry *entry;
3208         struct ring_buffer_event *event;
3209         u64 delta;
3210
3211         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3212                                             sizeof(*entry), trace_ctx);
3213         if (!event)
3214                 return;
3215
3216         delta = ring_buffer_event_time_stamp(buffer, event) -
3217                 last_info->ts_last_call;
3218
3219         entry = ring_buffer_event_data(event);
3220         entry->ip = last_info->ip;
3221         entry->parent_ip = last_info->parent_ip;
3222         entry->count = last_info->count;
3223         func_repeats_set_delta_ts(entry, delta);
3224
3225         __buffer_unlock_commit(buffer, event);
3226 }
3227
3228 /* created for use with alloc_percpu */
3229 struct trace_buffer_struct {
3230         int nesting;
3231         char buffer[4][TRACE_BUF_SIZE];
3232 };
3233
3234 static struct trace_buffer_struct *trace_percpu_buffer;
3235
3236 /*
3237  * This allows for lockless recording.  If we're nested too deeply, then
3238  * this returns NULL.
3239  */
3240 static char *get_trace_buf(void)
3241 {
3242         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3243
3244         if (!buffer || buffer->nesting >= 4)
3245                 return NULL;
3246
3247         buffer->nesting++;
3248
3249         /* Interrupts must see nesting incremented before we use the buffer */
3250         barrier();
3251         return &buffer->buffer[buffer->nesting - 1][0];
3252 }
3253
3254 static void put_trace_buf(void)
3255 {
3256         /* Don't let the decrement of nesting leak before this */
3257         barrier();
3258         this_cpu_dec(trace_percpu_buffer->nesting);
3259 }
3260
3261 static int alloc_percpu_trace_buffer(void)
3262 {
3263         struct trace_buffer_struct *buffers;
3264
3265         if (trace_percpu_buffer)
3266                 return 0;
3267
3268         buffers = alloc_percpu(struct trace_buffer_struct);
3269         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3270                 return -ENOMEM;
3271
3272         trace_percpu_buffer = buffers;
3273         return 0;
3274 }
3275
3276 static int buffers_allocated;
3277
3278 void trace_printk_init_buffers(void)
3279 {
3280         if (buffers_allocated)
3281                 return;
3282
3283         if (alloc_percpu_trace_buffer())
3284                 return;
3285
3286         /* trace_printk() is for debug use only. Don't use it in production. */
3287
3288         pr_warn("\n");
3289         pr_warn("**********************************************************\n");
3290         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3291         pr_warn("**                                                      **\n");
3292         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3293         pr_warn("**                                                      **\n");
3294         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3295         pr_warn("** unsafe for production use.                           **\n");
3296         pr_warn("**                                                      **\n");
3297         pr_warn("** If you see this message and you are not debugging    **\n");
3298         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3299         pr_warn("**                                                      **\n");
3300         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3301         pr_warn("**********************************************************\n");
3302
3303         /* Expand the buffers to set size */
3304         tracing_update_buffers();
3305
3306         buffers_allocated = 1;
3307
3308         /*
3309          * trace_printk_init_buffers() can be called by modules.
3310          * If that happens, then we need to start cmdline recording
3311          * directly here. If the global_trace.buffer is already
3312          * allocated here, then this was called by module code.
3313          */
3314         if (global_trace.array_buffer.buffer)
3315                 tracing_start_cmdline_record();
3316 }
3317 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3318
3319 void trace_printk_start_comm(void)
3320 {
3321         /* Start tracing comms if trace printk is set */
3322         if (!buffers_allocated)
3323                 return;
3324         tracing_start_cmdline_record();
3325 }
3326
3327 static void trace_printk_start_stop_comm(int enabled)
3328 {
3329         if (!buffers_allocated)
3330                 return;
3331
3332         if (enabled)
3333                 tracing_start_cmdline_record();
3334         else
3335                 tracing_stop_cmdline_record();
3336 }
3337
3338 /**
3339  * trace_vbprintk - write binary msg to tracing buffer
3340  * @ip:    The address of the caller
3341  * @fmt:   The string format to write to the buffer
3342  * @args:  Arguments for @fmt
3343  */
3344 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3345 {
3346         struct trace_event_call *call = &event_bprint;
3347         struct ring_buffer_event *event;
3348         struct trace_buffer *buffer;
3349         struct trace_array *tr = &global_trace;
3350         struct bprint_entry *entry;
3351         unsigned int trace_ctx;
3352         char *tbuffer;
3353         int len = 0, size;
3354
3355         if (unlikely(tracing_selftest_running || tracing_disabled))
3356                 return 0;
3357
3358         /* Don't pollute graph traces with trace_vprintk internals */
3359         pause_graph_tracing();
3360
3361         trace_ctx = tracing_gen_ctx();
3362         preempt_disable_notrace();
3363
3364         tbuffer = get_trace_buf();
3365         if (!tbuffer) {
3366                 len = 0;
3367                 goto out_nobuffer;
3368         }
3369
3370         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3371
3372         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3373                 goto out_put;
3374
3375         size = sizeof(*entry) + sizeof(u32) * len;
3376         buffer = tr->array_buffer.buffer;
3377         ring_buffer_nest_start(buffer);
3378         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3379                                             trace_ctx);
3380         if (!event)
3381                 goto out;
3382         entry = ring_buffer_event_data(event);
3383         entry->ip                       = ip;
3384         entry->fmt                      = fmt;
3385
3386         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3387         if (!call_filter_check_discard(call, entry, buffer, event)) {
3388                 __buffer_unlock_commit(buffer, event);
3389                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3390         }
3391
3392 out:
3393         ring_buffer_nest_end(buffer);
3394 out_put:
3395         put_trace_buf();
3396
3397 out_nobuffer:
3398         preempt_enable_notrace();
3399         unpause_graph_tracing();
3400
3401         return len;
3402 }
3403 EXPORT_SYMBOL_GPL(trace_vbprintk);
3404
3405 __printf(3, 0)
3406 static int
3407 __trace_array_vprintk(struct trace_buffer *buffer,
3408                       unsigned long ip, const char *fmt, va_list args)
3409 {
3410         struct trace_event_call *call = &event_print;
3411         struct ring_buffer_event *event;
3412         int len = 0, size;
3413         struct print_entry *entry;
3414         unsigned int trace_ctx;
3415         char *tbuffer;
3416
3417         if (tracing_disabled || tracing_selftest_running)
3418                 return 0;
3419
3420         /* Don't pollute graph traces with trace_vprintk internals */
3421         pause_graph_tracing();
3422
3423         trace_ctx = tracing_gen_ctx();
3424         preempt_disable_notrace();
3425
3426
3427         tbuffer = get_trace_buf();
3428         if (!tbuffer) {
3429                 len = 0;
3430                 goto out_nobuffer;
3431         }
3432
3433         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3434
3435         size = sizeof(*entry) + len + 1;
3436         ring_buffer_nest_start(buffer);
3437         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3438                                             trace_ctx);
3439         if (!event)
3440                 goto out;
3441         entry = ring_buffer_event_data(event);
3442         entry->ip = ip;
3443
3444         memcpy(&entry->buf, tbuffer, len + 1);
3445         if (!call_filter_check_discard(call, entry, buffer, event)) {
3446                 __buffer_unlock_commit(buffer, event);
3447                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3448         }
3449
3450 out:
3451         ring_buffer_nest_end(buffer);
3452         put_trace_buf();
3453
3454 out_nobuffer:
3455         preempt_enable_notrace();
3456         unpause_graph_tracing();
3457
3458         return len;
3459 }
3460
3461 __printf(3, 0)
3462 int trace_array_vprintk(struct trace_array *tr,
3463                         unsigned long ip, const char *fmt, va_list args)
3464 {
3465         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3466 }
3467
3468 /**
3469  * trace_array_printk - Print a message to a specific instance
3470  * @tr: The instance trace_array descriptor
3471  * @ip: The instruction pointer that this is called from.
3472  * @fmt: The format to print (printf format)
3473  *
3474  * If a subsystem sets up its own instance, they have the right to
3475  * printk strings into their tracing instance buffer using this
3476  * function. Note, this function will not write into the top level
3477  * buffer (use trace_printk() for that), as writing into the top level
3478  * buffer should only have events that can be individually disabled.
3479  * trace_printk() is only used for debugging a kernel, and should not
3480  * be ever incorporated in normal use.
3481  *
3482  * trace_array_printk() can be used, as it will not add noise to the
3483  * top level tracing buffer.
3484  *
3485  * Note, trace_array_init_printk() must be called on @tr before this
3486  * can be used.
3487  */
3488 __printf(3, 0)
3489 int trace_array_printk(struct trace_array *tr,
3490                        unsigned long ip, const char *fmt, ...)
3491 {
3492         int ret;
3493         va_list ap;
3494
3495         if (!tr)
3496                 return -ENOENT;
3497
3498         /* This is only allowed for created instances */
3499         if (tr == &global_trace)
3500                 return 0;
3501
3502         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3503                 return 0;
3504
3505         va_start(ap, fmt);
3506         ret = trace_array_vprintk(tr, ip, fmt, ap);
3507         va_end(ap);
3508         return ret;
3509 }
3510 EXPORT_SYMBOL_GPL(trace_array_printk);
3511
3512 /**
3513  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3514  * @tr: The trace array to initialize the buffers for
3515  *
3516  * As trace_array_printk() only writes into instances, they are OK to
3517  * have in the kernel (unlike trace_printk()). This needs to be called
3518  * before trace_array_printk() can be used on a trace_array.
3519  */
3520 int trace_array_init_printk(struct trace_array *tr)
3521 {
3522         if (!tr)
3523                 return -ENOENT;
3524
3525         /* This is only allowed for created instances */
3526         if (tr == &global_trace)
3527                 return -EINVAL;
3528
3529         return alloc_percpu_trace_buffer();
3530 }
3531 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3532
3533 __printf(3, 4)
3534 int trace_array_printk_buf(struct trace_buffer *buffer,
3535                            unsigned long ip, const char *fmt, ...)
3536 {
3537         int ret;
3538         va_list ap;
3539
3540         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3541                 return 0;
3542
3543         va_start(ap, fmt);
3544         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3545         va_end(ap);
3546         return ret;
3547 }
3548
3549 __printf(2, 0)
3550 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3551 {
3552         return trace_array_vprintk(&global_trace, ip, fmt, args);
3553 }
3554 EXPORT_SYMBOL_GPL(trace_vprintk);
3555
3556 static void trace_iterator_increment(struct trace_iterator *iter)
3557 {
3558         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3559
3560         iter->idx++;
3561         if (buf_iter)
3562                 ring_buffer_iter_advance(buf_iter);
3563 }
3564
3565 static struct trace_entry *
3566 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3567                 unsigned long *lost_events)
3568 {
3569         struct ring_buffer_event *event;
3570         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3571
3572         if (buf_iter) {
3573                 event = ring_buffer_iter_peek(buf_iter, ts);
3574                 if (lost_events)
3575                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3576                                 (unsigned long)-1 : 0;
3577         } else {
3578                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3579                                          lost_events);
3580         }
3581
3582         if (event) {
3583                 iter->ent_size = ring_buffer_event_length(event);
3584                 return ring_buffer_event_data(event);
3585         }
3586         iter->ent_size = 0;
3587         return NULL;
3588 }
3589
3590 static struct trace_entry *
3591 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3592                   unsigned long *missing_events, u64 *ent_ts)
3593 {
3594         struct trace_buffer *buffer = iter->array_buffer->buffer;
3595         struct trace_entry *ent, *next = NULL;
3596         unsigned long lost_events = 0, next_lost = 0;
3597         int cpu_file = iter->cpu_file;
3598         u64 next_ts = 0, ts;
3599         int next_cpu = -1;
3600         int next_size = 0;
3601         int cpu;
3602
3603         /*
3604          * If we are in a per_cpu trace file, don't bother by iterating over
3605          * all cpu and peek directly.
3606          */
3607         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3608                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3609                         return NULL;
3610                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3611                 if (ent_cpu)
3612                         *ent_cpu = cpu_file;
3613
3614                 return ent;
3615         }
3616
3617         for_each_tracing_cpu(cpu) {
3618
3619                 if (ring_buffer_empty_cpu(buffer, cpu))
3620                         continue;
3621
3622                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3623
3624                 /*
3625                  * Pick the entry with the smallest timestamp:
3626                  */
3627                 if (ent && (!next || ts < next_ts)) {
3628                         next = ent;
3629                         next_cpu = cpu;
3630                         next_ts = ts;
3631                         next_lost = lost_events;
3632                         next_size = iter->ent_size;
3633                 }
3634         }
3635
3636         iter->ent_size = next_size;
3637
3638         if (ent_cpu)
3639                 *ent_cpu = next_cpu;
3640
3641         if (ent_ts)
3642                 *ent_ts = next_ts;
3643
3644         if (missing_events)
3645                 *missing_events = next_lost;
3646
3647         return next;
3648 }
3649
3650 #define STATIC_FMT_BUF_SIZE     128
3651 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3652
3653 static char *trace_iter_expand_format(struct trace_iterator *iter)
3654 {
3655         char *tmp;
3656
3657         /*
3658          * iter->tr is NULL when used with tp_printk, which makes
3659          * this get called where it is not safe to call krealloc().
3660          */
3661         if (!iter->tr || iter->fmt == static_fmt_buf)
3662                 return NULL;
3663
3664         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3665                        GFP_KERNEL);
3666         if (tmp) {
3667                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3668                 iter->fmt = tmp;
3669         }
3670
3671         return tmp;
3672 }
3673
3674 /* Returns true if the string is safe to dereference from an event */
3675 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3676 {
3677         unsigned long addr = (unsigned long)str;
3678         struct trace_event *trace_event;
3679         struct trace_event_call *event;
3680
3681         /* OK if part of the event data */
3682         if ((addr >= (unsigned long)iter->ent) &&
3683             (addr < (unsigned long)iter->ent + iter->ent_size))
3684                 return true;
3685
3686         /* OK if part of the temp seq buffer */
3687         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3688             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3689                 return true;
3690
3691         /* Core rodata can not be freed */
3692         if (is_kernel_rodata(addr))
3693                 return true;
3694
3695         if (trace_is_tracepoint_string(str))
3696                 return true;
3697
3698         /*
3699          * Now this could be a module event, referencing core module
3700          * data, which is OK.
3701          */
3702         if (!iter->ent)
3703                 return false;
3704
3705         trace_event = ftrace_find_event(iter->ent->type);
3706         if (!trace_event)
3707                 return false;
3708
3709         event = container_of(trace_event, struct trace_event_call, event);
3710         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3711                 return false;
3712
3713         /* Would rather have rodata, but this will suffice */
3714         if (within_module_core(addr, event->module))
3715                 return true;
3716
3717         return false;
3718 }
3719
3720 static const char *show_buffer(struct trace_seq *s)
3721 {
3722         struct seq_buf *seq = &s->seq;
3723
3724         seq_buf_terminate(seq);
3725
3726         return seq->buffer;
3727 }
3728
3729 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3730
3731 static int test_can_verify_check(const char *fmt, ...)
3732 {
3733         char buf[16];
3734         va_list ap;
3735         int ret;
3736
3737         /*
3738          * The verifier is dependent on vsnprintf() modifies the va_list
3739          * passed to it, where it is sent as a reference. Some architectures
3740          * (like x86_32) passes it by value, which means that vsnprintf()
3741          * does not modify the va_list passed to it, and the verifier
3742          * would then need to be able to understand all the values that
3743          * vsnprintf can use. If it is passed by value, then the verifier
3744          * is disabled.
3745          */
3746         va_start(ap, fmt);
3747         vsnprintf(buf, 16, "%d", ap);
3748         ret = va_arg(ap, int);
3749         va_end(ap);
3750
3751         return ret;
3752 }
3753
3754 static void test_can_verify(void)
3755 {
3756         if (!test_can_verify_check("%d %d", 0, 1)) {
3757                 pr_info("trace event string verifier disabled\n");
3758                 static_branch_inc(&trace_no_verify);
3759         }
3760 }
3761
3762 /**
3763  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3764  * @iter: The iterator that holds the seq buffer and the event being printed
3765  * @fmt: The format used to print the event
3766  * @ap: The va_list holding the data to print from @fmt.
3767  *
3768  * This writes the data into the @iter->seq buffer using the data from
3769  * @fmt and @ap. If the format has a %s, then the source of the string
3770  * is examined to make sure it is safe to print, otherwise it will
3771  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3772  * pointer.
3773  */
3774 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3775                          va_list ap)
3776 {
3777         const char *p = fmt;
3778         const char *str;
3779         int i, j;
3780
3781         if (WARN_ON_ONCE(!fmt))
3782                 return;
3783
3784         if (static_branch_unlikely(&trace_no_verify))
3785                 goto print;
3786
3787         /* Don't bother checking when doing a ftrace_dump() */
3788         if (iter->fmt == static_fmt_buf)
3789                 goto print;
3790
3791         while (*p) {
3792                 bool star = false;
3793                 int len = 0;
3794
3795                 j = 0;
3796
3797                 /* We only care about %s and variants */
3798                 for (i = 0; p[i]; i++) {
3799                         if (i + 1 >= iter->fmt_size) {
3800                                 /*
3801                                  * If we can't expand the copy buffer,
3802                                  * just print it.
3803                                  */
3804                                 if (!trace_iter_expand_format(iter))
3805                                         goto print;
3806                         }
3807
3808                         if (p[i] == '\\' && p[i+1]) {
3809                                 i++;
3810                                 continue;
3811                         }
3812                         if (p[i] == '%') {
3813                                 /* Need to test cases like %08.*s */
3814                                 for (j = 1; p[i+j]; j++) {
3815                                         if (isdigit(p[i+j]) ||
3816                                             p[i+j] == '.')
3817                                                 continue;
3818                                         if (p[i+j] == '*') {
3819                                                 star = true;
3820                                                 continue;
3821                                         }
3822                                         break;
3823                                 }
3824                                 if (p[i+j] == 's')
3825                                         break;
3826                                 star = false;
3827                         }
3828                         j = 0;
3829                 }
3830                 /* If no %s found then just print normally */
3831                 if (!p[i])
3832                         break;
3833
3834                 /* Copy up to the %s, and print that */
3835                 strncpy(iter->fmt, p, i);
3836                 iter->fmt[i] = '\0';
3837                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3838
3839                 if (star)
3840                         len = va_arg(ap, int);
3841
3842                 /* The ap now points to the string data of the %s */
3843                 str = va_arg(ap, const char *);
3844
3845                 /*
3846                  * If you hit this warning, it is likely that the
3847                  * trace event in question used %s on a string that
3848                  * was saved at the time of the event, but may not be
3849                  * around when the trace is read. Use __string(),
3850                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3851                  * instead. See samples/trace_events/trace-events-sample.h
3852                  * for reference.
3853                  */
3854                 if (WARN_ONCE(!trace_safe_str(iter, str),
3855                               "fmt: '%s' current_buffer: '%s'",
3856                               fmt, show_buffer(&iter->seq))) {
3857                         int ret;
3858
3859                         /* Try to safely read the string */
3860                         if (star) {
3861                                 if (len + 1 > iter->fmt_size)
3862                                         len = iter->fmt_size - 1;
3863                                 if (len < 0)
3864                                         len = 0;
3865                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3866                                 iter->fmt[len] = 0;
3867                                 star = false;
3868                         } else {
3869                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3870                                                                   iter->fmt_size);
3871                         }
3872                         if (ret < 0)
3873                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3874                         else
3875                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3876                                                  str, iter->fmt);
3877                         str = "[UNSAFE-MEMORY]";
3878                         strcpy(iter->fmt, "%s");
3879                 } else {
3880                         strncpy(iter->fmt, p + i, j + 1);
3881                         iter->fmt[j+1] = '\0';
3882                 }
3883                 if (star)
3884                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3885                 else
3886                         trace_seq_printf(&iter->seq, iter->fmt, str);
3887
3888                 p += i + j + 1;
3889         }
3890  print:
3891         if (*p)
3892                 trace_seq_vprintf(&iter->seq, p, ap);
3893 }
3894
3895 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3896 {
3897         const char *p, *new_fmt;
3898         char *q;
3899
3900         if (WARN_ON_ONCE(!fmt))
3901                 return fmt;
3902
3903         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3904                 return fmt;
3905
3906         p = fmt;
3907         new_fmt = q = iter->fmt;
3908         while (*p) {
3909                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3910                         if (!trace_iter_expand_format(iter))
3911                                 return fmt;
3912
3913                         q += iter->fmt - new_fmt;
3914                         new_fmt = iter->fmt;
3915                 }
3916
3917                 *q++ = *p++;
3918
3919                 /* Replace %p with %px */
3920                 if (p[-1] == '%') {
3921                         if (p[0] == '%') {
3922                                 *q++ = *p++;
3923                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3924                                 *q++ = *p++;
3925                                 *q++ = 'x';
3926                         }
3927                 }
3928         }
3929         *q = '\0';
3930
3931         return new_fmt;
3932 }
3933
3934 #define STATIC_TEMP_BUF_SIZE    128
3935 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3936
3937 /* Find the next real entry, without updating the iterator itself */
3938 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3939                                           int *ent_cpu, u64 *ent_ts)
3940 {
3941         /* __find_next_entry will reset ent_size */
3942         int ent_size = iter->ent_size;
3943         struct trace_entry *entry;
3944
3945         /*
3946          * If called from ftrace_dump(), then the iter->temp buffer
3947          * will be the static_temp_buf and not created from kmalloc.
3948          * If the entry size is greater than the buffer, we can
3949          * not save it. Just return NULL in that case. This is only
3950          * used to add markers when two consecutive events' time
3951          * stamps have a large delta. See trace_print_lat_context()
3952          */
3953         if (iter->temp == static_temp_buf &&
3954             STATIC_TEMP_BUF_SIZE < ent_size)
3955                 return NULL;
3956
3957         /*
3958          * The __find_next_entry() may call peek_next_entry(), which may
3959          * call ring_buffer_peek() that may make the contents of iter->ent
3960          * undefined. Need to copy iter->ent now.
3961          */
3962         if (iter->ent && iter->ent != iter->temp) {
3963                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3964                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3965                         void *temp;
3966                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3967                         if (!temp)
3968                                 return NULL;
3969                         kfree(iter->temp);
3970                         iter->temp = temp;
3971                         iter->temp_size = iter->ent_size;
3972                 }
3973                 memcpy(iter->temp, iter->ent, iter->ent_size);
3974                 iter->ent = iter->temp;
3975         }
3976         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3977         /* Put back the original ent_size */
3978         iter->ent_size = ent_size;
3979
3980         return entry;
3981 }
3982
3983 /* Find the next real entry, and increment the iterator to the next entry */
3984 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3985 {
3986         iter->ent = __find_next_entry(iter, &iter->cpu,
3987                                       &iter->lost_events, &iter->ts);
3988
3989         if (iter->ent)
3990                 trace_iterator_increment(iter);
3991
3992         return iter->ent ? iter : NULL;
3993 }
3994
3995 static void trace_consume(struct trace_iterator *iter)
3996 {
3997         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3998                             &iter->lost_events);
3999 }
4000
4001 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4002 {
4003         struct trace_iterator *iter = m->private;
4004         int i = (int)*pos;
4005         void *ent;
4006
4007         WARN_ON_ONCE(iter->leftover);
4008
4009         (*pos)++;
4010
4011         /* can't go backwards */
4012         if (iter->idx > i)
4013                 return NULL;
4014
4015         if (iter->idx < 0)
4016                 ent = trace_find_next_entry_inc(iter);
4017         else
4018                 ent = iter;
4019
4020         while (ent && iter->idx < i)
4021                 ent = trace_find_next_entry_inc(iter);
4022
4023         iter->pos = *pos;
4024
4025         return ent;
4026 }
4027
4028 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4029 {
4030         struct ring_buffer_iter *buf_iter;
4031         unsigned long entries = 0;
4032         u64 ts;
4033
4034         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4035
4036         buf_iter = trace_buffer_iter(iter, cpu);
4037         if (!buf_iter)
4038                 return;
4039
4040         ring_buffer_iter_reset(buf_iter);
4041
4042         /*
4043          * We could have the case with the max latency tracers
4044          * that a reset never took place on a cpu. This is evident
4045          * by the timestamp being before the start of the buffer.
4046          */
4047         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4048                 if (ts >= iter->array_buffer->time_start)
4049                         break;
4050                 entries++;
4051                 ring_buffer_iter_advance(buf_iter);
4052         }
4053
4054         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4055 }
4056
4057 /*
4058  * The current tracer is copied to avoid a global locking
4059  * all around.
4060  */
4061 static void *s_start(struct seq_file *m, loff_t *pos)
4062 {
4063         struct trace_iterator *iter = m->private;
4064         struct trace_array *tr = iter->tr;
4065         int cpu_file = iter->cpu_file;
4066         void *p = NULL;
4067         loff_t l = 0;
4068         int cpu;
4069
4070         /*
4071          * copy the tracer to avoid using a global lock all around.
4072          * iter->trace is a copy of current_trace, the pointer to the
4073          * name may be used instead of a strcmp(), as iter->trace->name
4074          * will point to the same string as current_trace->name.
4075          */
4076         mutex_lock(&trace_types_lock);
4077         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4078                 *iter->trace = *tr->current_trace;
4079         mutex_unlock(&trace_types_lock);
4080
4081 #ifdef CONFIG_TRACER_MAX_TRACE
4082         if (iter->snapshot && iter->trace->use_max_tr)
4083                 return ERR_PTR(-EBUSY);
4084 #endif
4085
4086         if (*pos != iter->pos) {
4087                 iter->ent = NULL;
4088                 iter->cpu = 0;
4089                 iter->idx = -1;
4090
4091                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4092                         for_each_tracing_cpu(cpu)
4093                                 tracing_iter_reset(iter, cpu);
4094                 } else
4095                         tracing_iter_reset(iter, cpu_file);
4096
4097                 iter->leftover = 0;
4098                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4099                         ;
4100
4101         } else {
4102                 /*
4103                  * If we overflowed the seq_file before, then we want
4104                  * to just reuse the trace_seq buffer again.
4105                  */
4106                 if (iter->leftover)
4107                         p = iter;
4108                 else {
4109                         l = *pos - 1;
4110                         p = s_next(m, p, &l);
4111                 }
4112         }
4113
4114         trace_event_read_lock();
4115         trace_access_lock(cpu_file);
4116         return p;
4117 }
4118
4119 static void s_stop(struct seq_file *m, void *p)
4120 {
4121         struct trace_iterator *iter = m->private;
4122
4123 #ifdef CONFIG_TRACER_MAX_TRACE
4124         if (iter->snapshot && iter->trace->use_max_tr)
4125                 return;
4126 #endif
4127
4128         trace_access_unlock(iter->cpu_file);
4129         trace_event_read_unlock();
4130 }
4131
4132 static void
4133 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4134                       unsigned long *entries, int cpu)
4135 {
4136         unsigned long count;
4137
4138         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4139         /*
4140          * If this buffer has skipped entries, then we hold all
4141          * entries for the trace and we need to ignore the
4142          * ones before the time stamp.
4143          */
4144         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4145                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4146                 /* total is the same as the entries */
4147                 *total = count;
4148         } else
4149                 *total = count +
4150                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4151         *entries = count;
4152 }
4153
4154 static void
4155 get_total_entries(struct array_buffer *buf,
4156                   unsigned long *total, unsigned long *entries)
4157 {
4158         unsigned long t, e;
4159         int cpu;
4160
4161         *total = 0;
4162         *entries = 0;
4163
4164         for_each_tracing_cpu(cpu) {
4165                 get_total_entries_cpu(buf, &t, &e, cpu);
4166                 *total += t;
4167                 *entries += e;
4168         }
4169 }
4170
4171 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4172 {
4173         unsigned long total, entries;
4174
4175         if (!tr)
4176                 tr = &global_trace;
4177
4178         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4179
4180         return entries;
4181 }
4182
4183 unsigned long trace_total_entries(struct trace_array *tr)
4184 {
4185         unsigned long total, entries;
4186
4187         if (!tr)
4188                 tr = &global_trace;
4189
4190         get_total_entries(&tr->array_buffer, &total, &entries);
4191
4192         return entries;
4193 }
4194
4195 static void print_lat_help_header(struct seq_file *m)
4196 {
4197         seq_puts(m, "#                    _------=> CPU#            \n"
4198                     "#                   / _-----=> irqs-off        \n"
4199                     "#                  | / _----=> need-resched    \n"
4200                     "#                  || / _---=> hardirq/softirq \n"
4201                     "#                  ||| / _--=> preempt-depth   \n"
4202                     "#                  |||| / _-=> migrate-disable \n"
4203                     "#                  ||||| /     delay           \n"
4204                     "#  cmd     pid     |||||| time  |   caller     \n"
4205                     "#     \\   /        ||||||  \\    |    /       \n");
4206 }
4207
4208 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4209 {
4210         unsigned long total;
4211         unsigned long entries;
4212
4213         get_total_entries(buf, &total, &entries);
4214         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4215                    entries, total, num_online_cpus());
4216         seq_puts(m, "#\n");
4217 }
4218
4219 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4220                                    unsigned int flags)
4221 {
4222         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4223
4224         print_event_info(buf, m);
4225
4226         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4227         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4228 }
4229
4230 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4231                                        unsigned int flags)
4232 {
4233         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4234         const char *space = "            ";
4235         int prec = tgid ? 12 : 2;
4236
4237         print_event_info(buf, m);
4238
4239         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4240         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4241         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4242         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4243         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4244         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4245         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4246         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4247 }
4248
4249 void
4250 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4251 {
4252         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4253         struct array_buffer *buf = iter->array_buffer;
4254         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4255         struct tracer *type = iter->trace;
4256         unsigned long entries;
4257         unsigned long total;
4258         const char *name = "preemption";
4259
4260         name = type->name;
4261
4262         get_total_entries(buf, &total, &entries);
4263
4264         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4265                    name, UTS_RELEASE);
4266         seq_puts(m, "# -----------------------------------"
4267                  "---------------------------------\n");
4268         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4269                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4270                    nsecs_to_usecs(data->saved_latency),
4271                    entries,
4272                    total,
4273                    buf->cpu,
4274 #if defined(CONFIG_PREEMPT_NONE)
4275                    "server",
4276 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4277                    "desktop",
4278 #elif defined(CONFIG_PREEMPT)
4279                    "preempt",
4280 #elif defined(CONFIG_PREEMPT_RT)
4281                    "preempt_rt",
4282 #else
4283                    "unknown",
4284 #endif
4285                    /* These are reserved for later use */
4286                    0, 0, 0, 0);
4287 #ifdef CONFIG_SMP
4288         seq_printf(m, " #P:%d)\n", num_online_cpus());
4289 #else
4290         seq_puts(m, ")\n");
4291 #endif
4292         seq_puts(m, "#    -----------------\n");
4293         seq_printf(m, "#    | task: %.16s-%d "
4294                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4295                    data->comm, data->pid,
4296                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4297                    data->policy, data->rt_priority);
4298         seq_puts(m, "#    -----------------\n");
4299
4300         if (data->critical_start) {
4301                 seq_puts(m, "#  => started at: ");
4302                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4303                 trace_print_seq(m, &iter->seq);
4304                 seq_puts(m, "\n#  => ended at:   ");
4305                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4306                 trace_print_seq(m, &iter->seq);
4307                 seq_puts(m, "\n#\n");
4308         }
4309
4310         seq_puts(m, "#\n");
4311 }
4312
4313 static void test_cpu_buff_start(struct trace_iterator *iter)
4314 {
4315         struct trace_seq *s = &iter->seq;
4316         struct trace_array *tr = iter->tr;
4317
4318         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4319                 return;
4320
4321         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4322                 return;
4323
4324         if (cpumask_available(iter->started) &&
4325             cpumask_test_cpu(iter->cpu, iter->started))
4326                 return;
4327
4328         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4329                 return;
4330
4331         if (cpumask_available(iter->started))
4332                 cpumask_set_cpu(iter->cpu, iter->started);
4333
4334         /* Don't print started cpu buffer for the first entry of the trace */
4335         if (iter->idx > 1)
4336                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4337                                 iter->cpu);
4338 }
4339
4340 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4341 {
4342         struct trace_array *tr = iter->tr;
4343         struct trace_seq *s = &iter->seq;
4344         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4345         struct trace_entry *entry;
4346         struct trace_event *event;
4347
4348         entry = iter->ent;
4349
4350         test_cpu_buff_start(iter);
4351
4352         event = ftrace_find_event(entry->type);
4353
4354         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4355                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4356                         trace_print_lat_context(iter);
4357                 else
4358                         trace_print_context(iter);
4359         }
4360
4361         if (trace_seq_has_overflowed(s))
4362                 return TRACE_TYPE_PARTIAL_LINE;
4363
4364         if (event)
4365                 return event->funcs->trace(iter, sym_flags, event);
4366
4367         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4368
4369         return trace_handle_return(s);
4370 }
4371
4372 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4373 {
4374         struct trace_array *tr = iter->tr;
4375         struct trace_seq *s = &iter->seq;
4376         struct trace_entry *entry;
4377         struct trace_event *event;
4378
4379         entry = iter->ent;
4380
4381         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4382                 trace_seq_printf(s, "%d %d %llu ",
4383                                  entry->pid, iter->cpu, iter->ts);
4384
4385         if (trace_seq_has_overflowed(s))
4386                 return TRACE_TYPE_PARTIAL_LINE;
4387
4388         event = ftrace_find_event(entry->type);
4389         if (event)
4390                 return event->funcs->raw(iter, 0, event);
4391
4392         trace_seq_printf(s, "%d ?\n", entry->type);
4393
4394         return trace_handle_return(s);
4395 }
4396
4397 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4398 {
4399         struct trace_array *tr = iter->tr;
4400         struct trace_seq *s = &iter->seq;
4401         unsigned char newline = '\n';
4402         struct trace_entry *entry;
4403         struct trace_event *event;
4404
4405         entry = iter->ent;
4406
4407         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4408                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4409                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4410                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4411                 if (trace_seq_has_overflowed(s))
4412                         return TRACE_TYPE_PARTIAL_LINE;
4413         }
4414
4415         event = ftrace_find_event(entry->type);
4416         if (event) {
4417                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4418                 if (ret != TRACE_TYPE_HANDLED)
4419                         return ret;
4420         }
4421
4422         SEQ_PUT_FIELD(s, newline);
4423
4424         return trace_handle_return(s);
4425 }
4426
4427 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4428 {
4429         struct trace_array *tr = iter->tr;
4430         struct trace_seq *s = &iter->seq;
4431         struct trace_entry *entry;
4432         struct trace_event *event;
4433
4434         entry = iter->ent;
4435
4436         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4437                 SEQ_PUT_FIELD(s, entry->pid);
4438                 SEQ_PUT_FIELD(s, iter->cpu);
4439                 SEQ_PUT_FIELD(s, iter->ts);
4440                 if (trace_seq_has_overflowed(s))
4441                         return TRACE_TYPE_PARTIAL_LINE;
4442         }
4443
4444         event = ftrace_find_event(entry->type);
4445         return event ? event->funcs->binary(iter, 0, event) :
4446                 TRACE_TYPE_HANDLED;
4447 }
4448
4449 int trace_empty(struct trace_iterator *iter)
4450 {
4451         struct ring_buffer_iter *buf_iter;
4452         int cpu;
4453
4454         /* If we are looking at one CPU buffer, only check that one */
4455         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4456                 cpu = iter->cpu_file;
4457                 buf_iter = trace_buffer_iter(iter, cpu);
4458                 if (buf_iter) {
4459                         if (!ring_buffer_iter_empty(buf_iter))
4460                                 return 0;
4461                 } else {
4462                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4463                                 return 0;
4464                 }
4465                 return 1;
4466         }
4467
4468         for_each_tracing_cpu(cpu) {
4469                 buf_iter = trace_buffer_iter(iter, cpu);
4470                 if (buf_iter) {
4471                         if (!ring_buffer_iter_empty(buf_iter))
4472                                 return 0;
4473                 } else {
4474                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4475                                 return 0;
4476                 }
4477         }
4478
4479         return 1;
4480 }
4481
4482 /*  Called with trace_event_read_lock() held. */
4483 enum print_line_t print_trace_line(struct trace_iterator *iter)
4484 {
4485         struct trace_array *tr = iter->tr;
4486         unsigned long trace_flags = tr->trace_flags;
4487         enum print_line_t ret;
4488
4489         if (iter->lost_events) {
4490                 if (iter->lost_events == (unsigned long)-1)
4491                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4492                                          iter->cpu);
4493                 else
4494                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4495                                          iter->cpu, iter->lost_events);
4496                 if (trace_seq_has_overflowed(&iter->seq))
4497                         return TRACE_TYPE_PARTIAL_LINE;
4498         }
4499
4500         if (iter->trace && iter->trace->print_line) {
4501                 ret = iter->trace->print_line(iter);
4502                 if (ret != TRACE_TYPE_UNHANDLED)
4503                         return ret;
4504         }
4505
4506         if (iter->ent->type == TRACE_BPUTS &&
4507                         trace_flags & TRACE_ITER_PRINTK &&
4508                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4509                 return trace_print_bputs_msg_only(iter);
4510
4511         if (iter->ent->type == TRACE_BPRINT &&
4512                         trace_flags & TRACE_ITER_PRINTK &&
4513                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4514                 return trace_print_bprintk_msg_only(iter);
4515
4516         if (iter->ent->type == TRACE_PRINT &&
4517                         trace_flags & TRACE_ITER_PRINTK &&
4518                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4519                 return trace_print_printk_msg_only(iter);
4520
4521         if (trace_flags & TRACE_ITER_BIN)
4522                 return print_bin_fmt(iter);
4523
4524         if (trace_flags & TRACE_ITER_HEX)
4525                 return print_hex_fmt(iter);
4526
4527         if (trace_flags & TRACE_ITER_RAW)
4528                 return print_raw_fmt(iter);
4529
4530         return print_trace_fmt(iter);
4531 }
4532
4533 void trace_latency_header(struct seq_file *m)
4534 {
4535         struct trace_iterator *iter = m->private;
4536         struct trace_array *tr = iter->tr;
4537
4538         /* print nothing if the buffers are empty */
4539         if (trace_empty(iter))
4540                 return;
4541
4542         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4543                 print_trace_header(m, iter);
4544
4545         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4546                 print_lat_help_header(m);
4547 }
4548
4549 void trace_default_header(struct seq_file *m)
4550 {
4551         struct trace_iterator *iter = m->private;
4552         struct trace_array *tr = iter->tr;
4553         unsigned long trace_flags = tr->trace_flags;
4554
4555         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4556                 return;
4557
4558         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4559                 /* print nothing if the buffers are empty */
4560                 if (trace_empty(iter))
4561                         return;
4562                 print_trace_header(m, iter);
4563                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4564                         print_lat_help_header(m);
4565         } else {
4566                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4567                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4568                                 print_func_help_header_irq(iter->array_buffer,
4569                                                            m, trace_flags);
4570                         else
4571                                 print_func_help_header(iter->array_buffer, m,
4572                                                        trace_flags);
4573                 }
4574         }
4575 }
4576
4577 static void test_ftrace_alive(struct seq_file *m)
4578 {
4579         if (!ftrace_is_dead())
4580                 return;
4581         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4582                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4583 }
4584
4585 #ifdef CONFIG_TRACER_MAX_TRACE
4586 static void show_snapshot_main_help(struct seq_file *m)
4587 {
4588         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4589                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4590                     "#                      Takes a snapshot of the main buffer.\n"
4591                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4592                     "#                      (Doesn't have to be '2' works with any number that\n"
4593                     "#                       is not a '0' or '1')\n");
4594 }
4595
4596 static void show_snapshot_percpu_help(struct seq_file *m)
4597 {
4598         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4599 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4600         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4601                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4602 #else
4603         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4604                     "#                     Must use main snapshot file to allocate.\n");
4605 #endif
4606         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4607                     "#                      (Doesn't have to be '2' works with any number that\n"
4608                     "#                       is not a '0' or '1')\n");
4609 }
4610
4611 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4612 {
4613         if (iter->tr->allocated_snapshot)
4614                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4615         else
4616                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4617
4618         seq_puts(m, "# Snapshot commands:\n");
4619         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4620                 show_snapshot_main_help(m);
4621         else
4622                 show_snapshot_percpu_help(m);
4623 }
4624 #else
4625 /* Should never be called */
4626 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4627 #endif
4628
4629 static int s_show(struct seq_file *m, void *v)
4630 {
4631         struct trace_iterator *iter = v;
4632         int ret;
4633
4634         if (iter->ent == NULL) {
4635                 if (iter->tr) {
4636                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4637                         seq_puts(m, "#\n");
4638                         test_ftrace_alive(m);
4639                 }
4640                 if (iter->snapshot && trace_empty(iter))
4641                         print_snapshot_help(m, iter);
4642                 else if (iter->trace && iter->trace->print_header)
4643                         iter->trace->print_header(m);
4644                 else
4645                         trace_default_header(m);
4646
4647         } else if (iter->leftover) {
4648                 /*
4649                  * If we filled the seq_file buffer earlier, we
4650                  * want to just show it now.
4651                  */
4652                 ret = trace_print_seq(m, &iter->seq);
4653
4654                 /* ret should this time be zero, but you never know */
4655                 iter->leftover = ret;
4656
4657         } else {
4658                 print_trace_line(iter);
4659                 ret = trace_print_seq(m, &iter->seq);
4660                 /*
4661                  * If we overflow the seq_file buffer, then it will
4662                  * ask us for this data again at start up.
4663                  * Use that instead.
4664                  *  ret is 0 if seq_file write succeeded.
4665                  *        -1 otherwise.
4666                  */
4667                 iter->leftover = ret;
4668         }
4669
4670         return 0;
4671 }
4672
4673 /*
4674  * Should be used after trace_array_get(), trace_types_lock
4675  * ensures that i_cdev was already initialized.
4676  */
4677 static inline int tracing_get_cpu(struct inode *inode)
4678 {
4679         if (inode->i_cdev) /* See trace_create_cpu_file() */
4680                 return (long)inode->i_cdev - 1;
4681         return RING_BUFFER_ALL_CPUS;
4682 }
4683
4684 static const struct seq_operations tracer_seq_ops = {
4685         .start          = s_start,
4686         .next           = s_next,
4687         .stop           = s_stop,
4688         .show           = s_show,
4689 };
4690
4691 static struct trace_iterator *
4692 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4693 {
4694         struct trace_array *tr = inode->i_private;
4695         struct trace_iterator *iter;
4696         int cpu;
4697
4698         if (tracing_disabled)
4699                 return ERR_PTR(-ENODEV);
4700
4701         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4702         if (!iter)
4703                 return ERR_PTR(-ENOMEM);
4704
4705         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4706                                     GFP_KERNEL);
4707         if (!iter->buffer_iter)
4708                 goto release;
4709
4710         /*
4711          * trace_find_next_entry() may need to save off iter->ent.
4712          * It will place it into the iter->temp buffer. As most
4713          * events are less than 128, allocate a buffer of that size.
4714          * If one is greater, then trace_find_next_entry() will
4715          * allocate a new buffer to adjust for the bigger iter->ent.
4716          * It's not critical if it fails to get allocated here.
4717          */
4718         iter->temp = kmalloc(128, GFP_KERNEL);
4719         if (iter->temp)
4720                 iter->temp_size = 128;
4721
4722         /*
4723          * trace_event_printf() may need to modify given format
4724          * string to replace %p with %px so that it shows real address
4725          * instead of hash value. However, that is only for the event
4726          * tracing, other tracer may not need. Defer the allocation
4727          * until it is needed.
4728          */
4729         iter->fmt = NULL;
4730         iter->fmt_size = 0;
4731
4732         /*
4733          * We make a copy of the current tracer to avoid concurrent
4734          * changes on it while we are reading.
4735          */
4736         mutex_lock(&trace_types_lock);
4737         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4738         if (!iter->trace)
4739                 goto fail;
4740
4741         *iter->trace = *tr->current_trace;
4742
4743         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4744                 goto fail;
4745
4746         iter->tr = tr;
4747
4748 #ifdef CONFIG_TRACER_MAX_TRACE
4749         /* Currently only the top directory has a snapshot */
4750         if (tr->current_trace->print_max || snapshot)
4751                 iter->array_buffer = &tr->max_buffer;
4752         else
4753 #endif
4754                 iter->array_buffer = &tr->array_buffer;
4755         iter->snapshot = snapshot;
4756         iter->pos = -1;
4757         iter->cpu_file = tracing_get_cpu(inode);
4758         mutex_init(&iter->mutex);
4759
4760         /* Notify the tracer early; before we stop tracing. */
4761         if (iter->trace->open)
4762                 iter->trace->open(iter);
4763
4764         /* Annotate start of buffers if we had overruns */
4765         if (ring_buffer_overruns(iter->array_buffer->buffer))
4766                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4767
4768         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4769         if (trace_clocks[tr->clock_id].in_ns)
4770                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4771
4772         /*
4773          * If pause-on-trace is enabled, then stop the trace while
4774          * dumping, unless this is the "snapshot" file
4775          */
4776         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4777                 tracing_stop_tr(tr);
4778
4779         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4780                 for_each_tracing_cpu(cpu) {
4781                         iter->buffer_iter[cpu] =
4782                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4783                                                          cpu, GFP_KERNEL);
4784                 }
4785                 ring_buffer_read_prepare_sync();
4786                 for_each_tracing_cpu(cpu) {
4787                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4788                         tracing_iter_reset(iter, cpu);
4789                 }
4790         } else {
4791                 cpu = iter->cpu_file;
4792                 iter->buffer_iter[cpu] =
4793                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4794                                                  cpu, GFP_KERNEL);
4795                 ring_buffer_read_prepare_sync();
4796                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4797                 tracing_iter_reset(iter, cpu);
4798         }
4799
4800         mutex_unlock(&trace_types_lock);
4801
4802         return iter;
4803
4804  fail:
4805         mutex_unlock(&trace_types_lock);
4806         kfree(iter->trace);
4807         kfree(iter->temp);
4808         kfree(iter->buffer_iter);
4809 release:
4810         seq_release_private(inode, file);
4811         return ERR_PTR(-ENOMEM);
4812 }
4813
4814 int tracing_open_generic(struct inode *inode, struct file *filp)
4815 {
4816         int ret;
4817
4818         ret = tracing_check_open_get_tr(NULL);
4819         if (ret)
4820                 return ret;
4821
4822         filp->private_data = inode->i_private;
4823         return 0;
4824 }
4825
4826 bool tracing_is_disabled(void)
4827 {
4828         return (tracing_disabled) ? true: false;
4829 }
4830
4831 /*
4832  * Open and update trace_array ref count.
4833  * Must have the current trace_array passed to it.
4834  */
4835 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4836 {
4837         struct trace_array *tr = inode->i_private;
4838         int ret;
4839
4840         ret = tracing_check_open_get_tr(tr);
4841         if (ret)
4842                 return ret;
4843
4844         filp->private_data = inode->i_private;
4845
4846         return 0;
4847 }
4848
4849 static int tracing_release(struct inode *inode, struct file *file)
4850 {
4851         struct trace_array *tr = inode->i_private;
4852         struct seq_file *m = file->private_data;
4853         struct trace_iterator *iter;
4854         int cpu;
4855
4856         if (!(file->f_mode & FMODE_READ)) {
4857                 trace_array_put(tr);
4858                 return 0;
4859         }
4860
4861         /* Writes do not use seq_file */
4862         iter = m->private;
4863         mutex_lock(&trace_types_lock);
4864
4865         for_each_tracing_cpu(cpu) {
4866                 if (iter->buffer_iter[cpu])
4867                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4868         }
4869
4870         if (iter->trace && iter->trace->close)
4871                 iter->trace->close(iter);
4872
4873         if (!iter->snapshot && tr->stop_count)
4874                 /* reenable tracing if it was previously enabled */
4875                 tracing_start_tr(tr);
4876
4877         __trace_array_put(tr);
4878
4879         mutex_unlock(&trace_types_lock);
4880
4881         mutex_destroy(&iter->mutex);
4882         free_cpumask_var(iter->started);
4883         kfree(iter->fmt);
4884         kfree(iter->temp);
4885         kfree(iter->trace);
4886         kfree(iter->buffer_iter);
4887         seq_release_private(inode, file);
4888
4889         return 0;
4890 }
4891
4892 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4893 {
4894         struct trace_array *tr = inode->i_private;
4895
4896         trace_array_put(tr);
4897         return 0;
4898 }
4899
4900 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4901 {
4902         struct trace_array *tr = inode->i_private;
4903
4904         trace_array_put(tr);
4905
4906         return single_release(inode, file);
4907 }
4908
4909 static int tracing_open(struct inode *inode, struct file *file)
4910 {
4911         struct trace_array *tr = inode->i_private;
4912         struct trace_iterator *iter;
4913         int ret;
4914
4915         ret = tracing_check_open_get_tr(tr);
4916         if (ret)
4917                 return ret;
4918
4919         /* If this file was open for write, then erase contents */
4920         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4921                 int cpu = tracing_get_cpu(inode);
4922                 struct array_buffer *trace_buf = &tr->array_buffer;
4923
4924 #ifdef CONFIG_TRACER_MAX_TRACE
4925                 if (tr->current_trace->print_max)
4926                         trace_buf = &tr->max_buffer;
4927 #endif
4928
4929                 if (cpu == RING_BUFFER_ALL_CPUS)
4930                         tracing_reset_online_cpus(trace_buf);
4931                 else
4932                         tracing_reset_cpu(trace_buf, cpu);
4933         }
4934
4935         if (file->f_mode & FMODE_READ) {
4936                 iter = __tracing_open(inode, file, false);
4937                 if (IS_ERR(iter))
4938                         ret = PTR_ERR(iter);
4939                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4940                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4941         }
4942
4943         if (ret < 0)
4944                 trace_array_put(tr);
4945
4946         return ret;
4947 }
4948
4949 /*
4950  * Some tracers are not suitable for instance buffers.
4951  * A tracer is always available for the global array (toplevel)
4952  * or if it explicitly states that it is.
4953  */
4954 static bool
4955 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4956 {
4957         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4958 }
4959
4960 /* Find the next tracer that this trace array may use */
4961 static struct tracer *
4962 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4963 {
4964         while (t && !trace_ok_for_array(t, tr))
4965                 t = t->next;
4966
4967         return t;
4968 }
4969
4970 static void *
4971 t_next(struct seq_file *m, void *v, loff_t *pos)
4972 {
4973         struct trace_array *tr = m->private;
4974         struct tracer *t = v;
4975
4976         (*pos)++;
4977
4978         if (t)
4979                 t = get_tracer_for_array(tr, t->next);
4980
4981         return t;
4982 }
4983
4984 static void *t_start(struct seq_file *m, loff_t *pos)
4985 {
4986         struct trace_array *tr = m->private;
4987         struct tracer *t;
4988         loff_t l = 0;
4989
4990         mutex_lock(&trace_types_lock);
4991
4992         t = get_tracer_for_array(tr, trace_types);
4993         for (; t && l < *pos; t = t_next(m, t, &l))
4994                         ;
4995
4996         return t;
4997 }
4998
4999 static void t_stop(struct seq_file *m, void *p)
5000 {
5001         mutex_unlock(&trace_types_lock);
5002 }
5003
5004 static int t_show(struct seq_file *m, void *v)
5005 {
5006         struct tracer *t = v;
5007
5008         if (!t)
5009                 return 0;
5010
5011         seq_puts(m, t->name);
5012         if (t->next)
5013                 seq_putc(m, ' ');
5014         else
5015                 seq_putc(m, '\n');
5016
5017         return 0;
5018 }
5019
5020 static const struct seq_operations show_traces_seq_ops = {
5021         .start          = t_start,
5022         .next           = t_next,
5023         .stop           = t_stop,
5024         .show           = t_show,
5025 };
5026
5027 static int show_traces_open(struct inode *inode, struct file *file)
5028 {
5029         struct trace_array *tr = inode->i_private;
5030         struct seq_file *m;
5031         int ret;
5032
5033         ret = tracing_check_open_get_tr(tr);
5034         if (ret)
5035                 return ret;
5036
5037         ret = seq_open(file, &show_traces_seq_ops);
5038         if (ret) {
5039                 trace_array_put(tr);
5040                 return ret;
5041         }
5042
5043         m = file->private_data;
5044         m->private = tr;
5045
5046         return 0;
5047 }
5048
5049 static int show_traces_release(struct inode *inode, struct file *file)
5050 {
5051         struct trace_array *tr = inode->i_private;
5052
5053         trace_array_put(tr);
5054         return seq_release(inode, file);
5055 }
5056
5057 static ssize_t
5058 tracing_write_stub(struct file *filp, const char __user *ubuf,
5059                    size_t count, loff_t *ppos)
5060 {
5061         return count;
5062 }
5063
5064 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5065 {
5066         int ret;
5067
5068         if (file->f_mode & FMODE_READ)
5069                 ret = seq_lseek(file, offset, whence);
5070         else
5071                 file->f_pos = ret = 0;
5072
5073         return ret;
5074 }
5075
5076 static const struct file_operations tracing_fops = {
5077         .open           = tracing_open,
5078         .read           = seq_read,
5079         .write          = tracing_write_stub,
5080         .llseek         = tracing_lseek,
5081         .release        = tracing_release,
5082 };
5083
5084 static const struct file_operations show_traces_fops = {
5085         .open           = show_traces_open,
5086         .read           = seq_read,
5087         .llseek         = seq_lseek,
5088         .release        = show_traces_release,
5089 };
5090
5091 static ssize_t
5092 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5093                      size_t count, loff_t *ppos)
5094 {
5095         struct trace_array *tr = file_inode(filp)->i_private;
5096         char *mask_str;
5097         int len;
5098
5099         len = snprintf(NULL, 0, "%*pb\n",
5100                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5101         mask_str = kmalloc(len, GFP_KERNEL);
5102         if (!mask_str)
5103                 return -ENOMEM;
5104
5105         len = snprintf(mask_str, len, "%*pb\n",
5106                        cpumask_pr_args(tr->tracing_cpumask));
5107         if (len >= count) {
5108                 count = -EINVAL;
5109                 goto out_err;
5110         }
5111         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5112
5113 out_err:
5114         kfree(mask_str);
5115
5116         return count;
5117 }
5118
5119 int tracing_set_cpumask(struct trace_array *tr,
5120                         cpumask_var_t tracing_cpumask_new)
5121 {
5122         int cpu;
5123
5124         if (!tr)
5125                 return -EINVAL;
5126
5127         local_irq_disable();
5128         arch_spin_lock(&tr->max_lock);
5129         for_each_tracing_cpu(cpu) {
5130                 /*
5131                  * Increase/decrease the disabled counter if we are
5132                  * about to flip a bit in the cpumask:
5133                  */
5134                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5135                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5136                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5137                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5138                 }
5139                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5140                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5141                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5142                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5143                 }
5144         }
5145         arch_spin_unlock(&tr->max_lock);
5146         local_irq_enable();
5147
5148         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5149
5150         return 0;
5151 }
5152
5153 static ssize_t
5154 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5155                       size_t count, loff_t *ppos)
5156 {
5157         struct trace_array *tr = file_inode(filp)->i_private;
5158         cpumask_var_t tracing_cpumask_new;
5159         int err;
5160
5161         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5162                 return -ENOMEM;
5163
5164         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5165         if (err)
5166                 goto err_free;
5167
5168         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5169         if (err)
5170                 goto err_free;
5171
5172         free_cpumask_var(tracing_cpumask_new);
5173
5174         return count;
5175
5176 err_free:
5177         free_cpumask_var(tracing_cpumask_new);
5178
5179         return err;
5180 }
5181
5182 static const struct file_operations tracing_cpumask_fops = {
5183         .open           = tracing_open_generic_tr,
5184         .read           = tracing_cpumask_read,
5185         .write          = tracing_cpumask_write,
5186         .release        = tracing_release_generic_tr,
5187         .llseek         = generic_file_llseek,
5188 };
5189
5190 static int tracing_trace_options_show(struct seq_file *m, void *v)
5191 {
5192         struct tracer_opt *trace_opts;
5193         struct trace_array *tr = m->private;
5194         u32 tracer_flags;
5195         int i;
5196
5197         mutex_lock(&trace_types_lock);
5198         tracer_flags = tr->current_trace->flags->val;
5199         trace_opts = tr->current_trace->flags->opts;
5200
5201         for (i = 0; trace_options[i]; i++) {
5202                 if (tr->trace_flags & (1 << i))
5203                         seq_printf(m, "%s\n", trace_options[i]);
5204                 else
5205                         seq_printf(m, "no%s\n", trace_options[i]);
5206         }
5207
5208         for (i = 0; trace_opts[i].name; i++) {
5209                 if (tracer_flags & trace_opts[i].bit)
5210                         seq_printf(m, "%s\n", trace_opts[i].name);
5211                 else
5212                         seq_printf(m, "no%s\n", trace_opts[i].name);
5213         }
5214         mutex_unlock(&trace_types_lock);
5215
5216         return 0;
5217 }
5218
5219 static int __set_tracer_option(struct trace_array *tr,
5220                                struct tracer_flags *tracer_flags,
5221                                struct tracer_opt *opts, int neg)
5222 {
5223         struct tracer *trace = tracer_flags->trace;
5224         int ret;
5225
5226         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5227         if (ret)
5228                 return ret;
5229
5230         if (neg)
5231                 tracer_flags->val &= ~opts->bit;
5232         else
5233                 tracer_flags->val |= opts->bit;
5234         return 0;
5235 }
5236
5237 /* Try to assign a tracer specific option */
5238 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5239 {
5240         struct tracer *trace = tr->current_trace;
5241         struct tracer_flags *tracer_flags = trace->flags;
5242         struct tracer_opt *opts = NULL;
5243         int i;
5244
5245         for (i = 0; tracer_flags->opts[i].name; i++) {
5246                 opts = &tracer_flags->opts[i];
5247
5248                 if (strcmp(cmp, opts->name) == 0)
5249                         return __set_tracer_option(tr, trace->flags, opts, neg);
5250         }
5251
5252         return -EINVAL;
5253 }
5254
5255 /* Some tracers require overwrite to stay enabled */
5256 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5257 {
5258         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5259                 return -1;
5260
5261         return 0;
5262 }
5263
5264 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5265 {
5266         int *map;
5267
5268         if ((mask == TRACE_ITER_RECORD_TGID) ||
5269             (mask == TRACE_ITER_RECORD_CMD))
5270                 lockdep_assert_held(&event_mutex);
5271
5272         /* do nothing if flag is already set */
5273         if (!!(tr->trace_flags & mask) == !!enabled)
5274                 return 0;
5275
5276         /* Give the tracer a chance to approve the change */
5277         if (tr->current_trace->flag_changed)
5278                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5279                         return -EINVAL;
5280
5281         if (enabled)
5282                 tr->trace_flags |= mask;
5283         else
5284                 tr->trace_flags &= ~mask;
5285
5286         if (mask == TRACE_ITER_RECORD_CMD)
5287                 trace_event_enable_cmd_record(enabled);
5288
5289         if (mask == TRACE_ITER_RECORD_TGID) {
5290                 if (!tgid_map) {
5291                         tgid_map_max = pid_max;
5292                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5293                                        GFP_KERNEL);
5294
5295                         /*
5296                          * Pairs with smp_load_acquire() in
5297                          * trace_find_tgid_ptr() to ensure that if it observes
5298                          * the tgid_map we just allocated then it also observes
5299                          * the corresponding tgid_map_max value.
5300                          */
5301                         smp_store_release(&tgid_map, map);
5302                 }
5303                 if (!tgid_map) {
5304                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5305                         return -ENOMEM;
5306                 }
5307
5308                 trace_event_enable_tgid_record(enabled);
5309         }
5310
5311         if (mask == TRACE_ITER_EVENT_FORK)
5312                 trace_event_follow_fork(tr, enabled);
5313
5314         if (mask == TRACE_ITER_FUNC_FORK)
5315                 ftrace_pid_follow_fork(tr, enabled);
5316
5317         if (mask == TRACE_ITER_OVERWRITE) {
5318                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5319 #ifdef CONFIG_TRACER_MAX_TRACE
5320                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5321 #endif
5322         }
5323
5324         if (mask == TRACE_ITER_PRINTK) {
5325                 trace_printk_start_stop_comm(enabled);
5326                 trace_printk_control(enabled);
5327         }
5328
5329         return 0;
5330 }
5331
5332 int trace_set_options(struct trace_array *tr, char *option)
5333 {
5334         char *cmp;
5335         int neg = 0;
5336         int ret;
5337         size_t orig_len = strlen(option);
5338         int len;
5339
5340         cmp = strstrip(option);
5341
5342         len = str_has_prefix(cmp, "no");
5343         if (len)
5344                 neg = 1;
5345
5346         cmp += len;
5347
5348         mutex_lock(&event_mutex);
5349         mutex_lock(&trace_types_lock);
5350
5351         ret = match_string(trace_options, -1, cmp);
5352         /* If no option could be set, test the specific tracer options */
5353         if (ret < 0)
5354                 ret = set_tracer_option(tr, cmp, neg);
5355         else
5356                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5357
5358         mutex_unlock(&trace_types_lock);
5359         mutex_unlock(&event_mutex);
5360
5361         /*
5362          * If the first trailing whitespace is replaced with '\0' by strstrip,
5363          * turn it back into a space.
5364          */
5365         if (orig_len > strlen(option))
5366                 option[strlen(option)] = ' ';
5367
5368         return ret;
5369 }
5370
5371 static void __init apply_trace_boot_options(void)
5372 {
5373         char *buf = trace_boot_options_buf;
5374         char *option;
5375
5376         while (true) {
5377                 option = strsep(&buf, ",");
5378
5379                 if (!option)
5380                         break;
5381
5382                 if (*option)
5383                         trace_set_options(&global_trace, option);
5384
5385                 /* Put back the comma to allow this to be called again */
5386                 if (buf)
5387                         *(buf - 1) = ',';
5388         }
5389 }
5390
5391 static ssize_t
5392 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5393                         size_t cnt, loff_t *ppos)
5394 {
5395         struct seq_file *m = filp->private_data;
5396         struct trace_array *tr = m->private;
5397         char buf[64];
5398         int ret;
5399
5400         if (cnt >= sizeof(buf))
5401                 return -EINVAL;
5402
5403         if (copy_from_user(buf, ubuf, cnt))
5404                 return -EFAULT;
5405
5406         buf[cnt] = 0;
5407
5408         ret = trace_set_options(tr, buf);
5409         if (ret < 0)
5410                 return ret;
5411
5412         *ppos += cnt;
5413
5414         return cnt;
5415 }
5416
5417 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5418 {
5419         struct trace_array *tr = inode->i_private;
5420         int ret;
5421
5422         ret = tracing_check_open_get_tr(tr);
5423         if (ret)
5424                 return ret;
5425
5426         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5427         if (ret < 0)
5428                 trace_array_put(tr);
5429
5430         return ret;
5431 }
5432
5433 static const struct file_operations tracing_iter_fops = {
5434         .open           = tracing_trace_options_open,
5435         .read           = seq_read,
5436         .llseek         = seq_lseek,
5437         .release        = tracing_single_release_tr,
5438         .write          = tracing_trace_options_write,
5439 };
5440
5441 static const char readme_msg[] =
5442         "tracing mini-HOWTO:\n\n"
5443         "# echo 0 > tracing_on : quick way to disable tracing\n"
5444         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5445         " Important files:\n"
5446         "  trace\t\t\t- The static contents of the buffer\n"
5447         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5448         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5449         "  current_tracer\t- function and latency tracers\n"
5450         "  available_tracers\t- list of configured tracers for current_tracer\n"
5451         "  error_log\t- error log for failed commands (that support it)\n"
5452         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5453         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5454         "  trace_clock\t\t-change the clock used to order events\n"
5455         "       local:   Per cpu clock but may not be synced across CPUs\n"
5456         "      global:   Synced across CPUs but slows tracing down.\n"
5457         "     counter:   Not a clock, but just an increment\n"
5458         "      uptime:   Jiffy counter from time of boot\n"
5459         "        perf:   Same clock that perf events use\n"
5460 #ifdef CONFIG_X86_64
5461         "     x86-tsc:   TSC cycle counter\n"
5462 #endif
5463         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5464         "       delta:   Delta difference against a buffer-wide timestamp\n"
5465         "    absolute:   Absolute (standalone) timestamp\n"
5466         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5467         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5468         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5469         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5470         "\t\t\t  Remove sub-buffer with rmdir\n"
5471         "  trace_options\t\t- Set format or modify how tracing happens\n"
5472         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5473         "\t\t\t  option name\n"
5474         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5475 #ifdef CONFIG_DYNAMIC_FTRACE
5476         "\n  available_filter_functions - list of functions that can be filtered on\n"
5477         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5478         "\t\t\t  functions\n"
5479         "\t     accepts: func_full_name or glob-matching-pattern\n"
5480         "\t     modules: Can select a group via module\n"
5481         "\t      Format: :mod:<module-name>\n"
5482         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5483         "\t    triggers: a command to perform when function is hit\n"
5484         "\t      Format: <function>:<trigger>[:count]\n"
5485         "\t     trigger: traceon, traceoff\n"
5486         "\t\t      enable_event:<system>:<event>\n"
5487         "\t\t      disable_event:<system>:<event>\n"
5488 #ifdef CONFIG_STACKTRACE
5489         "\t\t      stacktrace\n"
5490 #endif
5491 #ifdef CONFIG_TRACER_SNAPSHOT
5492         "\t\t      snapshot\n"
5493 #endif
5494         "\t\t      dump\n"
5495         "\t\t      cpudump\n"
5496         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5497         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5498         "\t     The first one will disable tracing every time do_fault is hit\n"
5499         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5500         "\t       The first time do trap is hit and it disables tracing, the\n"
5501         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5502         "\t       the counter will not decrement. It only decrements when the\n"
5503         "\t       trigger did work\n"
5504         "\t     To remove trigger without count:\n"
5505         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5506         "\t     To remove trigger with a count:\n"
5507         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5508         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5509         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5510         "\t    modules: Can select a group via module command :mod:\n"
5511         "\t    Does not accept triggers\n"
5512 #endif /* CONFIG_DYNAMIC_FTRACE */
5513 #ifdef CONFIG_FUNCTION_TRACER
5514         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5515         "\t\t    (function)\n"
5516         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5517         "\t\t    (function)\n"
5518 #endif
5519 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5520         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5521         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5522         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5523 #endif
5524 #ifdef CONFIG_TRACER_SNAPSHOT
5525         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5526         "\t\t\t  snapshot buffer. Read the contents for more\n"
5527         "\t\t\t  information\n"
5528 #endif
5529 #ifdef CONFIG_STACK_TRACER
5530         "  stack_trace\t\t- Shows the max stack trace when active\n"
5531         "  stack_max_size\t- Shows current max stack size that was traced\n"
5532         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5533         "\t\t\t  new trace)\n"
5534 #ifdef CONFIG_DYNAMIC_FTRACE
5535         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5536         "\t\t\t  traces\n"
5537 #endif
5538 #endif /* CONFIG_STACK_TRACER */
5539 #ifdef CONFIG_DYNAMIC_EVENTS
5540         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5541         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5542 #endif
5543 #ifdef CONFIG_KPROBE_EVENTS
5544         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5545         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5546 #endif
5547 #ifdef CONFIG_UPROBE_EVENTS
5548         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5549         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5550 #endif
5551 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5552         "\t  accepts: event-definitions (one definition per line)\n"
5553         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5554         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5555 #ifdef CONFIG_HIST_TRIGGERS
5556         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5557 #endif
5558         "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5559         "\t           -:[<group>/]<event>\n"
5560 #ifdef CONFIG_KPROBE_EVENTS
5561         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5562   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5563 #endif
5564 #ifdef CONFIG_UPROBE_EVENTS
5565   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5566 #endif
5567         "\t     args: <name>=fetcharg[:type]\n"
5568         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5569 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5570         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5571 #else
5572         "\t           $stack<index>, $stack, $retval, $comm,\n"
5573 #endif
5574         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5575         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5576         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5577         "\t           <type>\\[<array-size>\\]\n"
5578 #ifdef CONFIG_HIST_TRIGGERS
5579         "\t    field: <stype> <name>;\n"
5580         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5581         "\t           [unsigned] char/int/long\n"
5582 #endif
5583         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5584         "\t            of the <attached-group>/<attached-event>.\n"
5585 #endif
5586         "  events/\t\t- Directory containing all trace event subsystems:\n"
5587         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5588         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5589         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5590         "\t\t\t  events\n"
5591         "      filter\t\t- If set, only events passing filter are traced\n"
5592         "  events/<system>/<event>/\t- Directory containing control files for\n"
5593         "\t\t\t  <event>:\n"
5594         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5595         "      filter\t\t- If set, only events passing filter are traced\n"
5596         "      trigger\t\t- If set, a command to perform when event is hit\n"
5597         "\t    Format: <trigger>[:count][if <filter>]\n"
5598         "\t   trigger: traceon, traceoff\n"
5599         "\t            enable_event:<system>:<event>\n"
5600         "\t            disable_event:<system>:<event>\n"
5601 #ifdef CONFIG_HIST_TRIGGERS
5602         "\t            enable_hist:<system>:<event>\n"
5603         "\t            disable_hist:<system>:<event>\n"
5604 #endif
5605 #ifdef CONFIG_STACKTRACE
5606         "\t\t    stacktrace\n"
5607 #endif
5608 #ifdef CONFIG_TRACER_SNAPSHOT
5609         "\t\t    snapshot\n"
5610 #endif
5611 #ifdef CONFIG_HIST_TRIGGERS
5612         "\t\t    hist (see below)\n"
5613 #endif
5614         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5615         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5616         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5617         "\t                  events/block/block_unplug/trigger\n"
5618         "\t   The first disables tracing every time block_unplug is hit.\n"
5619         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5620         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5621         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5622         "\t   Like function triggers, the counter is only decremented if it\n"
5623         "\t    enabled or disabled tracing.\n"
5624         "\t   To remove a trigger without a count:\n"
5625         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5626         "\t   To remove a trigger with a count:\n"
5627         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5628         "\t   Filters can be ignored when removing a trigger.\n"
5629 #ifdef CONFIG_HIST_TRIGGERS
5630         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5631         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5632         "\t            [:values=<field1[,field2,...]>]\n"
5633         "\t            [:sort=<field1[,field2,...]>]\n"
5634         "\t            [:size=#entries]\n"
5635         "\t            [:pause][:continue][:clear]\n"
5636         "\t            [:name=histname1]\n"
5637         "\t            [:<handler>.<action>]\n"
5638         "\t            [if <filter>]\n\n"
5639         "\t    Note, special fields can be used as well:\n"
5640         "\t            common_timestamp - to record current timestamp\n"
5641         "\t            common_cpu - to record the CPU the event happened on\n"
5642         "\n"
5643         "\t    When a matching event is hit, an entry is added to a hash\n"
5644         "\t    table using the key(s) and value(s) named, and the value of a\n"
5645         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5646         "\t    correspond to fields in the event's format description.  Keys\n"
5647         "\t    can be any field, or the special string 'stacktrace'.\n"
5648         "\t    Compound keys consisting of up to two fields can be specified\n"
5649         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5650         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5651         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5652         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5653         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5654         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5655         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5656         "\t    its histogram data will be shared with other triggers of the\n"
5657         "\t    same name, and trigger hits will update this common data.\n\n"
5658         "\t    Reading the 'hist' file for the event will dump the hash\n"
5659         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5660         "\t    triggers attached to an event, there will be a table for each\n"
5661         "\t    trigger in the output.  The table displayed for a named\n"
5662         "\t    trigger will be the same as any other instance having the\n"
5663         "\t    same name.  The default format used to display a given field\n"
5664         "\t    can be modified by appending any of the following modifiers\n"
5665         "\t    to the field name, as applicable:\n\n"
5666         "\t            .hex        display a number as a hex value\n"
5667         "\t            .sym        display an address as a symbol\n"
5668         "\t            .sym-offset display an address as a symbol and offset\n"
5669         "\t            .execname   display a common_pid as a program name\n"
5670         "\t            .syscall    display a syscall id as a syscall name\n"
5671         "\t            .log2       display log2 value rather than raw number\n"
5672         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5673         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5674         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5675         "\t    trigger or to start a hist trigger but not log any events\n"
5676         "\t    until told to do so.  'continue' can be used to start or\n"
5677         "\t    restart a paused hist trigger.\n\n"
5678         "\t    The 'clear' parameter will clear the contents of a running\n"
5679         "\t    hist trigger and leave its current paused/active state\n"
5680         "\t    unchanged.\n\n"
5681         "\t    The enable_hist and disable_hist triggers can be used to\n"
5682         "\t    have one event conditionally start and stop another event's\n"
5683         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5684         "\t    the enable_event and disable_event triggers.\n\n"
5685         "\t    Hist trigger handlers and actions are executed whenever a\n"
5686         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5687         "\t        <handler>.<action>\n\n"
5688         "\t    The available handlers are:\n\n"
5689         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5690         "\t        onmax(var)               - invoke if var exceeds current max\n"
5691         "\t        onchange(var)            - invoke action if var changes\n\n"
5692         "\t    The available actions are:\n\n"
5693         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5694         "\t        save(field,...)                      - save current event fields\n"
5695 #ifdef CONFIG_TRACER_SNAPSHOT
5696         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5697 #endif
5698 #ifdef CONFIG_SYNTH_EVENTS
5699         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5700         "\t  Write into this file to define/undefine new synthetic events.\n"
5701         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5702 #endif
5703 #endif
5704 ;
5705
5706 static ssize_t
5707 tracing_readme_read(struct file *filp, char __user *ubuf,
5708                        size_t cnt, loff_t *ppos)
5709 {
5710         return simple_read_from_buffer(ubuf, cnt, ppos,
5711                                         readme_msg, strlen(readme_msg));
5712 }
5713
5714 static const struct file_operations tracing_readme_fops = {
5715         .open           = tracing_open_generic,
5716         .read           = tracing_readme_read,
5717         .llseek         = generic_file_llseek,
5718 };
5719
5720 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5721 {
5722         int pid = ++(*pos);
5723
5724         return trace_find_tgid_ptr(pid);
5725 }
5726
5727 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5728 {
5729         int pid = *pos;
5730
5731         return trace_find_tgid_ptr(pid);
5732 }
5733
5734 static void saved_tgids_stop(struct seq_file *m, void *v)
5735 {
5736 }
5737
5738 static int saved_tgids_show(struct seq_file *m, void *v)
5739 {
5740         int *entry = (int *)v;
5741         int pid = entry - tgid_map;
5742         int tgid = *entry;
5743
5744         if (tgid == 0)
5745                 return SEQ_SKIP;
5746
5747         seq_printf(m, "%d %d\n", pid, tgid);
5748         return 0;
5749 }
5750
5751 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5752         .start          = saved_tgids_start,
5753         .stop           = saved_tgids_stop,
5754         .next           = saved_tgids_next,
5755         .show           = saved_tgids_show,
5756 };
5757
5758 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5759 {
5760         int ret;
5761
5762         ret = tracing_check_open_get_tr(NULL);
5763         if (ret)
5764                 return ret;
5765
5766         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5767 }
5768
5769
5770 static const struct file_operations tracing_saved_tgids_fops = {
5771         .open           = tracing_saved_tgids_open,
5772         .read           = seq_read,
5773         .llseek         = seq_lseek,
5774         .release        = seq_release,
5775 };
5776
5777 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5778 {
5779         unsigned int *ptr = v;
5780
5781         if (*pos || m->count)
5782                 ptr++;
5783
5784         (*pos)++;
5785
5786         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5787              ptr++) {
5788                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5789                         continue;
5790
5791                 return ptr;
5792         }
5793
5794         return NULL;
5795 }
5796
5797 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5798 {
5799         void *v;
5800         loff_t l = 0;
5801
5802         preempt_disable();
5803         arch_spin_lock(&trace_cmdline_lock);
5804
5805         v = &savedcmd->map_cmdline_to_pid[0];
5806         while (l <= *pos) {
5807                 v = saved_cmdlines_next(m, v, &l);
5808                 if (!v)
5809                         return NULL;
5810         }
5811
5812         return v;
5813 }
5814
5815 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5816 {
5817         arch_spin_unlock(&trace_cmdline_lock);
5818         preempt_enable();
5819 }
5820
5821 static int saved_cmdlines_show(struct seq_file *m, void *v)
5822 {
5823         char buf[TASK_COMM_LEN];
5824         unsigned int *pid = v;
5825
5826         __trace_find_cmdline(*pid, buf);
5827         seq_printf(m, "%d %s\n", *pid, buf);
5828         return 0;
5829 }
5830
5831 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5832         .start          = saved_cmdlines_start,
5833         .next           = saved_cmdlines_next,
5834         .stop           = saved_cmdlines_stop,
5835         .show           = saved_cmdlines_show,
5836 };
5837
5838 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5839 {
5840         int ret;
5841
5842         ret = tracing_check_open_get_tr(NULL);
5843         if (ret)
5844                 return ret;
5845
5846         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5847 }
5848
5849 static const struct file_operations tracing_saved_cmdlines_fops = {
5850         .open           = tracing_saved_cmdlines_open,
5851         .read           = seq_read,
5852         .llseek         = seq_lseek,
5853         .release        = seq_release,
5854 };
5855
5856 static ssize_t
5857 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5858                                  size_t cnt, loff_t *ppos)
5859 {
5860         char buf[64];
5861         int r;
5862
5863         arch_spin_lock(&trace_cmdline_lock);
5864         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5865         arch_spin_unlock(&trace_cmdline_lock);
5866
5867         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5868 }
5869
5870 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5871 {
5872         kfree(s->saved_cmdlines);
5873         kfree(s->map_cmdline_to_pid);
5874         kfree(s);
5875 }
5876
5877 static int tracing_resize_saved_cmdlines(unsigned int val)
5878 {
5879         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5880
5881         s = kmalloc(sizeof(*s), GFP_KERNEL);
5882         if (!s)
5883                 return -ENOMEM;
5884
5885         if (allocate_cmdlines_buffer(val, s) < 0) {
5886                 kfree(s);
5887                 return -ENOMEM;
5888         }
5889
5890         arch_spin_lock(&trace_cmdline_lock);
5891         savedcmd_temp = savedcmd;
5892         savedcmd = s;
5893         arch_spin_unlock(&trace_cmdline_lock);
5894         free_saved_cmdlines_buffer(savedcmd_temp);
5895
5896         return 0;
5897 }
5898
5899 static ssize_t
5900 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5901                                   size_t cnt, loff_t *ppos)
5902 {
5903         unsigned long val;
5904         int ret;
5905
5906         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5907         if (ret)
5908                 return ret;
5909
5910         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5911         if (!val || val > PID_MAX_DEFAULT)
5912                 return -EINVAL;
5913
5914         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5915         if (ret < 0)
5916                 return ret;
5917
5918         *ppos += cnt;
5919
5920         return cnt;
5921 }
5922
5923 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5924         .open           = tracing_open_generic,
5925         .read           = tracing_saved_cmdlines_size_read,
5926         .write          = tracing_saved_cmdlines_size_write,
5927 };
5928
5929 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5930 static union trace_eval_map_item *
5931 update_eval_map(union trace_eval_map_item *ptr)
5932 {
5933         if (!ptr->map.eval_string) {
5934                 if (ptr->tail.next) {
5935                         ptr = ptr->tail.next;
5936                         /* Set ptr to the next real item (skip head) */
5937                         ptr++;
5938                 } else
5939                         return NULL;
5940         }
5941         return ptr;
5942 }
5943
5944 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5945 {
5946         union trace_eval_map_item *ptr = v;
5947
5948         /*
5949          * Paranoid! If ptr points to end, we don't want to increment past it.
5950          * This really should never happen.
5951          */
5952         (*pos)++;
5953         ptr = update_eval_map(ptr);
5954         if (WARN_ON_ONCE(!ptr))
5955                 return NULL;
5956
5957         ptr++;
5958         ptr = update_eval_map(ptr);
5959
5960         return ptr;
5961 }
5962
5963 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5964 {
5965         union trace_eval_map_item *v;
5966         loff_t l = 0;
5967
5968         mutex_lock(&trace_eval_mutex);
5969
5970         v = trace_eval_maps;
5971         if (v)
5972                 v++;
5973
5974         while (v && l < *pos) {
5975                 v = eval_map_next(m, v, &l);
5976         }
5977
5978         return v;
5979 }
5980
5981 static void eval_map_stop(struct seq_file *m, void *v)
5982 {
5983         mutex_unlock(&trace_eval_mutex);
5984 }
5985
5986 static int eval_map_show(struct seq_file *m, void *v)
5987 {
5988         union trace_eval_map_item *ptr = v;
5989
5990         seq_printf(m, "%s %ld (%s)\n",
5991                    ptr->map.eval_string, ptr->map.eval_value,
5992                    ptr->map.system);
5993
5994         return 0;
5995 }
5996
5997 static const struct seq_operations tracing_eval_map_seq_ops = {
5998         .start          = eval_map_start,
5999         .next           = eval_map_next,
6000         .stop           = eval_map_stop,
6001         .show           = eval_map_show,
6002 };
6003
6004 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6005 {
6006         int ret;
6007
6008         ret = tracing_check_open_get_tr(NULL);
6009         if (ret)
6010                 return ret;
6011
6012         return seq_open(filp, &tracing_eval_map_seq_ops);
6013 }
6014
6015 static const struct file_operations tracing_eval_map_fops = {
6016         .open           = tracing_eval_map_open,
6017         .read           = seq_read,
6018         .llseek         = seq_lseek,
6019         .release        = seq_release,
6020 };
6021
6022 static inline union trace_eval_map_item *
6023 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6024 {
6025         /* Return tail of array given the head */
6026         return ptr + ptr->head.length + 1;
6027 }
6028
6029 static void
6030 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6031                            int len)
6032 {
6033         struct trace_eval_map **stop;
6034         struct trace_eval_map **map;
6035         union trace_eval_map_item *map_array;
6036         union trace_eval_map_item *ptr;
6037
6038         stop = start + len;
6039
6040         /*
6041          * The trace_eval_maps contains the map plus a head and tail item,
6042          * where the head holds the module and length of array, and the
6043          * tail holds a pointer to the next list.
6044          */
6045         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6046         if (!map_array) {
6047                 pr_warn("Unable to allocate trace eval mapping\n");
6048                 return;
6049         }
6050
6051         mutex_lock(&trace_eval_mutex);
6052
6053         if (!trace_eval_maps)
6054                 trace_eval_maps = map_array;
6055         else {
6056                 ptr = trace_eval_maps;
6057                 for (;;) {
6058                         ptr = trace_eval_jmp_to_tail(ptr);
6059                         if (!ptr->tail.next)
6060                                 break;
6061                         ptr = ptr->tail.next;
6062
6063                 }
6064                 ptr->tail.next = map_array;
6065         }
6066         map_array->head.mod = mod;
6067         map_array->head.length = len;
6068         map_array++;
6069
6070         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6071                 map_array->map = **map;
6072                 map_array++;
6073         }
6074         memset(map_array, 0, sizeof(*map_array));
6075
6076         mutex_unlock(&trace_eval_mutex);
6077 }
6078
6079 static void trace_create_eval_file(struct dentry *d_tracer)
6080 {
6081         trace_create_file("eval_map", 0444, d_tracer,
6082                           NULL, &tracing_eval_map_fops);
6083 }
6084
6085 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6086 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6087 static inline void trace_insert_eval_map_file(struct module *mod,
6088                               struct trace_eval_map **start, int len) { }
6089 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6090
6091 static void trace_insert_eval_map(struct module *mod,
6092                                   struct trace_eval_map **start, int len)
6093 {
6094         struct trace_eval_map **map;
6095
6096         if (len <= 0)
6097                 return;
6098
6099         map = start;
6100
6101         trace_event_eval_update(map, len);
6102
6103         trace_insert_eval_map_file(mod, start, len);
6104 }
6105
6106 static ssize_t
6107 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6108                        size_t cnt, loff_t *ppos)
6109 {
6110         struct trace_array *tr = filp->private_data;
6111         char buf[MAX_TRACER_SIZE+2];
6112         int r;
6113
6114         mutex_lock(&trace_types_lock);
6115         r = sprintf(buf, "%s\n", tr->current_trace->name);
6116         mutex_unlock(&trace_types_lock);
6117
6118         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6119 }
6120
6121 int tracer_init(struct tracer *t, struct trace_array *tr)
6122 {
6123         tracing_reset_online_cpus(&tr->array_buffer);
6124         return t->init(tr);
6125 }
6126
6127 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6128 {
6129         int cpu;
6130
6131         for_each_tracing_cpu(cpu)
6132                 per_cpu_ptr(buf->data, cpu)->entries = val;
6133 }
6134
6135 #ifdef CONFIG_TRACER_MAX_TRACE
6136 /* resize @tr's buffer to the size of @size_tr's entries */
6137 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6138                                         struct array_buffer *size_buf, int cpu_id)
6139 {
6140         int cpu, ret = 0;
6141
6142         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6143                 for_each_tracing_cpu(cpu) {
6144                         ret = ring_buffer_resize(trace_buf->buffer,
6145                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6146                         if (ret < 0)
6147                                 break;
6148                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6149                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6150                 }
6151         } else {
6152                 ret = ring_buffer_resize(trace_buf->buffer,
6153                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6154                 if (ret == 0)
6155                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6156                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6157         }
6158
6159         return ret;
6160 }
6161 #endif /* CONFIG_TRACER_MAX_TRACE */
6162
6163 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6164                                         unsigned long size, int cpu)
6165 {
6166         int ret;
6167
6168         /*
6169          * If kernel or user changes the size of the ring buffer
6170          * we use the size that was given, and we can forget about
6171          * expanding it later.
6172          */
6173         ring_buffer_expanded = true;
6174
6175         /* May be called before buffers are initialized */
6176         if (!tr->array_buffer.buffer)
6177                 return 0;
6178
6179         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6180         if (ret < 0)
6181                 return ret;
6182
6183 #ifdef CONFIG_TRACER_MAX_TRACE
6184         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6185             !tr->current_trace->use_max_tr)
6186                 goto out;
6187
6188         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6189         if (ret < 0) {
6190                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6191                                                      &tr->array_buffer, cpu);
6192                 if (r < 0) {
6193                         /*
6194                          * AARGH! We are left with different
6195                          * size max buffer!!!!
6196                          * The max buffer is our "snapshot" buffer.
6197                          * When a tracer needs a snapshot (one of the
6198                          * latency tracers), it swaps the max buffer
6199                          * with the saved snap shot. We succeeded to
6200                          * update the size of the main buffer, but failed to
6201                          * update the size of the max buffer. But when we tried
6202                          * to reset the main buffer to the original size, we
6203                          * failed there too. This is very unlikely to
6204                          * happen, but if it does, warn and kill all
6205                          * tracing.
6206                          */
6207                         WARN_ON(1);
6208                         tracing_disabled = 1;
6209                 }
6210                 return ret;
6211         }
6212
6213         if (cpu == RING_BUFFER_ALL_CPUS)
6214                 set_buffer_entries(&tr->max_buffer, size);
6215         else
6216                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6217
6218  out:
6219 #endif /* CONFIG_TRACER_MAX_TRACE */
6220
6221         if (cpu == RING_BUFFER_ALL_CPUS)
6222                 set_buffer_entries(&tr->array_buffer, size);
6223         else
6224                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6225
6226         return ret;
6227 }
6228
6229 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6230                                   unsigned long size, int cpu_id)
6231 {
6232         int ret;
6233
6234         mutex_lock(&trace_types_lock);
6235
6236         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6237                 /* make sure, this cpu is enabled in the mask */
6238                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6239                         ret = -EINVAL;
6240                         goto out;
6241                 }
6242         }
6243
6244         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6245         if (ret < 0)
6246                 ret = -ENOMEM;
6247
6248 out:
6249         mutex_unlock(&trace_types_lock);
6250
6251         return ret;
6252 }
6253
6254
6255 /**
6256  * tracing_update_buffers - used by tracing facility to expand ring buffers
6257  *
6258  * To save on memory when the tracing is never used on a system with it
6259  * configured in. The ring buffers are set to a minimum size. But once
6260  * a user starts to use the tracing facility, then they need to grow
6261  * to their default size.
6262  *
6263  * This function is to be called when a tracer is about to be used.
6264  */
6265 int tracing_update_buffers(void)
6266 {
6267         int ret = 0;
6268
6269         mutex_lock(&trace_types_lock);
6270         if (!ring_buffer_expanded)
6271                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6272                                                 RING_BUFFER_ALL_CPUS);
6273         mutex_unlock(&trace_types_lock);
6274
6275         return ret;
6276 }
6277
6278 struct trace_option_dentry;
6279
6280 static void
6281 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6282
6283 /*
6284  * Used to clear out the tracer before deletion of an instance.
6285  * Must have trace_types_lock held.
6286  */
6287 static void tracing_set_nop(struct trace_array *tr)
6288 {
6289         if (tr->current_trace == &nop_trace)
6290                 return;
6291         
6292         tr->current_trace->enabled--;
6293
6294         if (tr->current_trace->reset)
6295                 tr->current_trace->reset(tr);
6296
6297         tr->current_trace = &nop_trace;
6298 }
6299
6300 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6301 {
6302         /* Only enable if the directory has been created already. */
6303         if (!tr->dir)
6304                 return;
6305
6306         create_trace_option_files(tr, t);
6307 }
6308
6309 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6310 {
6311         struct tracer *t;
6312 #ifdef CONFIG_TRACER_MAX_TRACE
6313         bool had_max_tr;
6314 #endif
6315         int ret = 0;
6316
6317         mutex_lock(&trace_types_lock);
6318
6319         if (!ring_buffer_expanded) {
6320                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6321                                                 RING_BUFFER_ALL_CPUS);
6322                 if (ret < 0)
6323                         goto out;
6324                 ret = 0;
6325         }
6326
6327         for (t = trace_types; t; t = t->next) {
6328                 if (strcmp(t->name, buf) == 0)
6329                         break;
6330         }
6331         if (!t) {
6332                 ret = -EINVAL;
6333                 goto out;
6334         }
6335         if (t == tr->current_trace)
6336                 goto out;
6337
6338 #ifdef CONFIG_TRACER_SNAPSHOT
6339         if (t->use_max_tr) {
6340                 arch_spin_lock(&tr->max_lock);
6341                 if (tr->cond_snapshot)
6342                         ret = -EBUSY;
6343                 arch_spin_unlock(&tr->max_lock);
6344                 if (ret)
6345                         goto out;
6346         }
6347 #endif
6348         /* Some tracers won't work on kernel command line */
6349         if (system_state < SYSTEM_RUNNING && t->noboot) {
6350                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6351                         t->name);
6352                 goto out;
6353         }
6354
6355         /* Some tracers are only allowed for the top level buffer */
6356         if (!trace_ok_for_array(t, tr)) {
6357                 ret = -EINVAL;
6358                 goto out;
6359         }
6360
6361         /* If trace pipe files are being read, we can't change the tracer */
6362         if (tr->trace_ref) {
6363                 ret = -EBUSY;
6364                 goto out;
6365         }
6366
6367         trace_branch_disable();
6368
6369         tr->current_trace->enabled--;
6370
6371         if (tr->current_trace->reset)
6372                 tr->current_trace->reset(tr);
6373
6374         /* Current trace needs to be nop_trace before synchronize_rcu */
6375         tr->current_trace = &nop_trace;
6376
6377 #ifdef CONFIG_TRACER_MAX_TRACE
6378         had_max_tr = tr->allocated_snapshot;
6379
6380         if (had_max_tr && !t->use_max_tr) {
6381                 /*
6382                  * We need to make sure that the update_max_tr sees that
6383                  * current_trace changed to nop_trace to keep it from
6384                  * swapping the buffers after we resize it.
6385                  * The update_max_tr is called from interrupts disabled
6386                  * so a synchronized_sched() is sufficient.
6387                  */
6388                 synchronize_rcu();
6389                 free_snapshot(tr);
6390         }
6391 #endif
6392
6393 #ifdef CONFIG_TRACER_MAX_TRACE
6394         if (t->use_max_tr && !had_max_tr) {
6395                 ret = tracing_alloc_snapshot_instance(tr);
6396                 if (ret < 0)
6397                         goto out;
6398         }
6399 #endif
6400
6401         if (t->init) {
6402                 ret = tracer_init(t, tr);
6403                 if (ret)
6404                         goto out;
6405         }
6406
6407         tr->current_trace = t;
6408         tr->current_trace->enabled++;
6409         trace_branch_enable(tr);
6410  out:
6411         mutex_unlock(&trace_types_lock);
6412
6413         return ret;
6414 }
6415
6416 static ssize_t
6417 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6418                         size_t cnt, loff_t *ppos)
6419 {
6420         struct trace_array *tr = filp->private_data;
6421         char buf[MAX_TRACER_SIZE+1];
6422         int i;
6423         size_t ret;
6424         int err;
6425
6426         ret = cnt;
6427
6428         if (cnt > MAX_TRACER_SIZE)
6429                 cnt = MAX_TRACER_SIZE;
6430
6431         if (copy_from_user(buf, ubuf, cnt))
6432                 return -EFAULT;
6433
6434         buf[cnt] = 0;
6435
6436         /* strip ending whitespace. */
6437         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6438                 buf[i] = 0;
6439
6440         err = tracing_set_tracer(tr, buf);
6441         if (err)
6442                 return err;
6443
6444         *ppos += ret;
6445
6446         return ret;
6447 }
6448
6449 static ssize_t
6450 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6451                    size_t cnt, loff_t *ppos)
6452 {
6453         char buf[64];
6454         int r;
6455
6456         r = snprintf(buf, sizeof(buf), "%ld\n",
6457                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6458         if (r > sizeof(buf))
6459                 r = sizeof(buf);
6460         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6461 }
6462
6463 static ssize_t
6464 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6465                     size_t cnt, loff_t *ppos)
6466 {
6467         unsigned long val;
6468         int ret;
6469
6470         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6471         if (ret)
6472                 return ret;
6473
6474         *ptr = val * 1000;
6475
6476         return cnt;
6477 }
6478
6479 static ssize_t
6480 tracing_thresh_read(struct file *filp, char __user *ubuf,
6481                     size_t cnt, loff_t *ppos)
6482 {
6483         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6484 }
6485
6486 static ssize_t
6487 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6488                      size_t cnt, loff_t *ppos)
6489 {
6490         struct trace_array *tr = filp->private_data;
6491         int ret;
6492
6493         mutex_lock(&trace_types_lock);
6494         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6495         if (ret < 0)
6496                 goto out;
6497
6498         if (tr->current_trace->update_thresh) {
6499                 ret = tr->current_trace->update_thresh(tr);
6500                 if (ret < 0)
6501                         goto out;
6502         }
6503
6504         ret = cnt;
6505 out:
6506         mutex_unlock(&trace_types_lock);
6507
6508         return ret;
6509 }
6510
6511 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6512
6513 static ssize_t
6514 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6515                      size_t cnt, loff_t *ppos)
6516 {
6517         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6518 }
6519
6520 static ssize_t
6521 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6522                       size_t cnt, loff_t *ppos)
6523 {
6524         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6525 }
6526
6527 #endif
6528
6529 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6530 {
6531         struct trace_array *tr = inode->i_private;
6532         struct trace_iterator *iter;
6533         int ret;
6534
6535         ret = tracing_check_open_get_tr(tr);
6536         if (ret)
6537                 return ret;
6538
6539         mutex_lock(&trace_types_lock);
6540
6541         /* create a buffer to store the information to pass to userspace */
6542         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6543         if (!iter) {
6544                 ret = -ENOMEM;
6545                 __trace_array_put(tr);
6546                 goto out;
6547         }
6548
6549         trace_seq_init(&iter->seq);
6550         iter->trace = tr->current_trace;
6551
6552         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6553                 ret = -ENOMEM;
6554                 goto fail;
6555         }
6556
6557         /* trace pipe does not show start of buffer */
6558         cpumask_setall(iter->started);
6559
6560         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6561                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6562
6563         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6564         if (trace_clocks[tr->clock_id].in_ns)
6565                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6566
6567         iter->tr = tr;
6568         iter->array_buffer = &tr->array_buffer;
6569         iter->cpu_file = tracing_get_cpu(inode);
6570         mutex_init(&iter->mutex);
6571         filp->private_data = iter;
6572
6573         if (iter->trace->pipe_open)
6574                 iter->trace->pipe_open(iter);
6575
6576         nonseekable_open(inode, filp);
6577
6578         tr->trace_ref++;
6579 out:
6580         mutex_unlock(&trace_types_lock);
6581         return ret;
6582
6583 fail:
6584         kfree(iter);
6585         __trace_array_put(tr);
6586         mutex_unlock(&trace_types_lock);
6587         return ret;
6588 }
6589
6590 static int tracing_release_pipe(struct inode *inode, struct file *file)
6591 {
6592         struct trace_iterator *iter = file->private_data;
6593         struct trace_array *tr = inode->i_private;
6594
6595         mutex_lock(&trace_types_lock);
6596
6597         tr->trace_ref--;
6598
6599         if (iter->trace->pipe_close)
6600                 iter->trace->pipe_close(iter);
6601
6602         mutex_unlock(&trace_types_lock);
6603
6604         free_cpumask_var(iter->started);
6605         mutex_destroy(&iter->mutex);
6606         kfree(iter);
6607
6608         trace_array_put(tr);
6609
6610         return 0;
6611 }
6612
6613 static __poll_t
6614 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6615 {
6616         struct trace_array *tr = iter->tr;
6617
6618         /* Iterators are static, they should be filled or empty */
6619         if (trace_buffer_iter(iter, iter->cpu_file))
6620                 return EPOLLIN | EPOLLRDNORM;
6621
6622         if (tr->trace_flags & TRACE_ITER_BLOCK)
6623                 /*
6624                  * Always select as readable when in blocking mode
6625                  */
6626                 return EPOLLIN | EPOLLRDNORM;
6627         else
6628                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6629                                              filp, poll_table);
6630 }
6631
6632 static __poll_t
6633 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6634 {
6635         struct trace_iterator *iter = filp->private_data;
6636
6637         return trace_poll(iter, filp, poll_table);
6638 }
6639
6640 /* Must be called with iter->mutex held. */
6641 static int tracing_wait_pipe(struct file *filp)
6642 {
6643         struct trace_iterator *iter = filp->private_data;
6644         int ret;
6645
6646         while (trace_empty(iter)) {
6647
6648                 if ((filp->f_flags & O_NONBLOCK)) {
6649                         return -EAGAIN;
6650                 }
6651
6652                 /*
6653                  * We block until we read something and tracing is disabled.
6654                  * We still block if tracing is disabled, but we have never
6655                  * read anything. This allows a user to cat this file, and
6656                  * then enable tracing. But after we have read something,
6657                  * we give an EOF when tracing is again disabled.
6658                  *
6659                  * iter->pos will be 0 if we haven't read anything.
6660                  */
6661                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6662                         break;
6663
6664                 mutex_unlock(&iter->mutex);
6665
6666                 ret = wait_on_pipe(iter, 0);
6667
6668                 mutex_lock(&iter->mutex);
6669
6670                 if (ret)
6671                         return ret;
6672         }
6673
6674         return 1;
6675 }
6676
6677 /*
6678  * Consumer reader.
6679  */
6680 static ssize_t
6681 tracing_read_pipe(struct file *filp, char __user *ubuf,
6682                   size_t cnt, loff_t *ppos)
6683 {
6684         struct trace_iterator *iter = filp->private_data;
6685         ssize_t sret;
6686
6687         /*
6688          * Avoid more than one consumer on a single file descriptor
6689          * This is just a matter of traces coherency, the ring buffer itself
6690          * is protected.
6691          */
6692         mutex_lock(&iter->mutex);
6693
6694         /* return any leftover data */
6695         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6696         if (sret != -EBUSY)
6697                 goto out;
6698
6699         trace_seq_init(&iter->seq);
6700
6701         if (iter->trace->read) {
6702                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6703                 if (sret)
6704                         goto out;
6705         }
6706
6707 waitagain:
6708         sret = tracing_wait_pipe(filp);
6709         if (sret <= 0)
6710                 goto out;
6711
6712         /* stop when tracing is finished */
6713         if (trace_empty(iter)) {
6714                 sret = 0;
6715                 goto out;
6716         }
6717
6718         if (cnt >= PAGE_SIZE)
6719                 cnt = PAGE_SIZE - 1;
6720
6721         /* reset all but tr, trace, and overruns */
6722         memset(&iter->seq, 0,
6723                sizeof(struct trace_iterator) -
6724                offsetof(struct trace_iterator, seq));
6725         cpumask_clear(iter->started);
6726         trace_seq_init(&iter->seq);
6727         iter->pos = -1;
6728
6729         trace_event_read_lock();
6730         trace_access_lock(iter->cpu_file);
6731         while (trace_find_next_entry_inc(iter) != NULL) {
6732                 enum print_line_t ret;
6733                 int save_len = iter->seq.seq.len;
6734
6735                 ret = print_trace_line(iter);
6736                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6737                         /* don't print partial lines */
6738                         iter->seq.seq.len = save_len;
6739                         break;
6740                 }
6741                 if (ret != TRACE_TYPE_NO_CONSUME)
6742                         trace_consume(iter);
6743
6744                 if (trace_seq_used(&iter->seq) >= cnt)
6745                         break;
6746
6747                 /*
6748                  * Setting the full flag means we reached the trace_seq buffer
6749                  * size and we should leave by partial output condition above.
6750                  * One of the trace_seq_* functions is not used properly.
6751                  */
6752                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6753                           iter->ent->type);
6754         }
6755         trace_access_unlock(iter->cpu_file);
6756         trace_event_read_unlock();
6757
6758         /* Now copy what we have to the user */
6759         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6760         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6761                 trace_seq_init(&iter->seq);
6762
6763         /*
6764          * If there was nothing to send to user, in spite of consuming trace
6765          * entries, go back to wait for more entries.
6766          */
6767         if (sret == -EBUSY)
6768                 goto waitagain;
6769
6770 out:
6771         mutex_unlock(&iter->mutex);
6772
6773         return sret;
6774 }
6775
6776 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6777                                      unsigned int idx)
6778 {
6779         __free_page(spd->pages[idx]);
6780 }
6781
6782 static size_t
6783 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6784 {
6785         size_t count;
6786         int save_len;
6787         int ret;
6788
6789         /* Seq buffer is page-sized, exactly what we need. */
6790         for (;;) {
6791                 save_len = iter->seq.seq.len;
6792                 ret = print_trace_line(iter);
6793
6794                 if (trace_seq_has_overflowed(&iter->seq)) {
6795                         iter->seq.seq.len = save_len;
6796                         break;
6797                 }
6798
6799                 /*
6800                  * This should not be hit, because it should only
6801                  * be set if the iter->seq overflowed. But check it
6802                  * anyway to be safe.
6803                  */
6804                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6805                         iter->seq.seq.len = save_len;
6806                         break;
6807                 }
6808
6809                 count = trace_seq_used(&iter->seq) - save_len;
6810                 if (rem < count) {
6811                         rem = 0;
6812                         iter->seq.seq.len = save_len;
6813                         break;
6814                 }
6815
6816                 if (ret != TRACE_TYPE_NO_CONSUME)
6817                         trace_consume(iter);
6818                 rem -= count;
6819                 if (!trace_find_next_entry_inc(iter))   {
6820                         rem = 0;
6821                         iter->ent = NULL;
6822                         break;
6823                 }
6824         }
6825
6826         return rem;
6827 }
6828
6829 static ssize_t tracing_splice_read_pipe(struct file *filp,
6830                                         loff_t *ppos,
6831                                         struct pipe_inode_info *pipe,
6832                                         size_t len,
6833                                         unsigned int flags)
6834 {
6835         struct page *pages_def[PIPE_DEF_BUFFERS];
6836         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6837         struct trace_iterator *iter = filp->private_data;
6838         struct splice_pipe_desc spd = {
6839                 .pages          = pages_def,
6840                 .partial        = partial_def,
6841                 .nr_pages       = 0, /* This gets updated below. */
6842                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6843                 .ops            = &default_pipe_buf_ops,
6844                 .spd_release    = tracing_spd_release_pipe,
6845         };
6846         ssize_t ret;
6847         size_t rem;
6848         unsigned int i;
6849
6850         if (splice_grow_spd(pipe, &spd))
6851                 return -ENOMEM;
6852
6853         mutex_lock(&iter->mutex);
6854
6855         if (iter->trace->splice_read) {
6856                 ret = iter->trace->splice_read(iter, filp,
6857                                                ppos, pipe, len, flags);
6858                 if (ret)
6859                         goto out_err;
6860         }
6861
6862         ret = tracing_wait_pipe(filp);
6863         if (ret <= 0)
6864                 goto out_err;
6865
6866         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6867                 ret = -EFAULT;
6868                 goto out_err;
6869         }
6870
6871         trace_event_read_lock();
6872         trace_access_lock(iter->cpu_file);
6873
6874         /* Fill as many pages as possible. */
6875         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6876                 spd.pages[i] = alloc_page(GFP_KERNEL);
6877                 if (!spd.pages[i])
6878                         break;
6879
6880                 rem = tracing_fill_pipe_page(rem, iter);
6881
6882                 /* Copy the data into the page, so we can start over. */
6883                 ret = trace_seq_to_buffer(&iter->seq,
6884                                           page_address(spd.pages[i]),
6885                                           trace_seq_used(&iter->seq));
6886                 if (ret < 0) {
6887                         __free_page(spd.pages[i]);
6888                         break;
6889                 }
6890                 spd.partial[i].offset = 0;
6891                 spd.partial[i].len = trace_seq_used(&iter->seq);
6892
6893                 trace_seq_init(&iter->seq);
6894         }
6895
6896         trace_access_unlock(iter->cpu_file);
6897         trace_event_read_unlock();
6898         mutex_unlock(&iter->mutex);
6899
6900         spd.nr_pages = i;
6901
6902         if (i)
6903                 ret = splice_to_pipe(pipe, &spd);
6904         else
6905                 ret = 0;
6906 out:
6907         splice_shrink_spd(&spd);
6908         return ret;
6909
6910 out_err:
6911         mutex_unlock(&iter->mutex);
6912         goto out;
6913 }
6914
6915 static ssize_t
6916 tracing_entries_read(struct file *filp, char __user *ubuf,
6917                      size_t cnt, loff_t *ppos)
6918 {
6919         struct inode *inode = file_inode(filp);
6920         struct trace_array *tr = inode->i_private;
6921         int cpu = tracing_get_cpu(inode);
6922         char buf[64];
6923         int r = 0;
6924         ssize_t ret;
6925
6926         mutex_lock(&trace_types_lock);
6927
6928         if (cpu == RING_BUFFER_ALL_CPUS) {
6929                 int cpu, buf_size_same;
6930                 unsigned long size;
6931
6932                 size = 0;
6933                 buf_size_same = 1;
6934                 /* check if all cpu sizes are same */
6935                 for_each_tracing_cpu(cpu) {
6936                         /* fill in the size from first enabled cpu */
6937                         if (size == 0)
6938                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6939                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6940                                 buf_size_same = 0;
6941                                 break;
6942                         }
6943                 }
6944
6945                 if (buf_size_same) {
6946                         if (!ring_buffer_expanded)
6947                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6948                                             size >> 10,
6949                                             trace_buf_size >> 10);
6950                         else
6951                                 r = sprintf(buf, "%lu\n", size >> 10);
6952                 } else
6953                         r = sprintf(buf, "X\n");
6954         } else
6955                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6956
6957         mutex_unlock(&trace_types_lock);
6958
6959         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6960         return ret;
6961 }
6962
6963 static ssize_t
6964 tracing_entries_write(struct file *filp, const char __user *ubuf,
6965                       size_t cnt, loff_t *ppos)
6966 {
6967         struct inode *inode = file_inode(filp);
6968         struct trace_array *tr = inode->i_private;
6969         unsigned long val;
6970         int ret;
6971
6972         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6973         if (ret)
6974                 return ret;
6975
6976         /* must have at least 1 entry */
6977         if (!val)
6978                 return -EINVAL;
6979
6980         /* value is in KB */
6981         val <<= 10;
6982         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6983         if (ret < 0)
6984                 return ret;
6985
6986         *ppos += cnt;
6987
6988         return cnt;
6989 }
6990
6991 static ssize_t
6992 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6993                                 size_t cnt, loff_t *ppos)
6994 {
6995         struct trace_array *tr = filp->private_data;
6996         char buf[64];
6997         int r, cpu;
6998         unsigned long size = 0, expanded_size = 0;
6999
7000         mutex_lock(&trace_types_lock);
7001         for_each_tracing_cpu(cpu) {
7002                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7003                 if (!ring_buffer_expanded)
7004                         expanded_size += trace_buf_size >> 10;
7005         }
7006         if (ring_buffer_expanded)
7007                 r = sprintf(buf, "%lu\n", size);
7008         else
7009                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7010         mutex_unlock(&trace_types_lock);
7011
7012         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7013 }
7014
7015 static ssize_t
7016 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7017                           size_t cnt, loff_t *ppos)
7018 {
7019         /*
7020          * There is no need to read what the user has written, this function
7021          * is just to make sure that there is no error when "echo" is used
7022          */
7023
7024         *ppos += cnt;
7025
7026         return cnt;
7027 }
7028
7029 static int
7030 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7031 {
7032         struct trace_array *tr = inode->i_private;
7033
7034         /* disable tracing ? */
7035         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7036                 tracer_tracing_off(tr);
7037         /* resize the ring buffer to 0 */
7038         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7039
7040         trace_array_put(tr);
7041
7042         return 0;
7043 }
7044
7045 static ssize_t
7046 tracing_mark_write(struct file *filp, const char __user *ubuf,
7047                                         size_t cnt, loff_t *fpos)
7048 {
7049         struct trace_array *tr = filp->private_data;
7050         struct ring_buffer_event *event;
7051         enum event_trigger_type tt = ETT_NONE;
7052         struct trace_buffer *buffer;
7053         struct print_entry *entry;
7054         ssize_t written;
7055         int size;
7056         int len;
7057
7058 /* Used in tracing_mark_raw_write() as well */
7059 #define FAULTED_STR "<faulted>"
7060 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7061
7062         if (tracing_disabled)
7063                 return -EINVAL;
7064
7065         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7066                 return -EINVAL;
7067
7068         if (cnt > TRACE_BUF_SIZE)
7069                 cnt = TRACE_BUF_SIZE;
7070
7071         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7072
7073         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7074
7075         /* If less than "<faulted>", then make sure we can still add that */
7076         if (cnt < FAULTED_SIZE)
7077                 size += FAULTED_SIZE - cnt;
7078
7079         buffer = tr->array_buffer.buffer;
7080         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7081                                             tracing_gen_ctx());
7082         if (unlikely(!event))
7083                 /* Ring buffer disabled, return as if not open for write */
7084                 return -EBADF;
7085
7086         entry = ring_buffer_event_data(event);
7087         entry->ip = _THIS_IP_;
7088
7089         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7090         if (len) {
7091                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7092                 cnt = FAULTED_SIZE;
7093                 written = -EFAULT;
7094         } else
7095                 written = cnt;
7096
7097         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7098                 /* do not add \n before testing triggers, but add \0 */
7099                 entry->buf[cnt] = '\0';
7100                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7101         }
7102
7103         if (entry->buf[cnt - 1] != '\n') {
7104                 entry->buf[cnt] = '\n';
7105                 entry->buf[cnt + 1] = '\0';
7106         } else
7107                 entry->buf[cnt] = '\0';
7108
7109         if (static_branch_unlikely(&trace_marker_exports_enabled))
7110                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7111         __buffer_unlock_commit(buffer, event);
7112
7113         if (tt)
7114                 event_triggers_post_call(tr->trace_marker_file, tt);
7115
7116         if (written > 0)
7117                 *fpos += written;
7118
7119         return written;
7120 }
7121
7122 /* Limit it for now to 3K (including tag) */
7123 #define RAW_DATA_MAX_SIZE (1024*3)
7124
7125 static ssize_t
7126 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7127                                         size_t cnt, loff_t *fpos)
7128 {
7129         struct trace_array *tr = filp->private_data;
7130         struct ring_buffer_event *event;
7131         struct trace_buffer *buffer;
7132         struct raw_data_entry *entry;
7133         ssize_t written;
7134         int size;
7135         int len;
7136
7137 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7138
7139         if (tracing_disabled)
7140                 return -EINVAL;
7141
7142         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7143                 return -EINVAL;
7144
7145         /* The marker must at least have a tag id */
7146         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7147                 return -EINVAL;
7148
7149         if (cnt > TRACE_BUF_SIZE)
7150                 cnt = TRACE_BUF_SIZE;
7151
7152         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7153
7154         size = sizeof(*entry) + cnt;
7155         if (cnt < FAULT_SIZE_ID)
7156                 size += FAULT_SIZE_ID - cnt;
7157
7158         buffer = tr->array_buffer.buffer;
7159         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7160                                             tracing_gen_ctx());
7161         if (!event)
7162                 /* Ring buffer disabled, return as if not open for write */
7163                 return -EBADF;
7164
7165         entry = ring_buffer_event_data(event);
7166
7167         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7168         if (len) {
7169                 entry->id = -1;
7170                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7171                 written = -EFAULT;
7172         } else
7173                 written = cnt;
7174
7175         __buffer_unlock_commit(buffer, event);
7176
7177         if (written > 0)
7178                 *fpos += written;
7179
7180         return written;
7181 }
7182
7183 static int tracing_clock_show(struct seq_file *m, void *v)
7184 {
7185         struct trace_array *tr = m->private;
7186         int i;
7187
7188         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7189                 seq_printf(m,
7190                         "%s%s%s%s", i ? " " : "",
7191                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7192                         i == tr->clock_id ? "]" : "");
7193         seq_putc(m, '\n');
7194
7195         return 0;
7196 }
7197
7198 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7199 {
7200         int i;
7201
7202         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7203                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7204                         break;
7205         }
7206         if (i == ARRAY_SIZE(trace_clocks))
7207                 return -EINVAL;
7208
7209         mutex_lock(&trace_types_lock);
7210
7211         tr->clock_id = i;
7212
7213         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7214
7215         /*
7216          * New clock may not be consistent with the previous clock.
7217          * Reset the buffer so that it doesn't have incomparable timestamps.
7218          */
7219         tracing_reset_online_cpus(&tr->array_buffer);
7220
7221 #ifdef CONFIG_TRACER_MAX_TRACE
7222         if (tr->max_buffer.buffer)
7223                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7224         tracing_reset_online_cpus(&tr->max_buffer);
7225 #endif
7226
7227         mutex_unlock(&trace_types_lock);
7228
7229         return 0;
7230 }
7231
7232 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7233                                    size_t cnt, loff_t *fpos)
7234 {
7235         struct seq_file *m = filp->private_data;
7236         struct trace_array *tr = m->private;
7237         char buf[64];
7238         const char *clockstr;
7239         int ret;
7240
7241         if (cnt >= sizeof(buf))
7242                 return -EINVAL;
7243
7244         if (copy_from_user(buf, ubuf, cnt))
7245                 return -EFAULT;
7246
7247         buf[cnt] = 0;
7248
7249         clockstr = strstrip(buf);
7250
7251         ret = tracing_set_clock(tr, clockstr);
7252         if (ret)
7253                 return ret;
7254
7255         *fpos += cnt;
7256
7257         return cnt;
7258 }
7259
7260 static int tracing_clock_open(struct inode *inode, struct file *file)
7261 {
7262         struct trace_array *tr = inode->i_private;
7263         int ret;
7264
7265         ret = tracing_check_open_get_tr(tr);
7266         if (ret)
7267                 return ret;
7268
7269         ret = single_open(file, tracing_clock_show, inode->i_private);
7270         if (ret < 0)
7271                 trace_array_put(tr);
7272
7273         return ret;
7274 }
7275
7276 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7277 {
7278         struct trace_array *tr = m->private;
7279
7280         mutex_lock(&trace_types_lock);
7281
7282         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7283                 seq_puts(m, "delta [absolute]\n");
7284         else
7285                 seq_puts(m, "[delta] absolute\n");
7286
7287         mutex_unlock(&trace_types_lock);
7288
7289         return 0;
7290 }
7291
7292 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7293 {
7294         struct trace_array *tr = inode->i_private;
7295         int ret;
7296
7297         ret = tracing_check_open_get_tr(tr);
7298         if (ret)
7299                 return ret;
7300
7301         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7302         if (ret < 0)
7303                 trace_array_put(tr);
7304
7305         return ret;
7306 }
7307
7308 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7309 {
7310         if (rbe == this_cpu_read(trace_buffered_event))
7311                 return ring_buffer_time_stamp(buffer);
7312
7313         return ring_buffer_event_time_stamp(buffer, rbe);
7314 }
7315
7316 /*
7317  * Set or disable using the per CPU trace_buffer_event when possible.
7318  */
7319 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7320 {
7321         int ret = 0;
7322
7323         mutex_lock(&trace_types_lock);
7324
7325         if (set && tr->no_filter_buffering_ref++)
7326                 goto out;
7327
7328         if (!set) {
7329                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7330                         ret = -EINVAL;
7331                         goto out;
7332                 }
7333
7334                 --tr->no_filter_buffering_ref;
7335         }
7336  out:
7337         mutex_unlock(&trace_types_lock);
7338
7339         return ret;
7340 }
7341
7342 struct ftrace_buffer_info {
7343         struct trace_iterator   iter;
7344         void                    *spare;
7345         unsigned int            spare_cpu;
7346         unsigned int            read;
7347 };
7348
7349 #ifdef CONFIG_TRACER_SNAPSHOT
7350 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7351 {
7352         struct trace_array *tr = inode->i_private;
7353         struct trace_iterator *iter;
7354         struct seq_file *m;
7355         int ret;
7356
7357         ret = tracing_check_open_get_tr(tr);
7358         if (ret)
7359                 return ret;
7360
7361         if (file->f_mode & FMODE_READ) {
7362                 iter = __tracing_open(inode, file, true);
7363                 if (IS_ERR(iter))
7364                         ret = PTR_ERR(iter);
7365         } else {
7366                 /* Writes still need the seq_file to hold the private data */
7367                 ret = -ENOMEM;
7368                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7369                 if (!m)
7370                         goto out;
7371                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7372                 if (!iter) {
7373                         kfree(m);
7374                         goto out;
7375                 }
7376                 ret = 0;
7377
7378                 iter->tr = tr;
7379                 iter->array_buffer = &tr->max_buffer;
7380                 iter->cpu_file = tracing_get_cpu(inode);
7381                 m->private = iter;
7382                 file->private_data = m;
7383         }
7384 out:
7385         if (ret < 0)
7386                 trace_array_put(tr);
7387
7388         return ret;
7389 }
7390
7391 static ssize_t
7392 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7393                        loff_t *ppos)
7394 {
7395         struct seq_file *m = filp->private_data;
7396         struct trace_iterator *iter = m->private;
7397         struct trace_array *tr = iter->tr;
7398         unsigned long val;
7399         int ret;
7400
7401         ret = tracing_update_buffers();
7402         if (ret < 0)
7403                 return ret;
7404
7405         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7406         if (ret)
7407                 return ret;
7408
7409         mutex_lock(&trace_types_lock);
7410
7411         if (tr->current_trace->use_max_tr) {
7412                 ret = -EBUSY;
7413                 goto out;
7414         }
7415
7416         arch_spin_lock(&tr->max_lock);
7417         if (tr->cond_snapshot)
7418                 ret = -EBUSY;
7419         arch_spin_unlock(&tr->max_lock);
7420         if (ret)
7421                 goto out;
7422
7423         switch (val) {
7424         case 0:
7425                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7426                         ret = -EINVAL;
7427                         break;
7428                 }
7429                 if (tr->allocated_snapshot)
7430                         free_snapshot(tr);
7431                 break;
7432         case 1:
7433 /* Only allow per-cpu swap if the ring buffer supports it */
7434 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7435                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7436                         ret = -EINVAL;
7437                         break;
7438                 }
7439 #endif
7440                 if (tr->allocated_snapshot)
7441                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7442                                         &tr->array_buffer, iter->cpu_file);
7443                 else
7444                         ret = tracing_alloc_snapshot_instance(tr);
7445                 if (ret < 0)
7446                         break;
7447                 local_irq_disable();
7448                 /* Now, we're going to swap */
7449                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7450                         update_max_tr(tr, current, smp_processor_id(), NULL);
7451                 else
7452                         update_max_tr_single(tr, current, iter->cpu_file);
7453                 local_irq_enable();
7454                 break;
7455         default:
7456                 if (tr->allocated_snapshot) {
7457                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7458                                 tracing_reset_online_cpus(&tr->max_buffer);
7459                         else
7460                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7461                 }
7462                 break;
7463         }
7464
7465         if (ret >= 0) {
7466                 *ppos += cnt;
7467                 ret = cnt;
7468         }
7469 out:
7470         mutex_unlock(&trace_types_lock);
7471         return ret;
7472 }
7473
7474 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7475 {
7476         struct seq_file *m = file->private_data;
7477         int ret;
7478
7479         ret = tracing_release(inode, file);
7480
7481         if (file->f_mode & FMODE_READ)
7482                 return ret;
7483
7484         /* If write only, the seq_file is just a stub */
7485         if (m)
7486                 kfree(m->private);
7487         kfree(m);
7488
7489         return 0;
7490 }
7491
7492 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7493 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7494                                     size_t count, loff_t *ppos);
7495 static int tracing_buffers_release(struct inode *inode, struct file *file);
7496 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7497                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7498
7499 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7500 {
7501         struct ftrace_buffer_info *info;
7502         int ret;
7503
7504         /* The following checks for tracefs lockdown */
7505         ret = tracing_buffers_open(inode, filp);
7506         if (ret < 0)
7507                 return ret;
7508
7509         info = filp->private_data;
7510
7511         if (info->iter.trace->use_max_tr) {
7512                 tracing_buffers_release(inode, filp);
7513                 return -EBUSY;
7514         }
7515
7516         info->iter.snapshot = true;
7517         info->iter.array_buffer = &info->iter.tr->max_buffer;
7518
7519         return ret;
7520 }
7521
7522 #endif /* CONFIG_TRACER_SNAPSHOT */
7523
7524
7525 static const struct file_operations tracing_thresh_fops = {
7526         .open           = tracing_open_generic,
7527         .read           = tracing_thresh_read,
7528         .write          = tracing_thresh_write,
7529         .llseek         = generic_file_llseek,
7530 };
7531
7532 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7533 static const struct file_operations tracing_max_lat_fops = {
7534         .open           = tracing_open_generic,
7535         .read           = tracing_max_lat_read,
7536         .write          = tracing_max_lat_write,
7537         .llseek         = generic_file_llseek,
7538 };
7539 #endif
7540
7541 static const struct file_operations set_tracer_fops = {
7542         .open           = tracing_open_generic,
7543         .read           = tracing_set_trace_read,
7544         .write          = tracing_set_trace_write,
7545         .llseek         = generic_file_llseek,
7546 };
7547
7548 static const struct file_operations tracing_pipe_fops = {
7549         .open           = tracing_open_pipe,
7550         .poll           = tracing_poll_pipe,
7551         .read           = tracing_read_pipe,
7552         .splice_read    = tracing_splice_read_pipe,
7553         .release        = tracing_release_pipe,
7554         .llseek         = no_llseek,
7555 };
7556
7557 static const struct file_operations tracing_entries_fops = {
7558         .open           = tracing_open_generic_tr,
7559         .read           = tracing_entries_read,
7560         .write          = tracing_entries_write,
7561         .llseek         = generic_file_llseek,
7562         .release        = tracing_release_generic_tr,
7563 };
7564
7565 static const struct file_operations tracing_total_entries_fops = {
7566         .open           = tracing_open_generic_tr,
7567         .read           = tracing_total_entries_read,
7568         .llseek         = generic_file_llseek,
7569         .release        = tracing_release_generic_tr,
7570 };
7571
7572 static const struct file_operations tracing_free_buffer_fops = {
7573         .open           = tracing_open_generic_tr,
7574         .write          = tracing_free_buffer_write,
7575         .release        = tracing_free_buffer_release,
7576 };
7577
7578 static const struct file_operations tracing_mark_fops = {
7579         .open           = tracing_open_generic_tr,
7580         .write          = tracing_mark_write,
7581         .llseek         = generic_file_llseek,
7582         .release        = tracing_release_generic_tr,
7583 };
7584
7585 static const struct file_operations tracing_mark_raw_fops = {
7586         .open           = tracing_open_generic_tr,
7587         .write          = tracing_mark_raw_write,
7588         .llseek         = generic_file_llseek,
7589         .release        = tracing_release_generic_tr,
7590 };
7591
7592 static const struct file_operations trace_clock_fops = {
7593         .open           = tracing_clock_open,
7594         .read           = seq_read,
7595         .llseek         = seq_lseek,
7596         .release        = tracing_single_release_tr,
7597         .write          = tracing_clock_write,
7598 };
7599
7600 static const struct file_operations trace_time_stamp_mode_fops = {
7601         .open           = tracing_time_stamp_mode_open,
7602         .read           = seq_read,
7603         .llseek         = seq_lseek,
7604         .release        = tracing_single_release_tr,
7605 };
7606
7607 #ifdef CONFIG_TRACER_SNAPSHOT
7608 static const struct file_operations snapshot_fops = {
7609         .open           = tracing_snapshot_open,
7610         .read           = seq_read,
7611         .write          = tracing_snapshot_write,
7612         .llseek         = tracing_lseek,
7613         .release        = tracing_snapshot_release,
7614 };
7615
7616 static const struct file_operations snapshot_raw_fops = {
7617         .open           = snapshot_raw_open,
7618         .read           = tracing_buffers_read,
7619         .release        = tracing_buffers_release,
7620         .splice_read    = tracing_buffers_splice_read,
7621         .llseek         = no_llseek,
7622 };
7623
7624 #endif /* CONFIG_TRACER_SNAPSHOT */
7625
7626 /*
7627  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7628  * @filp: The active open file structure
7629  * @ubuf: The userspace provided buffer to read value into
7630  * @cnt: The maximum number of bytes to read
7631  * @ppos: The current "file" position
7632  *
7633  * This function implements the write interface for a struct trace_min_max_param.
7634  * The filp->private_data must point to a trace_min_max_param structure that
7635  * defines where to write the value, the min and the max acceptable values,
7636  * and a lock to protect the write.
7637  */
7638 static ssize_t
7639 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7640 {
7641         struct trace_min_max_param *param = filp->private_data;
7642         u64 val;
7643         int err;
7644
7645         if (!param)
7646                 return -EFAULT;
7647
7648         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7649         if (err)
7650                 return err;
7651
7652         if (param->lock)
7653                 mutex_lock(param->lock);
7654
7655         if (param->min && val < *param->min)
7656                 err = -EINVAL;
7657
7658         if (param->max && val > *param->max)
7659                 err = -EINVAL;
7660
7661         if (!err)
7662                 *param->val = val;
7663
7664         if (param->lock)
7665                 mutex_unlock(param->lock);
7666
7667         if (err)
7668                 return err;
7669
7670         return cnt;
7671 }
7672
7673 /*
7674  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7675  * @filp: The active open file structure
7676  * @ubuf: The userspace provided buffer to read value into
7677  * @cnt: The maximum number of bytes to read
7678  * @ppos: The current "file" position
7679  *
7680  * This function implements the read interface for a struct trace_min_max_param.
7681  * The filp->private_data must point to a trace_min_max_param struct with valid
7682  * data.
7683  */
7684 static ssize_t
7685 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7686 {
7687         struct trace_min_max_param *param = filp->private_data;
7688         char buf[U64_STR_SIZE];
7689         int len;
7690         u64 val;
7691
7692         if (!param)
7693                 return -EFAULT;
7694
7695         val = *param->val;
7696
7697         if (cnt > sizeof(buf))
7698                 cnt = sizeof(buf);
7699
7700         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7701
7702         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7703 }
7704
7705 const struct file_operations trace_min_max_fops = {
7706         .open           = tracing_open_generic,
7707         .read           = trace_min_max_read,
7708         .write          = trace_min_max_write,
7709 };
7710
7711 #define TRACING_LOG_ERRS_MAX    8
7712 #define TRACING_LOG_LOC_MAX     128
7713
7714 #define CMD_PREFIX "  Command: "
7715
7716 struct err_info {
7717         const char      **errs; /* ptr to loc-specific array of err strings */
7718         u8              type;   /* index into errs -> specific err string */
7719         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7720         u64             ts;
7721 };
7722
7723 struct tracing_log_err {
7724         struct list_head        list;
7725         struct err_info         info;
7726         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7727         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7728 };
7729
7730 static DEFINE_MUTEX(tracing_err_log_lock);
7731
7732 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7733 {
7734         struct tracing_log_err *err;
7735
7736         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7737                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7738                 if (!err)
7739                         err = ERR_PTR(-ENOMEM);
7740                 tr->n_err_log_entries++;
7741
7742                 return err;
7743         }
7744
7745         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7746         list_del(&err->list);
7747
7748         return err;
7749 }
7750
7751 /**
7752  * err_pos - find the position of a string within a command for error careting
7753  * @cmd: The tracing command that caused the error
7754  * @str: The string to position the caret at within @cmd
7755  *
7756  * Finds the position of the first occurrence of @str within @cmd.  The
7757  * return value can be passed to tracing_log_err() for caret placement
7758  * within @cmd.
7759  *
7760  * Returns the index within @cmd of the first occurrence of @str or 0
7761  * if @str was not found.
7762  */
7763 unsigned int err_pos(char *cmd, const char *str)
7764 {
7765         char *found;
7766
7767         if (WARN_ON(!strlen(cmd)))
7768                 return 0;
7769
7770         found = strstr(cmd, str);
7771         if (found)
7772                 return found - cmd;
7773
7774         return 0;
7775 }
7776
7777 /**
7778  * tracing_log_err - write an error to the tracing error log
7779  * @tr: The associated trace array for the error (NULL for top level array)
7780  * @loc: A string describing where the error occurred
7781  * @cmd: The tracing command that caused the error
7782  * @errs: The array of loc-specific static error strings
7783  * @type: The index into errs[], which produces the specific static err string
7784  * @pos: The position the caret should be placed in the cmd
7785  *
7786  * Writes an error into tracing/error_log of the form:
7787  *
7788  * <loc>: error: <text>
7789  *   Command: <cmd>
7790  *              ^
7791  *
7792  * tracing/error_log is a small log file containing the last
7793  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7794  * unless there has been a tracing error, and the error log can be
7795  * cleared and have its memory freed by writing the empty string in
7796  * truncation mode to it i.e. echo > tracing/error_log.
7797  *
7798  * NOTE: the @errs array along with the @type param are used to
7799  * produce a static error string - this string is not copied and saved
7800  * when the error is logged - only a pointer to it is saved.  See
7801  * existing callers for examples of how static strings are typically
7802  * defined for use with tracing_log_err().
7803  */
7804 void tracing_log_err(struct trace_array *tr,
7805                      const char *loc, const char *cmd,
7806                      const char **errs, u8 type, u8 pos)
7807 {
7808         struct tracing_log_err *err;
7809
7810         if (!tr)
7811                 tr = &global_trace;
7812
7813         mutex_lock(&tracing_err_log_lock);
7814         err = get_tracing_log_err(tr);
7815         if (PTR_ERR(err) == -ENOMEM) {
7816                 mutex_unlock(&tracing_err_log_lock);
7817                 return;
7818         }
7819
7820         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7821         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7822
7823         err->info.errs = errs;
7824         err->info.type = type;
7825         err->info.pos = pos;
7826         err->info.ts = local_clock();
7827
7828         list_add_tail(&err->list, &tr->err_log);
7829         mutex_unlock(&tracing_err_log_lock);
7830 }
7831
7832 static void clear_tracing_err_log(struct trace_array *tr)
7833 {
7834         struct tracing_log_err *err, *next;
7835
7836         mutex_lock(&tracing_err_log_lock);
7837         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7838                 list_del(&err->list);
7839                 kfree(err);
7840         }
7841
7842         tr->n_err_log_entries = 0;
7843         mutex_unlock(&tracing_err_log_lock);
7844 }
7845
7846 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7847 {
7848         struct trace_array *tr = m->private;
7849
7850         mutex_lock(&tracing_err_log_lock);
7851
7852         return seq_list_start(&tr->err_log, *pos);
7853 }
7854
7855 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7856 {
7857         struct trace_array *tr = m->private;
7858
7859         return seq_list_next(v, &tr->err_log, pos);
7860 }
7861
7862 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7863 {
7864         mutex_unlock(&tracing_err_log_lock);
7865 }
7866
7867 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7868 {
7869         u8 i;
7870
7871         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7872                 seq_putc(m, ' ');
7873         for (i = 0; i < pos; i++)
7874                 seq_putc(m, ' ');
7875         seq_puts(m, "^\n");
7876 }
7877
7878 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7879 {
7880         struct tracing_log_err *err = v;
7881
7882         if (err) {
7883                 const char *err_text = err->info.errs[err->info.type];
7884                 u64 sec = err->info.ts;
7885                 u32 nsec;
7886
7887                 nsec = do_div(sec, NSEC_PER_SEC);
7888                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7889                            err->loc, err_text);
7890                 seq_printf(m, "%s", err->cmd);
7891                 tracing_err_log_show_pos(m, err->info.pos);
7892         }
7893
7894         return 0;
7895 }
7896
7897 static const struct seq_operations tracing_err_log_seq_ops = {
7898         .start  = tracing_err_log_seq_start,
7899         .next   = tracing_err_log_seq_next,
7900         .stop   = tracing_err_log_seq_stop,
7901         .show   = tracing_err_log_seq_show
7902 };
7903
7904 static int tracing_err_log_open(struct inode *inode, struct file *file)
7905 {
7906         struct trace_array *tr = inode->i_private;
7907         int ret = 0;
7908
7909         ret = tracing_check_open_get_tr(tr);
7910         if (ret)
7911                 return ret;
7912
7913         /* If this file was opened for write, then erase contents */
7914         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7915                 clear_tracing_err_log(tr);
7916
7917         if (file->f_mode & FMODE_READ) {
7918                 ret = seq_open(file, &tracing_err_log_seq_ops);
7919                 if (!ret) {
7920                         struct seq_file *m = file->private_data;
7921                         m->private = tr;
7922                 } else {
7923                         trace_array_put(tr);
7924                 }
7925         }
7926         return ret;
7927 }
7928
7929 static ssize_t tracing_err_log_write(struct file *file,
7930                                      const char __user *buffer,
7931                                      size_t count, loff_t *ppos)
7932 {
7933         return count;
7934 }
7935
7936 static int tracing_err_log_release(struct inode *inode, struct file *file)
7937 {
7938         struct trace_array *tr = inode->i_private;
7939
7940         trace_array_put(tr);
7941
7942         if (file->f_mode & FMODE_READ)
7943                 seq_release(inode, file);
7944
7945         return 0;
7946 }
7947
7948 static const struct file_operations tracing_err_log_fops = {
7949         .open           = tracing_err_log_open,
7950         .write          = tracing_err_log_write,
7951         .read           = seq_read,
7952         .llseek         = seq_lseek,
7953         .release        = tracing_err_log_release,
7954 };
7955
7956 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7957 {
7958         struct trace_array *tr = inode->i_private;
7959         struct ftrace_buffer_info *info;
7960         int ret;
7961
7962         ret = tracing_check_open_get_tr(tr);
7963         if (ret)
7964                 return ret;
7965
7966         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7967         if (!info) {
7968                 trace_array_put(tr);
7969                 return -ENOMEM;
7970         }
7971
7972         mutex_lock(&trace_types_lock);
7973
7974         info->iter.tr           = tr;
7975         info->iter.cpu_file     = tracing_get_cpu(inode);
7976         info->iter.trace        = tr->current_trace;
7977         info->iter.array_buffer = &tr->array_buffer;
7978         info->spare             = NULL;
7979         /* Force reading ring buffer for first read */
7980         info->read              = (unsigned int)-1;
7981
7982         filp->private_data = info;
7983
7984         tr->trace_ref++;
7985
7986         mutex_unlock(&trace_types_lock);
7987
7988         ret = nonseekable_open(inode, filp);
7989         if (ret < 0)
7990                 trace_array_put(tr);
7991
7992         return ret;
7993 }
7994
7995 static __poll_t
7996 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7997 {
7998         struct ftrace_buffer_info *info = filp->private_data;
7999         struct trace_iterator *iter = &info->iter;
8000
8001         return trace_poll(iter, filp, poll_table);
8002 }
8003
8004 static ssize_t
8005 tracing_buffers_read(struct file *filp, char __user *ubuf,
8006                      size_t count, loff_t *ppos)
8007 {
8008         struct ftrace_buffer_info *info = filp->private_data;
8009         struct trace_iterator *iter = &info->iter;
8010         ssize_t ret = 0;
8011         ssize_t size;
8012
8013         if (!count)
8014                 return 0;
8015
8016 #ifdef CONFIG_TRACER_MAX_TRACE
8017         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8018                 return -EBUSY;
8019 #endif
8020
8021         if (!info->spare) {
8022                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8023                                                           iter->cpu_file);
8024                 if (IS_ERR(info->spare)) {
8025                         ret = PTR_ERR(info->spare);
8026                         info->spare = NULL;
8027                 } else {
8028                         info->spare_cpu = iter->cpu_file;
8029                 }
8030         }
8031         if (!info->spare)
8032                 return ret;
8033
8034         /* Do we have previous read data to read? */
8035         if (info->read < PAGE_SIZE)
8036                 goto read;
8037
8038  again:
8039         trace_access_lock(iter->cpu_file);
8040         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8041                                     &info->spare,
8042                                     count,
8043                                     iter->cpu_file, 0);
8044         trace_access_unlock(iter->cpu_file);
8045
8046         if (ret < 0) {
8047                 if (trace_empty(iter)) {
8048                         if ((filp->f_flags & O_NONBLOCK))
8049                                 return -EAGAIN;
8050
8051                         ret = wait_on_pipe(iter, 0);
8052                         if (ret)
8053                                 return ret;
8054
8055                         goto again;
8056                 }
8057                 return 0;
8058         }
8059
8060         info->read = 0;
8061  read:
8062         size = PAGE_SIZE - info->read;
8063         if (size > count)
8064                 size = count;
8065
8066         ret = copy_to_user(ubuf, info->spare + info->read, size);
8067         if (ret == size)
8068                 return -EFAULT;
8069
8070         size -= ret;
8071
8072         *ppos += size;
8073         info->read += size;
8074
8075         return size;
8076 }
8077
8078 static int tracing_buffers_release(struct inode *inode, struct file *file)
8079 {
8080         struct ftrace_buffer_info *info = file->private_data;
8081         struct trace_iterator *iter = &info->iter;
8082
8083         mutex_lock(&trace_types_lock);
8084
8085         iter->tr->trace_ref--;
8086
8087         __trace_array_put(iter->tr);
8088
8089         if (info->spare)
8090                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8091                                            info->spare_cpu, info->spare);
8092         kvfree(info);
8093
8094         mutex_unlock(&trace_types_lock);
8095
8096         return 0;
8097 }
8098
8099 struct buffer_ref {
8100         struct trace_buffer     *buffer;
8101         void                    *page;
8102         int                     cpu;
8103         refcount_t              refcount;
8104 };
8105
8106 static void buffer_ref_release(struct buffer_ref *ref)
8107 {
8108         if (!refcount_dec_and_test(&ref->refcount))
8109                 return;
8110         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8111         kfree(ref);
8112 }
8113
8114 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8115                                     struct pipe_buffer *buf)
8116 {
8117         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8118
8119         buffer_ref_release(ref);
8120         buf->private = 0;
8121 }
8122
8123 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8124                                 struct pipe_buffer *buf)
8125 {
8126         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8127
8128         if (refcount_read(&ref->refcount) > INT_MAX/2)
8129                 return false;
8130
8131         refcount_inc(&ref->refcount);
8132         return true;
8133 }
8134
8135 /* Pipe buffer operations for a buffer. */
8136 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8137         .release                = buffer_pipe_buf_release,
8138         .get                    = buffer_pipe_buf_get,
8139 };
8140
8141 /*
8142  * Callback from splice_to_pipe(), if we need to release some pages
8143  * at the end of the spd in case we error'ed out in filling the pipe.
8144  */
8145 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8146 {
8147         struct buffer_ref *ref =
8148                 (struct buffer_ref *)spd->partial[i].private;
8149
8150         buffer_ref_release(ref);
8151         spd->partial[i].private = 0;
8152 }
8153
8154 static ssize_t
8155 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8156                             struct pipe_inode_info *pipe, size_t len,
8157                             unsigned int flags)
8158 {
8159         struct ftrace_buffer_info *info = file->private_data;
8160         struct trace_iterator *iter = &info->iter;
8161         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8162         struct page *pages_def[PIPE_DEF_BUFFERS];
8163         struct splice_pipe_desc spd = {
8164                 .pages          = pages_def,
8165                 .partial        = partial_def,
8166                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8167                 .ops            = &buffer_pipe_buf_ops,
8168                 .spd_release    = buffer_spd_release,
8169         };
8170         struct buffer_ref *ref;
8171         int entries, i;
8172         ssize_t ret = 0;
8173
8174 #ifdef CONFIG_TRACER_MAX_TRACE
8175         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8176                 return -EBUSY;
8177 #endif
8178
8179         if (*ppos & (PAGE_SIZE - 1))
8180                 return -EINVAL;
8181
8182         if (len & (PAGE_SIZE - 1)) {
8183                 if (len < PAGE_SIZE)
8184                         return -EINVAL;
8185                 len &= PAGE_MASK;
8186         }
8187
8188         if (splice_grow_spd(pipe, &spd))
8189                 return -ENOMEM;
8190
8191  again:
8192         trace_access_lock(iter->cpu_file);
8193         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8194
8195         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8196                 struct page *page;
8197                 int r;
8198
8199                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8200                 if (!ref) {
8201                         ret = -ENOMEM;
8202                         break;
8203                 }
8204
8205                 refcount_set(&ref->refcount, 1);
8206                 ref->buffer = iter->array_buffer->buffer;
8207                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8208                 if (IS_ERR(ref->page)) {
8209                         ret = PTR_ERR(ref->page);
8210                         ref->page = NULL;
8211                         kfree(ref);
8212                         break;
8213                 }
8214                 ref->cpu = iter->cpu_file;
8215
8216                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8217                                           len, iter->cpu_file, 1);
8218                 if (r < 0) {
8219                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8220                                                    ref->page);
8221                         kfree(ref);
8222                         break;
8223                 }
8224
8225                 page = virt_to_page(ref->page);
8226
8227                 spd.pages[i] = page;
8228                 spd.partial[i].len = PAGE_SIZE;
8229                 spd.partial[i].offset = 0;
8230                 spd.partial[i].private = (unsigned long)ref;
8231                 spd.nr_pages++;
8232                 *ppos += PAGE_SIZE;
8233
8234                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8235         }
8236
8237         trace_access_unlock(iter->cpu_file);
8238         spd.nr_pages = i;
8239
8240         /* did we read anything? */
8241         if (!spd.nr_pages) {
8242                 if (ret)
8243                         goto out;
8244
8245                 ret = -EAGAIN;
8246                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8247                         goto out;
8248
8249                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8250                 if (ret)
8251                         goto out;
8252
8253                 goto again;
8254         }
8255
8256         ret = splice_to_pipe(pipe, &spd);
8257 out:
8258         splice_shrink_spd(&spd);
8259
8260         return ret;
8261 }
8262
8263 static const struct file_operations tracing_buffers_fops = {
8264         .open           = tracing_buffers_open,
8265         .read           = tracing_buffers_read,
8266         .poll           = tracing_buffers_poll,
8267         .release        = tracing_buffers_release,
8268         .splice_read    = tracing_buffers_splice_read,
8269         .llseek         = no_llseek,
8270 };
8271
8272 static ssize_t
8273 tracing_stats_read(struct file *filp, char __user *ubuf,
8274                    size_t count, loff_t *ppos)
8275 {
8276         struct inode *inode = file_inode(filp);
8277         struct trace_array *tr = inode->i_private;
8278         struct array_buffer *trace_buf = &tr->array_buffer;
8279         int cpu = tracing_get_cpu(inode);
8280         struct trace_seq *s;
8281         unsigned long cnt;
8282         unsigned long long t;
8283         unsigned long usec_rem;
8284
8285         s = kmalloc(sizeof(*s), GFP_KERNEL);
8286         if (!s)
8287                 return -ENOMEM;
8288
8289         trace_seq_init(s);
8290
8291         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8292         trace_seq_printf(s, "entries: %ld\n", cnt);
8293
8294         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8295         trace_seq_printf(s, "overrun: %ld\n", cnt);
8296
8297         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8298         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8299
8300         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8301         trace_seq_printf(s, "bytes: %ld\n", cnt);
8302
8303         if (trace_clocks[tr->clock_id].in_ns) {
8304                 /* local or global for trace_clock */
8305                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8306                 usec_rem = do_div(t, USEC_PER_SEC);
8307                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8308                                                                 t, usec_rem);
8309
8310                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8311                 usec_rem = do_div(t, USEC_PER_SEC);
8312                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8313         } else {
8314                 /* counter or tsc mode for trace_clock */
8315                 trace_seq_printf(s, "oldest event ts: %llu\n",
8316                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8317
8318                 trace_seq_printf(s, "now ts: %llu\n",
8319                                 ring_buffer_time_stamp(trace_buf->buffer));
8320         }
8321
8322         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8323         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8324
8325         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8326         trace_seq_printf(s, "read events: %ld\n", cnt);
8327
8328         count = simple_read_from_buffer(ubuf, count, ppos,
8329                                         s->buffer, trace_seq_used(s));
8330
8331         kfree(s);
8332
8333         return count;
8334 }
8335
8336 static const struct file_operations tracing_stats_fops = {
8337         .open           = tracing_open_generic_tr,
8338         .read           = tracing_stats_read,
8339         .llseek         = generic_file_llseek,
8340         .release        = tracing_release_generic_tr,
8341 };
8342
8343 #ifdef CONFIG_DYNAMIC_FTRACE
8344
8345 static ssize_t
8346 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8347                   size_t cnt, loff_t *ppos)
8348 {
8349         ssize_t ret;
8350         char *buf;
8351         int r;
8352
8353         /* 256 should be plenty to hold the amount needed */
8354         buf = kmalloc(256, GFP_KERNEL);
8355         if (!buf)
8356                 return -ENOMEM;
8357
8358         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8359                       ftrace_update_tot_cnt,
8360                       ftrace_number_of_pages,
8361                       ftrace_number_of_groups);
8362
8363         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8364         kfree(buf);
8365         return ret;
8366 }
8367
8368 static const struct file_operations tracing_dyn_info_fops = {
8369         .open           = tracing_open_generic,
8370         .read           = tracing_read_dyn_info,
8371         .llseek         = generic_file_llseek,
8372 };
8373 #endif /* CONFIG_DYNAMIC_FTRACE */
8374
8375 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8376 static void
8377 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8378                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8379                 void *data)
8380 {
8381         tracing_snapshot_instance(tr);
8382 }
8383
8384 static void
8385 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8386                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8387                       void *data)
8388 {
8389         struct ftrace_func_mapper *mapper = data;
8390         long *count = NULL;
8391
8392         if (mapper)
8393                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8394
8395         if (count) {
8396
8397                 if (*count <= 0)
8398                         return;
8399
8400                 (*count)--;
8401         }
8402
8403         tracing_snapshot_instance(tr);
8404 }
8405
8406 static int
8407 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8408                       struct ftrace_probe_ops *ops, void *data)
8409 {
8410         struct ftrace_func_mapper *mapper = data;
8411         long *count = NULL;
8412
8413         seq_printf(m, "%ps:", (void *)ip);
8414
8415         seq_puts(m, "snapshot");
8416
8417         if (mapper)
8418                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8419
8420         if (count)
8421                 seq_printf(m, ":count=%ld\n", *count);
8422         else
8423                 seq_puts(m, ":unlimited\n");
8424
8425         return 0;
8426 }
8427
8428 static int
8429 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8430                      unsigned long ip, void *init_data, void **data)
8431 {
8432         struct ftrace_func_mapper *mapper = *data;
8433
8434         if (!mapper) {
8435                 mapper = allocate_ftrace_func_mapper();
8436                 if (!mapper)
8437                         return -ENOMEM;
8438                 *data = mapper;
8439         }
8440
8441         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8442 }
8443
8444 static void
8445 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8446                      unsigned long ip, void *data)
8447 {
8448         struct ftrace_func_mapper *mapper = data;
8449
8450         if (!ip) {
8451                 if (!mapper)
8452                         return;
8453                 free_ftrace_func_mapper(mapper, NULL);
8454                 return;
8455         }
8456
8457         ftrace_func_mapper_remove_ip(mapper, ip);
8458 }
8459
8460 static struct ftrace_probe_ops snapshot_probe_ops = {
8461         .func                   = ftrace_snapshot,
8462         .print                  = ftrace_snapshot_print,
8463 };
8464
8465 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8466         .func                   = ftrace_count_snapshot,
8467         .print                  = ftrace_snapshot_print,
8468         .init                   = ftrace_snapshot_init,
8469         .free                   = ftrace_snapshot_free,
8470 };
8471
8472 static int
8473 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8474                                char *glob, char *cmd, char *param, int enable)
8475 {
8476         struct ftrace_probe_ops *ops;
8477         void *count = (void *)-1;
8478         char *number;
8479         int ret;
8480
8481         if (!tr)
8482                 return -ENODEV;
8483
8484         /* hash funcs only work with set_ftrace_filter */
8485         if (!enable)
8486                 return -EINVAL;
8487
8488         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8489
8490         if (glob[0] == '!')
8491                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8492
8493         if (!param)
8494                 goto out_reg;
8495
8496         number = strsep(&param, ":");
8497
8498         if (!strlen(number))
8499                 goto out_reg;
8500
8501         /*
8502          * We use the callback data field (which is a pointer)
8503          * as our counter.
8504          */
8505         ret = kstrtoul(number, 0, (unsigned long *)&count);
8506         if (ret)
8507                 return ret;
8508
8509  out_reg:
8510         ret = tracing_alloc_snapshot_instance(tr);
8511         if (ret < 0)
8512                 goto out;
8513
8514         ret = register_ftrace_function_probe(glob, tr, ops, count);
8515
8516  out:
8517         return ret < 0 ? ret : 0;
8518 }
8519
8520 static struct ftrace_func_command ftrace_snapshot_cmd = {
8521         .name                   = "snapshot",
8522         .func                   = ftrace_trace_snapshot_callback,
8523 };
8524
8525 static __init int register_snapshot_cmd(void)
8526 {
8527         return register_ftrace_command(&ftrace_snapshot_cmd);
8528 }
8529 #else
8530 static inline __init int register_snapshot_cmd(void) { return 0; }
8531 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8532
8533 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8534 {
8535         if (WARN_ON(!tr->dir))
8536                 return ERR_PTR(-ENODEV);
8537
8538         /* Top directory uses NULL as the parent */
8539         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8540                 return NULL;
8541
8542         /* All sub buffers have a descriptor */
8543         return tr->dir;
8544 }
8545
8546 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8547 {
8548         struct dentry *d_tracer;
8549
8550         if (tr->percpu_dir)
8551                 return tr->percpu_dir;
8552
8553         d_tracer = tracing_get_dentry(tr);
8554         if (IS_ERR(d_tracer))
8555                 return NULL;
8556
8557         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8558
8559         MEM_FAIL(!tr->percpu_dir,
8560                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8561
8562         return tr->percpu_dir;
8563 }
8564
8565 static struct dentry *
8566 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8567                       void *data, long cpu, const struct file_operations *fops)
8568 {
8569         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8570
8571         if (ret) /* See tracing_get_cpu() */
8572                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8573         return ret;
8574 }
8575
8576 static void
8577 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8578 {
8579         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8580         struct dentry *d_cpu;
8581         char cpu_dir[30]; /* 30 characters should be more than enough */
8582
8583         if (!d_percpu)
8584                 return;
8585
8586         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8587         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8588         if (!d_cpu) {
8589                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8590                 return;
8591         }
8592
8593         /* per cpu trace_pipe */
8594         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8595                                 tr, cpu, &tracing_pipe_fops);
8596
8597         /* per cpu trace */
8598         trace_create_cpu_file("trace", 0644, d_cpu,
8599                                 tr, cpu, &tracing_fops);
8600
8601         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8602                                 tr, cpu, &tracing_buffers_fops);
8603
8604         trace_create_cpu_file("stats", 0444, d_cpu,
8605                                 tr, cpu, &tracing_stats_fops);
8606
8607         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8608                                 tr, cpu, &tracing_entries_fops);
8609
8610 #ifdef CONFIG_TRACER_SNAPSHOT
8611         trace_create_cpu_file("snapshot", 0644, d_cpu,
8612                                 tr, cpu, &snapshot_fops);
8613
8614         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8615                                 tr, cpu, &snapshot_raw_fops);
8616 #endif
8617 }
8618
8619 #ifdef CONFIG_FTRACE_SELFTEST
8620 /* Let selftest have access to static functions in this file */
8621 #include "trace_selftest.c"
8622 #endif
8623
8624 static ssize_t
8625 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8626                         loff_t *ppos)
8627 {
8628         struct trace_option_dentry *topt = filp->private_data;
8629         char *buf;
8630
8631         if (topt->flags->val & topt->opt->bit)
8632                 buf = "1\n";
8633         else
8634                 buf = "0\n";
8635
8636         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8637 }
8638
8639 static ssize_t
8640 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8641                          loff_t *ppos)
8642 {
8643         struct trace_option_dentry *topt = filp->private_data;
8644         unsigned long val;
8645         int ret;
8646
8647         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8648         if (ret)
8649                 return ret;
8650
8651         if (val != 0 && val != 1)
8652                 return -EINVAL;
8653
8654         if (!!(topt->flags->val & topt->opt->bit) != val) {
8655                 mutex_lock(&trace_types_lock);
8656                 ret = __set_tracer_option(topt->tr, topt->flags,
8657                                           topt->opt, !val);
8658                 mutex_unlock(&trace_types_lock);
8659                 if (ret)
8660                         return ret;
8661         }
8662
8663         *ppos += cnt;
8664
8665         return cnt;
8666 }
8667
8668
8669 static const struct file_operations trace_options_fops = {
8670         .open = tracing_open_generic,
8671         .read = trace_options_read,
8672         .write = trace_options_write,
8673         .llseek = generic_file_llseek,
8674 };
8675
8676 /*
8677  * In order to pass in both the trace_array descriptor as well as the index
8678  * to the flag that the trace option file represents, the trace_array
8679  * has a character array of trace_flags_index[], which holds the index
8680  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8681  * The address of this character array is passed to the flag option file
8682  * read/write callbacks.
8683  *
8684  * In order to extract both the index and the trace_array descriptor,
8685  * get_tr_index() uses the following algorithm.
8686  *
8687  *   idx = *ptr;
8688  *
8689  * As the pointer itself contains the address of the index (remember
8690  * index[1] == 1).
8691  *
8692  * Then to get the trace_array descriptor, by subtracting that index
8693  * from the ptr, we get to the start of the index itself.
8694  *
8695  *   ptr - idx == &index[0]
8696  *
8697  * Then a simple container_of() from that pointer gets us to the
8698  * trace_array descriptor.
8699  */
8700 static void get_tr_index(void *data, struct trace_array **ptr,
8701                          unsigned int *pindex)
8702 {
8703         *pindex = *(unsigned char *)data;
8704
8705         *ptr = container_of(data - *pindex, struct trace_array,
8706                             trace_flags_index);
8707 }
8708
8709 static ssize_t
8710 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8711                         loff_t *ppos)
8712 {
8713         void *tr_index = filp->private_data;
8714         struct trace_array *tr;
8715         unsigned int index;
8716         char *buf;
8717
8718         get_tr_index(tr_index, &tr, &index);
8719
8720         if (tr->trace_flags & (1 << index))
8721                 buf = "1\n";
8722         else
8723                 buf = "0\n";
8724
8725         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8726 }
8727
8728 static ssize_t
8729 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8730                          loff_t *ppos)
8731 {
8732         void *tr_index = filp->private_data;
8733         struct trace_array *tr;
8734         unsigned int index;
8735         unsigned long val;
8736         int ret;
8737
8738         get_tr_index(tr_index, &tr, &index);
8739
8740         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8741         if (ret)
8742                 return ret;
8743
8744         if (val != 0 && val != 1)
8745                 return -EINVAL;
8746
8747         mutex_lock(&event_mutex);
8748         mutex_lock(&trace_types_lock);
8749         ret = set_tracer_flag(tr, 1 << index, val);
8750         mutex_unlock(&trace_types_lock);
8751         mutex_unlock(&event_mutex);
8752
8753         if (ret < 0)
8754                 return ret;
8755
8756         *ppos += cnt;
8757
8758         return cnt;
8759 }
8760
8761 static const struct file_operations trace_options_core_fops = {
8762         .open = tracing_open_generic,
8763         .read = trace_options_core_read,
8764         .write = trace_options_core_write,
8765         .llseek = generic_file_llseek,
8766 };
8767
8768 struct dentry *trace_create_file(const char *name,
8769                                  umode_t mode,
8770                                  struct dentry *parent,
8771                                  void *data,
8772                                  const struct file_operations *fops)
8773 {
8774         struct dentry *ret;
8775
8776         ret = tracefs_create_file(name, mode, parent, data, fops);
8777         if (!ret)
8778                 pr_warn("Could not create tracefs '%s' entry\n", name);
8779
8780         return ret;
8781 }
8782
8783
8784 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8785 {
8786         struct dentry *d_tracer;
8787
8788         if (tr->options)
8789                 return tr->options;
8790
8791         d_tracer = tracing_get_dentry(tr);
8792         if (IS_ERR(d_tracer))
8793                 return NULL;
8794
8795         tr->options = tracefs_create_dir("options", d_tracer);
8796         if (!tr->options) {
8797                 pr_warn("Could not create tracefs directory 'options'\n");
8798                 return NULL;
8799         }
8800
8801         return tr->options;
8802 }
8803
8804 static void
8805 create_trace_option_file(struct trace_array *tr,
8806                          struct trace_option_dentry *topt,
8807                          struct tracer_flags *flags,
8808                          struct tracer_opt *opt)
8809 {
8810         struct dentry *t_options;
8811
8812         t_options = trace_options_init_dentry(tr);
8813         if (!t_options)
8814                 return;
8815
8816         topt->flags = flags;
8817         topt->opt = opt;
8818         topt->tr = tr;
8819
8820         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8821                                     &trace_options_fops);
8822
8823 }
8824
8825 static void
8826 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8827 {
8828         struct trace_option_dentry *topts;
8829         struct trace_options *tr_topts;
8830         struct tracer_flags *flags;
8831         struct tracer_opt *opts;
8832         int cnt;
8833         int i;
8834
8835         if (!tracer)
8836                 return;
8837
8838         flags = tracer->flags;
8839
8840         if (!flags || !flags->opts)
8841                 return;
8842
8843         /*
8844          * If this is an instance, only create flags for tracers
8845          * the instance may have.
8846          */
8847         if (!trace_ok_for_array(tracer, tr))
8848                 return;
8849
8850         for (i = 0; i < tr->nr_topts; i++) {
8851                 /* Make sure there's no duplicate flags. */
8852                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8853                         return;
8854         }
8855
8856         opts = flags->opts;
8857
8858         for (cnt = 0; opts[cnt].name; cnt++)
8859                 ;
8860
8861         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8862         if (!topts)
8863                 return;
8864
8865         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8866                             GFP_KERNEL);
8867         if (!tr_topts) {
8868                 kfree(topts);
8869                 return;
8870         }
8871
8872         tr->topts = tr_topts;
8873         tr->topts[tr->nr_topts].tracer = tracer;
8874         tr->topts[tr->nr_topts].topts = topts;
8875         tr->nr_topts++;
8876
8877         for (cnt = 0; opts[cnt].name; cnt++) {
8878                 create_trace_option_file(tr, &topts[cnt], flags,
8879                                          &opts[cnt]);
8880                 MEM_FAIL(topts[cnt].entry == NULL,
8881                           "Failed to create trace option: %s",
8882                           opts[cnt].name);
8883         }
8884 }
8885
8886 static struct dentry *
8887 create_trace_option_core_file(struct trace_array *tr,
8888                               const char *option, long index)
8889 {
8890         struct dentry *t_options;
8891
8892         t_options = trace_options_init_dentry(tr);
8893         if (!t_options)
8894                 return NULL;
8895
8896         return trace_create_file(option, 0644, t_options,
8897                                  (void *)&tr->trace_flags_index[index],
8898                                  &trace_options_core_fops);
8899 }
8900
8901 static void create_trace_options_dir(struct trace_array *tr)
8902 {
8903         struct dentry *t_options;
8904         bool top_level = tr == &global_trace;
8905         int i;
8906
8907         t_options = trace_options_init_dentry(tr);
8908         if (!t_options)
8909                 return;
8910
8911         for (i = 0; trace_options[i]; i++) {
8912                 if (top_level ||
8913                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8914                         create_trace_option_core_file(tr, trace_options[i], i);
8915         }
8916 }
8917
8918 static ssize_t
8919 rb_simple_read(struct file *filp, char __user *ubuf,
8920                size_t cnt, loff_t *ppos)
8921 {
8922         struct trace_array *tr = filp->private_data;
8923         char buf[64];
8924         int r;
8925
8926         r = tracer_tracing_is_on(tr);
8927         r = sprintf(buf, "%d\n", r);
8928
8929         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8930 }
8931
8932 static ssize_t
8933 rb_simple_write(struct file *filp, const char __user *ubuf,
8934                 size_t cnt, loff_t *ppos)
8935 {
8936         struct trace_array *tr = filp->private_data;
8937         struct trace_buffer *buffer = tr->array_buffer.buffer;
8938         unsigned long val;
8939         int ret;
8940
8941         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8942         if (ret)
8943                 return ret;
8944
8945         if (buffer) {
8946                 mutex_lock(&trace_types_lock);
8947                 if (!!val == tracer_tracing_is_on(tr)) {
8948                         val = 0; /* do nothing */
8949                 } else if (val) {
8950                         tracer_tracing_on(tr);
8951                         if (tr->current_trace->start)
8952                                 tr->current_trace->start(tr);
8953                 } else {
8954                         tracer_tracing_off(tr);
8955                         if (tr->current_trace->stop)
8956                                 tr->current_trace->stop(tr);
8957                 }
8958                 mutex_unlock(&trace_types_lock);
8959         }
8960
8961         (*ppos)++;
8962
8963         return cnt;
8964 }
8965
8966 static const struct file_operations rb_simple_fops = {
8967         .open           = tracing_open_generic_tr,
8968         .read           = rb_simple_read,
8969         .write          = rb_simple_write,
8970         .release        = tracing_release_generic_tr,
8971         .llseek         = default_llseek,
8972 };
8973
8974 static ssize_t
8975 buffer_percent_read(struct file *filp, char __user *ubuf,
8976                     size_t cnt, loff_t *ppos)
8977 {
8978         struct trace_array *tr = filp->private_data;
8979         char buf[64];
8980         int r;
8981
8982         r = tr->buffer_percent;
8983         r = sprintf(buf, "%d\n", r);
8984
8985         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8986 }
8987
8988 static ssize_t
8989 buffer_percent_write(struct file *filp, const char __user *ubuf,
8990                      size_t cnt, loff_t *ppos)
8991 {
8992         struct trace_array *tr = filp->private_data;
8993         unsigned long val;
8994         int ret;
8995
8996         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8997         if (ret)
8998                 return ret;
8999
9000         if (val > 100)
9001                 return -EINVAL;
9002
9003         if (!val)
9004                 val = 1;
9005
9006         tr->buffer_percent = val;
9007
9008         (*ppos)++;
9009
9010         return cnt;
9011 }
9012
9013 static const struct file_operations buffer_percent_fops = {
9014         .open           = tracing_open_generic_tr,
9015         .read           = buffer_percent_read,
9016         .write          = buffer_percent_write,
9017         .release        = tracing_release_generic_tr,
9018         .llseek         = default_llseek,
9019 };
9020
9021 static struct dentry *trace_instance_dir;
9022
9023 static void
9024 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9025
9026 static int
9027 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9028 {
9029         enum ring_buffer_flags rb_flags;
9030
9031         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9032
9033         buf->tr = tr;
9034
9035         buf->buffer = ring_buffer_alloc(size, rb_flags);
9036         if (!buf->buffer)
9037                 return -ENOMEM;
9038
9039         buf->data = alloc_percpu(struct trace_array_cpu);
9040         if (!buf->data) {
9041                 ring_buffer_free(buf->buffer);
9042                 buf->buffer = NULL;
9043                 return -ENOMEM;
9044         }
9045
9046         /* Allocate the first page for all buffers */
9047         set_buffer_entries(&tr->array_buffer,
9048                            ring_buffer_size(tr->array_buffer.buffer, 0));
9049
9050         return 0;
9051 }
9052
9053 static int allocate_trace_buffers(struct trace_array *tr, int size)
9054 {
9055         int ret;
9056
9057         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9058         if (ret)
9059                 return ret;
9060
9061 #ifdef CONFIG_TRACER_MAX_TRACE
9062         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9063                                     allocate_snapshot ? size : 1);
9064         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9065                 ring_buffer_free(tr->array_buffer.buffer);
9066                 tr->array_buffer.buffer = NULL;
9067                 free_percpu(tr->array_buffer.data);
9068                 tr->array_buffer.data = NULL;
9069                 return -ENOMEM;
9070         }
9071         tr->allocated_snapshot = allocate_snapshot;
9072
9073         /*
9074          * Only the top level trace array gets its snapshot allocated
9075          * from the kernel command line.
9076          */
9077         allocate_snapshot = false;
9078 #endif
9079
9080         return 0;
9081 }
9082
9083 static void free_trace_buffer(struct array_buffer *buf)
9084 {
9085         if (buf->buffer) {
9086                 ring_buffer_free(buf->buffer);
9087                 buf->buffer = NULL;
9088                 free_percpu(buf->data);
9089                 buf->data = NULL;
9090         }
9091 }
9092
9093 static void free_trace_buffers(struct trace_array *tr)
9094 {
9095         if (!tr)
9096                 return;
9097
9098         free_trace_buffer(&tr->array_buffer);
9099
9100 #ifdef CONFIG_TRACER_MAX_TRACE
9101         free_trace_buffer(&tr->max_buffer);
9102 #endif
9103 }
9104
9105 static void init_trace_flags_index(struct trace_array *tr)
9106 {
9107         int i;
9108
9109         /* Used by the trace options files */
9110         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9111                 tr->trace_flags_index[i] = i;
9112 }
9113
9114 static void __update_tracer_options(struct trace_array *tr)
9115 {
9116         struct tracer *t;
9117
9118         for (t = trace_types; t; t = t->next)
9119                 add_tracer_options(tr, t);
9120 }
9121
9122 static void update_tracer_options(struct trace_array *tr)
9123 {
9124         mutex_lock(&trace_types_lock);
9125         __update_tracer_options(tr);
9126         mutex_unlock(&trace_types_lock);
9127 }
9128
9129 /* Must have trace_types_lock held */
9130 struct trace_array *trace_array_find(const char *instance)
9131 {
9132         struct trace_array *tr, *found = NULL;
9133
9134         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9135                 if (tr->name && strcmp(tr->name, instance) == 0) {
9136                         found = tr;
9137                         break;
9138                 }
9139         }
9140
9141         return found;
9142 }
9143
9144 struct trace_array *trace_array_find_get(const char *instance)
9145 {
9146         struct trace_array *tr;
9147
9148         mutex_lock(&trace_types_lock);
9149         tr = trace_array_find(instance);
9150         if (tr)
9151                 tr->ref++;
9152         mutex_unlock(&trace_types_lock);
9153
9154         return tr;
9155 }
9156
9157 static int trace_array_create_dir(struct trace_array *tr)
9158 {
9159         int ret;
9160
9161         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9162         if (!tr->dir)
9163                 return -EINVAL;
9164
9165         ret = event_trace_add_tracer(tr->dir, tr);
9166         if (ret) {
9167                 tracefs_remove(tr->dir);
9168                 return ret;
9169         }
9170
9171         init_tracer_tracefs(tr, tr->dir);
9172         __update_tracer_options(tr);
9173
9174         return ret;
9175 }
9176
9177 static struct trace_array *trace_array_create(const char *name)
9178 {
9179         struct trace_array *tr;
9180         int ret;
9181
9182         ret = -ENOMEM;
9183         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9184         if (!tr)
9185                 return ERR_PTR(ret);
9186
9187         tr->name = kstrdup(name, GFP_KERNEL);
9188         if (!tr->name)
9189                 goto out_free_tr;
9190
9191         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9192                 goto out_free_tr;
9193
9194         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9195
9196         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9197
9198         raw_spin_lock_init(&tr->start_lock);
9199
9200         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9201
9202         tr->current_trace = &nop_trace;
9203
9204         INIT_LIST_HEAD(&tr->systems);
9205         INIT_LIST_HEAD(&tr->events);
9206         INIT_LIST_HEAD(&tr->hist_vars);
9207         INIT_LIST_HEAD(&tr->err_log);
9208
9209         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9210                 goto out_free_tr;
9211
9212         if (ftrace_allocate_ftrace_ops(tr) < 0)
9213                 goto out_free_tr;
9214
9215         ftrace_init_trace_array(tr);
9216
9217         init_trace_flags_index(tr);
9218
9219         if (trace_instance_dir) {
9220                 ret = trace_array_create_dir(tr);
9221                 if (ret)
9222                         goto out_free_tr;
9223         } else
9224                 __trace_early_add_events(tr);
9225
9226         list_add(&tr->list, &ftrace_trace_arrays);
9227
9228         tr->ref++;
9229
9230         return tr;
9231
9232  out_free_tr:
9233         ftrace_free_ftrace_ops(tr);
9234         free_trace_buffers(tr);
9235         free_cpumask_var(tr->tracing_cpumask);
9236         kfree(tr->name);
9237         kfree(tr);
9238
9239         return ERR_PTR(ret);
9240 }
9241
9242 static int instance_mkdir(const char *name)
9243 {
9244         struct trace_array *tr;
9245         int ret;
9246
9247         mutex_lock(&event_mutex);
9248         mutex_lock(&trace_types_lock);
9249
9250         ret = -EEXIST;
9251         if (trace_array_find(name))
9252                 goto out_unlock;
9253
9254         tr = trace_array_create(name);
9255
9256         ret = PTR_ERR_OR_ZERO(tr);
9257
9258 out_unlock:
9259         mutex_unlock(&trace_types_lock);
9260         mutex_unlock(&event_mutex);
9261         return ret;
9262 }
9263
9264 /**
9265  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9266  * @name: The name of the trace array to be looked up/created.
9267  *
9268  * Returns pointer to trace array with given name.
9269  * NULL, if it cannot be created.
9270  *
9271  * NOTE: This function increments the reference counter associated with the
9272  * trace array returned. This makes sure it cannot be freed while in use.
9273  * Use trace_array_put() once the trace array is no longer needed.
9274  * If the trace_array is to be freed, trace_array_destroy() needs to
9275  * be called after the trace_array_put(), or simply let user space delete
9276  * it from the tracefs instances directory. But until the
9277  * trace_array_put() is called, user space can not delete it.
9278  *
9279  */
9280 struct trace_array *trace_array_get_by_name(const char *name)
9281 {
9282         struct trace_array *tr;
9283
9284         mutex_lock(&event_mutex);
9285         mutex_lock(&trace_types_lock);
9286
9287         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9288                 if (tr->name && strcmp(tr->name, name) == 0)
9289                         goto out_unlock;
9290         }
9291
9292         tr = trace_array_create(name);
9293
9294         if (IS_ERR(tr))
9295                 tr = NULL;
9296 out_unlock:
9297         if (tr)
9298                 tr->ref++;
9299
9300         mutex_unlock(&trace_types_lock);
9301         mutex_unlock(&event_mutex);
9302         return tr;
9303 }
9304 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9305
9306 static int __remove_instance(struct trace_array *tr)
9307 {
9308         int i;
9309
9310         /* Reference counter for a newly created trace array = 1. */
9311         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9312                 return -EBUSY;
9313
9314         list_del(&tr->list);
9315
9316         /* Disable all the flags that were enabled coming in */
9317         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9318                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9319                         set_tracer_flag(tr, 1 << i, 0);
9320         }
9321
9322         tracing_set_nop(tr);
9323         clear_ftrace_function_probes(tr);
9324         event_trace_del_tracer(tr);
9325         ftrace_clear_pids(tr);
9326         ftrace_destroy_function_files(tr);
9327         tracefs_remove(tr->dir);
9328         free_percpu(tr->last_func_repeats);
9329         free_trace_buffers(tr);
9330
9331         for (i = 0; i < tr->nr_topts; i++) {
9332                 kfree(tr->topts[i].topts);
9333         }
9334         kfree(tr->topts);
9335
9336         free_cpumask_var(tr->tracing_cpumask);
9337         kfree(tr->name);
9338         kfree(tr);
9339
9340         return 0;
9341 }
9342
9343 int trace_array_destroy(struct trace_array *this_tr)
9344 {
9345         struct trace_array *tr;
9346         int ret;
9347
9348         if (!this_tr)
9349                 return -EINVAL;
9350
9351         mutex_lock(&event_mutex);
9352         mutex_lock(&trace_types_lock);
9353
9354         ret = -ENODEV;
9355
9356         /* Making sure trace array exists before destroying it. */
9357         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9358                 if (tr == this_tr) {
9359                         ret = __remove_instance(tr);
9360                         break;
9361                 }
9362         }
9363
9364         mutex_unlock(&trace_types_lock);
9365         mutex_unlock(&event_mutex);
9366
9367         return ret;
9368 }
9369 EXPORT_SYMBOL_GPL(trace_array_destroy);
9370
9371 static int instance_rmdir(const char *name)
9372 {
9373         struct trace_array *tr;
9374         int ret;
9375
9376         mutex_lock(&event_mutex);
9377         mutex_lock(&trace_types_lock);
9378
9379         ret = -ENODEV;
9380         tr = trace_array_find(name);
9381         if (tr)
9382                 ret = __remove_instance(tr);
9383
9384         mutex_unlock(&trace_types_lock);
9385         mutex_unlock(&event_mutex);
9386
9387         return ret;
9388 }
9389
9390 static __init void create_trace_instances(struct dentry *d_tracer)
9391 {
9392         struct trace_array *tr;
9393
9394         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9395                                                          instance_mkdir,
9396                                                          instance_rmdir);
9397         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9398                 return;
9399
9400         mutex_lock(&event_mutex);
9401         mutex_lock(&trace_types_lock);
9402
9403         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9404                 if (!tr->name)
9405                         continue;
9406                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9407                              "Failed to create instance directory\n"))
9408                         break;
9409         }
9410
9411         mutex_unlock(&trace_types_lock);
9412         mutex_unlock(&event_mutex);
9413 }
9414
9415 static void
9416 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9417 {
9418         struct trace_event_file *file;
9419         int cpu;
9420
9421         trace_create_file("available_tracers", 0444, d_tracer,
9422                         tr, &show_traces_fops);
9423
9424         trace_create_file("current_tracer", 0644, d_tracer,
9425                         tr, &set_tracer_fops);
9426
9427         trace_create_file("tracing_cpumask", 0644, d_tracer,
9428                           tr, &tracing_cpumask_fops);
9429
9430         trace_create_file("trace_options", 0644, d_tracer,
9431                           tr, &tracing_iter_fops);
9432
9433         trace_create_file("trace", 0644, d_tracer,
9434                           tr, &tracing_fops);
9435
9436         trace_create_file("trace_pipe", 0444, d_tracer,
9437                           tr, &tracing_pipe_fops);
9438
9439         trace_create_file("buffer_size_kb", 0644, d_tracer,
9440                           tr, &tracing_entries_fops);
9441
9442         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9443                           tr, &tracing_total_entries_fops);
9444
9445         trace_create_file("free_buffer", 0200, d_tracer,
9446                           tr, &tracing_free_buffer_fops);
9447
9448         trace_create_file("trace_marker", 0220, d_tracer,
9449                           tr, &tracing_mark_fops);
9450
9451         file = __find_event_file(tr, "ftrace", "print");
9452         if (file && file->dir)
9453                 trace_create_file("trigger", 0644, file->dir, file,
9454                                   &event_trigger_fops);
9455         tr->trace_marker_file = file;
9456
9457         trace_create_file("trace_marker_raw", 0220, d_tracer,
9458                           tr, &tracing_mark_raw_fops);
9459
9460         trace_create_file("trace_clock", 0644, d_tracer, tr,
9461                           &trace_clock_fops);
9462
9463         trace_create_file("tracing_on", 0644, d_tracer,
9464                           tr, &rb_simple_fops);
9465
9466         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9467                           &trace_time_stamp_mode_fops);
9468
9469         tr->buffer_percent = 50;
9470
9471         trace_create_file("buffer_percent", 0444, d_tracer,
9472                         tr, &buffer_percent_fops);
9473
9474         create_trace_options_dir(tr);
9475
9476 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9477         trace_create_maxlat_file(tr, d_tracer);
9478 #endif
9479
9480         if (ftrace_create_function_files(tr, d_tracer))
9481                 MEM_FAIL(1, "Could not allocate function filter files");
9482
9483 #ifdef CONFIG_TRACER_SNAPSHOT
9484         trace_create_file("snapshot", 0644, d_tracer,
9485                           tr, &snapshot_fops);
9486 #endif
9487
9488         trace_create_file("error_log", 0644, d_tracer,
9489                           tr, &tracing_err_log_fops);
9490
9491         for_each_tracing_cpu(cpu)
9492                 tracing_init_tracefs_percpu(tr, cpu);
9493
9494         ftrace_init_tracefs(tr, d_tracer);
9495 }
9496
9497 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9498 {
9499         struct vfsmount *mnt;
9500         struct file_system_type *type;
9501
9502         /*
9503          * To maintain backward compatibility for tools that mount
9504          * debugfs to get to the tracing facility, tracefs is automatically
9505          * mounted to the debugfs/tracing directory.
9506          */
9507         type = get_fs_type("tracefs");
9508         if (!type)
9509                 return NULL;
9510         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9511         put_filesystem(type);
9512         if (IS_ERR(mnt))
9513                 return NULL;
9514         mntget(mnt);
9515
9516         return mnt;
9517 }
9518
9519 /**
9520  * tracing_init_dentry - initialize top level trace array
9521  *
9522  * This is called when creating files or directories in the tracing
9523  * directory. It is called via fs_initcall() by any of the boot up code
9524  * and expects to return the dentry of the top level tracing directory.
9525  */
9526 int tracing_init_dentry(void)
9527 {
9528         struct trace_array *tr = &global_trace;
9529
9530         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9531                 pr_warn("Tracing disabled due to lockdown\n");
9532                 return -EPERM;
9533         }
9534
9535         /* The top level trace array uses  NULL as parent */
9536         if (tr->dir)
9537                 return 0;
9538
9539         if (WARN_ON(!tracefs_initialized()))
9540                 return -ENODEV;
9541
9542         /*
9543          * As there may still be users that expect the tracing
9544          * files to exist in debugfs/tracing, we must automount
9545          * the tracefs file system there, so older tools still
9546          * work with the newer kernel.
9547          */
9548         tr->dir = debugfs_create_automount("tracing", NULL,
9549                                            trace_automount, NULL);
9550
9551         return 0;
9552 }
9553
9554 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9555 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9556
9557 static struct workqueue_struct *eval_map_wq __initdata;
9558 static struct work_struct eval_map_work __initdata;
9559
9560 static void __init eval_map_work_func(struct work_struct *work)
9561 {
9562         int len;
9563
9564         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9565         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9566 }
9567
9568 static int __init trace_eval_init(void)
9569 {
9570         INIT_WORK(&eval_map_work, eval_map_work_func);
9571
9572         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9573         if (!eval_map_wq) {
9574                 pr_err("Unable to allocate eval_map_wq\n");
9575                 /* Do work here */
9576                 eval_map_work_func(&eval_map_work);
9577                 return -ENOMEM;
9578         }
9579
9580         queue_work(eval_map_wq, &eval_map_work);
9581         return 0;
9582 }
9583
9584 static int __init trace_eval_sync(void)
9585 {
9586         /* Make sure the eval map updates are finished */
9587         if (eval_map_wq)
9588                 destroy_workqueue(eval_map_wq);
9589         return 0;
9590 }
9591
9592 late_initcall_sync(trace_eval_sync);
9593
9594
9595 #ifdef CONFIG_MODULES
9596 static void trace_module_add_evals(struct module *mod)
9597 {
9598         if (!mod->num_trace_evals)
9599                 return;
9600
9601         /*
9602          * Modules with bad taint do not have events created, do
9603          * not bother with enums either.
9604          */
9605         if (trace_module_has_bad_taint(mod))
9606                 return;
9607
9608         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9609 }
9610
9611 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9612 static void trace_module_remove_evals(struct module *mod)
9613 {
9614         union trace_eval_map_item *map;
9615         union trace_eval_map_item **last = &trace_eval_maps;
9616
9617         if (!mod->num_trace_evals)
9618                 return;
9619
9620         mutex_lock(&trace_eval_mutex);
9621
9622         map = trace_eval_maps;
9623
9624         while (map) {
9625                 if (map->head.mod == mod)
9626                         break;
9627                 map = trace_eval_jmp_to_tail(map);
9628                 last = &map->tail.next;
9629                 map = map->tail.next;
9630         }
9631         if (!map)
9632                 goto out;
9633
9634         *last = trace_eval_jmp_to_tail(map)->tail.next;
9635         kfree(map);
9636  out:
9637         mutex_unlock(&trace_eval_mutex);
9638 }
9639 #else
9640 static inline void trace_module_remove_evals(struct module *mod) { }
9641 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9642
9643 static int trace_module_notify(struct notifier_block *self,
9644                                unsigned long val, void *data)
9645 {
9646         struct module *mod = data;
9647
9648         switch (val) {
9649         case MODULE_STATE_COMING:
9650                 trace_module_add_evals(mod);
9651                 break;
9652         case MODULE_STATE_GOING:
9653                 trace_module_remove_evals(mod);
9654                 break;
9655         }
9656
9657         return NOTIFY_OK;
9658 }
9659
9660 static struct notifier_block trace_module_nb = {
9661         .notifier_call = trace_module_notify,
9662         .priority = 0,
9663 };
9664 #endif /* CONFIG_MODULES */
9665
9666 static __init int tracer_init_tracefs(void)
9667 {
9668         int ret;
9669
9670         trace_access_lock_init();
9671
9672         ret = tracing_init_dentry();
9673         if (ret)
9674                 return 0;
9675
9676         event_trace_init();
9677
9678         init_tracer_tracefs(&global_trace, NULL);
9679         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9680
9681         trace_create_file("tracing_thresh", 0644, NULL,
9682                         &global_trace, &tracing_thresh_fops);
9683
9684         trace_create_file("README", 0444, NULL,
9685                         NULL, &tracing_readme_fops);
9686
9687         trace_create_file("saved_cmdlines", 0444, NULL,
9688                         NULL, &tracing_saved_cmdlines_fops);
9689
9690         trace_create_file("saved_cmdlines_size", 0644, NULL,
9691                           NULL, &tracing_saved_cmdlines_size_fops);
9692
9693         trace_create_file("saved_tgids", 0444, NULL,
9694                         NULL, &tracing_saved_tgids_fops);
9695
9696         trace_eval_init();
9697
9698         trace_create_eval_file(NULL);
9699
9700 #ifdef CONFIG_MODULES
9701         register_module_notifier(&trace_module_nb);
9702 #endif
9703
9704 #ifdef CONFIG_DYNAMIC_FTRACE
9705         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9706                         NULL, &tracing_dyn_info_fops);
9707 #endif
9708
9709         create_trace_instances(NULL);
9710
9711         update_tracer_options(&global_trace);
9712
9713         return 0;
9714 }
9715
9716 fs_initcall(tracer_init_tracefs);
9717
9718 static int trace_panic_handler(struct notifier_block *this,
9719                                unsigned long event, void *unused)
9720 {
9721         if (ftrace_dump_on_oops)
9722                 ftrace_dump(ftrace_dump_on_oops);
9723         return NOTIFY_OK;
9724 }
9725
9726 static struct notifier_block trace_panic_notifier = {
9727         .notifier_call  = trace_panic_handler,
9728         .next           = NULL,
9729         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9730 };
9731
9732 static int trace_die_handler(struct notifier_block *self,
9733                              unsigned long val,
9734                              void *data)
9735 {
9736         switch (val) {
9737         case DIE_OOPS:
9738                 if (ftrace_dump_on_oops)
9739                         ftrace_dump(ftrace_dump_on_oops);
9740                 break;
9741         default:
9742                 break;
9743         }
9744         return NOTIFY_OK;
9745 }
9746
9747 static struct notifier_block trace_die_notifier = {
9748         .notifier_call = trace_die_handler,
9749         .priority = 200
9750 };
9751
9752 /*
9753  * printk is set to max of 1024, we really don't need it that big.
9754  * Nothing should be printing 1000 characters anyway.
9755  */
9756 #define TRACE_MAX_PRINT         1000
9757
9758 /*
9759  * Define here KERN_TRACE so that we have one place to modify
9760  * it if we decide to change what log level the ftrace dump
9761  * should be at.
9762  */
9763 #define KERN_TRACE              KERN_EMERG
9764
9765 void
9766 trace_printk_seq(struct trace_seq *s)
9767 {
9768         /* Probably should print a warning here. */
9769         if (s->seq.len >= TRACE_MAX_PRINT)
9770                 s->seq.len = TRACE_MAX_PRINT;
9771
9772         /*
9773          * More paranoid code. Although the buffer size is set to
9774          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9775          * an extra layer of protection.
9776          */
9777         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9778                 s->seq.len = s->seq.size - 1;
9779
9780         /* should be zero ended, but we are paranoid. */
9781         s->buffer[s->seq.len] = 0;
9782
9783         printk(KERN_TRACE "%s", s->buffer);
9784
9785         trace_seq_init(s);
9786 }
9787
9788 void trace_init_global_iter(struct trace_iterator *iter)
9789 {
9790         iter->tr = &global_trace;
9791         iter->trace = iter->tr->current_trace;
9792         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9793         iter->array_buffer = &global_trace.array_buffer;
9794
9795         if (iter->trace && iter->trace->open)
9796                 iter->trace->open(iter);
9797
9798         /* Annotate start of buffers if we had overruns */
9799         if (ring_buffer_overruns(iter->array_buffer->buffer))
9800                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9801
9802         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9803         if (trace_clocks[iter->tr->clock_id].in_ns)
9804                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9805 }
9806
9807 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9808 {
9809         /* use static because iter can be a bit big for the stack */
9810         static struct trace_iterator iter;
9811         static atomic_t dump_running;
9812         struct trace_array *tr = &global_trace;
9813         unsigned int old_userobj;
9814         unsigned long flags;
9815         int cnt = 0, cpu;
9816
9817         /* Only allow one dump user at a time. */
9818         if (atomic_inc_return(&dump_running) != 1) {
9819                 atomic_dec(&dump_running);
9820                 return;
9821         }
9822
9823         /*
9824          * Always turn off tracing when we dump.
9825          * We don't need to show trace output of what happens
9826          * between multiple crashes.
9827          *
9828          * If the user does a sysrq-z, then they can re-enable
9829          * tracing with echo 1 > tracing_on.
9830          */
9831         tracing_off();
9832
9833         local_irq_save(flags);
9834
9835         /* Simulate the iterator */
9836         trace_init_global_iter(&iter);
9837         /* Can not use kmalloc for iter.temp and iter.fmt */
9838         iter.temp = static_temp_buf;
9839         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9840         iter.fmt = static_fmt_buf;
9841         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9842
9843         for_each_tracing_cpu(cpu) {
9844                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9845         }
9846
9847         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9848
9849         /* don't look at user memory in panic mode */
9850         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9851
9852         switch (oops_dump_mode) {
9853         case DUMP_ALL:
9854                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9855                 break;
9856         case DUMP_ORIG:
9857                 iter.cpu_file = raw_smp_processor_id();
9858                 break;
9859         case DUMP_NONE:
9860                 goto out_enable;
9861         default:
9862                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9863                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9864         }
9865
9866         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9867
9868         /* Did function tracer already get disabled? */
9869         if (ftrace_is_dead()) {
9870                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9871                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9872         }
9873
9874         /*
9875          * We need to stop all tracing on all CPUS to read
9876          * the next buffer. This is a bit expensive, but is
9877          * not done often. We fill all what we can read,
9878          * and then release the locks again.
9879          */
9880
9881         while (!trace_empty(&iter)) {
9882
9883                 if (!cnt)
9884                         printk(KERN_TRACE "---------------------------------\n");
9885
9886                 cnt++;
9887
9888                 trace_iterator_reset(&iter);
9889                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9890
9891                 if (trace_find_next_entry_inc(&iter) != NULL) {
9892                         int ret;
9893
9894                         ret = print_trace_line(&iter);
9895                         if (ret != TRACE_TYPE_NO_CONSUME)
9896                                 trace_consume(&iter);
9897                 }
9898                 touch_nmi_watchdog();
9899
9900                 trace_printk_seq(&iter.seq);
9901         }
9902
9903         if (!cnt)
9904                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9905         else
9906                 printk(KERN_TRACE "---------------------------------\n");
9907
9908  out_enable:
9909         tr->trace_flags |= old_userobj;
9910
9911         for_each_tracing_cpu(cpu) {
9912                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9913         }
9914         atomic_dec(&dump_running);
9915         local_irq_restore(flags);
9916 }
9917 EXPORT_SYMBOL_GPL(ftrace_dump);
9918
9919 #define WRITE_BUFSIZE  4096
9920
9921 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9922                                 size_t count, loff_t *ppos,
9923                                 int (*createfn)(const char *))
9924 {
9925         char *kbuf, *buf, *tmp;
9926         int ret = 0;
9927         size_t done = 0;
9928         size_t size;
9929
9930         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9931         if (!kbuf)
9932                 return -ENOMEM;
9933
9934         while (done < count) {
9935                 size = count - done;
9936
9937                 if (size >= WRITE_BUFSIZE)
9938                         size = WRITE_BUFSIZE - 1;
9939
9940                 if (copy_from_user(kbuf, buffer + done, size)) {
9941                         ret = -EFAULT;
9942                         goto out;
9943                 }
9944                 kbuf[size] = '\0';
9945                 buf = kbuf;
9946                 do {
9947                         tmp = strchr(buf, '\n');
9948                         if (tmp) {
9949                                 *tmp = '\0';
9950                                 size = tmp - buf + 1;
9951                         } else {
9952                                 size = strlen(buf);
9953                                 if (done + size < count) {
9954                                         if (buf != kbuf)
9955                                                 break;
9956                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9957                                         pr_warn("Line length is too long: Should be less than %d\n",
9958                                                 WRITE_BUFSIZE - 2);
9959                                         ret = -EINVAL;
9960                                         goto out;
9961                                 }
9962                         }
9963                         done += size;
9964
9965                         /* Remove comments */
9966                         tmp = strchr(buf, '#');
9967
9968                         if (tmp)
9969                                 *tmp = '\0';
9970
9971                         ret = createfn(buf);
9972                         if (ret)
9973                                 goto out;
9974                         buf += size;
9975
9976                 } while (done < count);
9977         }
9978         ret = done;
9979
9980 out:
9981         kfree(kbuf);
9982
9983         return ret;
9984 }
9985
9986 __init static int tracer_alloc_buffers(void)
9987 {
9988         int ring_buf_size;
9989         int ret = -ENOMEM;
9990
9991
9992         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9993                 pr_warn("Tracing disabled due to lockdown\n");
9994                 return -EPERM;
9995         }
9996
9997         /*
9998          * Make sure we don't accidentally add more trace options
9999          * than we have bits for.
10000          */
10001         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10002
10003         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10004                 goto out;
10005
10006         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10007                 goto out_free_buffer_mask;
10008
10009         /* Only allocate trace_printk buffers if a trace_printk exists */
10010         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10011                 /* Must be called before global_trace.buffer is allocated */
10012                 trace_printk_init_buffers();
10013
10014         /* To save memory, keep the ring buffer size to its minimum */
10015         if (ring_buffer_expanded)
10016                 ring_buf_size = trace_buf_size;
10017         else
10018                 ring_buf_size = 1;
10019
10020         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10021         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10022
10023         raw_spin_lock_init(&global_trace.start_lock);
10024
10025         /*
10026          * The prepare callbacks allocates some memory for the ring buffer. We
10027          * don't free the buffer if the CPU goes down. If we were to free
10028          * the buffer, then the user would lose any trace that was in the
10029          * buffer. The memory will be removed once the "instance" is removed.
10030          */
10031         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10032                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10033                                       NULL);
10034         if (ret < 0)
10035                 goto out_free_cpumask;
10036         /* Used for event triggers */
10037         ret = -ENOMEM;
10038         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10039         if (!temp_buffer)
10040                 goto out_rm_hp_state;
10041
10042         if (trace_create_savedcmd() < 0)
10043                 goto out_free_temp_buffer;
10044
10045         /* TODO: make the number of buffers hot pluggable with CPUS */
10046         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10047                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10048                 goto out_free_savedcmd;
10049         }
10050
10051         if (global_trace.buffer_disabled)
10052                 tracing_off();
10053
10054         if (trace_boot_clock) {
10055                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10056                 if (ret < 0)
10057                         pr_warn("Trace clock %s not defined, going back to default\n",
10058                                 trace_boot_clock);
10059         }
10060
10061         /*
10062          * register_tracer() might reference current_trace, so it
10063          * needs to be set before we register anything. This is
10064          * just a bootstrap of current_trace anyway.
10065          */
10066         global_trace.current_trace = &nop_trace;
10067
10068         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10069
10070         ftrace_init_global_array_ops(&global_trace);
10071
10072         init_trace_flags_index(&global_trace);
10073
10074         register_tracer(&nop_trace);
10075
10076         /* Function tracing may start here (via kernel command line) */
10077         init_function_trace();
10078
10079         /* All seems OK, enable tracing */
10080         tracing_disabled = 0;
10081
10082         atomic_notifier_chain_register(&panic_notifier_list,
10083                                        &trace_panic_notifier);
10084
10085         register_die_notifier(&trace_die_notifier);
10086
10087         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10088
10089         INIT_LIST_HEAD(&global_trace.systems);
10090         INIT_LIST_HEAD(&global_trace.events);
10091         INIT_LIST_HEAD(&global_trace.hist_vars);
10092         INIT_LIST_HEAD(&global_trace.err_log);
10093         list_add(&global_trace.list, &ftrace_trace_arrays);
10094
10095         apply_trace_boot_options();
10096
10097         register_snapshot_cmd();
10098
10099         test_can_verify();
10100
10101         return 0;
10102
10103 out_free_savedcmd:
10104         free_saved_cmdlines_buffer(savedcmd);
10105 out_free_temp_buffer:
10106         ring_buffer_free(temp_buffer);
10107 out_rm_hp_state:
10108         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10109 out_free_cpumask:
10110         free_cpumask_var(global_trace.tracing_cpumask);
10111 out_free_buffer_mask:
10112         free_cpumask_var(tracing_buffer_mask);
10113 out:
10114         return ret;
10115 }
10116
10117 void __init early_trace_init(void)
10118 {
10119         if (tracepoint_printk) {
10120                 tracepoint_print_iter =
10121                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10122                 if (MEM_FAIL(!tracepoint_print_iter,
10123                              "Failed to allocate trace iterator\n"))
10124                         tracepoint_printk = 0;
10125                 else
10126                         static_key_enable(&tracepoint_printk_key.key);
10127         }
10128         tracer_alloc_buffers();
10129 }
10130
10131 void __init trace_init(void)
10132 {
10133         trace_event_init();
10134 }
10135
10136 __init static void clear_boot_tracer(void)
10137 {
10138         /*
10139          * The default tracer at boot buffer is an init section.
10140          * This function is called in lateinit. If we did not
10141          * find the boot tracer, then clear it out, to prevent
10142          * later registration from accessing the buffer that is
10143          * about to be freed.
10144          */
10145         if (!default_bootup_tracer)
10146                 return;
10147
10148         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10149                default_bootup_tracer);
10150         default_bootup_tracer = NULL;
10151 }
10152
10153 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10154 __init static void tracing_set_default_clock(void)
10155 {
10156         /* sched_clock_stable() is determined in late_initcall */
10157         if (!trace_boot_clock && !sched_clock_stable()) {
10158                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10159                         pr_warn("Can not set tracing clock due to lockdown\n");
10160                         return;
10161                 }
10162
10163                 printk(KERN_WARNING
10164                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10165                        "If you want to keep using the local clock, then add:\n"
10166                        "  \"trace_clock=local\"\n"
10167                        "on the kernel command line\n");
10168                 tracing_set_clock(&global_trace, "global");
10169         }
10170 }
10171 #else
10172 static inline void tracing_set_default_clock(void) { }
10173 #endif
10174
10175 __init static int late_trace_init(void)
10176 {
10177         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10178                 static_key_disable(&tracepoint_printk_key.key);
10179                 tracepoint_printk = 0;
10180         }
10181
10182         tracing_set_default_clock();
10183         clear_boot_tracer();
10184         return 0;
10185 }
10186
10187 late_initcall_sync(late_trace_init);