jbd2: fix up sparse warnings in checkpoint code
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 static void
255 trace_process_export(struct trace_export *export,
256                struct ring_buffer_event *event, int flag)
257 {
258         struct trace_entry *entry;
259         unsigned int size = 0;
260
261         if (export->flags & flag) {
262                 entry = ring_buffer_event_data(event);
263                 size = ring_buffer_event_length(event);
264                 export->write(export, entry, size);
265         }
266 }
267
268 static DEFINE_MUTEX(ftrace_export_lock);
269
270 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
271
272 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
273 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
274 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
275
276 static inline void ftrace_exports_enable(struct trace_export *export)
277 {
278         if (export->flags & TRACE_EXPORT_FUNCTION)
279                 static_branch_inc(&trace_function_exports_enabled);
280
281         if (export->flags & TRACE_EXPORT_EVENT)
282                 static_branch_inc(&trace_event_exports_enabled);
283
284         if (export->flags & TRACE_EXPORT_MARKER)
285                 static_branch_inc(&trace_marker_exports_enabled);
286 }
287
288 static inline void ftrace_exports_disable(struct trace_export *export)
289 {
290         if (export->flags & TRACE_EXPORT_FUNCTION)
291                 static_branch_dec(&trace_function_exports_enabled);
292
293         if (export->flags & TRACE_EXPORT_EVENT)
294                 static_branch_dec(&trace_event_exports_enabled);
295
296         if (export->flags & TRACE_EXPORT_MARKER)
297                 static_branch_dec(&trace_marker_exports_enabled);
298 }
299
300 static void ftrace_exports(struct ring_buffer_event *event, int flag)
301 {
302         struct trace_export *export;
303
304         preempt_disable_notrace();
305
306         export = rcu_dereference_raw_check(ftrace_exports_list);
307         while (export) {
308                 trace_process_export(export, event, flag);
309                 export = rcu_dereference_raw_check(export->next);
310         }
311
312         preempt_enable_notrace();
313 }
314
315 static inline void
316 add_trace_export(struct trace_export **list, struct trace_export *export)
317 {
318         rcu_assign_pointer(export->next, *list);
319         /*
320          * We are entering export into the list but another
321          * CPU might be walking that list. We need to make sure
322          * the export->next pointer is valid before another CPU sees
323          * the export pointer included into the list.
324          */
325         rcu_assign_pointer(*list, export);
326 }
327
328 static inline int
329 rm_trace_export(struct trace_export **list, struct trace_export *export)
330 {
331         struct trace_export **p;
332
333         for (p = list; *p != NULL; p = &(*p)->next)
334                 if (*p == export)
335                         break;
336
337         if (*p != export)
338                 return -1;
339
340         rcu_assign_pointer(*p, (*p)->next);
341
342         return 0;
343 }
344
345 static inline void
346 add_ftrace_export(struct trace_export **list, struct trace_export *export)
347 {
348         ftrace_exports_enable(export);
349
350         add_trace_export(list, export);
351 }
352
353 static inline int
354 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
355 {
356         int ret;
357
358         ret = rm_trace_export(list, export);
359         ftrace_exports_disable(export);
360
361         return ret;
362 }
363
364 int register_ftrace_export(struct trace_export *export)
365 {
366         if (WARN_ON_ONCE(!export->write))
367                 return -1;
368
369         mutex_lock(&ftrace_export_lock);
370
371         add_ftrace_export(&ftrace_exports_list, export);
372
373         mutex_unlock(&ftrace_export_lock);
374
375         return 0;
376 }
377 EXPORT_SYMBOL_GPL(register_ftrace_export);
378
379 int unregister_ftrace_export(struct trace_export *export)
380 {
381         int ret;
382
383         mutex_lock(&ftrace_export_lock);
384
385         ret = rm_ftrace_export(&ftrace_exports_list, export);
386
387         mutex_unlock(&ftrace_export_lock);
388
389         return ret;
390 }
391 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
392
393 /* trace_flags holds trace_options default values */
394 #define TRACE_DEFAULT_FLAGS                                             \
395         (FUNCTION_DEFAULT_FLAGS |                                       \
396          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
397          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
398          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
399          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
400
401 /* trace_options that are only supported by global_trace */
402 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
403                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
404
405 /* trace_flags that are default zero for instances */
406 #define ZEROED_TRACE_FLAGS \
407         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
408
409 /*
410  * The global_trace is the descriptor that holds the top-level tracing
411  * buffers for the live tracing.
412  */
413 static struct trace_array global_trace = {
414         .trace_flags = TRACE_DEFAULT_FLAGS,
415 };
416
417 LIST_HEAD(ftrace_trace_arrays);
418
419 int trace_array_get(struct trace_array *this_tr)
420 {
421         struct trace_array *tr;
422         int ret = -ENODEV;
423
424         mutex_lock(&trace_types_lock);
425         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
426                 if (tr == this_tr) {
427                         tr->ref++;
428                         ret = 0;
429                         break;
430                 }
431         }
432         mutex_unlock(&trace_types_lock);
433
434         return ret;
435 }
436
437 static void __trace_array_put(struct trace_array *this_tr)
438 {
439         WARN_ON(!this_tr->ref);
440         this_tr->ref--;
441 }
442
443 /**
444  * trace_array_put - Decrement the reference counter for this trace array.
445  *
446  * NOTE: Use this when we no longer need the trace array returned by
447  * trace_array_get_by_name(). This ensures the trace array can be later
448  * destroyed.
449  *
450  */
451 void trace_array_put(struct trace_array *this_tr)
452 {
453         if (!this_tr)
454                 return;
455
456         mutex_lock(&trace_types_lock);
457         __trace_array_put(this_tr);
458         mutex_unlock(&trace_types_lock);
459 }
460 EXPORT_SYMBOL_GPL(trace_array_put);
461
462 int tracing_check_open_get_tr(struct trace_array *tr)
463 {
464         int ret;
465
466         ret = security_locked_down(LOCKDOWN_TRACEFS);
467         if (ret)
468                 return ret;
469
470         if (tracing_disabled)
471                 return -ENODEV;
472
473         if (tr && trace_array_get(tr) < 0)
474                 return -ENODEV;
475
476         return 0;
477 }
478
479 int call_filter_check_discard(struct trace_event_call *call, void *rec,
480                               struct trace_buffer *buffer,
481                               struct ring_buffer_event *event)
482 {
483         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
484             !filter_match_preds(call->filter, rec)) {
485                 __trace_event_discard_commit(buffer, event);
486                 return 1;
487         }
488
489         return 0;
490 }
491
492 void trace_free_pid_list(struct trace_pid_list *pid_list)
493 {
494         vfree(pid_list->pids);
495         kfree(pid_list);
496 }
497
498 /**
499  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
500  * @filtered_pids: The list of pids to check
501  * @search_pid: The PID to find in @filtered_pids
502  *
503  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
504  */
505 bool
506 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
507 {
508         /*
509          * If pid_max changed after filtered_pids was created, we
510          * by default ignore all pids greater than the previous pid_max.
511          */
512         if (search_pid >= filtered_pids->pid_max)
513                 return false;
514
515         return test_bit(search_pid, filtered_pids->pids);
516 }
517
518 /**
519  * trace_ignore_this_task - should a task be ignored for tracing
520  * @filtered_pids: The list of pids to check
521  * @task: The task that should be ignored if not filtered
522  *
523  * Checks if @task should be traced or not from @filtered_pids.
524  * Returns true if @task should *NOT* be traced.
525  * Returns false if @task should be traced.
526  */
527 bool
528 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
529                        struct trace_pid_list *filtered_no_pids,
530                        struct task_struct *task)
531 {
532         /*
533          * If filterd_no_pids is not empty, and the task's pid is listed
534          * in filtered_no_pids, then return true.
535          * Otherwise, if filtered_pids is empty, that means we can
536          * trace all tasks. If it has content, then only trace pids
537          * within filtered_pids.
538          */
539
540         return (filtered_pids &&
541                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
542                 (filtered_no_pids &&
543                  trace_find_filtered_pid(filtered_no_pids, task->pid));
544 }
545
546 /**
547  * trace_filter_add_remove_task - Add or remove a task from a pid_list
548  * @pid_list: The list to modify
549  * @self: The current task for fork or NULL for exit
550  * @task: The task to add or remove
551  *
552  * If adding a task, if @self is defined, the task is only added if @self
553  * is also included in @pid_list. This happens on fork and tasks should
554  * only be added when the parent is listed. If @self is NULL, then the
555  * @task pid will be removed from the list, which would happen on exit
556  * of a task.
557  */
558 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
559                                   struct task_struct *self,
560                                   struct task_struct *task)
561 {
562         if (!pid_list)
563                 return;
564
565         /* For forks, we only add if the forking task is listed */
566         if (self) {
567                 if (!trace_find_filtered_pid(pid_list, self->pid))
568                         return;
569         }
570
571         /* Sorry, but we don't support pid_max changing after setting */
572         if (task->pid >= pid_list->pid_max)
573                 return;
574
575         /* "self" is set for forks, and NULL for exits */
576         if (self)
577                 set_bit(task->pid, pid_list->pids);
578         else
579                 clear_bit(task->pid, pid_list->pids);
580 }
581
582 /**
583  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
584  * @pid_list: The pid list to show
585  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
586  * @pos: The position of the file
587  *
588  * This is used by the seq_file "next" operation to iterate the pids
589  * listed in a trace_pid_list structure.
590  *
591  * Returns the pid+1 as we want to display pid of zero, but NULL would
592  * stop the iteration.
593  */
594 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
595 {
596         unsigned long pid = (unsigned long)v;
597
598         (*pos)++;
599
600         /* pid already is +1 of the actual prevous bit */
601         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
602
603         /* Return pid + 1 to allow zero to be represented */
604         if (pid < pid_list->pid_max)
605                 return (void *)(pid + 1);
606
607         return NULL;
608 }
609
610 /**
611  * trace_pid_start - Used for seq_file to start reading pid lists
612  * @pid_list: The pid list to show
613  * @pos: The position of the file
614  *
615  * This is used by seq_file "start" operation to start the iteration
616  * of listing pids.
617  *
618  * Returns the pid+1 as we want to display pid of zero, but NULL would
619  * stop the iteration.
620  */
621 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
622 {
623         unsigned long pid;
624         loff_t l = 0;
625
626         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
627         if (pid >= pid_list->pid_max)
628                 return NULL;
629
630         /* Return pid + 1 so that zero can be the exit value */
631         for (pid++; pid && l < *pos;
632              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
633                 ;
634         return (void *)pid;
635 }
636
637 /**
638  * trace_pid_show - show the current pid in seq_file processing
639  * @m: The seq_file structure to write into
640  * @v: A void pointer of the pid (+1) value to display
641  *
642  * Can be directly used by seq_file operations to display the current
643  * pid value.
644  */
645 int trace_pid_show(struct seq_file *m, void *v)
646 {
647         unsigned long pid = (unsigned long)v - 1;
648
649         seq_printf(m, "%lu\n", pid);
650         return 0;
651 }
652
653 /* 128 should be much more than enough */
654 #define PID_BUF_SIZE            127
655
656 int trace_pid_write(struct trace_pid_list *filtered_pids,
657                     struct trace_pid_list **new_pid_list,
658                     const char __user *ubuf, size_t cnt)
659 {
660         struct trace_pid_list *pid_list;
661         struct trace_parser parser;
662         unsigned long val;
663         int nr_pids = 0;
664         ssize_t read = 0;
665         ssize_t ret = 0;
666         loff_t pos;
667         pid_t pid;
668
669         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
670                 return -ENOMEM;
671
672         /*
673          * Always recreate a new array. The write is an all or nothing
674          * operation. Always create a new array when adding new pids by
675          * the user. If the operation fails, then the current list is
676          * not modified.
677          */
678         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
679         if (!pid_list) {
680                 trace_parser_put(&parser);
681                 return -ENOMEM;
682         }
683
684         pid_list->pid_max = READ_ONCE(pid_max);
685
686         /* Only truncating will shrink pid_max */
687         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
688                 pid_list->pid_max = filtered_pids->pid_max;
689
690         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
691         if (!pid_list->pids) {
692                 trace_parser_put(&parser);
693                 kfree(pid_list);
694                 return -ENOMEM;
695         }
696
697         if (filtered_pids) {
698                 /* copy the current bits to the new max */
699                 for_each_set_bit(pid, filtered_pids->pids,
700                                  filtered_pids->pid_max) {
701                         set_bit(pid, pid_list->pids);
702                         nr_pids++;
703                 }
704         }
705
706         while (cnt > 0) {
707
708                 pos = 0;
709
710                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
711                 if (ret < 0 || !trace_parser_loaded(&parser))
712                         break;
713
714                 read += ret;
715                 ubuf += ret;
716                 cnt -= ret;
717
718                 ret = -EINVAL;
719                 if (kstrtoul(parser.buffer, 0, &val))
720                         break;
721                 if (val >= pid_list->pid_max)
722                         break;
723
724                 pid = (pid_t)val;
725
726                 set_bit(pid, pid_list->pids);
727                 nr_pids++;
728
729                 trace_parser_clear(&parser);
730                 ret = 0;
731         }
732         trace_parser_put(&parser);
733
734         if (ret < 0) {
735                 trace_free_pid_list(pid_list);
736                 return ret;
737         }
738
739         if (!nr_pids) {
740                 /* Cleared the list of pids */
741                 trace_free_pid_list(pid_list);
742                 read = ret;
743                 pid_list = NULL;
744         }
745
746         *new_pid_list = pid_list;
747
748         return read;
749 }
750
751 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
752 {
753         u64 ts;
754
755         /* Early boot up does not have a buffer yet */
756         if (!buf->buffer)
757                 return trace_clock_local();
758
759         ts = ring_buffer_time_stamp(buf->buffer, cpu);
760         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
761
762         return ts;
763 }
764
765 u64 ftrace_now(int cpu)
766 {
767         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
768 }
769
770 /**
771  * tracing_is_enabled - Show if global_trace has been disabled
772  *
773  * Shows if the global trace has been enabled or not. It uses the
774  * mirror flag "buffer_disabled" to be used in fast paths such as for
775  * the irqsoff tracer. But it may be inaccurate due to races. If you
776  * need to know the accurate state, use tracing_is_on() which is a little
777  * slower, but accurate.
778  */
779 int tracing_is_enabled(void)
780 {
781         /*
782          * For quick access (irqsoff uses this in fast path), just
783          * return the mirror variable of the state of the ring buffer.
784          * It's a little racy, but we don't really care.
785          */
786         smp_rmb();
787         return !global_trace.buffer_disabled;
788 }
789
790 /*
791  * trace_buf_size is the size in bytes that is allocated
792  * for a buffer. Note, the number of bytes is always rounded
793  * to page size.
794  *
795  * This number is purposely set to a low number of 16384.
796  * If the dump on oops happens, it will be much appreciated
797  * to not have to wait for all that output. Anyway this can be
798  * boot time and run time configurable.
799  */
800 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
801
802 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
803
804 /* trace_types holds a link list of available tracers. */
805 static struct tracer            *trace_types __read_mostly;
806
807 /*
808  * trace_types_lock is used to protect the trace_types list.
809  */
810 DEFINE_MUTEX(trace_types_lock);
811
812 /*
813  * serialize the access of the ring buffer
814  *
815  * ring buffer serializes readers, but it is low level protection.
816  * The validity of the events (which returns by ring_buffer_peek() ..etc)
817  * are not protected by ring buffer.
818  *
819  * The content of events may become garbage if we allow other process consumes
820  * these events concurrently:
821  *   A) the page of the consumed events may become a normal page
822  *      (not reader page) in ring buffer, and this page will be rewrited
823  *      by events producer.
824  *   B) The page of the consumed events may become a page for splice_read,
825  *      and this page will be returned to system.
826  *
827  * These primitives allow multi process access to different cpu ring buffer
828  * concurrently.
829  *
830  * These primitives don't distinguish read-only and read-consume access.
831  * Multi read-only access are also serialized.
832  */
833
834 #ifdef CONFIG_SMP
835 static DECLARE_RWSEM(all_cpu_access_lock);
836 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
837
838 static inline void trace_access_lock(int cpu)
839 {
840         if (cpu == RING_BUFFER_ALL_CPUS) {
841                 /* gain it for accessing the whole ring buffer. */
842                 down_write(&all_cpu_access_lock);
843         } else {
844                 /* gain it for accessing a cpu ring buffer. */
845
846                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
847                 down_read(&all_cpu_access_lock);
848
849                 /* Secondly block other access to this @cpu ring buffer. */
850                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
851         }
852 }
853
854 static inline void trace_access_unlock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 up_write(&all_cpu_access_lock);
858         } else {
859                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
860                 up_read(&all_cpu_access_lock);
861         }
862 }
863
864 static inline void trace_access_lock_init(void)
865 {
866         int cpu;
867
868         for_each_possible_cpu(cpu)
869                 mutex_init(&per_cpu(cpu_access_lock, cpu));
870 }
871
872 #else
873
874 static DEFINE_MUTEX(access_lock);
875
876 static inline void trace_access_lock(int cpu)
877 {
878         (void)cpu;
879         mutex_lock(&access_lock);
880 }
881
882 static inline void trace_access_unlock(int cpu)
883 {
884         (void)cpu;
885         mutex_unlock(&access_lock);
886 }
887
888 static inline void trace_access_lock_init(void)
889 {
890 }
891
892 #endif
893
894 #ifdef CONFIG_STACKTRACE
895 static void __ftrace_trace_stack(struct trace_buffer *buffer,
896                                  unsigned long flags,
897                                  int skip, int pc, struct pt_regs *regs);
898 static inline void ftrace_trace_stack(struct trace_array *tr,
899                                       struct trace_buffer *buffer,
900                                       unsigned long flags,
901                                       int skip, int pc, struct pt_regs *regs);
902
903 #else
904 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
905                                         unsigned long flags,
906                                         int skip, int pc, struct pt_regs *regs)
907 {
908 }
909 static inline void ftrace_trace_stack(struct trace_array *tr,
910                                       struct trace_buffer *buffer,
911                                       unsigned long flags,
912                                       int skip, int pc, struct pt_regs *regs)
913 {
914 }
915
916 #endif
917
918 static __always_inline void
919 trace_event_setup(struct ring_buffer_event *event,
920                   int type, unsigned long flags, int pc)
921 {
922         struct trace_entry *ent = ring_buffer_event_data(event);
923
924         tracing_generic_entry_update(ent, type, flags, pc);
925 }
926
927 static __always_inline struct ring_buffer_event *
928 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
929                           int type,
930                           unsigned long len,
931                           unsigned long flags, int pc)
932 {
933         struct ring_buffer_event *event;
934
935         event = ring_buffer_lock_reserve(buffer, len);
936         if (event != NULL)
937                 trace_event_setup(event, type, flags, pc);
938
939         return event;
940 }
941
942 void tracer_tracing_on(struct trace_array *tr)
943 {
944         if (tr->array_buffer.buffer)
945                 ring_buffer_record_on(tr->array_buffer.buffer);
946         /*
947          * This flag is looked at when buffers haven't been allocated
948          * yet, or by some tracers (like irqsoff), that just want to
949          * know if the ring buffer has been disabled, but it can handle
950          * races of where it gets disabled but we still do a record.
951          * As the check is in the fast path of the tracers, it is more
952          * important to be fast than accurate.
953          */
954         tr->buffer_disabled = 0;
955         /* Make the flag seen by readers */
956         smp_wmb();
957 }
958
959 /**
960  * tracing_on - enable tracing buffers
961  *
962  * This function enables tracing buffers that may have been
963  * disabled with tracing_off.
964  */
965 void tracing_on(void)
966 {
967         tracer_tracing_on(&global_trace);
968 }
969 EXPORT_SYMBOL_GPL(tracing_on);
970
971
972 static __always_inline void
973 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
974 {
975         __this_cpu_write(trace_taskinfo_save, true);
976
977         /* If this is the temp buffer, we need to commit fully */
978         if (this_cpu_read(trace_buffered_event) == event) {
979                 /* Length is in event->array[0] */
980                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
981                 /* Release the temp buffer */
982                 this_cpu_dec(trace_buffered_event_cnt);
983         } else
984                 ring_buffer_unlock_commit(buffer, event);
985 }
986
987 /**
988  * __trace_puts - write a constant string into the trace buffer.
989  * @ip:    The address of the caller
990  * @str:   The constant string to write
991  * @size:  The size of the string.
992  */
993 int __trace_puts(unsigned long ip, const char *str, int size)
994 {
995         struct ring_buffer_event *event;
996         struct trace_buffer *buffer;
997         struct print_entry *entry;
998         unsigned long irq_flags;
999         int alloc;
1000         int pc;
1001
1002         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1003                 return 0;
1004
1005         pc = preempt_count();
1006
1007         if (unlikely(tracing_selftest_running || tracing_disabled))
1008                 return 0;
1009
1010         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1011
1012         local_save_flags(irq_flags);
1013         buffer = global_trace.array_buffer.buffer;
1014         ring_buffer_nest_start(buffer);
1015         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
1016                                             irq_flags, pc);
1017         if (!event) {
1018                 size = 0;
1019                 goto out;
1020         }
1021
1022         entry = ring_buffer_event_data(event);
1023         entry->ip = ip;
1024
1025         memcpy(&entry->buf, str, size);
1026
1027         /* Add a newline if necessary */
1028         if (entry->buf[size - 1] != '\n') {
1029                 entry->buf[size] = '\n';
1030                 entry->buf[size + 1] = '\0';
1031         } else
1032                 entry->buf[size] = '\0';
1033
1034         __buffer_unlock_commit(buffer, event);
1035         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1036  out:
1037         ring_buffer_nest_end(buffer);
1038         return size;
1039 }
1040 EXPORT_SYMBOL_GPL(__trace_puts);
1041
1042 /**
1043  * __trace_bputs - write the pointer to a constant string into trace buffer
1044  * @ip:    The address of the caller
1045  * @str:   The constant string to write to the buffer to
1046  */
1047 int __trace_bputs(unsigned long ip, const char *str)
1048 {
1049         struct ring_buffer_event *event;
1050         struct trace_buffer *buffer;
1051         struct bputs_entry *entry;
1052         unsigned long irq_flags;
1053         int size = sizeof(struct bputs_entry);
1054         int ret = 0;
1055         int pc;
1056
1057         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1058                 return 0;
1059
1060         pc = preempt_count();
1061
1062         if (unlikely(tracing_selftest_running || tracing_disabled))
1063                 return 0;
1064
1065         local_save_flags(irq_flags);
1066         buffer = global_trace.array_buffer.buffer;
1067
1068         ring_buffer_nest_start(buffer);
1069         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1070                                             irq_flags, pc);
1071         if (!event)
1072                 goto out;
1073
1074         entry = ring_buffer_event_data(event);
1075         entry->ip                       = ip;
1076         entry->str                      = str;
1077
1078         __buffer_unlock_commit(buffer, event);
1079         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1080
1081         ret = 1;
1082  out:
1083         ring_buffer_nest_end(buffer);
1084         return ret;
1085 }
1086 EXPORT_SYMBOL_GPL(__trace_bputs);
1087
1088 #ifdef CONFIG_TRACER_SNAPSHOT
1089 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1090                                            void *cond_data)
1091 {
1092         struct tracer *tracer = tr->current_trace;
1093         unsigned long flags;
1094
1095         if (in_nmi()) {
1096                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1097                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1098                 return;
1099         }
1100
1101         if (!tr->allocated_snapshot) {
1102                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1103                 internal_trace_puts("*** stopping trace here!   ***\n");
1104                 tracing_off();
1105                 return;
1106         }
1107
1108         /* Note, snapshot can not be used when the tracer uses it */
1109         if (tracer->use_max_tr) {
1110                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1111                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1112                 return;
1113         }
1114
1115         local_irq_save(flags);
1116         update_max_tr(tr, current, smp_processor_id(), cond_data);
1117         local_irq_restore(flags);
1118 }
1119
1120 void tracing_snapshot_instance(struct trace_array *tr)
1121 {
1122         tracing_snapshot_instance_cond(tr, NULL);
1123 }
1124
1125 /**
1126  * tracing_snapshot - take a snapshot of the current buffer.
1127  *
1128  * This causes a swap between the snapshot buffer and the current live
1129  * tracing buffer. You can use this to take snapshots of the live
1130  * trace when some condition is triggered, but continue to trace.
1131  *
1132  * Note, make sure to allocate the snapshot with either
1133  * a tracing_snapshot_alloc(), or by doing it manually
1134  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1135  *
1136  * If the snapshot buffer is not allocated, it will stop tracing.
1137  * Basically making a permanent snapshot.
1138  */
1139 void tracing_snapshot(void)
1140 {
1141         struct trace_array *tr = &global_trace;
1142
1143         tracing_snapshot_instance(tr);
1144 }
1145 EXPORT_SYMBOL_GPL(tracing_snapshot);
1146
1147 /**
1148  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1149  * @tr:         The tracing instance to snapshot
1150  * @cond_data:  The data to be tested conditionally, and possibly saved
1151  *
1152  * This is the same as tracing_snapshot() except that the snapshot is
1153  * conditional - the snapshot will only happen if the
1154  * cond_snapshot.update() implementation receiving the cond_data
1155  * returns true, which means that the trace array's cond_snapshot
1156  * update() operation used the cond_data to determine whether the
1157  * snapshot should be taken, and if it was, presumably saved it along
1158  * with the snapshot.
1159  */
1160 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1161 {
1162         tracing_snapshot_instance_cond(tr, cond_data);
1163 }
1164 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1165
1166 /**
1167  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1168  * @tr:         The tracing instance
1169  *
1170  * When the user enables a conditional snapshot using
1171  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1172  * with the snapshot.  This accessor is used to retrieve it.
1173  *
1174  * Should not be called from cond_snapshot.update(), since it takes
1175  * the tr->max_lock lock, which the code calling
1176  * cond_snapshot.update() has already done.
1177  *
1178  * Returns the cond_data associated with the trace array's snapshot.
1179  */
1180 void *tracing_cond_snapshot_data(struct trace_array *tr)
1181 {
1182         void *cond_data = NULL;
1183
1184         arch_spin_lock(&tr->max_lock);
1185
1186         if (tr->cond_snapshot)
1187                 cond_data = tr->cond_snapshot->cond_data;
1188
1189         arch_spin_unlock(&tr->max_lock);
1190
1191         return cond_data;
1192 }
1193 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1194
1195 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1196                                         struct array_buffer *size_buf, int cpu_id);
1197 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1198
1199 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1200 {
1201         int ret;
1202
1203         if (!tr->allocated_snapshot) {
1204
1205                 /* allocate spare buffer */
1206                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1207                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1208                 if (ret < 0)
1209                         return ret;
1210
1211                 tr->allocated_snapshot = true;
1212         }
1213
1214         return 0;
1215 }
1216
1217 static void free_snapshot(struct trace_array *tr)
1218 {
1219         /*
1220          * We don't free the ring buffer. instead, resize it because
1221          * The max_tr ring buffer has some state (e.g. ring->clock) and
1222          * we want preserve it.
1223          */
1224         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1225         set_buffer_entries(&tr->max_buffer, 1);
1226         tracing_reset_online_cpus(&tr->max_buffer);
1227         tr->allocated_snapshot = false;
1228 }
1229
1230 /**
1231  * tracing_alloc_snapshot - allocate snapshot buffer.
1232  *
1233  * This only allocates the snapshot buffer if it isn't already
1234  * allocated - it doesn't also take a snapshot.
1235  *
1236  * This is meant to be used in cases where the snapshot buffer needs
1237  * to be set up for events that can't sleep but need to be able to
1238  * trigger a snapshot.
1239  */
1240 int tracing_alloc_snapshot(void)
1241 {
1242         struct trace_array *tr = &global_trace;
1243         int ret;
1244
1245         ret = tracing_alloc_snapshot_instance(tr);
1246         WARN_ON(ret < 0);
1247
1248         return ret;
1249 }
1250 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1251
1252 /**
1253  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1254  *
1255  * This is similar to tracing_snapshot(), but it will allocate the
1256  * snapshot buffer if it isn't already allocated. Use this only
1257  * where it is safe to sleep, as the allocation may sleep.
1258  *
1259  * This causes a swap between the snapshot buffer and the current live
1260  * tracing buffer. You can use this to take snapshots of the live
1261  * trace when some condition is triggered, but continue to trace.
1262  */
1263 void tracing_snapshot_alloc(void)
1264 {
1265         int ret;
1266
1267         ret = tracing_alloc_snapshot();
1268         if (ret < 0)
1269                 return;
1270
1271         tracing_snapshot();
1272 }
1273 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1274
1275 /**
1276  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1277  * @tr:         The tracing instance
1278  * @cond_data:  User data to associate with the snapshot
1279  * @update:     Implementation of the cond_snapshot update function
1280  *
1281  * Check whether the conditional snapshot for the given instance has
1282  * already been enabled, or if the current tracer is already using a
1283  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1284  * save the cond_data and update function inside.
1285  *
1286  * Returns 0 if successful, error otherwise.
1287  */
1288 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1289                                  cond_update_fn_t update)
1290 {
1291         struct cond_snapshot *cond_snapshot;
1292         int ret = 0;
1293
1294         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1295         if (!cond_snapshot)
1296                 return -ENOMEM;
1297
1298         cond_snapshot->cond_data = cond_data;
1299         cond_snapshot->update = update;
1300
1301         mutex_lock(&trace_types_lock);
1302
1303         ret = tracing_alloc_snapshot_instance(tr);
1304         if (ret)
1305                 goto fail_unlock;
1306
1307         if (tr->current_trace->use_max_tr) {
1308                 ret = -EBUSY;
1309                 goto fail_unlock;
1310         }
1311
1312         /*
1313          * The cond_snapshot can only change to NULL without the
1314          * trace_types_lock. We don't care if we race with it going
1315          * to NULL, but we want to make sure that it's not set to
1316          * something other than NULL when we get here, which we can
1317          * do safely with only holding the trace_types_lock and not
1318          * having to take the max_lock.
1319          */
1320         if (tr->cond_snapshot) {
1321                 ret = -EBUSY;
1322                 goto fail_unlock;
1323         }
1324
1325         arch_spin_lock(&tr->max_lock);
1326         tr->cond_snapshot = cond_snapshot;
1327         arch_spin_unlock(&tr->max_lock);
1328
1329         mutex_unlock(&trace_types_lock);
1330
1331         return ret;
1332
1333  fail_unlock:
1334         mutex_unlock(&trace_types_lock);
1335         kfree(cond_snapshot);
1336         return ret;
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1339
1340 /**
1341  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1342  * @tr:         The tracing instance
1343  *
1344  * Check whether the conditional snapshot for the given instance is
1345  * enabled; if so, free the cond_snapshot associated with it,
1346  * otherwise return -EINVAL.
1347  *
1348  * Returns 0 if successful, error otherwise.
1349  */
1350 int tracing_snapshot_cond_disable(struct trace_array *tr)
1351 {
1352         int ret = 0;
1353
1354         arch_spin_lock(&tr->max_lock);
1355
1356         if (!tr->cond_snapshot)
1357                 ret = -EINVAL;
1358         else {
1359                 kfree(tr->cond_snapshot);
1360                 tr->cond_snapshot = NULL;
1361         }
1362
1363         arch_spin_unlock(&tr->max_lock);
1364
1365         return ret;
1366 }
1367 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1368 #else
1369 void tracing_snapshot(void)
1370 {
1371         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1372 }
1373 EXPORT_SYMBOL_GPL(tracing_snapshot);
1374 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1375 {
1376         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1377 }
1378 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1379 int tracing_alloc_snapshot(void)
1380 {
1381         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1382         return -ENODEV;
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1385 void tracing_snapshot_alloc(void)
1386 {
1387         /* Give warning */
1388         tracing_snapshot();
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1391 void *tracing_cond_snapshot_data(struct trace_array *tr)
1392 {
1393         return NULL;
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1396 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1397 {
1398         return -ENODEV;
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1401 int tracing_snapshot_cond_disable(struct trace_array *tr)
1402 {
1403         return false;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1406 #endif /* CONFIG_TRACER_SNAPSHOT */
1407
1408 void tracer_tracing_off(struct trace_array *tr)
1409 {
1410         if (tr->array_buffer.buffer)
1411                 ring_buffer_record_off(tr->array_buffer.buffer);
1412         /*
1413          * This flag is looked at when buffers haven't been allocated
1414          * yet, or by some tracers (like irqsoff), that just want to
1415          * know if the ring buffer has been disabled, but it can handle
1416          * races of where it gets disabled but we still do a record.
1417          * As the check is in the fast path of the tracers, it is more
1418          * important to be fast than accurate.
1419          */
1420         tr->buffer_disabled = 1;
1421         /* Make the flag seen by readers */
1422         smp_wmb();
1423 }
1424
1425 /**
1426  * tracing_off - turn off tracing buffers
1427  *
1428  * This function stops the tracing buffers from recording data.
1429  * It does not disable any overhead the tracers themselves may
1430  * be causing. This function simply causes all recording to
1431  * the ring buffers to fail.
1432  */
1433 void tracing_off(void)
1434 {
1435         tracer_tracing_off(&global_trace);
1436 }
1437 EXPORT_SYMBOL_GPL(tracing_off);
1438
1439 void disable_trace_on_warning(void)
1440 {
1441         if (__disable_trace_on_warning) {
1442                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1443                         "Disabling tracing due to warning\n");
1444                 tracing_off();
1445         }
1446 }
1447
1448 /**
1449  * tracer_tracing_is_on - show real state of ring buffer enabled
1450  * @tr : the trace array to know if ring buffer is enabled
1451  *
1452  * Shows real state of the ring buffer if it is enabled or not.
1453  */
1454 bool tracer_tracing_is_on(struct trace_array *tr)
1455 {
1456         if (tr->array_buffer.buffer)
1457                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1458         return !tr->buffer_disabled;
1459 }
1460
1461 /**
1462  * tracing_is_on - show state of ring buffers enabled
1463  */
1464 int tracing_is_on(void)
1465 {
1466         return tracer_tracing_is_on(&global_trace);
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_is_on);
1469
1470 static int __init set_buf_size(char *str)
1471 {
1472         unsigned long buf_size;
1473
1474         if (!str)
1475                 return 0;
1476         buf_size = memparse(str, &str);
1477         /* nr_entries can not be zero */
1478         if (buf_size == 0)
1479                 return 0;
1480         trace_buf_size = buf_size;
1481         return 1;
1482 }
1483 __setup("trace_buf_size=", set_buf_size);
1484
1485 static int __init set_tracing_thresh(char *str)
1486 {
1487         unsigned long threshold;
1488         int ret;
1489
1490         if (!str)
1491                 return 0;
1492         ret = kstrtoul(str, 0, &threshold);
1493         if (ret < 0)
1494                 return 0;
1495         tracing_thresh = threshold * 1000;
1496         return 1;
1497 }
1498 __setup("tracing_thresh=", set_tracing_thresh);
1499
1500 unsigned long nsecs_to_usecs(unsigned long nsecs)
1501 {
1502         return nsecs / 1000;
1503 }
1504
1505 /*
1506  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1507  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1508  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1509  * of strings in the order that the evals (enum) were defined.
1510  */
1511 #undef C
1512 #define C(a, b) b
1513
1514 /* These must match the bit postions in trace_iterator_flags */
1515 static const char *trace_options[] = {
1516         TRACE_FLAGS
1517         NULL
1518 };
1519
1520 static struct {
1521         u64 (*func)(void);
1522         const char *name;
1523         int in_ns;              /* is this clock in nanoseconds? */
1524 } trace_clocks[] = {
1525         { trace_clock_local,            "local",        1 },
1526         { trace_clock_global,           "global",       1 },
1527         { trace_clock_counter,          "counter",      0 },
1528         { trace_clock_jiffies,          "uptime",       0 },
1529         { trace_clock,                  "perf",         1 },
1530         { ktime_get_mono_fast_ns,       "mono",         1 },
1531         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1532         { ktime_get_boot_fast_ns,       "boot",         1 },
1533         ARCH_TRACE_CLOCKS
1534 };
1535
1536 bool trace_clock_in_ns(struct trace_array *tr)
1537 {
1538         if (trace_clocks[tr->clock_id].in_ns)
1539                 return true;
1540
1541         return false;
1542 }
1543
1544 /*
1545  * trace_parser_get_init - gets the buffer for trace parser
1546  */
1547 int trace_parser_get_init(struct trace_parser *parser, int size)
1548 {
1549         memset(parser, 0, sizeof(*parser));
1550
1551         parser->buffer = kmalloc(size, GFP_KERNEL);
1552         if (!parser->buffer)
1553                 return 1;
1554
1555         parser->size = size;
1556         return 0;
1557 }
1558
1559 /*
1560  * trace_parser_put - frees the buffer for trace parser
1561  */
1562 void trace_parser_put(struct trace_parser *parser)
1563 {
1564         kfree(parser->buffer);
1565         parser->buffer = NULL;
1566 }
1567
1568 /*
1569  * trace_get_user - reads the user input string separated by  space
1570  * (matched by isspace(ch))
1571  *
1572  * For each string found the 'struct trace_parser' is updated,
1573  * and the function returns.
1574  *
1575  * Returns number of bytes read.
1576  *
1577  * See kernel/trace/trace.h for 'struct trace_parser' details.
1578  */
1579 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1580         size_t cnt, loff_t *ppos)
1581 {
1582         char ch;
1583         size_t read = 0;
1584         ssize_t ret;
1585
1586         if (!*ppos)
1587                 trace_parser_clear(parser);
1588
1589         ret = get_user(ch, ubuf++);
1590         if (ret)
1591                 goto out;
1592
1593         read++;
1594         cnt--;
1595
1596         /*
1597          * The parser is not finished with the last write,
1598          * continue reading the user input without skipping spaces.
1599          */
1600         if (!parser->cont) {
1601                 /* skip white space */
1602                 while (cnt && isspace(ch)) {
1603                         ret = get_user(ch, ubuf++);
1604                         if (ret)
1605                                 goto out;
1606                         read++;
1607                         cnt--;
1608                 }
1609
1610                 parser->idx = 0;
1611
1612                 /* only spaces were written */
1613                 if (isspace(ch) || !ch) {
1614                         *ppos += read;
1615                         ret = read;
1616                         goto out;
1617                 }
1618         }
1619
1620         /* read the non-space input */
1621         while (cnt && !isspace(ch) && ch) {
1622                 if (parser->idx < parser->size - 1)
1623                         parser->buffer[parser->idx++] = ch;
1624                 else {
1625                         ret = -EINVAL;
1626                         goto out;
1627                 }
1628                 ret = get_user(ch, ubuf++);
1629                 if (ret)
1630                         goto out;
1631                 read++;
1632                 cnt--;
1633         }
1634
1635         /* We either got finished input or we have to wait for another call. */
1636         if (isspace(ch) || !ch) {
1637                 parser->buffer[parser->idx] = 0;
1638                 parser->cont = false;
1639         } else if (parser->idx < parser->size - 1) {
1640                 parser->cont = true;
1641                 parser->buffer[parser->idx++] = ch;
1642                 /* Make sure the parsed string always terminates with '\0'. */
1643                 parser->buffer[parser->idx] = 0;
1644         } else {
1645                 ret = -EINVAL;
1646                 goto out;
1647         }
1648
1649         *ppos += read;
1650         ret = read;
1651
1652 out:
1653         return ret;
1654 }
1655
1656 /* TODO add a seq_buf_to_buffer() */
1657 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1658 {
1659         int len;
1660
1661         if (trace_seq_used(s) <= s->seq.readpos)
1662                 return -EBUSY;
1663
1664         len = trace_seq_used(s) - s->seq.readpos;
1665         if (cnt > len)
1666                 cnt = len;
1667         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1668
1669         s->seq.readpos += cnt;
1670         return cnt;
1671 }
1672
1673 unsigned long __read_mostly     tracing_thresh;
1674 static const struct file_operations tracing_max_lat_fops;
1675
1676 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1677         defined(CONFIG_FSNOTIFY)
1678
1679 static struct workqueue_struct *fsnotify_wq;
1680
1681 static void latency_fsnotify_workfn(struct work_struct *work)
1682 {
1683         struct trace_array *tr = container_of(work, struct trace_array,
1684                                               fsnotify_work);
1685         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1686 }
1687
1688 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1689 {
1690         struct trace_array *tr = container_of(iwork, struct trace_array,
1691                                               fsnotify_irqwork);
1692         queue_work(fsnotify_wq, &tr->fsnotify_work);
1693 }
1694
1695 static void trace_create_maxlat_file(struct trace_array *tr,
1696                                      struct dentry *d_tracer)
1697 {
1698         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1699         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1700         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1701                                               d_tracer, &tr->max_latency,
1702                                               &tracing_max_lat_fops);
1703 }
1704
1705 __init static int latency_fsnotify_init(void)
1706 {
1707         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1708                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1709         if (!fsnotify_wq) {
1710                 pr_err("Unable to allocate tr_max_lat_wq\n");
1711                 return -ENOMEM;
1712         }
1713         return 0;
1714 }
1715
1716 late_initcall_sync(latency_fsnotify_init);
1717
1718 void latency_fsnotify(struct trace_array *tr)
1719 {
1720         if (!fsnotify_wq)
1721                 return;
1722         /*
1723          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1724          * possible that we are called from __schedule() or do_idle(), which
1725          * could cause a deadlock.
1726          */
1727         irq_work_queue(&tr->fsnotify_irqwork);
1728 }
1729
1730 /*
1731  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1732  *  defined(CONFIG_FSNOTIFY)
1733  */
1734 #else
1735
1736 #define trace_create_maxlat_file(tr, d_tracer)                          \
1737         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1738                           &tr->max_latency, &tracing_max_lat_fops)
1739
1740 #endif
1741
1742 #ifdef CONFIG_TRACER_MAX_TRACE
1743 /*
1744  * Copy the new maximum trace into the separate maximum-trace
1745  * structure. (this way the maximum trace is permanently saved,
1746  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1747  */
1748 static void
1749 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1750 {
1751         struct array_buffer *trace_buf = &tr->array_buffer;
1752         struct array_buffer *max_buf = &tr->max_buffer;
1753         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1754         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1755
1756         max_buf->cpu = cpu;
1757         max_buf->time_start = data->preempt_timestamp;
1758
1759         max_data->saved_latency = tr->max_latency;
1760         max_data->critical_start = data->critical_start;
1761         max_data->critical_end = data->critical_end;
1762
1763         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1764         max_data->pid = tsk->pid;
1765         /*
1766          * If tsk == current, then use current_uid(), as that does not use
1767          * RCU. The irq tracer can be called out of RCU scope.
1768          */
1769         if (tsk == current)
1770                 max_data->uid = current_uid();
1771         else
1772                 max_data->uid = task_uid(tsk);
1773
1774         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1775         max_data->policy = tsk->policy;
1776         max_data->rt_priority = tsk->rt_priority;
1777
1778         /* record this tasks comm */
1779         tracing_record_cmdline(tsk);
1780         latency_fsnotify(tr);
1781 }
1782
1783 /**
1784  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1785  * @tr: tracer
1786  * @tsk: the task with the latency
1787  * @cpu: The cpu that initiated the trace.
1788  * @cond_data: User data associated with a conditional snapshot
1789  *
1790  * Flip the buffers between the @tr and the max_tr and record information
1791  * about which task was the cause of this latency.
1792  */
1793 void
1794 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1795               void *cond_data)
1796 {
1797         if (tr->stop_count)
1798                 return;
1799
1800         WARN_ON_ONCE(!irqs_disabled());
1801
1802         if (!tr->allocated_snapshot) {
1803                 /* Only the nop tracer should hit this when disabling */
1804                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1805                 return;
1806         }
1807
1808         arch_spin_lock(&tr->max_lock);
1809
1810         /* Inherit the recordable setting from array_buffer */
1811         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1812                 ring_buffer_record_on(tr->max_buffer.buffer);
1813         else
1814                 ring_buffer_record_off(tr->max_buffer.buffer);
1815
1816 #ifdef CONFIG_TRACER_SNAPSHOT
1817         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1818                 goto out_unlock;
1819 #endif
1820         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1821
1822         __update_max_tr(tr, tsk, cpu);
1823
1824  out_unlock:
1825         arch_spin_unlock(&tr->max_lock);
1826 }
1827
1828 /**
1829  * update_max_tr_single - only copy one trace over, and reset the rest
1830  * @tr: tracer
1831  * @tsk: task with the latency
1832  * @cpu: the cpu of the buffer to copy.
1833  *
1834  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1835  */
1836 void
1837 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1838 {
1839         int ret;
1840
1841         if (tr->stop_count)
1842                 return;
1843
1844         WARN_ON_ONCE(!irqs_disabled());
1845         if (!tr->allocated_snapshot) {
1846                 /* Only the nop tracer should hit this when disabling */
1847                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1848                 return;
1849         }
1850
1851         arch_spin_lock(&tr->max_lock);
1852
1853         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1854
1855         if (ret == -EBUSY) {
1856                 /*
1857                  * We failed to swap the buffer due to a commit taking
1858                  * place on this CPU. We fail to record, but we reset
1859                  * the max trace buffer (no one writes directly to it)
1860                  * and flag that it failed.
1861                  */
1862                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1863                         "Failed to swap buffers due to commit in progress\n");
1864         }
1865
1866         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1867
1868         __update_max_tr(tr, tsk, cpu);
1869         arch_spin_unlock(&tr->max_lock);
1870 }
1871 #endif /* CONFIG_TRACER_MAX_TRACE */
1872
1873 static int wait_on_pipe(struct trace_iterator *iter, int full)
1874 {
1875         /* Iterators are static, they should be filled or empty */
1876         if (trace_buffer_iter(iter, iter->cpu_file))
1877                 return 0;
1878
1879         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1880                                 full);
1881 }
1882
1883 #ifdef CONFIG_FTRACE_STARTUP_TEST
1884 static bool selftests_can_run;
1885
1886 struct trace_selftests {
1887         struct list_head                list;
1888         struct tracer                   *type;
1889 };
1890
1891 static LIST_HEAD(postponed_selftests);
1892
1893 static int save_selftest(struct tracer *type)
1894 {
1895         struct trace_selftests *selftest;
1896
1897         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1898         if (!selftest)
1899                 return -ENOMEM;
1900
1901         selftest->type = type;
1902         list_add(&selftest->list, &postponed_selftests);
1903         return 0;
1904 }
1905
1906 static int run_tracer_selftest(struct tracer *type)
1907 {
1908         struct trace_array *tr = &global_trace;
1909         struct tracer *saved_tracer = tr->current_trace;
1910         int ret;
1911
1912         if (!type->selftest || tracing_selftest_disabled)
1913                 return 0;
1914
1915         /*
1916          * If a tracer registers early in boot up (before scheduling is
1917          * initialized and such), then do not run its selftests yet.
1918          * Instead, run it a little later in the boot process.
1919          */
1920         if (!selftests_can_run)
1921                 return save_selftest(type);
1922
1923         /*
1924          * Run a selftest on this tracer.
1925          * Here we reset the trace buffer, and set the current
1926          * tracer to be this tracer. The tracer can then run some
1927          * internal tracing to verify that everything is in order.
1928          * If we fail, we do not register this tracer.
1929          */
1930         tracing_reset_online_cpus(&tr->array_buffer);
1931
1932         tr->current_trace = type;
1933
1934 #ifdef CONFIG_TRACER_MAX_TRACE
1935         if (type->use_max_tr) {
1936                 /* If we expanded the buffers, make sure the max is expanded too */
1937                 if (ring_buffer_expanded)
1938                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1939                                            RING_BUFFER_ALL_CPUS);
1940                 tr->allocated_snapshot = true;
1941         }
1942 #endif
1943
1944         /* the test is responsible for initializing and enabling */
1945         pr_info("Testing tracer %s: ", type->name);
1946         ret = type->selftest(type, tr);
1947         /* the test is responsible for resetting too */
1948         tr->current_trace = saved_tracer;
1949         if (ret) {
1950                 printk(KERN_CONT "FAILED!\n");
1951                 /* Add the warning after printing 'FAILED' */
1952                 WARN_ON(1);
1953                 return -1;
1954         }
1955         /* Only reset on passing, to avoid touching corrupted buffers */
1956         tracing_reset_online_cpus(&tr->array_buffer);
1957
1958 #ifdef CONFIG_TRACER_MAX_TRACE
1959         if (type->use_max_tr) {
1960                 tr->allocated_snapshot = false;
1961
1962                 /* Shrink the max buffer again */
1963                 if (ring_buffer_expanded)
1964                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1965                                            RING_BUFFER_ALL_CPUS);
1966         }
1967 #endif
1968
1969         printk(KERN_CONT "PASSED\n");
1970         return 0;
1971 }
1972
1973 static __init int init_trace_selftests(void)
1974 {
1975         struct trace_selftests *p, *n;
1976         struct tracer *t, **last;
1977         int ret;
1978
1979         selftests_can_run = true;
1980
1981         mutex_lock(&trace_types_lock);
1982
1983         if (list_empty(&postponed_selftests))
1984                 goto out;
1985
1986         pr_info("Running postponed tracer tests:\n");
1987
1988         tracing_selftest_running = true;
1989         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1990                 /* This loop can take minutes when sanitizers are enabled, so
1991                  * lets make sure we allow RCU processing.
1992                  */
1993                 cond_resched();
1994                 ret = run_tracer_selftest(p->type);
1995                 /* If the test fails, then warn and remove from available_tracers */
1996                 if (ret < 0) {
1997                         WARN(1, "tracer: %s failed selftest, disabling\n",
1998                              p->type->name);
1999                         last = &trace_types;
2000                         for (t = trace_types; t; t = t->next) {
2001                                 if (t == p->type) {
2002                                         *last = t->next;
2003                                         break;
2004                                 }
2005                                 last = &t->next;
2006                         }
2007                 }
2008                 list_del(&p->list);
2009                 kfree(p);
2010         }
2011         tracing_selftest_running = false;
2012
2013  out:
2014         mutex_unlock(&trace_types_lock);
2015
2016         return 0;
2017 }
2018 core_initcall(init_trace_selftests);
2019 #else
2020 static inline int run_tracer_selftest(struct tracer *type)
2021 {
2022         return 0;
2023 }
2024 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2025
2026 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2027
2028 static void __init apply_trace_boot_options(void);
2029
2030 /**
2031  * register_tracer - register a tracer with the ftrace system.
2032  * @type: the plugin for the tracer
2033  *
2034  * Register a new plugin tracer.
2035  */
2036 int __init register_tracer(struct tracer *type)
2037 {
2038         struct tracer *t;
2039         int ret = 0;
2040
2041         if (!type->name) {
2042                 pr_info("Tracer must have a name\n");
2043                 return -1;
2044         }
2045
2046         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2047                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2048                 return -1;
2049         }
2050
2051         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2052                 pr_warn("Can not register tracer %s due to lockdown\n",
2053                            type->name);
2054                 return -EPERM;
2055         }
2056
2057         mutex_lock(&trace_types_lock);
2058
2059         tracing_selftest_running = true;
2060
2061         for (t = trace_types; t; t = t->next) {
2062                 if (strcmp(type->name, t->name) == 0) {
2063                         /* already found */
2064                         pr_info("Tracer %s already registered\n",
2065                                 type->name);
2066                         ret = -1;
2067                         goto out;
2068                 }
2069         }
2070
2071         if (!type->set_flag)
2072                 type->set_flag = &dummy_set_flag;
2073         if (!type->flags) {
2074                 /*allocate a dummy tracer_flags*/
2075                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2076                 if (!type->flags) {
2077                         ret = -ENOMEM;
2078                         goto out;
2079                 }
2080                 type->flags->val = 0;
2081                 type->flags->opts = dummy_tracer_opt;
2082         } else
2083                 if (!type->flags->opts)
2084                         type->flags->opts = dummy_tracer_opt;
2085
2086         /* store the tracer for __set_tracer_option */
2087         type->flags->trace = type;
2088
2089         ret = run_tracer_selftest(type);
2090         if (ret < 0)
2091                 goto out;
2092
2093         type->next = trace_types;
2094         trace_types = type;
2095         add_tracer_options(&global_trace, type);
2096
2097  out:
2098         tracing_selftest_running = false;
2099         mutex_unlock(&trace_types_lock);
2100
2101         if (ret || !default_bootup_tracer)
2102                 goto out_unlock;
2103
2104         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2105                 goto out_unlock;
2106
2107         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2108         /* Do we want this tracer to start on bootup? */
2109         tracing_set_tracer(&global_trace, type->name);
2110         default_bootup_tracer = NULL;
2111
2112         apply_trace_boot_options();
2113
2114         /* disable other selftests, since this will break it. */
2115         tracing_selftest_disabled = true;
2116 #ifdef CONFIG_FTRACE_STARTUP_TEST
2117         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
2118                type->name);
2119 #endif
2120
2121  out_unlock:
2122         return ret;
2123 }
2124
2125 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2126 {
2127         struct trace_buffer *buffer = buf->buffer;
2128
2129         if (!buffer)
2130                 return;
2131
2132         ring_buffer_record_disable(buffer);
2133
2134         /* Make sure all commits have finished */
2135         synchronize_rcu();
2136         ring_buffer_reset_cpu(buffer, cpu);
2137
2138         ring_buffer_record_enable(buffer);
2139 }
2140
2141 void tracing_reset_online_cpus(struct array_buffer *buf)
2142 {
2143         struct trace_buffer *buffer = buf->buffer;
2144
2145         if (!buffer)
2146                 return;
2147
2148         ring_buffer_record_disable(buffer);
2149
2150         /* Make sure all commits have finished */
2151         synchronize_rcu();
2152
2153         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2154
2155         ring_buffer_reset_online_cpus(buffer);
2156
2157         ring_buffer_record_enable(buffer);
2158 }
2159
2160 /* Must have trace_types_lock held */
2161 void tracing_reset_all_online_cpus(void)
2162 {
2163         struct trace_array *tr;
2164
2165         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2166                 if (!tr->clear_trace)
2167                         continue;
2168                 tr->clear_trace = false;
2169                 tracing_reset_online_cpus(&tr->array_buffer);
2170 #ifdef CONFIG_TRACER_MAX_TRACE
2171                 tracing_reset_online_cpus(&tr->max_buffer);
2172 #endif
2173         }
2174 }
2175
2176 static int *tgid_map;
2177
2178 #define SAVED_CMDLINES_DEFAULT 128
2179 #define NO_CMDLINE_MAP UINT_MAX
2180 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2181 struct saved_cmdlines_buffer {
2182         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2183         unsigned *map_cmdline_to_pid;
2184         unsigned cmdline_num;
2185         int cmdline_idx;
2186         char *saved_cmdlines;
2187 };
2188 static struct saved_cmdlines_buffer *savedcmd;
2189
2190 /* temporary disable recording */
2191 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2192
2193 static inline char *get_saved_cmdlines(int idx)
2194 {
2195         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2196 }
2197
2198 static inline void set_cmdline(int idx, const char *cmdline)
2199 {
2200         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2201 }
2202
2203 static int allocate_cmdlines_buffer(unsigned int val,
2204                                     struct saved_cmdlines_buffer *s)
2205 {
2206         s->map_cmdline_to_pid = kmalloc_array(val,
2207                                               sizeof(*s->map_cmdline_to_pid),
2208                                               GFP_KERNEL);
2209         if (!s->map_cmdline_to_pid)
2210                 return -ENOMEM;
2211
2212         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2213         if (!s->saved_cmdlines) {
2214                 kfree(s->map_cmdline_to_pid);
2215                 return -ENOMEM;
2216         }
2217
2218         s->cmdline_idx = 0;
2219         s->cmdline_num = val;
2220         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2221                sizeof(s->map_pid_to_cmdline));
2222         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2223                val * sizeof(*s->map_cmdline_to_pid));
2224
2225         return 0;
2226 }
2227
2228 static int trace_create_savedcmd(void)
2229 {
2230         int ret;
2231
2232         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2233         if (!savedcmd)
2234                 return -ENOMEM;
2235
2236         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2237         if (ret < 0) {
2238                 kfree(savedcmd);
2239                 savedcmd = NULL;
2240                 return -ENOMEM;
2241         }
2242
2243         return 0;
2244 }
2245
2246 int is_tracing_stopped(void)
2247 {
2248         return global_trace.stop_count;
2249 }
2250
2251 /**
2252  * tracing_start - quick start of the tracer
2253  *
2254  * If tracing is enabled but was stopped by tracing_stop,
2255  * this will start the tracer back up.
2256  */
2257 void tracing_start(void)
2258 {
2259         struct trace_buffer *buffer;
2260         unsigned long flags;
2261
2262         if (tracing_disabled)
2263                 return;
2264
2265         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2266         if (--global_trace.stop_count) {
2267                 if (global_trace.stop_count < 0) {
2268                         /* Someone screwed up their debugging */
2269                         WARN_ON_ONCE(1);
2270                         global_trace.stop_count = 0;
2271                 }
2272                 goto out;
2273         }
2274
2275         /* Prevent the buffers from switching */
2276         arch_spin_lock(&global_trace.max_lock);
2277
2278         buffer = global_trace.array_buffer.buffer;
2279         if (buffer)
2280                 ring_buffer_record_enable(buffer);
2281
2282 #ifdef CONFIG_TRACER_MAX_TRACE
2283         buffer = global_trace.max_buffer.buffer;
2284         if (buffer)
2285                 ring_buffer_record_enable(buffer);
2286 #endif
2287
2288         arch_spin_unlock(&global_trace.max_lock);
2289
2290  out:
2291         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2292 }
2293
2294 static void tracing_start_tr(struct trace_array *tr)
2295 {
2296         struct trace_buffer *buffer;
2297         unsigned long flags;
2298
2299         if (tracing_disabled)
2300                 return;
2301
2302         /* If global, we need to also start the max tracer */
2303         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2304                 return tracing_start();
2305
2306         raw_spin_lock_irqsave(&tr->start_lock, flags);
2307
2308         if (--tr->stop_count) {
2309                 if (tr->stop_count < 0) {
2310                         /* Someone screwed up their debugging */
2311                         WARN_ON_ONCE(1);
2312                         tr->stop_count = 0;
2313                 }
2314                 goto out;
2315         }
2316
2317         buffer = tr->array_buffer.buffer;
2318         if (buffer)
2319                 ring_buffer_record_enable(buffer);
2320
2321  out:
2322         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2323 }
2324
2325 /**
2326  * tracing_stop - quick stop of the tracer
2327  *
2328  * Light weight way to stop tracing. Use in conjunction with
2329  * tracing_start.
2330  */
2331 void tracing_stop(void)
2332 {
2333         struct trace_buffer *buffer;
2334         unsigned long flags;
2335
2336         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2337         if (global_trace.stop_count++)
2338                 goto out;
2339
2340         /* Prevent the buffers from switching */
2341         arch_spin_lock(&global_trace.max_lock);
2342
2343         buffer = global_trace.array_buffer.buffer;
2344         if (buffer)
2345                 ring_buffer_record_disable(buffer);
2346
2347 #ifdef CONFIG_TRACER_MAX_TRACE
2348         buffer = global_trace.max_buffer.buffer;
2349         if (buffer)
2350                 ring_buffer_record_disable(buffer);
2351 #endif
2352
2353         arch_spin_unlock(&global_trace.max_lock);
2354
2355  out:
2356         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2357 }
2358
2359 static void tracing_stop_tr(struct trace_array *tr)
2360 {
2361         struct trace_buffer *buffer;
2362         unsigned long flags;
2363
2364         /* If global, we need to also stop the max tracer */
2365         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2366                 return tracing_stop();
2367
2368         raw_spin_lock_irqsave(&tr->start_lock, flags);
2369         if (tr->stop_count++)
2370                 goto out;
2371
2372         buffer = tr->array_buffer.buffer;
2373         if (buffer)
2374                 ring_buffer_record_disable(buffer);
2375
2376  out:
2377         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2378 }
2379
2380 static int trace_save_cmdline(struct task_struct *tsk)
2381 {
2382         unsigned pid, idx;
2383
2384         /* treat recording of idle task as a success */
2385         if (!tsk->pid)
2386                 return 1;
2387
2388         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2389                 return 0;
2390
2391         /*
2392          * It's not the end of the world if we don't get
2393          * the lock, but we also don't want to spin
2394          * nor do we want to disable interrupts,
2395          * so if we miss here, then better luck next time.
2396          */
2397         if (!arch_spin_trylock(&trace_cmdline_lock))
2398                 return 0;
2399
2400         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2401         if (idx == NO_CMDLINE_MAP) {
2402                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2403
2404                 /*
2405                  * Check whether the cmdline buffer at idx has a pid
2406                  * mapped. We are going to overwrite that entry so we
2407                  * need to clear the map_pid_to_cmdline. Otherwise we
2408                  * would read the new comm for the old pid.
2409                  */
2410                 pid = savedcmd->map_cmdline_to_pid[idx];
2411                 if (pid != NO_CMDLINE_MAP)
2412                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2413
2414                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2415                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2416
2417                 savedcmd->cmdline_idx = idx;
2418         }
2419
2420         set_cmdline(idx, tsk->comm);
2421
2422         arch_spin_unlock(&trace_cmdline_lock);
2423
2424         return 1;
2425 }
2426
2427 static void __trace_find_cmdline(int pid, char comm[])
2428 {
2429         unsigned map;
2430
2431         if (!pid) {
2432                 strcpy(comm, "<idle>");
2433                 return;
2434         }
2435
2436         if (WARN_ON_ONCE(pid < 0)) {
2437                 strcpy(comm, "<XXX>");
2438                 return;
2439         }
2440
2441         if (pid > PID_MAX_DEFAULT) {
2442                 strcpy(comm, "<...>");
2443                 return;
2444         }
2445
2446         map = savedcmd->map_pid_to_cmdline[pid];
2447         if (map != NO_CMDLINE_MAP)
2448                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2449         else
2450                 strcpy(comm, "<...>");
2451 }
2452
2453 void trace_find_cmdline(int pid, char comm[])
2454 {
2455         preempt_disable();
2456         arch_spin_lock(&trace_cmdline_lock);
2457
2458         __trace_find_cmdline(pid, comm);
2459
2460         arch_spin_unlock(&trace_cmdline_lock);
2461         preempt_enable();
2462 }
2463
2464 int trace_find_tgid(int pid)
2465 {
2466         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2467                 return 0;
2468
2469         return tgid_map[pid];
2470 }
2471
2472 static int trace_save_tgid(struct task_struct *tsk)
2473 {
2474         /* treat recording of idle task as a success */
2475         if (!tsk->pid)
2476                 return 1;
2477
2478         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2479                 return 0;
2480
2481         tgid_map[tsk->pid] = tsk->tgid;
2482         return 1;
2483 }
2484
2485 static bool tracing_record_taskinfo_skip(int flags)
2486 {
2487         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2488                 return true;
2489         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2490                 return true;
2491         if (!__this_cpu_read(trace_taskinfo_save))
2492                 return true;
2493         return false;
2494 }
2495
2496 /**
2497  * tracing_record_taskinfo - record the task info of a task
2498  *
2499  * @task:  task to record
2500  * @flags: TRACE_RECORD_CMDLINE for recording comm
2501  *         TRACE_RECORD_TGID for recording tgid
2502  */
2503 void tracing_record_taskinfo(struct task_struct *task, int flags)
2504 {
2505         bool done;
2506
2507         if (tracing_record_taskinfo_skip(flags))
2508                 return;
2509
2510         /*
2511          * Record as much task information as possible. If some fail, continue
2512          * to try to record the others.
2513          */
2514         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2515         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2516
2517         /* If recording any information failed, retry again soon. */
2518         if (!done)
2519                 return;
2520
2521         __this_cpu_write(trace_taskinfo_save, false);
2522 }
2523
2524 /**
2525  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2526  *
2527  * @prev: previous task during sched_switch
2528  * @next: next task during sched_switch
2529  * @flags: TRACE_RECORD_CMDLINE for recording comm
2530  *         TRACE_RECORD_TGID for recording tgid
2531  */
2532 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2533                                           struct task_struct *next, int flags)
2534 {
2535         bool done;
2536
2537         if (tracing_record_taskinfo_skip(flags))
2538                 return;
2539
2540         /*
2541          * Record as much task information as possible. If some fail, continue
2542          * to try to record the others.
2543          */
2544         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2545         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2546         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2547         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2548
2549         /* If recording any information failed, retry again soon. */
2550         if (!done)
2551                 return;
2552
2553         __this_cpu_write(trace_taskinfo_save, false);
2554 }
2555
2556 /* Helpers to record a specific task information */
2557 void tracing_record_cmdline(struct task_struct *task)
2558 {
2559         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2560 }
2561
2562 void tracing_record_tgid(struct task_struct *task)
2563 {
2564         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2565 }
2566
2567 /*
2568  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2569  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2570  * simplifies those functions and keeps them in sync.
2571  */
2572 enum print_line_t trace_handle_return(struct trace_seq *s)
2573 {
2574         return trace_seq_has_overflowed(s) ?
2575                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2576 }
2577 EXPORT_SYMBOL_GPL(trace_handle_return);
2578
2579 void
2580 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2581                              unsigned long flags, int pc)
2582 {
2583         struct task_struct *tsk = current;
2584
2585         entry->preempt_count            = pc & 0xff;
2586         entry->pid                      = (tsk) ? tsk->pid : 0;
2587         entry->type                     = type;
2588         entry->flags =
2589 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2590                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2591 #else
2592                 TRACE_FLAG_IRQS_NOSUPPORT |
2593 #endif
2594                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2595                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2596                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2597                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2598                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2599 }
2600 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2601
2602 struct ring_buffer_event *
2603 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2604                           int type,
2605                           unsigned long len,
2606                           unsigned long flags, int pc)
2607 {
2608         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2609 }
2610
2611 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2612 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2613 static int trace_buffered_event_ref;
2614
2615 /**
2616  * trace_buffered_event_enable - enable buffering events
2617  *
2618  * When events are being filtered, it is quicker to use a temporary
2619  * buffer to write the event data into if there's a likely chance
2620  * that it will not be committed. The discard of the ring buffer
2621  * is not as fast as committing, and is much slower than copying
2622  * a commit.
2623  *
2624  * When an event is to be filtered, allocate per cpu buffers to
2625  * write the event data into, and if the event is filtered and discarded
2626  * it is simply dropped, otherwise, the entire data is to be committed
2627  * in one shot.
2628  */
2629 void trace_buffered_event_enable(void)
2630 {
2631         struct ring_buffer_event *event;
2632         struct page *page;
2633         int cpu;
2634
2635         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2636
2637         if (trace_buffered_event_ref++)
2638                 return;
2639
2640         for_each_tracing_cpu(cpu) {
2641                 page = alloc_pages_node(cpu_to_node(cpu),
2642                                         GFP_KERNEL | __GFP_NORETRY, 0);
2643                 if (!page)
2644                         goto failed;
2645
2646                 event = page_address(page);
2647                 memset(event, 0, sizeof(*event));
2648
2649                 per_cpu(trace_buffered_event, cpu) = event;
2650
2651                 preempt_disable();
2652                 if (cpu == smp_processor_id() &&
2653                     __this_cpu_read(trace_buffered_event) !=
2654                     per_cpu(trace_buffered_event, cpu))
2655                         WARN_ON_ONCE(1);
2656                 preempt_enable();
2657         }
2658
2659         return;
2660  failed:
2661         trace_buffered_event_disable();
2662 }
2663
2664 static void enable_trace_buffered_event(void *data)
2665 {
2666         /* Probably not needed, but do it anyway */
2667         smp_rmb();
2668         this_cpu_dec(trace_buffered_event_cnt);
2669 }
2670
2671 static void disable_trace_buffered_event(void *data)
2672 {
2673         this_cpu_inc(trace_buffered_event_cnt);
2674 }
2675
2676 /**
2677  * trace_buffered_event_disable - disable buffering events
2678  *
2679  * When a filter is removed, it is faster to not use the buffered
2680  * events, and to commit directly into the ring buffer. Free up
2681  * the temp buffers when there are no more users. This requires
2682  * special synchronization with current events.
2683  */
2684 void trace_buffered_event_disable(void)
2685 {
2686         int cpu;
2687
2688         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2689
2690         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2691                 return;
2692
2693         if (--trace_buffered_event_ref)
2694                 return;
2695
2696         preempt_disable();
2697         /* For each CPU, set the buffer as used. */
2698         smp_call_function_many(tracing_buffer_mask,
2699                                disable_trace_buffered_event, NULL, 1);
2700         preempt_enable();
2701
2702         /* Wait for all current users to finish */
2703         synchronize_rcu();
2704
2705         for_each_tracing_cpu(cpu) {
2706                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2707                 per_cpu(trace_buffered_event, cpu) = NULL;
2708         }
2709         /*
2710          * Make sure trace_buffered_event is NULL before clearing
2711          * trace_buffered_event_cnt.
2712          */
2713         smp_wmb();
2714
2715         preempt_disable();
2716         /* Do the work on each cpu */
2717         smp_call_function_many(tracing_buffer_mask,
2718                                enable_trace_buffered_event, NULL, 1);
2719         preempt_enable();
2720 }
2721
2722 static struct trace_buffer *temp_buffer;
2723
2724 struct ring_buffer_event *
2725 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2726                           struct trace_event_file *trace_file,
2727                           int type, unsigned long len,
2728                           unsigned long flags, int pc)
2729 {
2730         struct ring_buffer_event *entry;
2731         int val;
2732
2733         *current_rb = trace_file->tr->array_buffer.buffer;
2734
2735         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2736              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2737             (entry = this_cpu_read(trace_buffered_event))) {
2738                 /* Try to use the per cpu buffer first */
2739                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2740                 if (val == 1) {
2741                         trace_event_setup(entry, type, flags, pc);
2742                         entry->array[0] = len;
2743                         return entry;
2744                 }
2745                 this_cpu_dec(trace_buffered_event_cnt);
2746         }
2747
2748         entry = __trace_buffer_lock_reserve(*current_rb,
2749                                             type, len, flags, pc);
2750         /*
2751          * If tracing is off, but we have triggers enabled
2752          * we still need to look at the event data. Use the temp_buffer
2753          * to store the trace event for the tigger to use. It's recusive
2754          * safe and will not be recorded anywhere.
2755          */
2756         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2757                 *current_rb = temp_buffer;
2758                 entry = __trace_buffer_lock_reserve(*current_rb,
2759                                                     type, len, flags, pc);
2760         }
2761         return entry;
2762 }
2763 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2764
2765 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2766 static DEFINE_MUTEX(tracepoint_printk_mutex);
2767
2768 static void output_printk(struct trace_event_buffer *fbuffer)
2769 {
2770         struct trace_event_call *event_call;
2771         struct trace_event_file *file;
2772         struct trace_event *event;
2773         unsigned long flags;
2774         struct trace_iterator *iter = tracepoint_print_iter;
2775
2776         /* We should never get here if iter is NULL */
2777         if (WARN_ON_ONCE(!iter))
2778                 return;
2779
2780         event_call = fbuffer->trace_file->event_call;
2781         if (!event_call || !event_call->event.funcs ||
2782             !event_call->event.funcs->trace)
2783                 return;
2784
2785         file = fbuffer->trace_file;
2786         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2787             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2788              !filter_match_preds(file->filter, fbuffer->entry)))
2789                 return;
2790
2791         event = &fbuffer->trace_file->event_call->event;
2792
2793         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2794         trace_seq_init(&iter->seq);
2795         iter->ent = fbuffer->entry;
2796         event_call->event.funcs->trace(iter, 0, event);
2797         trace_seq_putc(&iter->seq, 0);
2798         printk("%s", iter->seq.buffer);
2799
2800         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2801 }
2802
2803 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2804                              void *buffer, size_t *lenp,
2805                              loff_t *ppos)
2806 {
2807         int save_tracepoint_printk;
2808         int ret;
2809
2810         mutex_lock(&tracepoint_printk_mutex);
2811         save_tracepoint_printk = tracepoint_printk;
2812
2813         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2814
2815         /*
2816          * This will force exiting early, as tracepoint_printk
2817          * is always zero when tracepoint_printk_iter is not allocated
2818          */
2819         if (!tracepoint_print_iter)
2820                 tracepoint_printk = 0;
2821
2822         if (save_tracepoint_printk == tracepoint_printk)
2823                 goto out;
2824
2825         if (tracepoint_printk)
2826                 static_key_enable(&tracepoint_printk_key.key);
2827         else
2828                 static_key_disable(&tracepoint_printk_key.key);
2829
2830  out:
2831         mutex_unlock(&tracepoint_printk_mutex);
2832
2833         return ret;
2834 }
2835
2836 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2837 {
2838         if (static_key_false(&tracepoint_printk_key.key))
2839                 output_printk(fbuffer);
2840
2841         if (static_branch_unlikely(&trace_event_exports_enabled))
2842                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2843         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2844                                     fbuffer->event, fbuffer->entry,
2845                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2846 }
2847 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2848
2849 /*
2850  * Skip 3:
2851  *
2852  *   trace_buffer_unlock_commit_regs()
2853  *   trace_event_buffer_commit()
2854  *   trace_event_raw_event_xxx()
2855  */
2856 # define STACK_SKIP 3
2857
2858 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2859                                      struct trace_buffer *buffer,
2860                                      struct ring_buffer_event *event,
2861                                      unsigned long flags, int pc,
2862                                      struct pt_regs *regs)
2863 {
2864         __buffer_unlock_commit(buffer, event);
2865
2866         /*
2867          * If regs is not set, then skip the necessary functions.
2868          * Note, we can still get here via blktrace, wakeup tracer
2869          * and mmiotrace, but that's ok if they lose a function or
2870          * two. They are not that meaningful.
2871          */
2872         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2873         ftrace_trace_userstack(buffer, flags, pc);
2874 }
2875
2876 /*
2877  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2878  */
2879 void
2880 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2881                                    struct ring_buffer_event *event)
2882 {
2883         __buffer_unlock_commit(buffer, event);
2884 }
2885
2886 void
2887 trace_function(struct trace_array *tr,
2888                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2889                int pc)
2890 {
2891         struct trace_event_call *call = &event_function;
2892         struct trace_buffer *buffer = tr->array_buffer.buffer;
2893         struct ring_buffer_event *event;
2894         struct ftrace_entry *entry;
2895
2896         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2897                                             flags, pc);
2898         if (!event)
2899                 return;
2900         entry   = ring_buffer_event_data(event);
2901         entry->ip                       = ip;
2902         entry->parent_ip                = parent_ip;
2903
2904         if (!call_filter_check_discard(call, entry, buffer, event)) {
2905                 if (static_branch_unlikely(&trace_function_exports_enabled))
2906                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2907                 __buffer_unlock_commit(buffer, event);
2908         }
2909 }
2910
2911 #ifdef CONFIG_STACKTRACE
2912
2913 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2914 #define FTRACE_KSTACK_NESTING   4
2915
2916 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2917
2918 struct ftrace_stack {
2919         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2920 };
2921
2922
2923 struct ftrace_stacks {
2924         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2925 };
2926
2927 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2928 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2929
2930 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2931                                  unsigned long flags,
2932                                  int skip, int pc, struct pt_regs *regs)
2933 {
2934         struct trace_event_call *call = &event_kernel_stack;
2935         struct ring_buffer_event *event;
2936         unsigned int size, nr_entries;
2937         struct ftrace_stack *fstack;
2938         struct stack_entry *entry;
2939         int stackidx;
2940
2941         /*
2942          * Add one, for this function and the call to save_stack_trace()
2943          * If regs is set, then these functions will not be in the way.
2944          */
2945 #ifndef CONFIG_UNWINDER_ORC
2946         if (!regs)
2947                 skip++;
2948 #endif
2949
2950         preempt_disable_notrace();
2951
2952         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2953
2954         /* This should never happen. If it does, yell once and skip */
2955         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2956                 goto out;
2957
2958         /*
2959          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2960          * interrupt will either see the value pre increment or post
2961          * increment. If the interrupt happens pre increment it will have
2962          * restored the counter when it returns.  We just need a barrier to
2963          * keep gcc from moving things around.
2964          */
2965         barrier();
2966
2967         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2968         size = ARRAY_SIZE(fstack->calls);
2969
2970         if (regs) {
2971                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2972                                                    size, skip);
2973         } else {
2974                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2975         }
2976
2977         size = nr_entries * sizeof(unsigned long);
2978         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2979                                             sizeof(*entry) + size, flags, pc);
2980         if (!event)
2981                 goto out;
2982         entry = ring_buffer_event_data(event);
2983
2984         memcpy(&entry->caller, fstack->calls, size);
2985         entry->size = nr_entries;
2986
2987         if (!call_filter_check_discard(call, entry, buffer, event))
2988                 __buffer_unlock_commit(buffer, event);
2989
2990  out:
2991         /* Again, don't let gcc optimize things here */
2992         barrier();
2993         __this_cpu_dec(ftrace_stack_reserve);
2994         preempt_enable_notrace();
2995
2996 }
2997
2998 static inline void ftrace_trace_stack(struct trace_array *tr,
2999                                       struct trace_buffer *buffer,
3000                                       unsigned long flags,
3001                                       int skip, int pc, struct pt_regs *regs)
3002 {
3003         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3004                 return;
3005
3006         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
3007 }
3008
3009 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3010                    int pc)
3011 {
3012         struct trace_buffer *buffer = tr->array_buffer.buffer;
3013
3014         if (rcu_is_watching()) {
3015                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3016                 return;
3017         }
3018
3019         /*
3020          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3021          * but if the above rcu_is_watching() failed, then the NMI
3022          * triggered someplace critical, and rcu_irq_enter() should
3023          * not be called from NMI.
3024          */
3025         if (unlikely(in_nmi()))
3026                 return;
3027
3028         rcu_irq_enter_irqson();
3029         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3030         rcu_irq_exit_irqson();
3031 }
3032
3033 /**
3034  * trace_dump_stack - record a stack back trace in the trace buffer
3035  * @skip: Number of functions to skip (helper handlers)
3036  */
3037 void trace_dump_stack(int skip)
3038 {
3039         unsigned long flags;
3040
3041         if (tracing_disabled || tracing_selftest_running)
3042                 return;
3043
3044         local_save_flags(flags);
3045
3046 #ifndef CONFIG_UNWINDER_ORC
3047         /* Skip 1 to skip this function. */
3048         skip++;
3049 #endif
3050         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3051                              flags, skip, preempt_count(), NULL);
3052 }
3053 EXPORT_SYMBOL_GPL(trace_dump_stack);
3054
3055 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3056 static DEFINE_PER_CPU(int, user_stack_count);
3057
3058 static void
3059 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3060 {
3061         struct trace_event_call *call = &event_user_stack;
3062         struct ring_buffer_event *event;
3063         struct userstack_entry *entry;
3064
3065         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3066                 return;
3067
3068         /*
3069          * NMIs can not handle page faults, even with fix ups.
3070          * The save user stack can (and often does) fault.
3071          */
3072         if (unlikely(in_nmi()))
3073                 return;
3074
3075         /*
3076          * prevent recursion, since the user stack tracing may
3077          * trigger other kernel events.
3078          */
3079         preempt_disable();
3080         if (__this_cpu_read(user_stack_count))
3081                 goto out;
3082
3083         __this_cpu_inc(user_stack_count);
3084
3085         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3086                                             sizeof(*entry), flags, pc);
3087         if (!event)
3088                 goto out_drop_count;
3089         entry   = ring_buffer_event_data(event);
3090
3091         entry->tgid             = current->tgid;
3092         memset(&entry->caller, 0, sizeof(entry->caller));
3093
3094         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3095         if (!call_filter_check_discard(call, entry, buffer, event))
3096                 __buffer_unlock_commit(buffer, event);
3097
3098  out_drop_count:
3099         __this_cpu_dec(user_stack_count);
3100  out:
3101         preempt_enable();
3102 }
3103 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3104 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3105                                    unsigned long flags, int pc)
3106 {
3107 }
3108 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3109
3110 #endif /* CONFIG_STACKTRACE */
3111
3112 /* created for use with alloc_percpu */
3113 struct trace_buffer_struct {
3114         int nesting;
3115         char buffer[4][TRACE_BUF_SIZE];
3116 };
3117
3118 static struct trace_buffer_struct *trace_percpu_buffer;
3119
3120 /*
3121  * Thise allows for lockless recording.  If we're nested too deeply, then
3122  * this returns NULL.
3123  */
3124 static char *get_trace_buf(void)
3125 {
3126         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3127
3128         if (!buffer || buffer->nesting >= 4)
3129                 return NULL;
3130
3131         buffer->nesting++;
3132
3133         /* Interrupts must see nesting incremented before we use the buffer */
3134         barrier();
3135         return &buffer->buffer[buffer->nesting][0];
3136 }
3137
3138 static void put_trace_buf(void)
3139 {
3140         /* Don't let the decrement of nesting leak before this */
3141         barrier();
3142         this_cpu_dec(trace_percpu_buffer->nesting);
3143 }
3144
3145 static int alloc_percpu_trace_buffer(void)
3146 {
3147         struct trace_buffer_struct *buffers;
3148
3149         if (trace_percpu_buffer)
3150                 return 0;
3151
3152         buffers = alloc_percpu(struct trace_buffer_struct);
3153         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3154                 return -ENOMEM;
3155
3156         trace_percpu_buffer = buffers;
3157         return 0;
3158 }
3159
3160 static int buffers_allocated;
3161
3162 void trace_printk_init_buffers(void)
3163 {
3164         if (buffers_allocated)
3165                 return;
3166
3167         if (alloc_percpu_trace_buffer())
3168                 return;
3169
3170         /* trace_printk() is for debug use only. Don't use it in production. */
3171
3172         pr_warn("\n");
3173         pr_warn("**********************************************************\n");
3174         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3175         pr_warn("**                                                      **\n");
3176         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3177         pr_warn("**                                                      **\n");
3178         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3179         pr_warn("** unsafe for production use.                           **\n");
3180         pr_warn("**                                                      **\n");
3181         pr_warn("** If you see this message and you are not debugging    **\n");
3182         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3183         pr_warn("**                                                      **\n");
3184         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3185         pr_warn("**********************************************************\n");
3186
3187         /* Expand the buffers to set size */
3188         tracing_update_buffers();
3189
3190         buffers_allocated = 1;
3191
3192         /*
3193          * trace_printk_init_buffers() can be called by modules.
3194          * If that happens, then we need to start cmdline recording
3195          * directly here. If the global_trace.buffer is already
3196          * allocated here, then this was called by module code.
3197          */
3198         if (global_trace.array_buffer.buffer)
3199                 tracing_start_cmdline_record();
3200 }
3201 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3202
3203 void trace_printk_start_comm(void)
3204 {
3205         /* Start tracing comms if trace printk is set */
3206         if (!buffers_allocated)
3207                 return;
3208         tracing_start_cmdline_record();
3209 }
3210
3211 static void trace_printk_start_stop_comm(int enabled)
3212 {
3213         if (!buffers_allocated)
3214                 return;
3215
3216         if (enabled)
3217                 tracing_start_cmdline_record();
3218         else
3219                 tracing_stop_cmdline_record();
3220 }
3221
3222 /**
3223  * trace_vbprintk - write binary msg to tracing buffer
3224  * @ip:    The address of the caller
3225  * @fmt:   The string format to write to the buffer
3226  * @args:  Arguments for @fmt
3227  */
3228 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3229 {
3230         struct trace_event_call *call = &event_bprint;
3231         struct ring_buffer_event *event;
3232         struct trace_buffer *buffer;
3233         struct trace_array *tr = &global_trace;
3234         struct bprint_entry *entry;
3235         unsigned long flags;
3236         char *tbuffer;
3237         int len = 0, size, pc;
3238
3239         if (unlikely(tracing_selftest_running || tracing_disabled))
3240                 return 0;
3241
3242         /* Don't pollute graph traces with trace_vprintk internals */
3243         pause_graph_tracing();
3244
3245         pc = preempt_count();
3246         preempt_disable_notrace();
3247
3248         tbuffer = get_trace_buf();
3249         if (!tbuffer) {
3250                 len = 0;
3251                 goto out_nobuffer;
3252         }
3253
3254         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3255
3256         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3257                 goto out_put;
3258
3259         local_save_flags(flags);
3260         size = sizeof(*entry) + sizeof(u32) * len;
3261         buffer = tr->array_buffer.buffer;
3262         ring_buffer_nest_start(buffer);
3263         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3264                                             flags, pc);
3265         if (!event)
3266                 goto out;
3267         entry = ring_buffer_event_data(event);
3268         entry->ip                       = ip;
3269         entry->fmt                      = fmt;
3270
3271         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3272         if (!call_filter_check_discard(call, entry, buffer, event)) {
3273                 __buffer_unlock_commit(buffer, event);
3274                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3275         }
3276
3277 out:
3278         ring_buffer_nest_end(buffer);
3279 out_put:
3280         put_trace_buf();
3281
3282 out_nobuffer:
3283         preempt_enable_notrace();
3284         unpause_graph_tracing();
3285
3286         return len;
3287 }
3288 EXPORT_SYMBOL_GPL(trace_vbprintk);
3289
3290 __printf(3, 0)
3291 static int
3292 __trace_array_vprintk(struct trace_buffer *buffer,
3293                       unsigned long ip, const char *fmt, va_list args)
3294 {
3295         struct trace_event_call *call = &event_print;
3296         struct ring_buffer_event *event;
3297         int len = 0, size, pc;
3298         struct print_entry *entry;
3299         unsigned long flags;
3300         char *tbuffer;
3301
3302         if (tracing_disabled || tracing_selftest_running)
3303                 return 0;
3304
3305         /* Don't pollute graph traces with trace_vprintk internals */
3306         pause_graph_tracing();
3307
3308         pc = preempt_count();
3309         preempt_disable_notrace();
3310
3311
3312         tbuffer = get_trace_buf();
3313         if (!tbuffer) {
3314                 len = 0;
3315                 goto out_nobuffer;
3316         }
3317
3318         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3319
3320         local_save_flags(flags);
3321         size = sizeof(*entry) + len + 1;
3322         ring_buffer_nest_start(buffer);
3323         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3324                                             flags, pc);
3325         if (!event)
3326                 goto out;
3327         entry = ring_buffer_event_data(event);
3328         entry->ip = ip;
3329
3330         memcpy(&entry->buf, tbuffer, len + 1);
3331         if (!call_filter_check_discard(call, entry, buffer, event)) {
3332                 __buffer_unlock_commit(buffer, event);
3333                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3334         }
3335
3336 out:
3337         ring_buffer_nest_end(buffer);
3338         put_trace_buf();
3339
3340 out_nobuffer:
3341         preempt_enable_notrace();
3342         unpause_graph_tracing();
3343
3344         return len;
3345 }
3346
3347 __printf(3, 0)
3348 int trace_array_vprintk(struct trace_array *tr,
3349                         unsigned long ip, const char *fmt, va_list args)
3350 {
3351         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3352 }
3353
3354 /**
3355  * trace_array_printk - Print a message to a specific instance
3356  * @tr: The instance trace_array descriptor
3357  * @ip: The instruction pointer that this is called from.
3358  * @fmt: The format to print (printf format)
3359  *
3360  * If a subsystem sets up its own instance, they have the right to
3361  * printk strings into their tracing instance buffer using this
3362  * function. Note, this function will not write into the top level
3363  * buffer (use trace_printk() for that), as writing into the top level
3364  * buffer should only have events that can be individually disabled.
3365  * trace_printk() is only used for debugging a kernel, and should not
3366  * be ever encorporated in normal use.
3367  *
3368  * trace_array_printk() can be used, as it will not add noise to the
3369  * top level tracing buffer.
3370  *
3371  * Note, trace_array_init_printk() must be called on @tr before this
3372  * can be used.
3373  */
3374 __printf(3, 0)
3375 int trace_array_printk(struct trace_array *tr,
3376                        unsigned long ip, const char *fmt, ...)
3377 {
3378         int ret;
3379         va_list ap;
3380
3381         if (!tr)
3382                 return -ENOENT;
3383
3384         /* This is only allowed for created instances */
3385         if (tr == &global_trace)
3386                 return 0;
3387
3388         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3389                 return 0;
3390
3391         va_start(ap, fmt);
3392         ret = trace_array_vprintk(tr, ip, fmt, ap);
3393         va_end(ap);
3394         return ret;
3395 }
3396 EXPORT_SYMBOL_GPL(trace_array_printk);
3397
3398 /**
3399  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3400  * @tr: The trace array to initialize the buffers for
3401  *
3402  * As trace_array_printk() only writes into instances, they are OK to
3403  * have in the kernel (unlike trace_printk()). This needs to be called
3404  * before trace_array_printk() can be used on a trace_array.
3405  */
3406 int trace_array_init_printk(struct trace_array *tr)
3407 {
3408         if (!tr)
3409                 return -ENOENT;
3410
3411         /* This is only allowed for created instances */
3412         if (tr == &global_trace)
3413                 return -EINVAL;
3414
3415         return alloc_percpu_trace_buffer();
3416 }
3417 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3418
3419 __printf(3, 4)
3420 int trace_array_printk_buf(struct trace_buffer *buffer,
3421                            unsigned long ip, const char *fmt, ...)
3422 {
3423         int ret;
3424         va_list ap;
3425
3426         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3427                 return 0;
3428
3429         va_start(ap, fmt);
3430         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3431         va_end(ap);
3432         return ret;
3433 }
3434
3435 __printf(2, 0)
3436 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3437 {
3438         return trace_array_vprintk(&global_trace, ip, fmt, args);
3439 }
3440 EXPORT_SYMBOL_GPL(trace_vprintk);
3441
3442 static void trace_iterator_increment(struct trace_iterator *iter)
3443 {
3444         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3445
3446         iter->idx++;
3447         if (buf_iter)
3448                 ring_buffer_iter_advance(buf_iter);
3449 }
3450
3451 static struct trace_entry *
3452 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3453                 unsigned long *lost_events)
3454 {
3455         struct ring_buffer_event *event;
3456         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3457
3458         if (buf_iter) {
3459                 event = ring_buffer_iter_peek(buf_iter, ts);
3460                 if (lost_events)
3461                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3462                                 (unsigned long)-1 : 0;
3463         } else {
3464                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3465                                          lost_events);
3466         }
3467
3468         if (event) {
3469                 iter->ent_size = ring_buffer_event_length(event);
3470                 return ring_buffer_event_data(event);
3471         }
3472         iter->ent_size = 0;
3473         return NULL;
3474 }
3475
3476 static struct trace_entry *
3477 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3478                   unsigned long *missing_events, u64 *ent_ts)
3479 {
3480         struct trace_buffer *buffer = iter->array_buffer->buffer;
3481         struct trace_entry *ent, *next = NULL;
3482         unsigned long lost_events = 0, next_lost = 0;
3483         int cpu_file = iter->cpu_file;
3484         u64 next_ts = 0, ts;
3485         int next_cpu = -1;
3486         int next_size = 0;
3487         int cpu;
3488
3489         /*
3490          * If we are in a per_cpu trace file, don't bother by iterating over
3491          * all cpu and peek directly.
3492          */
3493         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3494                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3495                         return NULL;
3496                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3497                 if (ent_cpu)
3498                         *ent_cpu = cpu_file;
3499
3500                 return ent;
3501         }
3502
3503         for_each_tracing_cpu(cpu) {
3504
3505                 if (ring_buffer_empty_cpu(buffer, cpu))
3506                         continue;
3507
3508                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3509
3510                 /*
3511                  * Pick the entry with the smallest timestamp:
3512                  */
3513                 if (ent && (!next || ts < next_ts)) {
3514                         next = ent;
3515                         next_cpu = cpu;
3516                         next_ts = ts;
3517                         next_lost = lost_events;
3518                         next_size = iter->ent_size;
3519                 }
3520         }
3521
3522         iter->ent_size = next_size;
3523
3524         if (ent_cpu)
3525                 *ent_cpu = next_cpu;
3526
3527         if (ent_ts)
3528                 *ent_ts = next_ts;
3529
3530         if (missing_events)
3531                 *missing_events = next_lost;
3532
3533         return next;
3534 }
3535
3536 #define STATIC_TEMP_BUF_SIZE    128
3537 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3538
3539 /* Find the next real entry, without updating the iterator itself */
3540 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3541                                           int *ent_cpu, u64 *ent_ts)
3542 {
3543         /* __find_next_entry will reset ent_size */
3544         int ent_size = iter->ent_size;
3545         struct trace_entry *entry;
3546
3547         /*
3548          * If called from ftrace_dump(), then the iter->temp buffer
3549          * will be the static_temp_buf and not created from kmalloc.
3550          * If the entry size is greater than the buffer, we can
3551          * not save it. Just return NULL in that case. This is only
3552          * used to add markers when two consecutive events' time
3553          * stamps have a large delta. See trace_print_lat_context()
3554          */
3555         if (iter->temp == static_temp_buf &&
3556             STATIC_TEMP_BUF_SIZE < ent_size)
3557                 return NULL;
3558
3559         /*
3560          * The __find_next_entry() may call peek_next_entry(), which may
3561          * call ring_buffer_peek() that may make the contents of iter->ent
3562          * undefined. Need to copy iter->ent now.
3563          */
3564         if (iter->ent && iter->ent != iter->temp) {
3565                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3566                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3567                         void *temp;
3568                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3569                         if (!temp)
3570                                 return NULL;
3571                         kfree(iter->temp);
3572                         iter->temp = temp;
3573                         iter->temp_size = iter->ent_size;
3574                 }
3575                 memcpy(iter->temp, iter->ent, iter->ent_size);
3576                 iter->ent = iter->temp;
3577         }
3578         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3579         /* Put back the original ent_size */
3580         iter->ent_size = ent_size;
3581
3582         return entry;
3583 }
3584
3585 /* Find the next real entry, and increment the iterator to the next entry */
3586 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3587 {
3588         iter->ent = __find_next_entry(iter, &iter->cpu,
3589                                       &iter->lost_events, &iter->ts);
3590
3591         if (iter->ent)
3592                 trace_iterator_increment(iter);
3593
3594         return iter->ent ? iter : NULL;
3595 }
3596
3597 static void trace_consume(struct trace_iterator *iter)
3598 {
3599         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3600                             &iter->lost_events);
3601 }
3602
3603 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3604 {
3605         struct trace_iterator *iter = m->private;
3606         int i = (int)*pos;
3607         void *ent;
3608
3609         WARN_ON_ONCE(iter->leftover);
3610
3611         (*pos)++;
3612
3613         /* can't go backwards */
3614         if (iter->idx > i)
3615                 return NULL;
3616
3617         if (iter->idx < 0)
3618                 ent = trace_find_next_entry_inc(iter);
3619         else
3620                 ent = iter;
3621
3622         while (ent && iter->idx < i)
3623                 ent = trace_find_next_entry_inc(iter);
3624
3625         iter->pos = *pos;
3626
3627         return ent;
3628 }
3629
3630 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3631 {
3632         struct ring_buffer_iter *buf_iter;
3633         unsigned long entries = 0;
3634         u64 ts;
3635
3636         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3637
3638         buf_iter = trace_buffer_iter(iter, cpu);
3639         if (!buf_iter)
3640                 return;
3641
3642         ring_buffer_iter_reset(buf_iter);
3643
3644         /*
3645          * We could have the case with the max latency tracers
3646          * that a reset never took place on a cpu. This is evident
3647          * by the timestamp being before the start of the buffer.
3648          */
3649         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3650                 if (ts >= iter->array_buffer->time_start)
3651                         break;
3652                 entries++;
3653                 ring_buffer_iter_advance(buf_iter);
3654         }
3655
3656         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3657 }
3658
3659 /*
3660  * The current tracer is copied to avoid a global locking
3661  * all around.
3662  */
3663 static void *s_start(struct seq_file *m, loff_t *pos)
3664 {
3665         struct trace_iterator *iter = m->private;
3666         struct trace_array *tr = iter->tr;
3667         int cpu_file = iter->cpu_file;
3668         void *p = NULL;
3669         loff_t l = 0;
3670         int cpu;
3671
3672         /*
3673          * copy the tracer to avoid using a global lock all around.
3674          * iter->trace is a copy of current_trace, the pointer to the
3675          * name may be used instead of a strcmp(), as iter->trace->name
3676          * will point to the same string as current_trace->name.
3677          */
3678         mutex_lock(&trace_types_lock);
3679         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3680                 *iter->trace = *tr->current_trace;
3681         mutex_unlock(&trace_types_lock);
3682
3683 #ifdef CONFIG_TRACER_MAX_TRACE
3684         if (iter->snapshot && iter->trace->use_max_tr)
3685                 return ERR_PTR(-EBUSY);
3686 #endif
3687
3688         if (!iter->snapshot)
3689                 atomic_inc(&trace_record_taskinfo_disabled);
3690
3691         if (*pos != iter->pos) {
3692                 iter->ent = NULL;
3693                 iter->cpu = 0;
3694                 iter->idx = -1;
3695
3696                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3697                         for_each_tracing_cpu(cpu)
3698                                 tracing_iter_reset(iter, cpu);
3699                 } else
3700                         tracing_iter_reset(iter, cpu_file);
3701
3702                 iter->leftover = 0;
3703                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3704                         ;
3705
3706         } else {
3707                 /*
3708                  * If we overflowed the seq_file before, then we want
3709                  * to just reuse the trace_seq buffer again.
3710                  */
3711                 if (iter->leftover)
3712                         p = iter;
3713                 else {
3714                         l = *pos - 1;
3715                         p = s_next(m, p, &l);
3716                 }
3717         }
3718
3719         trace_event_read_lock();
3720         trace_access_lock(cpu_file);
3721         return p;
3722 }
3723
3724 static void s_stop(struct seq_file *m, void *p)
3725 {
3726         struct trace_iterator *iter = m->private;
3727
3728 #ifdef CONFIG_TRACER_MAX_TRACE
3729         if (iter->snapshot && iter->trace->use_max_tr)
3730                 return;
3731 #endif
3732
3733         if (!iter->snapshot)
3734                 atomic_dec(&trace_record_taskinfo_disabled);
3735
3736         trace_access_unlock(iter->cpu_file);
3737         trace_event_read_unlock();
3738 }
3739
3740 static void
3741 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3742                       unsigned long *entries, int cpu)
3743 {
3744         unsigned long count;
3745
3746         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3747         /*
3748          * If this buffer has skipped entries, then we hold all
3749          * entries for the trace and we need to ignore the
3750          * ones before the time stamp.
3751          */
3752         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3753                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3754                 /* total is the same as the entries */
3755                 *total = count;
3756         } else
3757                 *total = count +
3758                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3759         *entries = count;
3760 }
3761
3762 static void
3763 get_total_entries(struct array_buffer *buf,
3764                   unsigned long *total, unsigned long *entries)
3765 {
3766         unsigned long t, e;
3767         int cpu;
3768
3769         *total = 0;
3770         *entries = 0;
3771
3772         for_each_tracing_cpu(cpu) {
3773                 get_total_entries_cpu(buf, &t, &e, cpu);
3774                 *total += t;
3775                 *entries += e;
3776         }
3777 }
3778
3779 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3780 {
3781         unsigned long total, entries;
3782
3783         if (!tr)
3784                 tr = &global_trace;
3785
3786         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3787
3788         return entries;
3789 }
3790
3791 unsigned long trace_total_entries(struct trace_array *tr)
3792 {
3793         unsigned long total, entries;
3794
3795         if (!tr)
3796                 tr = &global_trace;
3797
3798         get_total_entries(&tr->array_buffer, &total, &entries);
3799
3800         return entries;
3801 }
3802
3803 static void print_lat_help_header(struct seq_file *m)
3804 {
3805         seq_puts(m, "#                    _------=> CPU#            \n"
3806                     "#                   / _-----=> irqs-off        \n"
3807                     "#                  | / _----=> need-resched    \n"
3808                     "#                  || / _---=> hardirq/softirq \n"
3809                     "#                  ||| / _--=> preempt-depth   \n"
3810                     "#                  |||| /     delay            \n"
3811                     "#  cmd     pid     ||||| time  |   caller      \n"
3812                     "#     \\   /        |||||  \\    |   /         \n");
3813 }
3814
3815 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3816 {
3817         unsigned long total;
3818         unsigned long entries;
3819
3820         get_total_entries(buf, &total, &entries);
3821         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3822                    entries, total, num_online_cpus());
3823         seq_puts(m, "#\n");
3824 }
3825
3826 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3827                                    unsigned int flags)
3828 {
3829         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3830
3831         print_event_info(buf, m);
3832
3833         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3834         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3835 }
3836
3837 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3838                                        unsigned int flags)
3839 {
3840         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3841         const char *space = "            ";
3842         int prec = tgid ? 12 : 2;
3843
3844         print_event_info(buf, m);
3845
3846         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
3847         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3848         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3849         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3850         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
3851         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3852         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
3853 }
3854
3855 void
3856 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3857 {
3858         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3859         struct array_buffer *buf = iter->array_buffer;
3860         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3861         struct tracer *type = iter->trace;
3862         unsigned long entries;
3863         unsigned long total;
3864         const char *name = "preemption";
3865
3866         name = type->name;
3867
3868         get_total_entries(buf, &total, &entries);
3869
3870         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3871                    name, UTS_RELEASE);
3872         seq_puts(m, "# -----------------------------------"
3873                  "---------------------------------\n");
3874         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3875                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3876                    nsecs_to_usecs(data->saved_latency),
3877                    entries,
3878                    total,
3879                    buf->cpu,
3880 #if defined(CONFIG_PREEMPT_NONE)
3881                    "server",
3882 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3883                    "desktop",
3884 #elif defined(CONFIG_PREEMPT)
3885                    "preempt",
3886 #elif defined(CONFIG_PREEMPT_RT)
3887                    "preempt_rt",
3888 #else
3889                    "unknown",
3890 #endif
3891                    /* These are reserved for later use */
3892                    0, 0, 0, 0);
3893 #ifdef CONFIG_SMP
3894         seq_printf(m, " #P:%d)\n", num_online_cpus());
3895 #else
3896         seq_puts(m, ")\n");
3897 #endif
3898         seq_puts(m, "#    -----------------\n");
3899         seq_printf(m, "#    | task: %.16s-%d "
3900                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3901                    data->comm, data->pid,
3902                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3903                    data->policy, data->rt_priority);
3904         seq_puts(m, "#    -----------------\n");
3905
3906         if (data->critical_start) {
3907                 seq_puts(m, "#  => started at: ");
3908                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3909                 trace_print_seq(m, &iter->seq);
3910                 seq_puts(m, "\n#  => ended at:   ");
3911                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3912                 trace_print_seq(m, &iter->seq);
3913                 seq_puts(m, "\n#\n");
3914         }
3915
3916         seq_puts(m, "#\n");
3917 }
3918
3919 static void test_cpu_buff_start(struct trace_iterator *iter)
3920 {
3921         struct trace_seq *s = &iter->seq;
3922         struct trace_array *tr = iter->tr;
3923
3924         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3925                 return;
3926
3927         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3928                 return;
3929
3930         if (cpumask_available(iter->started) &&
3931             cpumask_test_cpu(iter->cpu, iter->started))
3932                 return;
3933
3934         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3935                 return;
3936
3937         if (cpumask_available(iter->started))
3938                 cpumask_set_cpu(iter->cpu, iter->started);
3939
3940         /* Don't print started cpu buffer for the first entry of the trace */
3941         if (iter->idx > 1)
3942                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3943                                 iter->cpu);
3944 }
3945
3946 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3947 {
3948         struct trace_array *tr = iter->tr;
3949         struct trace_seq *s = &iter->seq;
3950         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3951         struct trace_entry *entry;
3952         struct trace_event *event;
3953
3954         entry = iter->ent;
3955
3956         test_cpu_buff_start(iter);
3957
3958         event = ftrace_find_event(entry->type);
3959
3960         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3961                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3962                         trace_print_lat_context(iter);
3963                 else
3964                         trace_print_context(iter);
3965         }
3966
3967         if (trace_seq_has_overflowed(s))
3968                 return TRACE_TYPE_PARTIAL_LINE;
3969
3970         if (event)
3971                 return event->funcs->trace(iter, sym_flags, event);
3972
3973         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3974
3975         return trace_handle_return(s);
3976 }
3977
3978 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3979 {
3980         struct trace_array *tr = iter->tr;
3981         struct trace_seq *s = &iter->seq;
3982         struct trace_entry *entry;
3983         struct trace_event *event;
3984
3985         entry = iter->ent;
3986
3987         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3988                 trace_seq_printf(s, "%d %d %llu ",
3989                                  entry->pid, iter->cpu, iter->ts);
3990
3991         if (trace_seq_has_overflowed(s))
3992                 return TRACE_TYPE_PARTIAL_LINE;
3993
3994         event = ftrace_find_event(entry->type);
3995         if (event)
3996                 return event->funcs->raw(iter, 0, event);
3997
3998         trace_seq_printf(s, "%d ?\n", entry->type);
3999
4000         return trace_handle_return(s);
4001 }
4002
4003 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4004 {
4005         struct trace_array *tr = iter->tr;
4006         struct trace_seq *s = &iter->seq;
4007         unsigned char newline = '\n';
4008         struct trace_entry *entry;
4009         struct trace_event *event;
4010
4011         entry = iter->ent;
4012
4013         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4014                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4015                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4016                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4017                 if (trace_seq_has_overflowed(s))
4018                         return TRACE_TYPE_PARTIAL_LINE;
4019         }
4020
4021         event = ftrace_find_event(entry->type);
4022         if (event) {
4023                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4024                 if (ret != TRACE_TYPE_HANDLED)
4025                         return ret;
4026         }
4027
4028         SEQ_PUT_FIELD(s, newline);
4029
4030         return trace_handle_return(s);
4031 }
4032
4033 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4034 {
4035         struct trace_array *tr = iter->tr;
4036         struct trace_seq *s = &iter->seq;
4037         struct trace_entry *entry;
4038         struct trace_event *event;
4039
4040         entry = iter->ent;
4041
4042         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4043                 SEQ_PUT_FIELD(s, entry->pid);
4044                 SEQ_PUT_FIELD(s, iter->cpu);
4045                 SEQ_PUT_FIELD(s, iter->ts);
4046                 if (trace_seq_has_overflowed(s))
4047                         return TRACE_TYPE_PARTIAL_LINE;
4048         }
4049
4050         event = ftrace_find_event(entry->type);
4051         return event ? event->funcs->binary(iter, 0, event) :
4052                 TRACE_TYPE_HANDLED;
4053 }
4054
4055 int trace_empty(struct trace_iterator *iter)
4056 {
4057         struct ring_buffer_iter *buf_iter;
4058         int cpu;
4059
4060         /* If we are looking at one CPU buffer, only check that one */
4061         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4062                 cpu = iter->cpu_file;
4063                 buf_iter = trace_buffer_iter(iter, cpu);
4064                 if (buf_iter) {
4065                         if (!ring_buffer_iter_empty(buf_iter))
4066                                 return 0;
4067                 } else {
4068                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4069                                 return 0;
4070                 }
4071                 return 1;
4072         }
4073
4074         for_each_tracing_cpu(cpu) {
4075                 buf_iter = trace_buffer_iter(iter, cpu);
4076                 if (buf_iter) {
4077                         if (!ring_buffer_iter_empty(buf_iter))
4078                                 return 0;
4079                 } else {
4080                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4081                                 return 0;
4082                 }
4083         }
4084
4085         return 1;
4086 }
4087
4088 /*  Called with trace_event_read_lock() held. */
4089 enum print_line_t print_trace_line(struct trace_iterator *iter)
4090 {
4091         struct trace_array *tr = iter->tr;
4092         unsigned long trace_flags = tr->trace_flags;
4093         enum print_line_t ret;
4094
4095         if (iter->lost_events) {
4096                 if (iter->lost_events == (unsigned long)-1)
4097                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4098                                          iter->cpu);
4099                 else
4100                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4101                                          iter->cpu, iter->lost_events);
4102                 if (trace_seq_has_overflowed(&iter->seq))
4103                         return TRACE_TYPE_PARTIAL_LINE;
4104         }
4105
4106         if (iter->trace && iter->trace->print_line) {
4107                 ret = iter->trace->print_line(iter);
4108                 if (ret != TRACE_TYPE_UNHANDLED)
4109                         return ret;
4110         }
4111
4112         if (iter->ent->type == TRACE_BPUTS &&
4113                         trace_flags & TRACE_ITER_PRINTK &&
4114                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4115                 return trace_print_bputs_msg_only(iter);
4116
4117         if (iter->ent->type == TRACE_BPRINT &&
4118                         trace_flags & TRACE_ITER_PRINTK &&
4119                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4120                 return trace_print_bprintk_msg_only(iter);
4121
4122         if (iter->ent->type == TRACE_PRINT &&
4123                         trace_flags & TRACE_ITER_PRINTK &&
4124                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4125                 return trace_print_printk_msg_only(iter);
4126
4127         if (trace_flags & TRACE_ITER_BIN)
4128                 return print_bin_fmt(iter);
4129
4130         if (trace_flags & TRACE_ITER_HEX)
4131                 return print_hex_fmt(iter);
4132
4133         if (trace_flags & TRACE_ITER_RAW)
4134                 return print_raw_fmt(iter);
4135
4136         return print_trace_fmt(iter);
4137 }
4138
4139 void trace_latency_header(struct seq_file *m)
4140 {
4141         struct trace_iterator *iter = m->private;
4142         struct trace_array *tr = iter->tr;
4143
4144         /* print nothing if the buffers are empty */
4145         if (trace_empty(iter))
4146                 return;
4147
4148         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4149                 print_trace_header(m, iter);
4150
4151         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4152                 print_lat_help_header(m);
4153 }
4154
4155 void trace_default_header(struct seq_file *m)
4156 {
4157         struct trace_iterator *iter = m->private;
4158         struct trace_array *tr = iter->tr;
4159         unsigned long trace_flags = tr->trace_flags;
4160
4161         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4162                 return;
4163
4164         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4165                 /* print nothing if the buffers are empty */
4166                 if (trace_empty(iter))
4167                         return;
4168                 print_trace_header(m, iter);
4169                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4170                         print_lat_help_header(m);
4171         } else {
4172                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4173                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4174                                 print_func_help_header_irq(iter->array_buffer,
4175                                                            m, trace_flags);
4176                         else
4177                                 print_func_help_header(iter->array_buffer, m,
4178                                                        trace_flags);
4179                 }
4180         }
4181 }
4182
4183 static void test_ftrace_alive(struct seq_file *m)
4184 {
4185         if (!ftrace_is_dead())
4186                 return;
4187         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4188                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4189 }
4190
4191 #ifdef CONFIG_TRACER_MAX_TRACE
4192 static void show_snapshot_main_help(struct seq_file *m)
4193 {
4194         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4195                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4196                     "#                      Takes a snapshot of the main buffer.\n"
4197                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4198                     "#                      (Doesn't have to be '2' works with any number that\n"
4199                     "#                       is not a '0' or '1')\n");
4200 }
4201
4202 static void show_snapshot_percpu_help(struct seq_file *m)
4203 {
4204         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4205 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4206         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4207                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4208 #else
4209         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4210                     "#                     Must use main snapshot file to allocate.\n");
4211 #endif
4212         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4213                     "#                      (Doesn't have to be '2' works with any number that\n"
4214                     "#                       is not a '0' or '1')\n");
4215 }
4216
4217 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4218 {
4219         if (iter->tr->allocated_snapshot)
4220                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4221         else
4222                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4223
4224         seq_puts(m, "# Snapshot commands:\n");
4225         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4226                 show_snapshot_main_help(m);
4227         else
4228                 show_snapshot_percpu_help(m);
4229 }
4230 #else
4231 /* Should never be called */
4232 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4233 #endif
4234
4235 static int s_show(struct seq_file *m, void *v)
4236 {
4237         struct trace_iterator *iter = v;
4238         int ret;
4239
4240         if (iter->ent == NULL) {
4241                 if (iter->tr) {
4242                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4243                         seq_puts(m, "#\n");
4244                         test_ftrace_alive(m);
4245                 }
4246                 if (iter->snapshot && trace_empty(iter))
4247                         print_snapshot_help(m, iter);
4248                 else if (iter->trace && iter->trace->print_header)
4249                         iter->trace->print_header(m);
4250                 else
4251                         trace_default_header(m);
4252
4253         } else if (iter->leftover) {
4254                 /*
4255                  * If we filled the seq_file buffer earlier, we
4256                  * want to just show it now.
4257                  */
4258                 ret = trace_print_seq(m, &iter->seq);
4259
4260                 /* ret should this time be zero, but you never know */
4261                 iter->leftover = ret;
4262
4263         } else {
4264                 print_trace_line(iter);
4265                 ret = trace_print_seq(m, &iter->seq);
4266                 /*
4267                  * If we overflow the seq_file buffer, then it will
4268                  * ask us for this data again at start up.
4269                  * Use that instead.
4270                  *  ret is 0 if seq_file write succeeded.
4271                  *        -1 otherwise.
4272                  */
4273                 iter->leftover = ret;
4274         }
4275
4276         return 0;
4277 }
4278
4279 /*
4280  * Should be used after trace_array_get(), trace_types_lock
4281  * ensures that i_cdev was already initialized.
4282  */
4283 static inline int tracing_get_cpu(struct inode *inode)
4284 {
4285         if (inode->i_cdev) /* See trace_create_cpu_file() */
4286                 return (long)inode->i_cdev - 1;
4287         return RING_BUFFER_ALL_CPUS;
4288 }
4289
4290 static const struct seq_operations tracer_seq_ops = {
4291         .start          = s_start,
4292         .next           = s_next,
4293         .stop           = s_stop,
4294         .show           = s_show,
4295 };
4296
4297 static struct trace_iterator *
4298 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4299 {
4300         struct trace_array *tr = inode->i_private;
4301         struct trace_iterator *iter;
4302         int cpu;
4303
4304         if (tracing_disabled)
4305                 return ERR_PTR(-ENODEV);
4306
4307         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4308         if (!iter)
4309                 return ERR_PTR(-ENOMEM);
4310
4311         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4312                                     GFP_KERNEL);
4313         if (!iter->buffer_iter)
4314                 goto release;
4315
4316         /*
4317          * trace_find_next_entry() may need to save off iter->ent.
4318          * It will place it into the iter->temp buffer. As most
4319          * events are less than 128, allocate a buffer of that size.
4320          * If one is greater, then trace_find_next_entry() will
4321          * allocate a new buffer to adjust for the bigger iter->ent.
4322          * It's not critical if it fails to get allocated here.
4323          */
4324         iter->temp = kmalloc(128, GFP_KERNEL);
4325         if (iter->temp)
4326                 iter->temp_size = 128;
4327
4328         /*
4329          * We make a copy of the current tracer to avoid concurrent
4330          * changes on it while we are reading.
4331          */
4332         mutex_lock(&trace_types_lock);
4333         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4334         if (!iter->trace)
4335                 goto fail;
4336
4337         *iter->trace = *tr->current_trace;
4338
4339         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4340                 goto fail;
4341
4342         iter->tr = tr;
4343
4344 #ifdef CONFIG_TRACER_MAX_TRACE
4345         /* Currently only the top directory has a snapshot */
4346         if (tr->current_trace->print_max || snapshot)
4347                 iter->array_buffer = &tr->max_buffer;
4348         else
4349 #endif
4350                 iter->array_buffer = &tr->array_buffer;
4351         iter->snapshot = snapshot;
4352         iter->pos = -1;
4353         iter->cpu_file = tracing_get_cpu(inode);
4354         mutex_init(&iter->mutex);
4355
4356         /* Notify the tracer early; before we stop tracing. */
4357         if (iter->trace->open)
4358                 iter->trace->open(iter);
4359
4360         /* Annotate start of buffers if we had overruns */
4361         if (ring_buffer_overruns(iter->array_buffer->buffer))
4362                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4363
4364         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4365         if (trace_clocks[tr->clock_id].in_ns)
4366                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4367
4368         /*
4369          * If pause-on-trace is enabled, then stop the trace while
4370          * dumping, unless this is the "snapshot" file
4371          */
4372         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4373                 tracing_stop_tr(tr);
4374
4375         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4376                 for_each_tracing_cpu(cpu) {
4377                         iter->buffer_iter[cpu] =
4378                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4379                                                          cpu, GFP_KERNEL);
4380                 }
4381                 ring_buffer_read_prepare_sync();
4382                 for_each_tracing_cpu(cpu) {
4383                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4384                         tracing_iter_reset(iter, cpu);
4385                 }
4386         } else {
4387                 cpu = iter->cpu_file;
4388                 iter->buffer_iter[cpu] =
4389                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4390                                                  cpu, GFP_KERNEL);
4391                 ring_buffer_read_prepare_sync();
4392                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4393                 tracing_iter_reset(iter, cpu);
4394         }
4395
4396         mutex_unlock(&trace_types_lock);
4397
4398         return iter;
4399
4400  fail:
4401         mutex_unlock(&trace_types_lock);
4402         kfree(iter->trace);
4403         kfree(iter->temp);
4404         kfree(iter->buffer_iter);
4405 release:
4406         seq_release_private(inode, file);
4407         return ERR_PTR(-ENOMEM);
4408 }
4409
4410 int tracing_open_generic(struct inode *inode, struct file *filp)
4411 {
4412         int ret;
4413
4414         ret = tracing_check_open_get_tr(NULL);
4415         if (ret)
4416                 return ret;
4417
4418         filp->private_data = inode->i_private;
4419         return 0;
4420 }
4421
4422 bool tracing_is_disabled(void)
4423 {
4424         return (tracing_disabled) ? true: false;
4425 }
4426
4427 /*
4428  * Open and update trace_array ref count.
4429  * Must have the current trace_array passed to it.
4430  */
4431 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4432 {
4433         struct trace_array *tr = inode->i_private;
4434         int ret;
4435
4436         ret = tracing_check_open_get_tr(tr);
4437         if (ret)
4438                 return ret;
4439
4440         filp->private_data = inode->i_private;
4441
4442         return 0;
4443 }
4444
4445 static int tracing_release(struct inode *inode, struct file *file)
4446 {
4447         struct trace_array *tr = inode->i_private;
4448         struct seq_file *m = file->private_data;
4449         struct trace_iterator *iter;
4450         int cpu;
4451
4452         if (!(file->f_mode & FMODE_READ)) {
4453                 trace_array_put(tr);
4454                 return 0;
4455         }
4456
4457         /* Writes do not use seq_file */
4458         iter = m->private;
4459         mutex_lock(&trace_types_lock);
4460
4461         for_each_tracing_cpu(cpu) {
4462                 if (iter->buffer_iter[cpu])
4463                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4464         }
4465
4466         if (iter->trace && iter->trace->close)
4467                 iter->trace->close(iter);
4468
4469         if (!iter->snapshot && tr->stop_count)
4470                 /* reenable tracing if it was previously enabled */
4471                 tracing_start_tr(tr);
4472
4473         __trace_array_put(tr);
4474
4475         mutex_unlock(&trace_types_lock);
4476
4477         mutex_destroy(&iter->mutex);
4478         free_cpumask_var(iter->started);
4479         kfree(iter->temp);
4480         kfree(iter->trace);
4481         kfree(iter->buffer_iter);
4482         seq_release_private(inode, file);
4483
4484         return 0;
4485 }
4486
4487 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4488 {
4489         struct trace_array *tr = inode->i_private;
4490
4491         trace_array_put(tr);
4492         return 0;
4493 }
4494
4495 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4496 {
4497         struct trace_array *tr = inode->i_private;
4498
4499         trace_array_put(tr);
4500
4501         return single_release(inode, file);
4502 }
4503
4504 static int tracing_open(struct inode *inode, struct file *file)
4505 {
4506         struct trace_array *tr = inode->i_private;
4507         struct trace_iterator *iter;
4508         int ret;
4509
4510         ret = tracing_check_open_get_tr(tr);
4511         if (ret)
4512                 return ret;
4513
4514         /* If this file was open for write, then erase contents */
4515         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4516                 int cpu = tracing_get_cpu(inode);
4517                 struct array_buffer *trace_buf = &tr->array_buffer;
4518
4519 #ifdef CONFIG_TRACER_MAX_TRACE
4520                 if (tr->current_trace->print_max)
4521                         trace_buf = &tr->max_buffer;
4522 #endif
4523
4524                 if (cpu == RING_BUFFER_ALL_CPUS)
4525                         tracing_reset_online_cpus(trace_buf);
4526                 else
4527                         tracing_reset_cpu(trace_buf, cpu);
4528         }
4529
4530         if (file->f_mode & FMODE_READ) {
4531                 iter = __tracing_open(inode, file, false);
4532                 if (IS_ERR(iter))
4533                         ret = PTR_ERR(iter);
4534                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4535                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4536         }
4537
4538         if (ret < 0)
4539                 trace_array_put(tr);
4540
4541         return ret;
4542 }
4543
4544 /*
4545  * Some tracers are not suitable for instance buffers.
4546  * A tracer is always available for the global array (toplevel)
4547  * or if it explicitly states that it is.
4548  */
4549 static bool
4550 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4551 {
4552         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4553 }
4554
4555 /* Find the next tracer that this trace array may use */
4556 static struct tracer *
4557 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4558 {
4559         while (t && !trace_ok_for_array(t, tr))
4560                 t = t->next;
4561
4562         return t;
4563 }
4564
4565 static void *
4566 t_next(struct seq_file *m, void *v, loff_t *pos)
4567 {
4568         struct trace_array *tr = m->private;
4569         struct tracer *t = v;
4570
4571         (*pos)++;
4572
4573         if (t)
4574                 t = get_tracer_for_array(tr, t->next);
4575
4576         return t;
4577 }
4578
4579 static void *t_start(struct seq_file *m, loff_t *pos)
4580 {
4581         struct trace_array *tr = m->private;
4582         struct tracer *t;
4583         loff_t l = 0;
4584
4585         mutex_lock(&trace_types_lock);
4586
4587         t = get_tracer_for_array(tr, trace_types);
4588         for (; t && l < *pos; t = t_next(m, t, &l))
4589                         ;
4590
4591         return t;
4592 }
4593
4594 static void t_stop(struct seq_file *m, void *p)
4595 {
4596         mutex_unlock(&trace_types_lock);
4597 }
4598
4599 static int t_show(struct seq_file *m, void *v)
4600 {
4601         struct tracer *t = v;
4602
4603         if (!t)
4604                 return 0;
4605
4606         seq_puts(m, t->name);
4607         if (t->next)
4608                 seq_putc(m, ' ');
4609         else
4610                 seq_putc(m, '\n');
4611
4612         return 0;
4613 }
4614
4615 static const struct seq_operations show_traces_seq_ops = {
4616         .start          = t_start,
4617         .next           = t_next,
4618         .stop           = t_stop,
4619         .show           = t_show,
4620 };
4621
4622 static int show_traces_open(struct inode *inode, struct file *file)
4623 {
4624         struct trace_array *tr = inode->i_private;
4625         struct seq_file *m;
4626         int ret;
4627
4628         ret = tracing_check_open_get_tr(tr);
4629         if (ret)
4630                 return ret;
4631
4632         ret = seq_open(file, &show_traces_seq_ops);
4633         if (ret) {
4634                 trace_array_put(tr);
4635                 return ret;
4636         }
4637
4638         m = file->private_data;
4639         m->private = tr;
4640
4641         return 0;
4642 }
4643
4644 static int show_traces_release(struct inode *inode, struct file *file)
4645 {
4646         struct trace_array *tr = inode->i_private;
4647
4648         trace_array_put(tr);
4649         return seq_release(inode, file);
4650 }
4651
4652 static ssize_t
4653 tracing_write_stub(struct file *filp, const char __user *ubuf,
4654                    size_t count, loff_t *ppos)
4655 {
4656         return count;
4657 }
4658
4659 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4660 {
4661         int ret;
4662
4663         if (file->f_mode & FMODE_READ)
4664                 ret = seq_lseek(file, offset, whence);
4665         else
4666                 file->f_pos = ret = 0;
4667
4668         return ret;
4669 }
4670
4671 static const struct file_operations tracing_fops = {
4672         .open           = tracing_open,
4673         .read           = seq_read,
4674         .write          = tracing_write_stub,
4675         .llseek         = tracing_lseek,
4676         .release        = tracing_release,
4677 };
4678
4679 static const struct file_operations show_traces_fops = {
4680         .open           = show_traces_open,
4681         .read           = seq_read,
4682         .llseek         = seq_lseek,
4683         .release        = show_traces_release,
4684 };
4685
4686 static ssize_t
4687 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4688                      size_t count, loff_t *ppos)
4689 {
4690         struct trace_array *tr = file_inode(filp)->i_private;
4691         char *mask_str;
4692         int len;
4693
4694         len = snprintf(NULL, 0, "%*pb\n",
4695                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4696         mask_str = kmalloc(len, GFP_KERNEL);
4697         if (!mask_str)
4698                 return -ENOMEM;
4699
4700         len = snprintf(mask_str, len, "%*pb\n",
4701                        cpumask_pr_args(tr->tracing_cpumask));
4702         if (len >= count) {
4703                 count = -EINVAL;
4704                 goto out_err;
4705         }
4706         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4707
4708 out_err:
4709         kfree(mask_str);
4710
4711         return count;
4712 }
4713
4714 int tracing_set_cpumask(struct trace_array *tr,
4715                         cpumask_var_t tracing_cpumask_new)
4716 {
4717         int cpu;
4718
4719         if (!tr)
4720                 return -EINVAL;
4721
4722         local_irq_disable();
4723         arch_spin_lock(&tr->max_lock);
4724         for_each_tracing_cpu(cpu) {
4725                 /*
4726                  * Increase/decrease the disabled counter if we are
4727                  * about to flip a bit in the cpumask:
4728                  */
4729                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4730                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4731                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4732                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4733                 }
4734                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4735                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4736                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4737                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4738                 }
4739         }
4740         arch_spin_unlock(&tr->max_lock);
4741         local_irq_enable();
4742
4743         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4744
4745         return 0;
4746 }
4747
4748 static ssize_t
4749 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4750                       size_t count, loff_t *ppos)
4751 {
4752         struct trace_array *tr = file_inode(filp)->i_private;
4753         cpumask_var_t tracing_cpumask_new;
4754         int err;
4755
4756         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4757                 return -ENOMEM;
4758
4759         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4760         if (err)
4761                 goto err_free;
4762
4763         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4764         if (err)
4765                 goto err_free;
4766
4767         free_cpumask_var(tracing_cpumask_new);
4768
4769         return count;
4770
4771 err_free:
4772         free_cpumask_var(tracing_cpumask_new);
4773
4774         return err;
4775 }
4776
4777 static const struct file_operations tracing_cpumask_fops = {
4778         .open           = tracing_open_generic_tr,
4779         .read           = tracing_cpumask_read,
4780         .write          = tracing_cpumask_write,
4781         .release        = tracing_release_generic_tr,
4782         .llseek         = generic_file_llseek,
4783 };
4784
4785 static int tracing_trace_options_show(struct seq_file *m, void *v)
4786 {
4787         struct tracer_opt *trace_opts;
4788         struct trace_array *tr = m->private;
4789         u32 tracer_flags;
4790         int i;
4791
4792         mutex_lock(&trace_types_lock);
4793         tracer_flags = tr->current_trace->flags->val;
4794         trace_opts = tr->current_trace->flags->opts;
4795
4796         for (i = 0; trace_options[i]; i++) {
4797                 if (tr->trace_flags & (1 << i))
4798                         seq_printf(m, "%s\n", trace_options[i]);
4799                 else
4800                         seq_printf(m, "no%s\n", trace_options[i]);
4801         }
4802
4803         for (i = 0; trace_opts[i].name; i++) {
4804                 if (tracer_flags & trace_opts[i].bit)
4805                         seq_printf(m, "%s\n", trace_opts[i].name);
4806                 else
4807                         seq_printf(m, "no%s\n", trace_opts[i].name);
4808         }
4809         mutex_unlock(&trace_types_lock);
4810
4811         return 0;
4812 }
4813
4814 static int __set_tracer_option(struct trace_array *tr,
4815                                struct tracer_flags *tracer_flags,
4816                                struct tracer_opt *opts, int neg)
4817 {
4818         struct tracer *trace = tracer_flags->trace;
4819         int ret;
4820
4821         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4822         if (ret)
4823                 return ret;
4824
4825         if (neg)
4826                 tracer_flags->val &= ~opts->bit;
4827         else
4828                 tracer_flags->val |= opts->bit;
4829         return 0;
4830 }
4831
4832 /* Try to assign a tracer specific option */
4833 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4834 {
4835         struct tracer *trace = tr->current_trace;
4836         struct tracer_flags *tracer_flags = trace->flags;
4837         struct tracer_opt *opts = NULL;
4838         int i;
4839
4840         for (i = 0; tracer_flags->opts[i].name; i++) {
4841                 opts = &tracer_flags->opts[i];
4842
4843                 if (strcmp(cmp, opts->name) == 0)
4844                         return __set_tracer_option(tr, trace->flags, opts, neg);
4845         }
4846
4847         return -EINVAL;
4848 }
4849
4850 /* Some tracers require overwrite to stay enabled */
4851 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4852 {
4853         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4854                 return -1;
4855
4856         return 0;
4857 }
4858
4859 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4860 {
4861         if ((mask == TRACE_ITER_RECORD_TGID) ||
4862             (mask == TRACE_ITER_RECORD_CMD))
4863                 lockdep_assert_held(&event_mutex);
4864
4865         /* do nothing if flag is already set */
4866         if (!!(tr->trace_flags & mask) == !!enabled)
4867                 return 0;
4868
4869         /* Give the tracer a chance to approve the change */
4870         if (tr->current_trace->flag_changed)
4871                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4872                         return -EINVAL;
4873
4874         if (enabled)
4875                 tr->trace_flags |= mask;
4876         else
4877                 tr->trace_flags &= ~mask;
4878
4879         if (mask == TRACE_ITER_RECORD_CMD)
4880                 trace_event_enable_cmd_record(enabled);
4881
4882         if (mask == TRACE_ITER_RECORD_TGID) {
4883                 if (!tgid_map)
4884                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4885                                            sizeof(*tgid_map),
4886                                            GFP_KERNEL);
4887                 if (!tgid_map) {
4888                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4889                         return -ENOMEM;
4890                 }
4891
4892                 trace_event_enable_tgid_record(enabled);
4893         }
4894
4895         if (mask == TRACE_ITER_EVENT_FORK)
4896                 trace_event_follow_fork(tr, enabled);
4897
4898         if (mask == TRACE_ITER_FUNC_FORK)
4899                 ftrace_pid_follow_fork(tr, enabled);
4900
4901         if (mask == TRACE_ITER_OVERWRITE) {
4902                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4903 #ifdef CONFIG_TRACER_MAX_TRACE
4904                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4905 #endif
4906         }
4907
4908         if (mask == TRACE_ITER_PRINTK) {
4909                 trace_printk_start_stop_comm(enabled);
4910                 trace_printk_control(enabled);
4911         }
4912
4913         return 0;
4914 }
4915
4916 int trace_set_options(struct trace_array *tr, char *option)
4917 {
4918         char *cmp;
4919         int neg = 0;
4920         int ret;
4921         size_t orig_len = strlen(option);
4922         int len;
4923
4924         cmp = strstrip(option);
4925
4926         len = str_has_prefix(cmp, "no");
4927         if (len)
4928                 neg = 1;
4929
4930         cmp += len;
4931
4932         mutex_lock(&event_mutex);
4933         mutex_lock(&trace_types_lock);
4934
4935         ret = match_string(trace_options, -1, cmp);
4936         /* If no option could be set, test the specific tracer options */
4937         if (ret < 0)
4938                 ret = set_tracer_option(tr, cmp, neg);
4939         else
4940                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4941
4942         mutex_unlock(&trace_types_lock);
4943         mutex_unlock(&event_mutex);
4944
4945         /*
4946          * If the first trailing whitespace is replaced with '\0' by strstrip,
4947          * turn it back into a space.
4948          */
4949         if (orig_len > strlen(option))
4950                 option[strlen(option)] = ' ';
4951
4952         return ret;
4953 }
4954
4955 static void __init apply_trace_boot_options(void)
4956 {
4957         char *buf = trace_boot_options_buf;
4958         char *option;
4959
4960         while (true) {
4961                 option = strsep(&buf, ",");
4962
4963                 if (!option)
4964                         break;
4965
4966                 if (*option)
4967                         trace_set_options(&global_trace, option);
4968
4969                 /* Put back the comma to allow this to be called again */
4970                 if (buf)
4971                         *(buf - 1) = ',';
4972         }
4973 }
4974
4975 static ssize_t
4976 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4977                         size_t cnt, loff_t *ppos)
4978 {
4979         struct seq_file *m = filp->private_data;
4980         struct trace_array *tr = m->private;
4981         char buf[64];
4982         int ret;
4983
4984         if (cnt >= sizeof(buf))
4985                 return -EINVAL;
4986
4987         if (copy_from_user(buf, ubuf, cnt))
4988                 return -EFAULT;
4989
4990         buf[cnt] = 0;
4991
4992         ret = trace_set_options(tr, buf);
4993         if (ret < 0)
4994                 return ret;
4995
4996         *ppos += cnt;
4997
4998         return cnt;
4999 }
5000
5001 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5002 {
5003         struct trace_array *tr = inode->i_private;
5004         int ret;
5005
5006         ret = tracing_check_open_get_tr(tr);
5007         if (ret)
5008                 return ret;
5009
5010         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5011         if (ret < 0)
5012                 trace_array_put(tr);
5013
5014         return ret;
5015 }
5016
5017 static const struct file_operations tracing_iter_fops = {
5018         .open           = tracing_trace_options_open,
5019         .read           = seq_read,
5020         .llseek         = seq_lseek,
5021         .release        = tracing_single_release_tr,
5022         .write          = tracing_trace_options_write,
5023 };
5024
5025 static const char readme_msg[] =
5026         "tracing mini-HOWTO:\n\n"
5027         "# echo 0 > tracing_on : quick way to disable tracing\n"
5028         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5029         " Important files:\n"
5030         "  trace\t\t\t- The static contents of the buffer\n"
5031         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5032         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5033         "  current_tracer\t- function and latency tracers\n"
5034         "  available_tracers\t- list of configured tracers for current_tracer\n"
5035         "  error_log\t- error log for failed commands (that support it)\n"
5036         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5037         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5038         "  trace_clock\t\t-change the clock used to order events\n"
5039         "       local:   Per cpu clock but may not be synced across CPUs\n"
5040         "      global:   Synced across CPUs but slows tracing down.\n"
5041         "     counter:   Not a clock, but just an increment\n"
5042         "      uptime:   Jiffy counter from time of boot\n"
5043         "        perf:   Same clock that perf events use\n"
5044 #ifdef CONFIG_X86_64
5045         "     x86-tsc:   TSC cycle counter\n"
5046 #endif
5047         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5048         "       delta:   Delta difference against a buffer-wide timestamp\n"
5049         "    absolute:   Absolute (standalone) timestamp\n"
5050         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5051         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5052         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5053         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5054         "\t\t\t  Remove sub-buffer with rmdir\n"
5055         "  trace_options\t\t- Set format or modify how tracing happens\n"
5056         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5057         "\t\t\t  option name\n"
5058         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5059 #ifdef CONFIG_DYNAMIC_FTRACE
5060         "\n  available_filter_functions - list of functions that can be filtered on\n"
5061         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5062         "\t\t\t  functions\n"
5063         "\t     accepts: func_full_name or glob-matching-pattern\n"
5064         "\t     modules: Can select a group via module\n"
5065         "\t      Format: :mod:<module-name>\n"
5066         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5067         "\t    triggers: a command to perform when function is hit\n"
5068         "\t      Format: <function>:<trigger>[:count]\n"
5069         "\t     trigger: traceon, traceoff\n"
5070         "\t\t      enable_event:<system>:<event>\n"
5071         "\t\t      disable_event:<system>:<event>\n"
5072 #ifdef CONFIG_STACKTRACE
5073         "\t\t      stacktrace\n"
5074 #endif
5075 #ifdef CONFIG_TRACER_SNAPSHOT
5076         "\t\t      snapshot\n"
5077 #endif
5078         "\t\t      dump\n"
5079         "\t\t      cpudump\n"
5080         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5081         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5082         "\t     The first one will disable tracing every time do_fault is hit\n"
5083         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5084         "\t       The first time do trap is hit and it disables tracing, the\n"
5085         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5086         "\t       the counter will not decrement. It only decrements when the\n"
5087         "\t       trigger did work\n"
5088         "\t     To remove trigger without count:\n"
5089         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5090         "\t     To remove trigger with a count:\n"
5091         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5092         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5093         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5094         "\t    modules: Can select a group via module command :mod:\n"
5095         "\t    Does not accept triggers\n"
5096 #endif /* CONFIG_DYNAMIC_FTRACE */
5097 #ifdef CONFIG_FUNCTION_TRACER
5098         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5099         "\t\t    (function)\n"
5100         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5101         "\t\t    (function)\n"
5102 #endif
5103 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5104         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5105         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5106         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5107 #endif
5108 #ifdef CONFIG_TRACER_SNAPSHOT
5109         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5110         "\t\t\t  snapshot buffer. Read the contents for more\n"
5111         "\t\t\t  information\n"
5112 #endif
5113 #ifdef CONFIG_STACK_TRACER
5114         "  stack_trace\t\t- Shows the max stack trace when active\n"
5115         "  stack_max_size\t- Shows current max stack size that was traced\n"
5116         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5117         "\t\t\t  new trace)\n"
5118 #ifdef CONFIG_DYNAMIC_FTRACE
5119         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5120         "\t\t\t  traces\n"
5121 #endif
5122 #endif /* CONFIG_STACK_TRACER */
5123 #ifdef CONFIG_DYNAMIC_EVENTS
5124         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5125         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5126 #endif
5127 #ifdef CONFIG_KPROBE_EVENTS
5128         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5129         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5130 #endif
5131 #ifdef CONFIG_UPROBE_EVENTS
5132         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5133         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5134 #endif
5135 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5136         "\t  accepts: event-definitions (one definition per line)\n"
5137         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5138         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5139 #ifdef CONFIG_HIST_TRIGGERS
5140         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5141 #endif
5142         "\t           -:[<group>/]<event>\n"
5143 #ifdef CONFIG_KPROBE_EVENTS
5144         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5145   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5146 #endif
5147 #ifdef CONFIG_UPROBE_EVENTS
5148   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5149 #endif
5150         "\t     args: <name>=fetcharg[:type]\n"
5151         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5152 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5153         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5154 #else
5155         "\t           $stack<index>, $stack, $retval, $comm,\n"
5156 #endif
5157         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5158         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5159         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5160         "\t           <type>\\[<array-size>\\]\n"
5161 #ifdef CONFIG_HIST_TRIGGERS
5162         "\t    field: <stype> <name>;\n"
5163         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5164         "\t           [unsigned] char/int/long\n"
5165 #endif
5166 #endif
5167         "  events/\t\t- Directory containing all trace event subsystems:\n"
5168         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5169         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5170         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5171         "\t\t\t  events\n"
5172         "      filter\t\t- If set, only events passing filter are traced\n"
5173         "  events/<system>/<event>/\t- Directory containing control files for\n"
5174         "\t\t\t  <event>:\n"
5175         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5176         "      filter\t\t- If set, only events passing filter are traced\n"
5177         "      trigger\t\t- If set, a command to perform when event is hit\n"
5178         "\t    Format: <trigger>[:count][if <filter>]\n"
5179         "\t   trigger: traceon, traceoff\n"
5180         "\t            enable_event:<system>:<event>\n"
5181         "\t            disable_event:<system>:<event>\n"
5182 #ifdef CONFIG_HIST_TRIGGERS
5183         "\t            enable_hist:<system>:<event>\n"
5184         "\t            disable_hist:<system>:<event>\n"
5185 #endif
5186 #ifdef CONFIG_STACKTRACE
5187         "\t\t    stacktrace\n"
5188 #endif
5189 #ifdef CONFIG_TRACER_SNAPSHOT
5190         "\t\t    snapshot\n"
5191 #endif
5192 #ifdef CONFIG_HIST_TRIGGERS
5193         "\t\t    hist (see below)\n"
5194 #endif
5195         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5196         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5197         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5198         "\t                  events/block/block_unplug/trigger\n"
5199         "\t   The first disables tracing every time block_unplug is hit.\n"
5200         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5201         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5202         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5203         "\t   Like function triggers, the counter is only decremented if it\n"
5204         "\t    enabled or disabled tracing.\n"
5205         "\t   To remove a trigger without a count:\n"
5206         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5207         "\t   To remove a trigger with a count:\n"
5208         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5209         "\t   Filters can be ignored when removing a trigger.\n"
5210 #ifdef CONFIG_HIST_TRIGGERS
5211         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5212         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5213         "\t            [:values=<field1[,field2,...]>]\n"
5214         "\t            [:sort=<field1[,field2,...]>]\n"
5215         "\t            [:size=#entries]\n"
5216         "\t            [:pause][:continue][:clear]\n"
5217         "\t            [:name=histname1]\n"
5218         "\t            [:<handler>.<action>]\n"
5219         "\t            [if <filter>]\n\n"
5220         "\t    When a matching event is hit, an entry is added to a hash\n"
5221         "\t    table using the key(s) and value(s) named, and the value of a\n"
5222         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5223         "\t    correspond to fields in the event's format description.  Keys\n"
5224         "\t    can be any field, or the special string 'stacktrace'.\n"
5225         "\t    Compound keys consisting of up to two fields can be specified\n"
5226         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5227         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5228         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5229         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5230         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5231         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5232         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5233         "\t    its histogram data will be shared with other triggers of the\n"
5234         "\t    same name, and trigger hits will update this common data.\n\n"
5235         "\t    Reading the 'hist' file for the event will dump the hash\n"
5236         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5237         "\t    triggers attached to an event, there will be a table for each\n"
5238         "\t    trigger in the output.  The table displayed for a named\n"
5239         "\t    trigger will be the same as any other instance having the\n"
5240         "\t    same name.  The default format used to display a given field\n"
5241         "\t    can be modified by appending any of the following modifiers\n"
5242         "\t    to the field name, as applicable:\n\n"
5243         "\t            .hex        display a number as a hex value\n"
5244         "\t            .sym        display an address as a symbol\n"
5245         "\t            .sym-offset display an address as a symbol and offset\n"
5246         "\t            .execname   display a common_pid as a program name\n"
5247         "\t            .syscall    display a syscall id as a syscall name\n"
5248         "\t            .log2       display log2 value rather than raw number\n"
5249         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5250         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5251         "\t    trigger or to start a hist trigger but not log any events\n"
5252         "\t    until told to do so.  'continue' can be used to start or\n"
5253         "\t    restart a paused hist trigger.\n\n"
5254         "\t    The 'clear' parameter will clear the contents of a running\n"
5255         "\t    hist trigger and leave its current paused/active state\n"
5256         "\t    unchanged.\n\n"
5257         "\t    The enable_hist and disable_hist triggers can be used to\n"
5258         "\t    have one event conditionally start and stop another event's\n"
5259         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5260         "\t    the enable_event and disable_event triggers.\n\n"
5261         "\t    Hist trigger handlers and actions are executed whenever a\n"
5262         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5263         "\t        <handler>.<action>\n\n"
5264         "\t    The available handlers are:\n\n"
5265         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5266         "\t        onmax(var)               - invoke if var exceeds current max\n"
5267         "\t        onchange(var)            - invoke action if var changes\n\n"
5268         "\t    The available actions are:\n\n"
5269         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5270         "\t        save(field,...)                      - save current event fields\n"
5271 #ifdef CONFIG_TRACER_SNAPSHOT
5272         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5273 #endif
5274 #ifdef CONFIG_SYNTH_EVENTS
5275         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5276         "\t  Write into this file to define/undefine new synthetic events.\n"
5277         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5278 #endif
5279 #endif
5280 ;
5281
5282 static ssize_t
5283 tracing_readme_read(struct file *filp, char __user *ubuf,
5284                        size_t cnt, loff_t *ppos)
5285 {
5286         return simple_read_from_buffer(ubuf, cnt, ppos,
5287                                         readme_msg, strlen(readme_msg));
5288 }
5289
5290 static const struct file_operations tracing_readme_fops = {
5291         .open           = tracing_open_generic,
5292         .read           = tracing_readme_read,
5293         .llseek         = generic_file_llseek,
5294 };
5295
5296 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5297 {
5298         int *ptr = v;
5299
5300         if (*pos || m->count)
5301                 ptr++;
5302
5303         (*pos)++;
5304
5305         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5306                 if (trace_find_tgid(*ptr))
5307                         return ptr;
5308         }
5309
5310         return NULL;
5311 }
5312
5313 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5314 {
5315         void *v;
5316         loff_t l = 0;
5317
5318         if (!tgid_map)
5319                 return NULL;
5320
5321         v = &tgid_map[0];
5322         while (l <= *pos) {
5323                 v = saved_tgids_next(m, v, &l);
5324                 if (!v)
5325                         return NULL;
5326         }
5327
5328         return v;
5329 }
5330
5331 static void saved_tgids_stop(struct seq_file *m, void *v)
5332 {
5333 }
5334
5335 static int saved_tgids_show(struct seq_file *m, void *v)
5336 {
5337         int pid = (int *)v - tgid_map;
5338
5339         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5340         return 0;
5341 }
5342
5343 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5344         .start          = saved_tgids_start,
5345         .stop           = saved_tgids_stop,
5346         .next           = saved_tgids_next,
5347         .show           = saved_tgids_show,
5348 };
5349
5350 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5351 {
5352         int ret;
5353
5354         ret = tracing_check_open_get_tr(NULL);
5355         if (ret)
5356                 return ret;
5357
5358         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5359 }
5360
5361
5362 static const struct file_operations tracing_saved_tgids_fops = {
5363         .open           = tracing_saved_tgids_open,
5364         .read           = seq_read,
5365         .llseek         = seq_lseek,
5366         .release        = seq_release,
5367 };
5368
5369 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5370 {
5371         unsigned int *ptr = v;
5372
5373         if (*pos || m->count)
5374                 ptr++;
5375
5376         (*pos)++;
5377
5378         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5379              ptr++) {
5380                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5381                         continue;
5382
5383                 return ptr;
5384         }
5385
5386         return NULL;
5387 }
5388
5389 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5390 {
5391         void *v;
5392         loff_t l = 0;
5393
5394         preempt_disable();
5395         arch_spin_lock(&trace_cmdline_lock);
5396
5397         v = &savedcmd->map_cmdline_to_pid[0];
5398         while (l <= *pos) {
5399                 v = saved_cmdlines_next(m, v, &l);
5400                 if (!v)
5401                         return NULL;
5402         }
5403
5404         return v;
5405 }
5406
5407 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5408 {
5409         arch_spin_unlock(&trace_cmdline_lock);
5410         preempt_enable();
5411 }
5412
5413 static int saved_cmdlines_show(struct seq_file *m, void *v)
5414 {
5415         char buf[TASK_COMM_LEN];
5416         unsigned int *pid = v;
5417
5418         __trace_find_cmdline(*pid, buf);
5419         seq_printf(m, "%d %s\n", *pid, buf);
5420         return 0;
5421 }
5422
5423 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5424         .start          = saved_cmdlines_start,
5425         .next           = saved_cmdlines_next,
5426         .stop           = saved_cmdlines_stop,
5427         .show           = saved_cmdlines_show,
5428 };
5429
5430 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5431 {
5432         int ret;
5433
5434         ret = tracing_check_open_get_tr(NULL);
5435         if (ret)
5436                 return ret;
5437
5438         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5439 }
5440
5441 static const struct file_operations tracing_saved_cmdlines_fops = {
5442         .open           = tracing_saved_cmdlines_open,
5443         .read           = seq_read,
5444         .llseek         = seq_lseek,
5445         .release        = seq_release,
5446 };
5447
5448 static ssize_t
5449 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5450                                  size_t cnt, loff_t *ppos)
5451 {
5452         char buf[64];
5453         int r;
5454
5455         arch_spin_lock(&trace_cmdline_lock);
5456         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5457         arch_spin_unlock(&trace_cmdline_lock);
5458
5459         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5460 }
5461
5462 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5463 {
5464         kfree(s->saved_cmdlines);
5465         kfree(s->map_cmdline_to_pid);
5466         kfree(s);
5467 }
5468
5469 static int tracing_resize_saved_cmdlines(unsigned int val)
5470 {
5471         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5472
5473         s = kmalloc(sizeof(*s), GFP_KERNEL);
5474         if (!s)
5475                 return -ENOMEM;
5476
5477         if (allocate_cmdlines_buffer(val, s) < 0) {
5478                 kfree(s);
5479                 return -ENOMEM;
5480         }
5481
5482         arch_spin_lock(&trace_cmdline_lock);
5483         savedcmd_temp = savedcmd;
5484         savedcmd = s;
5485         arch_spin_unlock(&trace_cmdline_lock);
5486         free_saved_cmdlines_buffer(savedcmd_temp);
5487
5488         return 0;
5489 }
5490
5491 static ssize_t
5492 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5493                                   size_t cnt, loff_t *ppos)
5494 {
5495         unsigned long val;
5496         int ret;
5497
5498         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5499         if (ret)
5500                 return ret;
5501
5502         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5503         if (!val || val > PID_MAX_DEFAULT)
5504                 return -EINVAL;
5505
5506         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5507         if (ret < 0)
5508                 return ret;
5509
5510         *ppos += cnt;
5511
5512         return cnt;
5513 }
5514
5515 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5516         .open           = tracing_open_generic,
5517         .read           = tracing_saved_cmdlines_size_read,
5518         .write          = tracing_saved_cmdlines_size_write,
5519 };
5520
5521 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5522 static union trace_eval_map_item *
5523 update_eval_map(union trace_eval_map_item *ptr)
5524 {
5525         if (!ptr->map.eval_string) {
5526                 if (ptr->tail.next) {
5527                         ptr = ptr->tail.next;
5528                         /* Set ptr to the next real item (skip head) */
5529                         ptr++;
5530                 } else
5531                         return NULL;
5532         }
5533         return ptr;
5534 }
5535
5536 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5537 {
5538         union trace_eval_map_item *ptr = v;
5539
5540         /*
5541          * Paranoid! If ptr points to end, we don't want to increment past it.
5542          * This really should never happen.
5543          */
5544         (*pos)++;
5545         ptr = update_eval_map(ptr);
5546         if (WARN_ON_ONCE(!ptr))
5547                 return NULL;
5548
5549         ptr++;
5550         ptr = update_eval_map(ptr);
5551
5552         return ptr;
5553 }
5554
5555 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5556 {
5557         union trace_eval_map_item *v;
5558         loff_t l = 0;
5559
5560         mutex_lock(&trace_eval_mutex);
5561
5562         v = trace_eval_maps;
5563         if (v)
5564                 v++;
5565
5566         while (v && l < *pos) {
5567                 v = eval_map_next(m, v, &l);
5568         }
5569
5570         return v;
5571 }
5572
5573 static void eval_map_stop(struct seq_file *m, void *v)
5574 {
5575         mutex_unlock(&trace_eval_mutex);
5576 }
5577
5578 static int eval_map_show(struct seq_file *m, void *v)
5579 {
5580         union trace_eval_map_item *ptr = v;
5581
5582         seq_printf(m, "%s %ld (%s)\n",
5583                    ptr->map.eval_string, ptr->map.eval_value,
5584                    ptr->map.system);
5585
5586         return 0;
5587 }
5588
5589 static const struct seq_operations tracing_eval_map_seq_ops = {
5590         .start          = eval_map_start,
5591         .next           = eval_map_next,
5592         .stop           = eval_map_stop,
5593         .show           = eval_map_show,
5594 };
5595
5596 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5597 {
5598         int ret;
5599
5600         ret = tracing_check_open_get_tr(NULL);
5601         if (ret)
5602                 return ret;
5603
5604         return seq_open(filp, &tracing_eval_map_seq_ops);
5605 }
5606
5607 static const struct file_operations tracing_eval_map_fops = {
5608         .open           = tracing_eval_map_open,
5609         .read           = seq_read,
5610         .llseek         = seq_lseek,
5611         .release        = seq_release,
5612 };
5613
5614 static inline union trace_eval_map_item *
5615 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5616 {
5617         /* Return tail of array given the head */
5618         return ptr + ptr->head.length + 1;
5619 }
5620
5621 static void
5622 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5623                            int len)
5624 {
5625         struct trace_eval_map **stop;
5626         struct trace_eval_map **map;
5627         union trace_eval_map_item *map_array;
5628         union trace_eval_map_item *ptr;
5629
5630         stop = start + len;
5631
5632         /*
5633          * The trace_eval_maps contains the map plus a head and tail item,
5634          * where the head holds the module and length of array, and the
5635          * tail holds a pointer to the next list.
5636          */
5637         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5638         if (!map_array) {
5639                 pr_warn("Unable to allocate trace eval mapping\n");
5640                 return;
5641         }
5642
5643         mutex_lock(&trace_eval_mutex);
5644
5645         if (!trace_eval_maps)
5646                 trace_eval_maps = map_array;
5647         else {
5648                 ptr = trace_eval_maps;
5649                 for (;;) {
5650                         ptr = trace_eval_jmp_to_tail(ptr);
5651                         if (!ptr->tail.next)
5652                                 break;
5653                         ptr = ptr->tail.next;
5654
5655                 }
5656                 ptr->tail.next = map_array;
5657         }
5658         map_array->head.mod = mod;
5659         map_array->head.length = len;
5660         map_array++;
5661
5662         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5663                 map_array->map = **map;
5664                 map_array++;
5665         }
5666         memset(map_array, 0, sizeof(*map_array));
5667
5668         mutex_unlock(&trace_eval_mutex);
5669 }
5670
5671 static void trace_create_eval_file(struct dentry *d_tracer)
5672 {
5673         trace_create_file("eval_map", 0444, d_tracer,
5674                           NULL, &tracing_eval_map_fops);
5675 }
5676
5677 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5678 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5679 static inline void trace_insert_eval_map_file(struct module *mod,
5680                               struct trace_eval_map **start, int len) { }
5681 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5682
5683 static void trace_insert_eval_map(struct module *mod,
5684                                   struct trace_eval_map **start, int len)
5685 {
5686         struct trace_eval_map **map;
5687
5688         if (len <= 0)
5689                 return;
5690
5691         map = start;
5692
5693         trace_event_eval_update(map, len);
5694
5695         trace_insert_eval_map_file(mod, start, len);
5696 }
5697
5698 static ssize_t
5699 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5700                        size_t cnt, loff_t *ppos)
5701 {
5702         struct trace_array *tr = filp->private_data;
5703         char buf[MAX_TRACER_SIZE+2];
5704         int r;
5705
5706         mutex_lock(&trace_types_lock);
5707         r = sprintf(buf, "%s\n", tr->current_trace->name);
5708         mutex_unlock(&trace_types_lock);
5709
5710         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5711 }
5712
5713 int tracer_init(struct tracer *t, struct trace_array *tr)
5714 {
5715         tracing_reset_online_cpus(&tr->array_buffer);
5716         return t->init(tr);
5717 }
5718
5719 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5720 {
5721         int cpu;
5722
5723         for_each_tracing_cpu(cpu)
5724                 per_cpu_ptr(buf->data, cpu)->entries = val;
5725 }
5726
5727 #ifdef CONFIG_TRACER_MAX_TRACE
5728 /* resize @tr's buffer to the size of @size_tr's entries */
5729 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5730                                         struct array_buffer *size_buf, int cpu_id)
5731 {
5732         int cpu, ret = 0;
5733
5734         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5735                 for_each_tracing_cpu(cpu) {
5736                         ret = ring_buffer_resize(trace_buf->buffer,
5737                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5738                         if (ret < 0)
5739                                 break;
5740                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5741                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5742                 }
5743         } else {
5744                 ret = ring_buffer_resize(trace_buf->buffer,
5745                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5746                 if (ret == 0)
5747                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5748                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5749         }
5750
5751         return ret;
5752 }
5753 #endif /* CONFIG_TRACER_MAX_TRACE */
5754
5755 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5756                                         unsigned long size, int cpu)
5757 {
5758         int ret;
5759
5760         /*
5761          * If kernel or user changes the size of the ring buffer
5762          * we use the size that was given, and we can forget about
5763          * expanding it later.
5764          */
5765         ring_buffer_expanded = true;
5766
5767         /* May be called before buffers are initialized */
5768         if (!tr->array_buffer.buffer)
5769                 return 0;
5770
5771         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5772         if (ret < 0)
5773                 return ret;
5774
5775 #ifdef CONFIG_TRACER_MAX_TRACE
5776         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5777             !tr->current_trace->use_max_tr)
5778                 goto out;
5779
5780         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5781         if (ret < 0) {
5782                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5783                                                      &tr->array_buffer, cpu);
5784                 if (r < 0) {
5785                         /*
5786                          * AARGH! We are left with different
5787                          * size max buffer!!!!
5788                          * The max buffer is our "snapshot" buffer.
5789                          * When a tracer needs a snapshot (one of the
5790                          * latency tracers), it swaps the max buffer
5791                          * with the saved snap shot. We succeeded to
5792                          * update the size of the main buffer, but failed to
5793                          * update the size of the max buffer. But when we tried
5794                          * to reset the main buffer to the original size, we
5795                          * failed there too. This is very unlikely to
5796                          * happen, but if it does, warn and kill all
5797                          * tracing.
5798                          */
5799                         WARN_ON(1);
5800                         tracing_disabled = 1;
5801                 }
5802                 return ret;
5803         }
5804
5805         if (cpu == RING_BUFFER_ALL_CPUS)
5806                 set_buffer_entries(&tr->max_buffer, size);
5807         else
5808                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5809
5810  out:
5811 #endif /* CONFIG_TRACER_MAX_TRACE */
5812
5813         if (cpu == RING_BUFFER_ALL_CPUS)
5814                 set_buffer_entries(&tr->array_buffer, size);
5815         else
5816                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5817
5818         return ret;
5819 }
5820
5821 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5822                                   unsigned long size, int cpu_id)
5823 {
5824         int ret = size;
5825
5826         mutex_lock(&trace_types_lock);
5827
5828         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5829                 /* make sure, this cpu is enabled in the mask */
5830                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5831                         ret = -EINVAL;
5832                         goto out;
5833                 }
5834         }
5835
5836         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5837         if (ret < 0)
5838                 ret = -ENOMEM;
5839
5840 out:
5841         mutex_unlock(&trace_types_lock);
5842
5843         return ret;
5844 }
5845
5846
5847 /**
5848  * tracing_update_buffers - used by tracing facility to expand ring buffers
5849  *
5850  * To save on memory when the tracing is never used on a system with it
5851  * configured in. The ring buffers are set to a minimum size. But once
5852  * a user starts to use the tracing facility, then they need to grow
5853  * to their default size.
5854  *
5855  * This function is to be called when a tracer is about to be used.
5856  */
5857 int tracing_update_buffers(void)
5858 {
5859         int ret = 0;
5860
5861         mutex_lock(&trace_types_lock);
5862         if (!ring_buffer_expanded)
5863                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5864                                                 RING_BUFFER_ALL_CPUS);
5865         mutex_unlock(&trace_types_lock);
5866
5867         return ret;
5868 }
5869
5870 struct trace_option_dentry;
5871
5872 static void
5873 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5874
5875 /*
5876  * Used to clear out the tracer before deletion of an instance.
5877  * Must have trace_types_lock held.
5878  */
5879 static void tracing_set_nop(struct trace_array *tr)
5880 {
5881         if (tr->current_trace == &nop_trace)
5882                 return;
5883         
5884         tr->current_trace->enabled--;
5885
5886         if (tr->current_trace->reset)
5887                 tr->current_trace->reset(tr);
5888
5889         tr->current_trace = &nop_trace;
5890 }
5891
5892 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5893 {
5894         /* Only enable if the directory has been created already. */
5895         if (!tr->dir)
5896                 return;
5897
5898         create_trace_option_files(tr, t);
5899 }
5900
5901 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5902 {
5903         struct tracer *t;
5904 #ifdef CONFIG_TRACER_MAX_TRACE
5905         bool had_max_tr;
5906 #endif
5907         int ret = 0;
5908
5909         mutex_lock(&trace_types_lock);
5910
5911         if (!ring_buffer_expanded) {
5912                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5913                                                 RING_BUFFER_ALL_CPUS);
5914                 if (ret < 0)
5915                         goto out;
5916                 ret = 0;
5917         }
5918
5919         for (t = trace_types; t; t = t->next) {
5920                 if (strcmp(t->name, buf) == 0)
5921                         break;
5922         }
5923         if (!t) {
5924                 ret = -EINVAL;
5925                 goto out;
5926         }
5927         if (t == tr->current_trace)
5928                 goto out;
5929
5930 #ifdef CONFIG_TRACER_SNAPSHOT
5931         if (t->use_max_tr) {
5932                 arch_spin_lock(&tr->max_lock);
5933                 if (tr->cond_snapshot)
5934                         ret = -EBUSY;
5935                 arch_spin_unlock(&tr->max_lock);
5936                 if (ret)
5937                         goto out;
5938         }
5939 #endif
5940         /* Some tracers won't work on kernel command line */
5941         if (system_state < SYSTEM_RUNNING && t->noboot) {
5942                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5943                         t->name);
5944                 goto out;
5945         }
5946
5947         /* Some tracers are only allowed for the top level buffer */
5948         if (!trace_ok_for_array(t, tr)) {
5949                 ret = -EINVAL;
5950                 goto out;
5951         }
5952
5953         /* If trace pipe files are being read, we can't change the tracer */
5954         if (tr->trace_ref) {
5955                 ret = -EBUSY;
5956                 goto out;
5957         }
5958
5959         trace_branch_disable();
5960
5961         tr->current_trace->enabled--;
5962
5963         if (tr->current_trace->reset)
5964                 tr->current_trace->reset(tr);
5965
5966         /* Current trace needs to be nop_trace before synchronize_rcu */
5967         tr->current_trace = &nop_trace;
5968
5969 #ifdef CONFIG_TRACER_MAX_TRACE
5970         had_max_tr = tr->allocated_snapshot;
5971
5972         if (had_max_tr && !t->use_max_tr) {
5973                 /*
5974                  * We need to make sure that the update_max_tr sees that
5975                  * current_trace changed to nop_trace to keep it from
5976                  * swapping the buffers after we resize it.
5977                  * The update_max_tr is called from interrupts disabled
5978                  * so a synchronized_sched() is sufficient.
5979                  */
5980                 synchronize_rcu();
5981                 free_snapshot(tr);
5982         }
5983 #endif
5984
5985 #ifdef CONFIG_TRACER_MAX_TRACE
5986         if (t->use_max_tr && !had_max_tr) {
5987                 ret = tracing_alloc_snapshot_instance(tr);
5988                 if (ret < 0)
5989                         goto out;
5990         }
5991 #endif
5992
5993         if (t->init) {
5994                 ret = tracer_init(t, tr);
5995                 if (ret)
5996                         goto out;
5997         }
5998
5999         tr->current_trace = t;
6000         tr->current_trace->enabled++;
6001         trace_branch_enable(tr);
6002  out:
6003         mutex_unlock(&trace_types_lock);
6004
6005         return ret;
6006 }
6007
6008 static ssize_t
6009 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6010                         size_t cnt, loff_t *ppos)
6011 {
6012         struct trace_array *tr = filp->private_data;
6013         char buf[MAX_TRACER_SIZE+1];
6014         int i;
6015         size_t ret;
6016         int err;
6017
6018         ret = cnt;
6019
6020         if (cnt > MAX_TRACER_SIZE)
6021                 cnt = MAX_TRACER_SIZE;
6022
6023         if (copy_from_user(buf, ubuf, cnt))
6024                 return -EFAULT;
6025
6026         buf[cnt] = 0;
6027
6028         /* strip ending whitespace. */
6029         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6030                 buf[i] = 0;
6031
6032         err = tracing_set_tracer(tr, buf);
6033         if (err)
6034                 return err;
6035
6036         *ppos += ret;
6037
6038         return ret;
6039 }
6040
6041 static ssize_t
6042 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6043                    size_t cnt, loff_t *ppos)
6044 {
6045         char buf[64];
6046         int r;
6047
6048         r = snprintf(buf, sizeof(buf), "%ld\n",
6049                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6050         if (r > sizeof(buf))
6051                 r = sizeof(buf);
6052         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6053 }
6054
6055 static ssize_t
6056 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6057                     size_t cnt, loff_t *ppos)
6058 {
6059         unsigned long val;
6060         int ret;
6061
6062         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6063         if (ret)
6064                 return ret;
6065
6066         *ptr = val * 1000;
6067
6068         return cnt;
6069 }
6070
6071 static ssize_t
6072 tracing_thresh_read(struct file *filp, char __user *ubuf,
6073                     size_t cnt, loff_t *ppos)
6074 {
6075         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6076 }
6077
6078 static ssize_t
6079 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6080                      size_t cnt, loff_t *ppos)
6081 {
6082         struct trace_array *tr = filp->private_data;
6083         int ret;
6084
6085         mutex_lock(&trace_types_lock);
6086         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6087         if (ret < 0)
6088                 goto out;
6089
6090         if (tr->current_trace->update_thresh) {
6091                 ret = tr->current_trace->update_thresh(tr);
6092                 if (ret < 0)
6093                         goto out;
6094         }
6095
6096         ret = cnt;
6097 out:
6098         mutex_unlock(&trace_types_lock);
6099
6100         return ret;
6101 }
6102
6103 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6104
6105 static ssize_t
6106 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6107                      size_t cnt, loff_t *ppos)
6108 {
6109         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6110 }
6111
6112 static ssize_t
6113 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6114                       size_t cnt, loff_t *ppos)
6115 {
6116         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6117 }
6118
6119 #endif
6120
6121 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6122 {
6123         struct trace_array *tr = inode->i_private;
6124         struct trace_iterator *iter;
6125         int ret;
6126
6127         ret = tracing_check_open_get_tr(tr);
6128         if (ret)
6129                 return ret;
6130
6131         mutex_lock(&trace_types_lock);
6132
6133         /* create a buffer to store the information to pass to userspace */
6134         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6135         if (!iter) {
6136                 ret = -ENOMEM;
6137                 __trace_array_put(tr);
6138                 goto out;
6139         }
6140
6141         trace_seq_init(&iter->seq);
6142         iter->trace = tr->current_trace;
6143
6144         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6145                 ret = -ENOMEM;
6146                 goto fail;
6147         }
6148
6149         /* trace pipe does not show start of buffer */
6150         cpumask_setall(iter->started);
6151
6152         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6153                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6154
6155         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6156         if (trace_clocks[tr->clock_id].in_ns)
6157                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6158
6159         iter->tr = tr;
6160         iter->array_buffer = &tr->array_buffer;
6161         iter->cpu_file = tracing_get_cpu(inode);
6162         mutex_init(&iter->mutex);
6163         filp->private_data = iter;
6164
6165         if (iter->trace->pipe_open)
6166                 iter->trace->pipe_open(iter);
6167
6168         nonseekable_open(inode, filp);
6169
6170         tr->trace_ref++;
6171 out:
6172         mutex_unlock(&trace_types_lock);
6173         return ret;
6174
6175 fail:
6176         kfree(iter);
6177         __trace_array_put(tr);
6178         mutex_unlock(&trace_types_lock);
6179         return ret;
6180 }
6181
6182 static int tracing_release_pipe(struct inode *inode, struct file *file)
6183 {
6184         struct trace_iterator *iter = file->private_data;
6185         struct trace_array *tr = inode->i_private;
6186
6187         mutex_lock(&trace_types_lock);
6188
6189         tr->trace_ref--;
6190
6191         if (iter->trace->pipe_close)
6192                 iter->trace->pipe_close(iter);
6193
6194         mutex_unlock(&trace_types_lock);
6195
6196         free_cpumask_var(iter->started);
6197         mutex_destroy(&iter->mutex);
6198         kfree(iter);
6199
6200         trace_array_put(tr);
6201
6202         return 0;
6203 }
6204
6205 static __poll_t
6206 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6207 {
6208         struct trace_array *tr = iter->tr;
6209
6210         /* Iterators are static, they should be filled or empty */
6211         if (trace_buffer_iter(iter, iter->cpu_file))
6212                 return EPOLLIN | EPOLLRDNORM;
6213
6214         if (tr->trace_flags & TRACE_ITER_BLOCK)
6215                 /*
6216                  * Always select as readable when in blocking mode
6217                  */
6218                 return EPOLLIN | EPOLLRDNORM;
6219         else
6220                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6221                                              filp, poll_table);
6222 }
6223
6224 static __poll_t
6225 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6226 {
6227         struct trace_iterator *iter = filp->private_data;
6228
6229         return trace_poll(iter, filp, poll_table);
6230 }
6231
6232 /* Must be called with iter->mutex held. */
6233 static int tracing_wait_pipe(struct file *filp)
6234 {
6235         struct trace_iterator *iter = filp->private_data;
6236         int ret;
6237
6238         while (trace_empty(iter)) {
6239
6240                 if ((filp->f_flags & O_NONBLOCK)) {
6241                         return -EAGAIN;
6242                 }
6243
6244                 /*
6245                  * We block until we read something and tracing is disabled.
6246                  * We still block if tracing is disabled, but we have never
6247                  * read anything. This allows a user to cat this file, and
6248                  * then enable tracing. But after we have read something,
6249                  * we give an EOF when tracing is again disabled.
6250                  *
6251                  * iter->pos will be 0 if we haven't read anything.
6252                  */
6253                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6254                         break;
6255
6256                 mutex_unlock(&iter->mutex);
6257
6258                 ret = wait_on_pipe(iter, 0);
6259
6260                 mutex_lock(&iter->mutex);
6261
6262                 if (ret)
6263                         return ret;
6264         }
6265
6266         return 1;
6267 }
6268
6269 /*
6270  * Consumer reader.
6271  */
6272 static ssize_t
6273 tracing_read_pipe(struct file *filp, char __user *ubuf,
6274                   size_t cnt, loff_t *ppos)
6275 {
6276         struct trace_iterator *iter = filp->private_data;
6277         ssize_t sret;
6278
6279         /*
6280          * Avoid more than one consumer on a single file descriptor
6281          * This is just a matter of traces coherency, the ring buffer itself
6282          * is protected.
6283          */
6284         mutex_lock(&iter->mutex);
6285
6286         /* return any leftover data */
6287         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6288         if (sret != -EBUSY)
6289                 goto out;
6290
6291         trace_seq_init(&iter->seq);
6292
6293         if (iter->trace->read) {
6294                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6295                 if (sret)
6296                         goto out;
6297         }
6298
6299 waitagain:
6300         sret = tracing_wait_pipe(filp);
6301         if (sret <= 0)
6302                 goto out;
6303
6304         /* stop when tracing is finished */
6305         if (trace_empty(iter)) {
6306                 sret = 0;
6307                 goto out;
6308         }
6309
6310         if (cnt >= PAGE_SIZE)
6311                 cnt = PAGE_SIZE - 1;
6312
6313         /* reset all but tr, trace, and overruns */
6314         memset(&iter->seq, 0,
6315                sizeof(struct trace_iterator) -
6316                offsetof(struct trace_iterator, seq));
6317         cpumask_clear(iter->started);
6318         trace_seq_init(&iter->seq);
6319         iter->pos = -1;
6320
6321         trace_event_read_lock();
6322         trace_access_lock(iter->cpu_file);
6323         while (trace_find_next_entry_inc(iter) != NULL) {
6324                 enum print_line_t ret;
6325                 int save_len = iter->seq.seq.len;
6326
6327                 ret = print_trace_line(iter);
6328                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6329                         /* don't print partial lines */
6330                         iter->seq.seq.len = save_len;
6331                         break;
6332                 }
6333                 if (ret != TRACE_TYPE_NO_CONSUME)
6334                         trace_consume(iter);
6335
6336                 if (trace_seq_used(&iter->seq) >= cnt)
6337                         break;
6338
6339                 /*
6340                  * Setting the full flag means we reached the trace_seq buffer
6341                  * size and we should leave by partial output condition above.
6342                  * One of the trace_seq_* functions is not used properly.
6343                  */
6344                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6345                           iter->ent->type);
6346         }
6347         trace_access_unlock(iter->cpu_file);
6348         trace_event_read_unlock();
6349
6350         /* Now copy what we have to the user */
6351         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6352         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6353                 trace_seq_init(&iter->seq);
6354
6355         /*
6356          * If there was nothing to send to user, in spite of consuming trace
6357          * entries, go back to wait for more entries.
6358          */
6359         if (sret == -EBUSY)
6360                 goto waitagain;
6361
6362 out:
6363         mutex_unlock(&iter->mutex);
6364
6365         return sret;
6366 }
6367
6368 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6369                                      unsigned int idx)
6370 {
6371         __free_page(spd->pages[idx]);
6372 }
6373
6374 static size_t
6375 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6376 {
6377         size_t count;
6378         int save_len;
6379         int ret;
6380
6381         /* Seq buffer is page-sized, exactly what we need. */
6382         for (;;) {
6383                 save_len = iter->seq.seq.len;
6384                 ret = print_trace_line(iter);
6385
6386                 if (trace_seq_has_overflowed(&iter->seq)) {
6387                         iter->seq.seq.len = save_len;
6388                         break;
6389                 }
6390
6391                 /*
6392                  * This should not be hit, because it should only
6393                  * be set if the iter->seq overflowed. But check it
6394                  * anyway to be safe.
6395                  */
6396                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6397                         iter->seq.seq.len = save_len;
6398                         break;
6399                 }
6400
6401                 count = trace_seq_used(&iter->seq) - save_len;
6402                 if (rem < count) {
6403                         rem = 0;
6404                         iter->seq.seq.len = save_len;
6405                         break;
6406                 }
6407
6408                 if (ret != TRACE_TYPE_NO_CONSUME)
6409                         trace_consume(iter);
6410                 rem -= count;
6411                 if (!trace_find_next_entry_inc(iter))   {
6412                         rem = 0;
6413                         iter->ent = NULL;
6414                         break;
6415                 }
6416         }
6417
6418         return rem;
6419 }
6420
6421 static ssize_t tracing_splice_read_pipe(struct file *filp,
6422                                         loff_t *ppos,
6423                                         struct pipe_inode_info *pipe,
6424                                         size_t len,
6425                                         unsigned int flags)
6426 {
6427         struct page *pages_def[PIPE_DEF_BUFFERS];
6428         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6429         struct trace_iterator *iter = filp->private_data;
6430         struct splice_pipe_desc spd = {
6431                 .pages          = pages_def,
6432                 .partial        = partial_def,
6433                 .nr_pages       = 0, /* This gets updated below. */
6434                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6435                 .ops            = &default_pipe_buf_ops,
6436                 .spd_release    = tracing_spd_release_pipe,
6437         };
6438         ssize_t ret;
6439         size_t rem;
6440         unsigned int i;
6441
6442         if (splice_grow_spd(pipe, &spd))
6443                 return -ENOMEM;
6444
6445         mutex_lock(&iter->mutex);
6446
6447         if (iter->trace->splice_read) {
6448                 ret = iter->trace->splice_read(iter, filp,
6449                                                ppos, pipe, len, flags);
6450                 if (ret)
6451                         goto out_err;
6452         }
6453
6454         ret = tracing_wait_pipe(filp);
6455         if (ret <= 0)
6456                 goto out_err;
6457
6458         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6459                 ret = -EFAULT;
6460                 goto out_err;
6461         }
6462
6463         trace_event_read_lock();
6464         trace_access_lock(iter->cpu_file);
6465
6466         /* Fill as many pages as possible. */
6467         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6468                 spd.pages[i] = alloc_page(GFP_KERNEL);
6469                 if (!spd.pages[i])
6470                         break;
6471
6472                 rem = tracing_fill_pipe_page(rem, iter);
6473
6474                 /* Copy the data into the page, so we can start over. */
6475                 ret = trace_seq_to_buffer(&iter->seq,
6476                                           page_address(spd.pages[i]),
6477                                           trace_seq_used(&iter->seq));
6478                 if (ret < 0) {
6479                         __free_page(spd.pages[i]);
6480                         break;
6481                 }
6482                 spd.partial[i].offset = 0;
6483                 spd.partial[i].len = trace_seq_used(&iter->seq);
6484
6485                 trace_seq_init(&iter->seq);
6486         }
6487
6488         trace_access_unlock(iter->cpu_file);
6489         trace_event_read_unlock();
6490         mutex_unlock(&iter->mutex);
6491
6492         spd.nr_pages = i;
6493
6494         if (i)
6495                 ret = splice_to_pipe(pipe, &spd);
6496         else
6497                 ret = 0;
6498 out:
6499         splice_shrink_spd(&spd);
6500         return ret;
6501
6502 out_err:
6503         mutex_unlock(&iter->mutex);
6504         goto out;
6505 }
6506
6507 static ssize_t
6508 tracing_entries_read(struct file *filp, char __user *ubuf,
6509                      size_t cnt, loff_t *ppos)
6510 {
6511         struct inode *inode = file_inode(filp);
6512         struct trace_array *tr = inode->i_private;
6513         int cpu = tracing_get_cpu(inode);
6514         char buf[64];
6515         int r = 0;
6516         ssize_t ret;
6517
6518         mutex_lock(&trace_types_lock);
6519
6520         if (cpu == RING_BUFFER_ALL_CPUS) {
6521                 int cpu, buf_size_same;
6522                 unsigned long size;
6523
6524                 size = 0;
6525                 buf_size_same = 1;
6526                 /* check if all cpu sizes are same */
6527                 for_each_tracing_cpu(cpu) {
6528                         /* fill in the size from first enabled cpu */
6529                         if (size == 0)
6530                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6531                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6532                                 buf_size_same = 0;
6533                                 break;
6534                         }
6535                 }
6536
6537                 if (buf_size_same) {
6538                         if (!ring_buffer_expanded)
6539                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6540                                             size >> 10,
6541                                             trace_buf_size >> 10);
6542                         else
6543                                 r = sprintf(buf, "%lu\n", size >> 10);
6544                 } else
6545                         r = sprintf(buf, "X\n");
6546         } else
6547                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6548
6549         mutex_unlock(&trace_types_lock);
6550
6551         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6552         return ret;
6553 }
6554
6555 static ssize_t
6556 tracing_entries_write(struct file *filp, const char __user *ubuf,
6557                       size_t cnt, loff_t *ppos)
6558 {
6559         struct inode *inode = file_inode(filp);
6560         struct trace_array *tr = inode->i_private;
6561         unsigned long val;
6562         int ret;
6563
6564         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6565         if (ret)
6566                 return ret;
6567
6568         /* must have at least 1 entry */
6569         if (!val)
6570                 return -EINVAL;
6571
6572         /* value is in KB */
6573         val <<= 10;
6574         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6575         if (ret < 0)
6576                 return ret;
6577
6578         *ppos += cnt;
6579
6580         return cnt;
6581 }
6582
6583 static ssize_t
6584 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6585                                 size_t cnt, loff_t *ppos)
6586 {
6587         struct trace_array *tr = filp->private_data;
6588         char buf[64];
6589         int r, cpu;
6590         unsigned long size = 0, expanded_size = 0;
6591
6592         mutex_lock(&trace_types_lock);
6593         for_each_tracing_cpu(cpu) {
6594                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6595                 if (!ring_buffer_expanded)
6596                         expanded_size += trace_buf_size >> 10;
6597         }
6598         if (ring_buffer_expanded)
6599                 r = sprintf(buf, "%lu\n", size);
6600         else
6601                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6602         mutex_unlock(&trace_types_lock);
6603
6604         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6605 }
6606
6607 static ssize_t
6608 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6609                           size_t cnt, loff_t *ppos)
6610 {
6611         /*
6612          * There is no need to read what the user has written, this function
6613          * is just to make sure that there is no error when "echo" is used
6614          */
6615
6616         *ppos += cnt;
6617
6618         return cnt;
6619 }
6620
6621 static int
6622 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6623 {
6624         struct trace_array *tr = inode->i_private;
6625
6626         /* disable tracing ? */
6627         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6628                 tracer_tracing_off(tr);
6629         /* resize the ring buffer to 0 */
6630         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6631
6632         trace_array_put(tr);
6633
6634         return 0;
6635 }
6636
6637 static ssize_t
6638 tracing_mark_write(struct file *filp, const char __user *ubuf,
6639                                         size_t cnt, loff_t *fpos)
6640 {
6641         struct trace_array *tr = filp->private_data;
6642         struct ring_buffer_event *event;
6643         enum event_trigger_type tt = ETT_NONE;
6644         struct trace_buffer *buffer;
6645         struct print_entry *entry;
6646         unsigned long irq_flags;
6647         ssize_t written;
6648         int size;
6649         int len;
6650
6651 /* Used in tracing_mark_raw_write() as well */
6652 #define FAULTED_STR "<faulted>"
6653 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6654
6655         if (tracing_disabled)
6656                 return -EINVAL;
6657
6658         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6659                 return -EINVAL;
6660
6661         if (cnt > TRACE_BUF_SIZE)
6662                 cnt = TRACE_BUF_SIZE;
6663
6664         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6665
6666         local_save_flags(irq_flags);
6667         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6668
6669         /* If less than "<faulted>", then make sure we can still add that */
6670         if (cnt < FAULTED_SIZE)
6671                 size += FAULTED_SIZE - cnt;
6672
6673         buffer = tr->array_buffer.buffer;
6674         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6675                                             irq_flags, preempt_count());
6676         if (unlikely(!event))
6677                 /* Ring buffer disabled, return as if not open for write */
6678                 return -EBADF;
6679
6680         entry = ring_buffer_event_data(event);
6681         entry->ip = _THIS_IP_;
6682
6683         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6684         if (len) {
6685                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6686                 cnt = FAULTED_SIZE;
6687                 written = -EFAULT;
6688         } else
6689                 written = cnt;
6690
6691         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6692                 /* do not add \n before testing triggers, but add \0 */
6693                 entry->buf[cnt] = '\0';
6694                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6695         }
6696
6697         if (entry->buf[cnt - 1] != '\n') {
6698                 entry->buf[cnt] = '\n';
6699                 entry->buf[cnt + 1] = '\0';
6700         } else
6701                 entry->buf[cnt] = '\0';
6702
6703         if (static_branch_unlikely(&trace_marker_exports_enabled))
6704                 ftrace_exports(event, TRACE_EXPORT_MARKER);
6705         __buffer_unlock_commit(buffer, event);
6706
6707         if (tt)
6708                 event_triggers_post_call(tr->trace_marker_file, tt);
6709
6710         if (written > 0)
6711                 *fpos += written;
6712
6713         return written;
6714 }
6715
6716 /* Limit it for now to 3K (including tag) */
6717 #define RAW_DATA_MAX_SIZE (1024*3)
6718
6719 static ssize_t
6720 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6721                                         size_t cnt, loff_t *fpos)
6722 {
6723         struct trace_array *tr = filp->private_data;
6724         struct ring_buffer_event *event;
6725         struct trace_buffer *buffer;
6726         struct raw_data_entry *entry;
6727         unsigned long irq_flags;
6728         ssize_t written;
6729         int size;
6730         int len;
6731
6732 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6733
6734         if (tracing_disabled)
6735                 return -EINVAL;
6736
6737         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6738                 return -EINVAL;
6739
6740         /* The marker must at least have a tag id */
6741         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6742                 return -EINVAL;
6743
6744         if (cnt > TRACE_BUF_SIZE)
6745                 cnt = TRACE_BUF_SIZE;
6746
6747         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6748
6749         local_save_flags(irq_flags);
6750         size = sizeof(*entry) + cnt;
6751         if (cnt < FAULT_SIZE_ID)
6752                 size += FAULT_SIZE_ID - cnt;
6753
6754         buffer = tr->array_buffer.buffer;
6755         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6756                                             irq_flags, preempt_count());
6757         if (!event)
6758                 /* Ring buffer disabled, return as if not open for write */
6759                 return -EBADF;
6760
6761         entry = ring_buffer_event_data(event);
6762
6763         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6764         if (len) {
6765                 entry->id = -1;
6766                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6767                 written = -EFAULT;
6768         } else
6769                 written = cnt;
6770
6771         __buffer_unlock_commit(buffer, event);
6772
6773         if (written > 0)
6774                 *fpos += written;
6775
6776         return written;
6777 }
6778
6779 static int tracing_clock_show(struct seq_file *m, void *v)
6780 {
6781         struct trace_array *tr = m->private;
6782         int i;
6783
6784         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6785                 seq_printf(m,
6786                         "%s%s%s%s", i ? " " : "",
6787                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6788                         i == tr->clock_id ? "]" : "");
6789         seq_putc(m, '\n');
6790
6791         return 0;
6792 }
6793
6794 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6795 {
6796         int i;
6797
6798         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6799                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6800                         break;
6801         }
6802         if (i == ARRAY_SIZE(trace_clocks))
6803                 return -EINVAL;
6804
6805         mutex_lock(&trace_types_lock);
6806
6807         tr->clock_id = i;
6808
6809         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6810
6811         /*
6812          * New clock may not be consistent with the previous clock.
6813          * Reset the buffer so that it doesn't have incomparable timestamps.
6814          */
6815         tracing_reset_online_cpus(&tr->array_buffer);
6816
6817 #ifdef CONFIG_TRACER_MAX_TRACE
6818         if (tr->max_buffer.buffer)
6819                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6820         tracing_reset_online_cpus(&tr->max_buffer);
6821 #endif
6822
6823         mutex_unlock(&trace_types_lock);
6824
6825         return 0;
6826 }
6827
6828 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6829                                    size_t cnt, loff_t *fpos)
6830 {
6831         struct seq_file *m = filp->private_data;
6832         struct trace_array *tr = m->private;
6833         char buf[64];
6834         const char *clockstr;
6835         int ret;
6836
6837         if (cnt >= sizeof(buf))
6838                 return -EINVAL;
6839
6840         if (copy_from_user(buf, ubuf, cnt))
6841                 return -EFAULT;
6842
6843         buf[cnt] = 0;
6844
6845         clockstr = strstrip(buf);
6846
6847         ret = tracing_set_clock(tr, clockstr);
6848         if (ret)
6849                 return ret;
6850
6851         *fpos += cnt;
6852
6853         return cnt;
6854 }
6855
6856 static int tracing_clock_open(struct inode *inode, struct file *file)
6857 {
6858         struct trace_array *tr = inode->i_private;
6859         int ret;
6860
6861         ret = tracing_check_open_get_tr(tr);
6862         if (ret)
6863                 return ret;
6864
6865         ret = single_open(file, tracing_clock_show, inode->i_private);
6866         if (ret < 0)
6867                 trace_array_put(tr);
6868
6869         return ret;
6870 }
6871
6872 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6873 {
6874         struct trace_array *tr = m->private;
6875
6876         mutex_lock(&trace_types_lock);
6877
6878         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6879                 seq_puts(m, "delta [absolute]\n");
6880         else
6881                 seq_puts(m, "[delta] absolute\n");
6882
6883         mutex_unlock(&trace_types_lock);
6884
6885         return 0;
6886 }
6887
6888 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6889 {
6890         struct trace_array *tr = inode->i_private;
6891         int ret;
6892
6893         ret = tracing_check_open_get_tr(tr);
6894         if (ret)
6895                 return ret;
6896
6897         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6898         if (ret < 0)
6899                 trace_array_put(tr);
6900
6901         return ret;
6902 }
6903
6904 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6905 {
6906         int ret = 0;
6907
6908         mutex_lock(&trace_types_lock);
6909
6910         if (abs && tr->time_stamp_abs_ref++)
6911                 goto out;
6912
6913         if (!abs) {
6914                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6915                         ret = -EINVAL;
6916                         goto out;
6917                 }
6918
6919                 if (--tr->time_stamp_abs_ref)
6920                         goto out;
6921         }
6922
6923         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6924
6925 #ifdef CONFIG_TRACER_MAX_TRACE
6926         if (tr->max_buffer.buffer)
6927                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6928 #endif
6929  out:
6930         mutex_unlock(&trace_types_lock);
6931
6932         return ret;
6933 }
6934
6935 struct ftrace_buffer_info {
6936         struct trace_iterator   iter;
6937         void                    *spare;
6938         unsigned int            spare_cpu;
6939         unsigned int            read;
6940 };
6941
6942 #ifdef CONFIG_TRACER_SNAPSHOT
6943 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6944 {
6945         struct trace_array *tr = inode->i_private;
6946         struct trace_iterator *iter;
6947         struct seq_file *m;
6948         int ret;
6949
6950         ret = tracing_check_open_get_tr(tr);
6951         if (ret)
6952                 return ret;
6953
6954         if (file->f_mode & FMODE_READ) {
6955                 iter = __tracing_open(inode, file, true);
6956                 if (IS_ERR(iter))
6957                         ret = PTR_ERR(iter);
6958         } else {
6959                 /* Writes still need the seq_file to hold the private data */
6960                 ret = -ENOMEM;
6961                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6962                 if (!m)
6963                         goto out;
6964                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6965                 if (!iter) {
6966                         kfree(m);
6967                         goto out;
6968                 }
6969                 ret = 0;
6970
6971                 iter->tr = tr;
6972                 iter->array_buffer = &tr->max_buffer;
6973                 iter->cpu_file = tracing_get_cpu(inode);
6974                 m->private = iter;
6975                 file->private_data = m;
6976         }
6977 out:
6978         if (ret < 0)
6979                 trace_array_put(tr);
6980
6981         return ret;
6982 }
6983
6984 static ssize_t
6985 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6986                        loff_t *ppos)
6987 {
6988         struct seq_file *m = filp->private_data;
6989         struct trace_iterator *iter = m->private;
6990         struct trace_array *tr = iter->tr;
6991         unsigned long val;
6992         int ret;
6993
6994         ret = tracing_update_buffers();
6995         if (ret < 0)
6996                 return ret;
6997
6998         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6999         if (ret)
7000                 return ret;
7001
7002         mutex_lock(&trace_types_lock);
7003
7004         if (tr->current_trace->use_max_tr) {
7005                 ret = -EBUSY;
7006                 goto out;
7007         }
7008
7009         arch_spin_lock(&tr->max_lock);
7010         if (tr->cond_snapshot)
7011                 ret = -EBUSY;
7012         arch_spin_unlock(&tr->max_lock);
7013         if (ret)
7014                 goto out;
7015
7016         switch (val) {
7017         case 0:
7018                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7019                         ret = -EINVAL;
7020                         break;
7021                 }
7022                 if (tr->allocated_snapshot)
7023                         free_snapshot(tr);
7024                 break;
7025         case 1:
7026 /* Only allow per-cpu swap if the ring buffer supports it */
7027 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7028                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7029                         ret = -EINVAL;
7030                         break;
7031                 }
7032 #endif
7033                 if (tr->allocated_snapshot)
7034                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7035                                         &tr->array_buffer, iter->cpu_file);
7036                 else
7037                         ret = tracing_alloc_snapshot_instance(tr);
7038                 if (ret < 0)
7039                         break;
7040                 local_irq_disable();
7041                 /* Now, we're going to swap */
7042                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7043                         update_max_tr(tr, current, smp_processor_id(), NULL);
7044                 else
7045                         update_max_tr_single(tr, current, iter->cpu_file);
7046                 local_irq_enable();
7047                 break;
7048         default:
7049                 if (tr->allocated_snapshot) {
7050                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7051                                 tracing_reset_online_cpus(&tr->max_buffer);
7052                         else
7053                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7054                 }
7055                 break;
7056         }
7057
7058         if (ret >= 0) {
7059                 *ppos += cnt;
7060                 ret = cnt;
7061         }
7062 out:
7063         mutex_unlock(&trace_types_lock);
7064         return ret;
7065 }
7066
7067 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7068 {
7069         struct seq_file *m = file->private_data;
7070         int ret;
7071
7072         ret = tracing_release(inode, file);
7073
7074         if (file->f_mode & FMODE_READ)
7075                 return ret;
7076
7077         /* If write only, the seq_file is just a stub */
7078         if (m)
7079                 kfree(m->private);
7080         kfree(m);
7081
7082         return 0;
7083 }
7084
7085 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7086 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7087                                     size_t count, loff_t *ppos);
7088 static int tracing_buffers_release(struct inode *inode, struct file *file);
7089 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7090                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7091
7092 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7093 {
7094         struct ftrace_buffer_info *info;
7095         int ret;
7096
7097         /* The following checks for tracefs lockdown */
7098         ret = tracing_buffers_open(inode, filp);
7099         if (ret < 0)
7100                 return ret;
7101
7102         info = filp->private_data;
7103
7104         if (info->iter.trace->use_max_tr) {
7105                 tracing_buffers_release(inode, filp);
7106                 return -EBUSY;
7107         }
7108
7109         info->iter.snapshot = true;
7110         info->iter.array_buffer = &info->iter.tr->max_buffer;
7111
7112         return ret;
7113 }
7114
7115 #endif /* CONFIG_TRACER_SNAPSHOT */
7116
7117
7118 static const struct file_operations tracing_thresh_fops = {
7119         .open           = tracing_open_generic,
7120         .read           = tracing_thresh_read,
7121         .write          = tracing_thresh_write,
7122         .llseek         = generic_file_llseek,
7123 };
7124
7125 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7126 static const struct file_operations tracing_max_lat_fops = {
7127         .open           = tracing_open_generic,
7128         .read           = tracing_max_lat_read,
7129         .write          = tracing_max_lat_write,
7130         .llseek         = generic_file_llseek,
7131 };
7132 #endif
7133
7134 static const struct file_operations set_tracer_fops = {
7135         .open           = tracing_open_generic,
7136         .read           = tracing_set_trace_read,
7137         .write          = tracing_set_trace_write,
7138         .llseek         = generic_file_llseek,
7139 };
7140
7141 static const struct file_operations tracing_pipe_fops = {
7142         .open           = tracing_open_pipe,
7143         .poll           = tracing_poll_pipe,
7144         .read           = tracing_read_pipe,
7145         .splice_read    = tracing_splice_read_pipe,
7146         .release        = tracing_release_pipe,
7147         .llseek         = no_llseek,
7148 };
7149
7150 static const struct file_operations tracing_entries_fops = {
7151         .open           = tracing_open_generic_tr,
7152         .read           = tracing_entries_read,
7153         .write          = tracing_entries_write,
7154         .llseek         = generic_file_llseek,
7155         .release        = tracing_release_generic_tr,
7156 };
7157
7158 static const struct file_operations tracing_total_entries_fops = {
7159         .open           = tracing_open_generic_tr,
7160         .read           = tracing_total_entries_read,
7161         .llseek         = generic_file_llseek,
7162         .release        = tracing_release_generic_tr,
7163 };
7164
7165 static const struct file_operations tracing_free_buffer_fops = {
7166         .open           = tracing_open_generic_tr,
7167         .write          = tracing_free_buffer_write,
7168         .release        = tracing_free_buffer_release,
7169 };
7170
7171 static const struct file_operations tracing_mark_fops = {
7172         .open           = tracing_open_generic_tr,
7173         .write          = tracing_mark_write,
7174         .llseek         = generic_file_llseek,
7175         .release        = tracing_release_generic_tr,
7176 };
7177
7178 static const struct file_operations tracing_mark_raw_fops = {
7179         .open           = tracing_open_generic_tr,
7180         .write          = tracing_mark_raw_write,
7181         .llseek         = generic_file_llseek,
7182         .release        = tracing_release_generic_tr,
7183 };
7184
7185 static const struct file_operations trace_clock_fops = {
7186         .open           = tracing_clock_open,
7187         .read           = seq_read,
7188         .llseek         = seq_lseek,
7189         .release        = tracing_single_release_tr,
7190         .write          = tracing_clock_write,
7191 };
7192
7193 static const struct file_operations trace_time_stamp_mode_fops = {
7194         .open           = tracing_time_stamp_mode_open,
7195         .read           = seq_read,
7196         .llseek         = seq_lseek,
7197         .release        = tracing_single_release_tr,
7198 };
7199
7200 #ifdef CONFIG_TRACER_SNAPSHOT
7201 static const struct file_operations snapshot_fops = {
7202         .open           = tracing_snapshot_open,
7203         .read           = seq_read,
7204         .write          = tracing_snapshot_write,
7205         .llseek         = tracing_lseek,
7206         .release        = tracing_snapshot_release,
7207 };
7208
7209 static const struct file_operations snapshot_raw_fops = {
7210         .open           = snapshot_raw_open,
7211         .read           = tracing_buffers_read,
7212         .release        = tracing_buffers_release,
7213         .splice_read    = tracing_buffers_splice_read,
7214         .llseek         = no_llseek,
7215 };
7216
7217 #endif /* CONFIG_TRACER_SNAPSHOT */
7218
7219 #define TRACING_LOG_ERRS_MAX    8
7220 #define TRACING_LOG_LOC_MAX     128
7221
7222 #define CMD_PREFIX "  Command: "
7223
7224 struct err_info {
7225         const char      **errs; /* ptr to loc-specific array of err strings */
7226         u8              type;   /* index into errs -> specific err string */
7227         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7228         u64             ts;
7229 };
7230
7231 struct tracing_log_err {
7232         struct list_head        list;
7233         struct err_info         info;
7234         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7235         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7236 };
7237
7238 static DEFINE_MUTEX(tracing_err_log_lock);
7239
7240 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7241 {
7242         struct tracing_log_err *err;
7243
7244         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7245                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7246                 if (!err)
7247                         err = ERR_PTR(-ENOMEM);
7248                 tr->n_err_log_entries++;
7249
7250                 return err;
7251         }
7252
7253         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7254         list_del(&err->list);
7255
7256         return err;
7257 }
7258
7259 /**
7260  * err_pos - find the position of a string within a command for error careting
7261  * @cmd: The tracing command that caused the error
7262  * @str: The string to position the caret at within @cmd
7263  *
7264  * Finds the position of the first occurence of @str within @cmd.  The
7265  * return value can be passed to tracing_log_err() for caret placement
7266  * within @cmd.
7267  *
7268  * Returns the index within @cmd of the first occurence of @str or 0
7269  * if @str was not found.
7270  */
7271 unsigned int err_pos(char *cmd, const char *str)
7272 {
7273         char *found;
7274
7275         if (WARN_ON(!strlen(cmd)))
7276                 return 0;
7277
7278         found = strstr(cmd, str);
7279         if (found)
7280                 return found - cmd;
7281
7282         return 0;
7283 }
7284
7285 /**
7286  * tracing_log_err - write an error to the tracing error log
7287  * @tr: The associated trace array for the error (NULL for top level array)
7288  * @loc: A string describing where the error occurred
7289  * @cmd: The tracing command that caused the error
7290  * @errs: The array of loc-specific static error strings
7291  * @type: The index into errs[], which produces the specific static err string
7292  * @pos: The position the caret should be placed in the cmd
7293  *
7294  * Writes an error into tracing/error_log of the form:
7295  *
7296  * <loc>: error: <text>
7297  *   Command: <cmd>
7298  *              ^
7299  *
7300  * tracing/error_log is a small log file containing the last
7301  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7302  * unless there has been a tracing error, and the error log can be
7303  * cleared and have its memory freed by writing the empty string in
7304  * truncation mode to it i.e. echo > tracing/error_log.
7305  *
7306  * NOTE: the @errs array along with the @type param are used to
7307  * produce a static error string - this string is not copied and saved
7308  * when the error is logged - only a pointer to it is saved.  See
7309  * existing callers for examples of how static strings are typically
7310  * defined for use with tracing_log_err().
7311  */
7312 void tracing_log_err(struct trace_array *tr,
7313                      const char *loc, const char *cmd,
7314                      const char **errs, u8 type, u8 pos)
7315 {
7316         struct tracing_log_err *err;
7317
7318         if (!tr)
7319                 tr = &global_trace;
7320
7321         mutex_lock(&tracing_err_log_lock);
7322         err = get_tracing_log_err(tr);
7323         if (PTR_ERR(err) == -ENOMEM) {
7324                 mutex_unlock(&tracing_err_log_lock);
7325                 return;
7326         }
7327
7328         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7329         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7330
7331         err->info.errs = errs;
7332         err->info.type = type;
7333         err->info.pos = pos;
7334         err->info.ts = local_clock();
7335
7336         list_add_tail(&err->list, &tr->err_log);
7337         mutex_unlock(&tracing_err_log_lock);
7338 }
7339
7340 static void clear_tracing_err_log(struct trace_array *tr)
7341 {
7342         struct tracing_log_err *err, *next;
7343
7344         mutex_lock(&tracing_err_log_lock);
7345         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7346                 list_del(&err->list);
7347                 kfree(err);
7348         }
7349
7350         tr->n_err_log_entries = 0;
7351         mutex_unlock(&tracing_err_log_lock);
7352 }
7353
7354 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7355 {
7356         struct trace_array *tr = m->private;
7357
7358         mutex_lock(&tracing_err_log_lock);
7359
7360         return seq_list_start(&tr->err_log, *pos);
7361 }
7362
7363 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7364 {
7365         struct trace_array *tr = m->private;
7366
7367         return seq_list_next(v, &tr->err_log, pos);
7368 }
7369
7370 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7371 {
7372         mutex_unlock(&tracing_err_log_lock);
7373 }
7374
7375 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7376 {
7377         u8 i;
7378
7379         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7380                 seq_putc(m, ' ');
7381         for (i = 0; i < pos; i++)
7382                 seq_putc(m, ' ');
7383         seq_puts(m, "^\n");
7384 }
7385
7386 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7387 {
7388         struct tracing_log_err *err = v;
7389
7390         if (err) {
7391                 const char *err_text = err->info.errs[err->info.type];
7392                 u64 sec = err->info.ts;
7393                 u32 nsec;
7394
7395                 nsec = do_div(sec, NSEC_PER_SEC);
7396                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7397                            err->loc, err_text);
7398                 seq_printf(m, "%s", err->cmd);
7399                 tracing_err_log_show_pos(m, err->info.pos);
7400         }
7401
7402         return 0;
7403 }
7404
7405 static const struct seq_operations tracing_err_log_seq_ops = {
7406         .start  = tracing_err_log_seq_start,
7407         .next   = tracing_err_log_seq_next,
7408         .stop   = tracing_err_log_seq_stop,
7409         .show   = tracing_err_log_seq_show
7410 };
7411
7412 static int tracing_err_log_open(struct inode *inode, struct file *file)
7413 {
7414         struct trace_array *tr = inode->i_private;
7415         int ret = 0;
7416
7417         ret = tracing_check_open_get_tr(tr);
7418         if (ret)
7419                 return ret;
7420
7421         /* If this file was opened for write, then erase contents */
7422         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7423                 clear_tracing_err_log(tr);
7424
7425         if (file->f_mode & FMODE_READ) {
7426                 ret = seq_open(file, &tracing_err_log_seq_ops);
7427                 if (!ret) {
7428                         struct seq_file *m = file->private_data;
7429                         m->private = tr;
7430                 } else {
7431                         trace_array_put(tr);
7432                 }
7433         }
7434         return ret;
7435 }
7436
7437 static ssize_t tracing_err_log_write(struct file *file,
7438                                      const char __user *buffer,
7439                                      size_t count, loff_t *ppos)
7440 {
7441         return count;
7442 }
7443
7444 static int tracing_err_log_release(struct inode *inode, struct file *file)
7445 {
7446         struct trace_array *tr = inode->i_private;
7447
7448         trace_array_put(tr);
7449
7450         if (file->f_mode & FMODE_READ)
7451                 seq_release(inode, file);
7452
7453         return 0;
7454 }
7455
7456 static const struct file_operations tracing_err_log_fops = {
7457         .open           = tracing_err_log_open,
7458         .write          = tracing_err_log_write,
7459         .read           = seq_read,
7460         .llseek         = seq_lseek,
7461         .release        = tracing_err_log_release,
7462 };
7463
7464 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7465 {
7466         struct trace_array *tr = inode->i_private;
7467         struct ftrace_buffer_info *info;
7468         int ret;
7469
7470         ret = tracing_check_open_get_tr(tr);
7471         if (ret)
7472                 return ret;
7473
7474         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7475         if (!info) {
7476                 trace_array_put(tr);
7477                 return -ENOMEM;
7478         }
7479
7480         mutex_lock(&trace_types_lock);
7481
7482         info->iter.tr           = tr;
7483         info->iter.cpu_file     = tracing_get_cpu(inode);
7484         info->iter.trace        = tr->current_trace;
7485         info->iter.array_buffer = &tr->array_buffer;
7486         info->spare             = NULL;
7487         /* Force reading ring buffer for first read */
7488         info->read              = (unsigned int)-1;
7489
7490         filp->private_data = info;
7491
7492         tr->trace_ref++;
7493
7494         mutex_unlock(&trace_types_lock);
7495
7496         ret = nonseekable_open(inode, filp);
7497         if (ret < 0)
7498                 trace_array_put(tr);
7499
7500         return ret;
7501 }
7502
7503 static __poll_t
7504 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7505 {
7506         struct ftrace_buffer_info *info = filp->private_data;
7507         struct trace_iterator *iter = &info->iter;
7508
7509         return trace_poll(iter, filp, poll_table);
7510 }
7511
7512 static ssize_t
7513 tracing_buffers_read(struct file *filp, char __user *ubuf,
7514                      size_t count, loff_t *ppos)
7515 {
7516         struct ftrace_buffer_info *info = filp->private_data;
7517         struct trace_iterator *iter = &info->iter;
7518         ssize_t ret = 0;
7519         ssize_t size;
7520
7521         if (!count)
7522                 return 0;
7523
7524 #ifdef CONFIG_TRACER_MAX_TRACE
7525         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7526                 return -EBUSY;
7527 #endif
7528
7529         if (!info->spare) {
7530                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7531                                                           iter->cpu_file);
7532                 if (IS_ERR(info->spare)) {
7533                         ret = PTR_ERR(info->spare);
7534                         info->spare = NULL;
7535                 } else {
7536                         info->spare_cpu = iter->cpu_file;
7537                 }
7538         }
7539         if (!info->spare)
7540                 return ret;
7541
7542         /* Do we have previous read data to read? */
7543         if (info->read < PAGE_SIZE)
7544                 goto read;
7545
7546  again:
7547         trace_access_lock(iter->cpu_file);
7548         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7549                                     &info->spare,
7550                                     count,
7551                                     iter->cpu_file, 0);
7552         trace_access_unlock(iter->cpu_file);
7553
7554         if (ret < 0) {
7555                 if (trace_empty(iter)) {
7556                         if ((filp->f_flags & O_NONBLOCK))
7557                                 return -EAGAIN;
7558
7559                         ret = wait_on_pipe(iter, 0);
7560                         if (ret)
7561                                 return ret;
7562
7563                         goto again;
7564                 }
7565                 return 0;
7566         }
7567
7568         info->read = 0;
7569  read:
7570         size = PAGE_SIZE - info->read;
7571         if (size > count)
7572                 size = count;
7573
7574         ret = copy_to_user(ubuf, info->spare + info->read, size);
7575         if (ret == size)
7576                 return -EFAULT;
7577
7578         size -= ret;
7579
7580         *ppos += size;
7581         info->read += size;
7582
7583         return size;
7584 }
7585
7586 static int tracing_buffers_release(struct inode *inode, struct file *file)
7587 {
7588         struct ftrace_buffer_info *info = file->private_data;
7589         struct trace_iterator *iter = &info->iter;
7590
7591         mutex_lock(&trace_types_lock);
7592
7593         iter->tr->trace_ref--;
7594
7595         __trace_array_put(iter->tr);
7596
7597         if (info->spare)
7598                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7599                                            info->spare_cpu, info->spare);
7600         kvfree(info);
7601
7602         mutex_unlock(&trace_types_lock);
7603
7604         return 0;
7605 }
7606
7607 struct buffer_ref {
7608         struct trace_buffer     *buffer;
7609         void                    *page;
7610         int                     cpu;
7611         refcount_t              refcount;
7612 };
7613
7614 static void buffer_ref_release(struct buffer_ref *ref)
7615 {
7616         if (!refcount_dec_and_test(&ref->refcount))
7617                 return;
7618         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7619         kfree(ref);
7620 }
7621
7622 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7623                                     struct pipe_buffer *buf)
7624 {
7625         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7626
7627         buffer_ref_release(ref);
7628         buf->private = 0;
7629 }
7630
7631 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7632                                 struct pipe_buffer *buf)
7633 {
7634         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7635
7636         if (refcount_read(&ref->refcount) > INT_MAX/2)
7637                 return false;
7638
7639         refcount_inc(&ref->refcount);
7640         return true;
7641 }
7642
7643 /* Pipe buffer operations for a buffer. */
7644 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7645         .release                = buffer_pipe_buf_release,
7646         .get                    = buffer_pipe_buf_get,
7647 };
7648
7649 /*
7650  * Callback from splice_to_pipe(), if we need to release some pages
7651  * at the end of the spd in case we error'ed out in filling the pipe.
7652  */
7653 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7654 {
7655         struct buffer_ref *ref =
7656                 (struct buffer_ref *)spd->partial[i].private;
7657
7658         buffer_ref_release(ref);
7659         spd->partial[i].private = 0;
7660 }
7661
7662 static ssize_t
7663 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7664                             struct pipe_inode_info *pipe, size_t len,
7665                             unsigned int flags)
7666 {
7667         struct ftrace_buffer_info *info = file->private_data;
7668         struct trace_iterator *iter = &info->iter;
7669         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7670         struct page *pages_def[PIPE_DEF_BUFFERS];
7671         struct splice_pipe_desc spd = {
7672                 .pages          = pages_def,
7673                 .partial        = partial_def,
7674                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7675                 .ops            = &buffer_pipe_buf_ops,
7676                 .spd_release    = buffer_spd_release,
7677         };
7678         struct buffer_ref *ref;
7679         int entries, i;
7680         ssize_t ret = 0;
7681
7682 #ifdef CONFIG_TRACER_MAX_TRACE
7683         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7684                 return -EBUSY;
7685 #endif
7686
7687         if (*ppos & (PAGE_SIZE - 1))
7688                 return -EINVAL;
7689
7690         if (len & (PAGE_SIZE - 1)) {
7691                 if (len < PAGE_SIZE)
7692                         return -EINVAL;
7693                 len &= PAGE_MASK;
7694         }
7695
7696         if (splice_grow_spd(pipe, &spd))
7697                 return -ENOMEM;
7698
7699  again:
7700         trace_access_lock(iter->cpu_file);
7701         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7702
7703         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7704                 struct page *page;
7705                 int r;
7706
7707                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7708                 if (!ref) {
7709                         ret = -ENOMEM;
7710                         break;
7711                 }
7712
7713                 refcount_set(&ref->refcount, 1);
7714                 ref->buffer = iter->array_buffer->buffer;
7715                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7716                 if (IS_ERR(ref->page)) {
7717                         ret = PTR_ERR(ref->page);
7718                         ref->page = NULL;
7719                         kfree(ref);
7720                         break;
7721                 }
7722                 ref->cpu = iter->cpu_file;
7723
7724                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7725                                           len, iter->cpu_file, 1);
7726                 if (r < 0) {
7727                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7728                                                    ref->page);
7729                         kfree(ref);
7730                         break;
7731                 }
7732
7733                 page = virt_to_page(ref->page);
7734
7735                 spd.pages[i] = page;
7736                 spd.partial[i].len = PAGE_SIZE;
7737                 spd.partial[i].offset = 0;
7738                 spd.partial[i].private = (unsigned long)ref;
7739                 spd.nr_pages++;
7740                 *ppos += PAGE_SIZE;
7741
7742                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7743         }
7744
7745         trace_access_unlock(iter->cpu_file);
7746         spd.nr_pages = i;
7747
7748         /* did we read anything? */
7749         if (!spd.nr_pages) {
7750                 if (ret)
7751                         goto out;
7752
7753                 ret = -EAGAIN;
7754                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7755                         goto out;
7756
7757                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7758                 if (ret)
7759                         goto out;
7760
7761                 goto again;
7762         }
7763
7764         ret = splice_to_pipe(pipe, &spd);
7765 out:
7766         splice_shrink_spd(&spd);
7767
7768         return ret;
7769 }
7770
7771 static const struct file_operations tracing_buffers_fops = {
7772         .open           = tracing_buffers_open,
7773         .read           = tracing_buffers_read,
7774         .poll           = tracing_buffers_poll,
7775         .release        = tracing_buffers_release,
7776         .splice_read    = tracing_buffers_splice_read,
7777         .llseek         = no_llseek,
7778 };
7779
7780 static ssize_t
7781 tracing_stats_read(struct file *filp, char __user *ubuf,
7782                    size_t count, loff_t *ppos)
7783 {
7784         struct inode *inode = file_inode(filp);
7785         struct trace_array *tr = inode->i_private;
7786         struct array_buffer *trace_buf = &tr->array_buffer;
7787         int cpu = tracing_get_cpu(inode);
7788         struct trace_seq *s;
7789         unsigned long cnt;
7790         unsigned long long t;
7791         unsigned long usec_rem;
7792
7793         s = kmalloc(sizeof(*s), GFP_KERNEL);
7794         if (!s)
7795                 return -ENOMEM;
7796
7797         trace_seq_init(s);
7798
7799         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7800         trace_seq_printf(s, "entries: %ld\n", cnt);
7801
7802         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7803         trace_seq_printf(s, "overrun: %ld\n", cnt);
7804
7805         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7806         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7807
7808         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7809         trace_seq_printf(s, "bytes: %ld\n", cnt);
7810
7811         if (trace_clocks[tr->clock_id].in_ns) {
7812                 /* local or global for trace_clock */
7813                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7814                 usec_rem = do_div(t, USEC_PER_SEC);
7815                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7816                                                                 t, usec_rem);
7817
7818                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7819                 usec_rem = do_div(t, USEC_PER_SEC);
7820                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7821         } else {
7822                 /* counter or tsc mode for trace_clock */
7823                 trace_seq_printf(s, "oldest event ts: %llu\n",
7824                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7825
7826                 trace_seq_printf(s, "now ts: %llu\n",
7827                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7828         }
7829
7830         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7831         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7832
7833         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7834         trace_seq_printf(s, "read events: %ld\n", cnt);
7835
7836         count = simple_read_from_buffer(ubuf, count, ppos,
7837                                         s->buffer, trace_seq_used(s));
7838
7839         kfree(s);
7840
7841         return count;
7842 }
7843
7844 static const struct file_operations tracing_stats_fops = {
7845         .open           = tracing_open_generic_tr,
7846         .read           = tracing_stats_read,
7847         .llseek         = generic_file_llseek,
7848         .release        = tracing_release_generic_tr,
7849 };
7850
7851 #ifdef CONFIG_DYNAMIC_FTRACE
7852
7853 static ssize_t
7854 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7855                   size_t cnt, loff_t *ppos)
7856 {
7857         ssize_t ret;
7858         char *buf;
7859         int r;
7860
7861         /* 256 should be plenty to hold the amount needed */
7862         buf = kmalloc(256, GFP_KERNEL);
7863         if (!buf)
7864                 return -ENOMEM;
7865
7866         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7867                       ftrace_update_tot_cnt,
7868                       ftrace_number_of_pages,
7869                       ftrace_number_of_groups);
7870
7871         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7872         kfree(buf);
7873         return ret;
7874 }
7875
7876 static const struct file_operations tracing_dyn_info_fops = {
7877         .open           = tracing_open_generic,
7878         .read           = tracing_read_dyn_info,
7879         .llseek         = generic_file_llseek,
7880 };
7881 #endif /* CONFIG_DYNAMIC_FTRACE */
7882
7883 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7884 static void
7885 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7886                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7887                 void *data)
7888 {
7889         tracing_snapshot_instance(tr);
7890 }
7891
7892 static void
7893 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7894                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7895                       void *data)
7896 {
7897         struct ftrace_func_mapper *mapper = data;
7898         long *count = NULL;
7899
7900         if (mapper)
7901                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7902
7903         if (count) {
7904
7905                 if (*count <= 0)
7906                         return;
7907
7908                 (*count)--;
7909         }
7910
7911         tracing_snapshot_instance(tr);
7912 }
7913
7914 static int
7915 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7916                       struct ftrace_probe_ops *ops, void *data)
7917 {
7918         struct ftrace_func_mapper *mapper = data;
7919         long *count = NULL;
7920
7921         seq_printf(m, "%ps:", (void *)ip);
7922
7923         seq_puts(m, "snapshot");
7924
7925         if (mapper)
7926                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7927
7928         if (count)
7929                 seq_printf(m, ":count=%ld\n", *count);
7930         else
7931                 seq_puts(m, ":unlimited\n");
7932
7933         return 0;
7934 }
7935
7936 static int
7937 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7938                      unsigned long ip, void *init_data, void **data)
7939 {
7940         struct ftrace_func_mapper *mapper = *data;
7941
7942         if (!mapper) {
7943                 mapper = allocate_ftrace_func_mapper();
7944                 if (!mapper)
7945                         return -ENOMEM;
7946                 *data = mapper;
7947         }
7948
7949         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7950 }
7951
7952 static void
7953 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7954                      unsigned long ip, void *data)
7955 {
7956         struct ftrace_func_mapper *mapper = data;
7957
7958         if (!ip) {
7959                 if (!mapper)
7960                         return;
7961                 free_ftrace_func_mapper(mapper, NULL);
7962                 return;
7963         }
7964
7965         ftrace_func_mapper_remove_ip(mapper, ip);
7966 }
7967
7968 static struct ftrace_probe_ops snapshot_probe_ops = {
7969         .func                   = ftrace_snapshot,
7970         .print                  = ftrace_snapshot_print,
7971 };
7972
7973 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7974         .func                   = ftrace_count_snapshot,
7975         .print                  = ftrace_snapshot_print,
7976         .init                   = ftrace_snapshot_init,
7977         .free                   = ftrace_snapshot_free,
7978 };
7979
7980 static int
7981 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7982                                char *glob, char *cmd, char *param, int enable)
7983 {
7984         struct ftrace_probe_ops *ops;
7985         void *count = (void *)-1;
7986         char *number;
7987         int ret;
7988
7989         if (!tr)
7990                 return -ENODEV;
7991
7992         /* hash funcs only work with set_ftrace_filter */
7993         if (!enable)
7994                 return -EINVAL;
7995
7996         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7997
7998         if (glob[0] == '!')
7999                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8000
8001         if (!param)
8002                 goto out_reg;
8003
8004         number = strsep(&param, ":");
8005
8006         if (!strlen(number))
8007                 goto out_reg;
8008
8009         /*
8010          * We use the callback data field (which is a pointer)
8011          * as our counter.
8012          */
8013         ret = kstrtoul(number, 0, (unsigned long *)&count);
8014         if (ret)
8015                 return ret;
8016
8017  out_reg:
8018         ret = tracing_alloc_snapshot_instance(tr);
8019         if (ret < 0)
8020                 goto out;
8021
8022         ret = register_ftrace_function_probe(glob, tr, ops, count);
8023
8024  out:
8025         return ret < 0 ? ret : 0;
8026 }
8027
8028 static struct ftrace_func_command ftrace_snapshot_cmd = {
8029         .name                   = "snapshot",
8030         .func                   = ftrace_trace_snapshot_callback,
8031 };
8032
8033 static __init int register_snapshot_cmd(void)
8034 {
8035         return register_ftrace_command(&ftrace_snapshot_cmd);
8036 }
8037 #else
8038 static inline __init int register_snapshot_cmd(void) { return 0; }
8039 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8040
8041 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8042 {
8043         if (WARN_ON(!tr->dir))
8044                 return ERR_PTR(-ENODEV);
8045
8046         /* Top directory uses NULL as the parent */
8047         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8048                 return NULL;
8049
8050         /* All sub buffers have a descriptor */
8051         return tr->dir;
8052 }
8053
8054 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8055 {
8056         struct dentry *d_tracer;
8057
8058         if (tr->percpu_dir)
8059                 return tr->percpu_dir;
8060
8061         d_tracer = tracing_get_dentry(tr);
8062         if (IS_ERR(d_tracer))
8063                 return NULL;
8064
8065         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8066
8067         MEM_FAIL(!tr->percpu_dir,
8068                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8069
8070         return tr->percpu_dir;
8071 }
8072
8073 static struct dentry *
8074 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8075                       void *data, long cpu, const struct file_operations *fops)
8076 {
8077         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8078
8079         if (ret) /* See tracing_get_cpu() */
8080                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8081         return ret;
8082 }
8083
8084 static void
8085 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8086 {
8087         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8088         struct dentry *d_cpu;
8089         char cpu_dir[30]; /* 30 characters should be more than enough */
8090
8091         if (!d_percpu)
8092                 return;
8093
8094         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8095         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8096         if (!d_cpu) {
8097                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8098                 return;
8099         }
8100
8101         /* per cpu trace_pipe */
8102         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8103                                 tr, cpu, &tracing_pipe_fops);
8104
8105         /* per cpu trace */
8106         trace_create_cpu_file("trace", 0644, d_cpu,
8107                                 tr, cpu, &tracing_fops);
8108
8109         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8110                                 tr, cpu, &tracing_buffers_fops);
8111
8112         trace_create_cpu_file("stats", 0444, d_cpu,
8113                                 tr, cpu, &tracing_stats_fops);
8114
8115         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8116                                 tr, cpu, &tracing_entries_fops);
8117
8118 #ifdef CONFIG_TRACER_SNAPSHOT
8119         trace_create_cpu_file("snapshot", 0644, d_cpu,
8120                                 tr, cpu, &snapshot_fops);
8121
8122         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8123                                 tr, cpu, &snapshot_raw_fops);
8124 #endif
8125 }
8126
8127 #ifdef CONFIG_FTRACE_SELFTEST
8128 /* Let selftest have access to static functions in this file */
8129 #include "trace_selftest.c"
8130 #endif
8131
8132 static ssize_t
8133 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8134                         loff_t *ppos)
8135 {
8136         struct trace_option_dentry *topt = filp->private_data;
8137         char *buf;
8138
8139         if (topt->flags->val & topt->opt->bit)
8140                 buf = "1\n";
8141         else
8142                 buf = "0\n";
8143
8144         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8145 }
8146
8147 static ssize_t
8148 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8149                          loff_t *ppos)
8150 {
8151         struct trace_option_dentry *topt = filp->private_data;
8152         unsigned long val;
8153         int ret;
8154
8155         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8156         if (ret)
8157                 return ret;
8158
8159         if (val != 0 && val != 1)
8160                 return -EINVAL;
8161
8162         if (!!(topt->flags->val & topt->opt->bit) != val) {
8163                 mutex_lock(&trace_types_lock);
8164                 ret = __set_tracer_option(topt->tr, topt->flags,
8165                                           topt->opt, !val);
8166                 mutex_unlock(&trace_types_lock);
8167                 if (ret)
8168                         return ret;
8169         }
8170
8171         *ppos += cnt;
8172
8173         return cnt;
8174 }
8175
8176
8177 static const struct file_operations trace_options_fops = {
8178         .open = tracing_open_generic,
8179         .read = trace_options_read,
8180         .write = trace_options_write,
8181         .llseek = generic_file_llseek,
8182 };
8183
8184 /*
8185  * In order to pass in both the trace_array descriptor as well as the index
8186  * to the flag that the trace option file represents, the trace_array
8187  * has a character array of trace_flags_index[], which holds the index
8188  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8189  * The address of this character array is passed to the flag option file
8190  * read/write callbacks.
8191  *
8192  * In order to extract both the index and the trace_array descriptor,
8193  * get_tr_index() uses the following algorithm.
8194  *
8195  *   idx = *ptr;
8196  *
8197  * As the pointer itself contains the address of the index (remember
8198  * index[1] == 1).
8199  *
8200  * Then to get the trace_array descriptor, by subtracting that index
8201  * from the ptr, we get to the start of the index itself.
8202  *
8203  *   ptr - idx == &index[0]
8204  *
8205  * Then a simple container_of() from that pointer gets us to the
8206  * trace_array descriptor.
8207  */
8208 static void get_tr_index(void *data, struct trace_array **ptr,
8209                          unsigned int *pindex)
8210 {
8211         *pindex = *(unsigned char *)data;
8212
8213         *ptr = container_of(data - *pindex, struct trace_array,
8214                             trace_flags_index);
8215 }
8216
8217 static ssize_t
8218 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8219                         loff_t *ppos)
8220 {
8221         void *tr_index = filp->private_data;
8222         struct trace_array *tr;
8223         unsigned int index;
8224         char *buf;
8225
8226         get_tr_index(tr_index, &tr, &index);
8227
8228         if (tr->trace_flags & (1 << index))
8229                 buf = "1\n";
8230         else
8231                 buf = "0\n";
8232
8233         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8234 }
8235
8236 static ssize_t
8237 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8238                          loff_t *ppos)
8239 {
8240         void *tr_index = filp->private_data;
8241         struct trace_array *tr;
8242         unsigned int index;
8243         unsigned long val;
8244         int ret;
8245
8246         get_tr_index(tr_index, &tr, &index);
8247
8248         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8249         if (ret)
8250                 return ret;
8251
8252         if (val != 0 && val != 1)
8253                 return -EINVAL;
8254
8255         mutex_lock(&event_mutex);
8256         mutex_lock(&trace_types_lock);
8257         ret = set_tracer_flag(tr, 1 << index, val);
8258         mutex_unlock(&trace_types_lock);
8259         mutex_unlock(&event_mutex);
8260
8261         if (ret < 0)
8262                 return ret;
8263
8264         *ppos += cnt;
8265
8266         return cnt;
8267 }
8268
8269 static const struct file_operations trace_options_core_fops = {
8270         .open = tracing_open_generic,
8271         .read = trace_options_core_read,
8272         .write = trace_options_core_write,
8273         .llseek = generic_file_llseek,
8274 };
8275
8276 struct dentry *trace_create_file(const char *name,
8277                                  umode_t mode,
8278                                  struct dentry *parent,
8279                                  void *data,
8280                                  const struct file_operations *fops)
8281 {
8282         struct dentry *ret;
8283
8284         ret = tracefs_create_file(name, mode, parent, data, fops);
8285         if (!ret)
8286                 pr_warn("Could not create tracefs '%s' entry\n", name);
8287
8288         return ret;
8289 }
8290
8291
8292 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8293 {
8294         struct dentry *d_tracer;
8295
8296         if (tr->options)
8297                 return tr->options;
8298
8299         d_tracer = tracing_get_dentry(tr);
8300         if (IS_ERR(d_tracer))
8301                 return NULL;
8302
8303         tr->options = tracefs_create_dir("options", d_tracer);
8304         if (!tr->options) {
8305                 pr_warn("Could not create tracefs directory 'options'\n");
8306                 return NULL;
8307         }
8308
8309         return tr->options;
8310 }
8311
8312 static void
8313 create_trace_option_file(struct trace_array *tr,
8314                          struct trace_option_dentry *topt,
8315                          struct tracer_flags *flags,
8316                          struct tracer_opt *opt)
8317 {
8318         struct dentry *t_options;
8319
8320         t_options = trace_options_init_dentry(tr);
8321         if (!t_options)
8322                 return;
8323
8324         topt->flags = flags;
8325         topt->opt = opt;
8326         topt->tr = tr;
8327
8328         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8329                                     &trace_options_fops);
8330
8331 }
8332
8333 static void
8334 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8335 {
8336         struct trace_option_dentry *topts;
8337         struct trace_options *tr_topts;
8338         struct tracer_flags *flags;
8339         struct tracer_opt *opts;
8340         int cnt;
8341         int i;
8342
8343         if (!tracer)
8344                 return;
8345
8346         flags = tracer->flags;
8347
8348         if (!flags || !flags->opts)
8349                 return;
8350
8351         /*
8352          * If this is an instance, only create flags for tracers
8353          * the instance may have.
8354          */
8355         if (!trace_ok_for_array(tracer, tr))
8356                 return;
8357
8358         for (i = 0; i < tr->nr_topts; i++) {
8359                 /* Make sure there's no duplicate flags. */
8360                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8361                         return;
8362         }
8363
8364         opts = flags->opts;
8365
8366         for (cnt = 0; opts[cnt].name; cnt++)
8367                 ;
8368
8369         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8370         if (!topts)
8371                 return;
8372
8373         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8374                             GFP_KERNEL);
8375         if (!tr_topts) {
8376                 kfree(topts);
8377                 return;
8378         }
8379
8380         tr->topts = tr_topts;
8381         tr->topts[tr->nr_topts].tracer = tracer;
8382         tr->topts[tr->nr_topts].topts = topts;
8383         tr->nr_topts++;
8384
8385         for (cnt = 0; opts[cnt].name; cnt++) {
8386                 create_trace_option_file(tr, &topts[cnt], flags,
8387                                          &opts[cnt]);
8388                 MEM_FAIL(topts[cnt].entry == NULL,
8389                           "Failed to create trace option: %s",
8390                           opts[cnt].name);
8391         }
8392 }
8393
8394 static struct dentry *
8395 create_trace_option_core_file(struct trace_array *tr,
8396                               const char *option, long index)
8397 {
8398         struct dentry *t_options;
8399
8400         t_options = trace_options_init_dentry(tr);
8401         if (!t_options)
8402                 return NULL;
8403
8404         return trace_create_file(option, 0644, t_options,
8405                                  (void *)&tr->trace_flags_index[index],
8406                                  &trace_options_core_fops);
8407 }
8408
8409 static void create_trace_options_dir(struct trace_array *tr)
8410 {
8411         struct dentry *t_options;
8412         bool top_level = tr == &global_trace;
8413         int i;
8414
8415         t_options = trace_options_init_dentry(tr);
8416         if (!t_options)
8417                 return;
8418
8419         for (i = 0; trace_options[i]; i++) {
8420                 if (top_level ||
8421                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8422                         create_trace_option_core_file(tr, trace_options[i], i);
8423         }
8424 }
8425
8426 static ssize_t
8427 rb_simple_read(struct file *filp, char __user *ubuf,
8428                size_t cnt, loff_t *ppos)
8429 {
8430         struct trace_array *tr = filp->private_data;
8431         char buf[64];
8432         int r;
8433
8434         r = tracer_tracing_is_on(tr);
8435         r = sprintf(buf, "%d\n", r);
8436
8437         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8438 }
8439
8440 static ssize_t
8441 rb_simple_write(struct file *filp, const char __user *ubuf,
8442                 size_t cnt, loff_t *ppos)
8443 {
8444         struct trace_array *tr = filp->private_data;
8445         struct trace_buffer *buffer = tr->array_buffer.buffer;
8446         unsigned long val;
8447         int ret;
8448
8449         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8450         if (ret)
8451                 return ret;
8452
8453         if (buffer) {
8454                 mutex_lock(&trace_types_lock);
8455                 if (!!val == tracer_tracing_is_on(tr)) {
8456                         val = 0; /* do nothing */
8457                 } else if (val) {
8458                         tracer_tracing_on(tr);
8459                         if (tr->current_trace->start)
8460                                 tr->current_trace->start(tr);
8461                 } else {
8462                         tracer_tracing_off(tr);
8463                         if (tr->current_trace->stop)
8464                                 tr->current_trace->stop(tr);
8465                 }
8466                 mutex_unlock(&trace_types_lock);
8467         }
8468
8469         (*ppos)++;
8470
8471         return cnt;
8472 }
8473
8474 static const struct file_operations rb_simple_fops = {
8475         .open           = tracing_open_generic_tr,
8476         .read           = rb_simple_read,
8477         .write          = rb_simple_write,
8478         .release        = tracing_release_generic_tr,
8479         .llseek         = default_llseek,
8480 };
8481
8482 static ssize_t
8483 buffer_percent_read(struct file *filp, char __user *ubuf,
8484                     size_t cnt, loff_t *ppos)
8485 {
8486         struct trace_array *tr = filp->private_data;
8487         char buf[64];
8488         int r;
8489
8490         r = tr->buffer_percent;
8491         r = sprintf(buf, "%d\n", r);
8492
8493         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8494 }
8495
8496 static ssize_t
8497 buffer_percent_write(struct file *filp, const char __user *ubuf,
8498                      size_t cnt, loff_t *ppos)
8499 {
8500         struct trace_array *tr = filp->private_data;
8501         unsigned long val;
8502         int ret;
8503
8504         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8505         if (ret)
8506                 return ret;
8507
8508         if (val > 100)
8509                 return -EINVAL;
8510
8511         if (!val)
8512                 val = 1;
8513
8514         tr->buffer_percent = val;
8515
8516         (*ppos)++;
8517
8518         return cnt;
8519 }
8520
8521 static const struct file_operations buffer_percent_fops = {
8522         .open           = tracing_open_generic_tr,
8523         .read           = buffer_percent_read,
8524         .write          = buffer_percent_write,
8525         .release        = tracing_release_generic_tr,
8526         .llseek         = default_llseek,
8527 };
8528
8529 static struct dentry *trace_instance_dir;
8530
8531 static void
8532 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8533
8534 static int
8535 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8536 {
8537         enum ring_buffer_flags rb_flags;
8538
8539         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8540
8541         buf->tr = tr;
8542
8543         buf->buffer = ring_buffer_alloc(size, rb_flags);
8544         if (!buf->buffer)
8545                 return -ENOMEM;
8546
8547         buf->data = alloc_percpu(struct trace_array_cpu);
8548         if (!buf->data) {
8549                 ring_buffer_free(buf->buffer);
8550                 buf->buffer = NULL;
8551                 return -ENOMEM;
8552         }
8553
8554         /* Allocate the first page for all buffers */
8555         set_buffer_entries(&tr->array_buffer,
8556                            ring_buffer_size(tr->array_buffer.buffer, 0));
8557
8558         return 0;
8559 }
8560
8561 static int allocate_trace_buffers(struct trace_array *tr, int size)
8562 {
8563         int ret;
8564
8565         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8566         if (ret)
8567                 return ret;
8568
8569 #ifdef CONFIG_TRACER_MAX_TRACE
8570         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8571                                     allocate_snapshot ? size : 1);
8572         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8573                 ring_buffer_free(tr->array_buffer.buffer);
8574                 tr->array_buffer.buffer = NULL;
8575                 free_percpu(tr->array_buffer.data);
8576                 tr->array_buffer.data = NULL;
8577                 return -ENOMEM;
8578         }
8579         tr->allocated_snapshot = allocate_snapshot;
8580
8581         /*
8582          * Only the top level trace array gets its snapshot allocated
8583          * from the kernel command line.
8584          */
8585         allocate_snapshot = false;
8586 #endif
8587
8588         return 0;
8589 }
8590
8591 static void free_trace_buffer(struct array_buffer *buf)
8592 {
8593         if (buf->buffer) {
8594                 ring_buffer_free(buf->buffer);
8595                 buf->buffer = NULL;
8596                 free_percpu(buf->data);
8597                 buf->data = NULL;
8598         }
8599 }
8600
8601 static void free_trace_buffers(struct trace_array *tr)
8602 {
8603         if (!tr)
8604                 return;
8605
8606         free_trace_buffer(&tr->array_buffer);
8607
8608 #ifdef CONFIG_TRACER_MAX_TRACE
8609         free_trace_buffer(&tr->max_buffer);
8610 #endif
8611 }
8612
8613 static void init_trace_flags_index(struct trace_array *tr)
8614 {
8615         int i;
8616
8617         /* Used by the trace options files */
8618         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8619                 tr->trace_flags_index[i] = i;
8620 }
8621
8622 static void __update_tracer_options(struct trace_array *tr)
8623 {
8624         struct tracer *t;
8625
8626         for (t = trace_types; t; t = t->next)
8627                 add_tracer_options(tr, t);
8628 }
8629
8630 static void update_tracer_options(struct trace_array *tr)
8631 {
8632         mutex_lock(&trace_types_lock);
8633         __update_tracer_options(tr);
8634         mutex_unlock(&trace_types_lock);
8635 }
8636
8637 /* Must have trace_types_lock held */
8638 struct trace_array *trace_array_find(const char *instance)
8639 {
8640         struct trace_array *tr, *found = NULL;
8641
8642         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8643                 if (tr->name && strcmp(tr->name, instance) == 0) {
8644                         found = tr;
8645                         break;
8646                 }
8647         }
8648
8649         return found;
8650 }
8651
8652 struct trace_array *trace_array_find_get(const char *instance)
8653 {
8654         struct trace_array *tr;
8655
8656         mutex_lock(&trace_types_lock);
8657         tr = trace_array_find(instance);
8658         if (tr)
8659                 tr->ref++;
8660         mutex_unlock(&trace_types_lock);
8661
8662         return tr;
8663 }
8664
8665 static int trace_array_create_dir(struct trace_array *tr)
8666 {
8667         int ret;
8668
8669         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8670         if (!tr->dir)
8671                 return -EINVAL;
8672
8673         ret = event_trace_add_tracer(tr->dir, tr);
8674         if (ret)
8675                 tracefs_remove(tr->dir);
8676
8677         init_tracer_tracefs(tr, tr->dir);
8678         __update_tracer_options(tr);
8679
8680         return ret;
8681 }
8682
8683 static struct trace_array *trace_array_create(const char *name)
8684 {
8685         struct trace_array *tr;
8686         int ret;
8687
8688         ret = -ENOMEM;
8689         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8690         if (!tr)
8691                 return ERR_PTR(ret);
8692
8693         tr->name = kstrdup(name, GFP_KERNEL);
8694         if (!tr->name)
8695                 goto out_free_tr;
8696
8697         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8698                 goto out_free_tr;
8699
8700         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8701
8702         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8703
8704         raw_spin_lock_init(&tr->start_lock);
8705
8706         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8707
8708         tr->current_trace = &nop_trace;
8709
8710         INIT_LIST_HEAD(&tr->systems);
8711         INIT_LIST_HEAD(&tr->events);
8712         INIT_LIST_HEAD(&tr->hist_vars);
8713         INIT_LIST_HEAD(&tr->err_log);
8714
8715         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8716                 goto out_free_tr;
8717
8718         if (ftrace_allocate_ftrace_ops(tr) < 0)
8719                 goto out_free_tr;
8720
8721         ftrace_init_trace_array(tr);
8722
8723         init_trace_flags_index(tr);
8724
8725         if (trace_instance_dir) {
8726                 ret = trace_array_create_dir(tr);
8727                 if (ret)
8728                         goto out_free_tr;
8729         } else
8730                 __trace_early_add_events(tr);
8731
8732         list_add(&tr->list, &ftrace_trace_arrays);
8733
8734         tr->ref++;
8735
8736         return tr;
8737
8738  out_free_tr:
8739         ftrace_free_ftrace_ops(tr);
8740         free_trace_buffers(tr);
8741         free_cpumask_var(tr->tracing_cpumask);
8742         kfree(tr->name);
8743         kfree(tr);
8744
8745         return ERR_PTR(ret);
8746 }
8747
8748 static int instance_mkdir(const char *name)
8749 {
8750         struct trace_array *tr;
8751         int ret;
8752
8753         mutex_lock(&event_mutex);
8754         mutex_lock(&trace_types_lock);
8755
8756         ret = -EEXIST;
8757         if (trace_array_find(name))
8758                 goto out_unlock;
8759
8760         tr = trace_array_create(name);
8761
8762         ret = PTR_ERR_OR_ZERO(tr);
8763
8764 out_unlock:
8765         mutex_unlock(&trace_types_lock);
8766         mutex_unlock(&event_mutex);
8767         return ret;
8768 }
8769
8770 /**
8771  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8772  * @name: The name of the trace array to be looked up/created.
8773  *
8774  * Returns pointer to trace array with given name.
8775  * NULL, if it cannot be created.
8776  *
8777  * NOTE: This function increments the reference counter associated with the
8778  * trace array returned. This makes sure it cannot be freed while in use.
8779  * Use trace_array_put() once the trace array is no longer needed.
8780  * If the trace_array is to be freed, trace_array_destroy() needs to
8781  * be called after the trace_array_put(), or simply let user space delete
8782  * it from the tracefs instances directory. But until the
8783  * trace_array_put() is called, user space can not delete it.
8784  *
8785  */
8786 struct trace_array *trace_array_get_by_name(const char *name)
8787 {
8788         struct trace_array *tr;
8789
8790         mutex_lock(&event_mutex);
8791         mutex_lock(&trace_types_lock);
8792
8793         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8794                 if (tr->name && strcmp(tr->name, name) == 0)
8795                         goto out_unlock;
8796         }
8797
8798         tr = trace_array_create(name);
8799
8800         if (IS_ERR(tr))
8801                 tr = NULL;
8802 out_unlock:
8803         if (tr)
8804                 tr->ref++;
8805
8806         mutex_unlock(&trace_types_lock);
8807         mutex_unlock(&event_mutex);
8808         return tr;
8809 }
8810 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8811
8812 static int __remove_instance(struct trace_array *tr)
8813 {
8814         int i;
8815
8816         /* Reference counter for a newly created trace array = 1. */
8817         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8818                 return -EBUSY;
8819
8820         list_del(&tr->list);
8821
8822         /* Disable all the flags that were enabled coming in */
8823         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8824                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8825                         set_tracer_flag(tr, 1 << i, 0);
8826         }
8827
8828         tracing_set_nop(tr);
8829         clear_ftrace_function_probes(tr);
8830         event_trace_del_tracer(tr);
8831         ftrace_clear_pids(tr);
8832         ftrace_destroy_function_files(tr);
8833         tracefs_remove(tr->dir);
8834         free_trace_buffers(tr);
8835
8836         for (i = 0; i < tr->nr_topts; i++) {
8837                 kfree(tr->topts[i].topts);
8838         }
8839         kfree(tr->topts);
8840
8841         free_cpumask_var(tr->tracing_cpumask);
8842         kfree(tr->name);
8843         kfree(tr);
8844
8845         return 0;
8846 }
8847
8848 int trace_array_destroy(struct trace_array *this_tr)
8849 {
8850         struct trace_array *tr;
8851         int ret;
8852
8853         if (!this_tr)
8854                 return -EINVAL;
8855
8856         mutex_lock(&event_mutex);
8857         mutex_lock(&trace_types_lock);
8858
8859         ret = -ENODEV;
8860
8861         /* Making sure trace array exists before destroying it. */
8862         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8863                 if (tr == this_tr) {
8864                         ret = __remove_instance(tr);
8865                         break;
8866                 }
8867         }
8868
8869         mutex_unlock(&trace_types_lock);
8870         mutex_unlock(&event_mutex);
8871
8872         return ret;
8873 }
8874 EXPORT_SYMBOL_GPL(trace_array_destroy);
8875
8876 static int instance_rmdir(const char *name)
8877 {
8878         struct trace_array *tr;
8879         int ret;
8880
8881         mutex_lock(&event_mutex);
8882         mutex_lock(&trace_types_lock);
8883
8884         ret = -ENODEV;
8885         tr = trace_array_find(name);
8886         if (tr)
8887                 ret = __remove_instance(tr);
8888
8889         mutex_unlock(&trace_types_lock);
8890         mutex_unlock(&event_mutex);
8891
8892         return ret;
8893 }
8894
8895 static __init void create_trace_instances(struct dentry *d_tracer)
8896 {
8897         struct trace_array *tr;
8898
8899         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8900                                                          instance_mkdir,
8901                                                          instance_rmdir);
8902         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8903                 return;
8904
8905         mutex_lock(&event_mutex);
8906         mutex_lock(&trace_types_lock);
8907
8908         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8909                 if (!tr->name)
8910                         continue;
8911                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8912                              "Failed to create instance directory\n"))
8913                         break;
8914         }
8915
8916         mutex_unlock(&trace_types_lock);
8917         mutex_unlock(&event_mutex);
8918 }
8919
8920 static void
8921 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8922 {
8923         struct trace_event_file *file;
8924         int cpu;
8925
8926         trace_create_file("available_tracers", 0444, d_tracer,
8927                         tr, &show_traces_fops);
8928
8929         trace_create_file("current_tracer", 0644, d_tracer,
8930                         tr, &set_tracer_fops);
8931
8932         trace_create_file("tracing_cpumask", 0644, d_tracer,
8933                           tr, &tracing_cpumask_fops);
8934
8935         trace_create_file("trace_options", 0644, d_tracer,
8936                           tr, &tracing_iter_fops);
8937
8938         trace_create_file("trace", 0644, d_tracer,
8939                           tr, &tracing_fops);
8940
8941         trace_create_file("trace_pipe", 0444, d_tracer,
8942                           tr, &tracing_pipe_fops);
8943
8944         trace_create_file("buffer_size_kb", 0644, d_tracer,
8945                           tr, &tracing_entries_fops);
8946
8947         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8948                           tr, &tracing_total_entries_fops);
8949
8950         trace_create_file("free_buffer", 0200, d_tracer,
8951                           tr, &tracing_free_buffer_fops);
8952
8953         trace_create_file("trace_marker", 0220, d_tracer,
8954                           tr, &tracing_mark_fops);
8955
8956         file = __find_event_file(tr, "ftrace", "print");
8957         if (file && file->dir)
8958                 trace_create_file("trigger", 0644, file->dir, file,
8959                                   &event_trigger_fops);
8960         tr->trace_marker_file = file;
8961
8962         trace_create_file("trace_marker_raw", 0220, d_tracer,
8963                           tr, &tracing_mark_raw_fops);
8964
8965         trace_create_file("trace_clock", 0644, d_tracer, tr,
8966                           &trace_clock_fops);
8967
8968         trace_create_file("tracing_on", 0644, d_tracer,
8969                           tr, &rb_simple_fops);
8970
8971         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8972                           &trace_time_stamp_mode_fops);
8973
8974         tr->buffer_percent = 50;
8975
8976         trace_create_file("buffer_percent", 0444, d_tracer,
8977                         tr, &buffer_percent_fops);
8978
8979         create_trace_options_dir(tr);
8980
8981 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8982         trace_create_maxlat_file(tr, d_tracer);
8983 #endif
8984
8985         if (ftrace_create_function_files(tr, d_tracer))
8986                 MEM_FAIL(1, "Could not allocate function filter files");
8987
8988 #ifdef CONFIG_TRACER_SNAPSHOT
8989         trace_create_file("snapshot", 0644, d_tracer,
8990                           tr, &snapshot_fops);
8991 #endif
8992
8993         trace_create_file("error_log", 0644, d_tracer,
8994                           tr, &tracing_err_log_fops);
8995
8996         for_each_tracing_cpu(cpu)
8997                 tracing_init_tracefs_percpu(tr, cpu);
8998
8999         ftrace_init_tracefs(tr, d_tracer);
9000 }
9001
9002 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9003 {
9004         struct vfsmount *mnt;
9005         struct file_system_type *type;
9006
9007         /*
9008          * To maintain backward compatibility for tools that mount
9009          * debugfs to get to the tracing facility, tracefs is automatically
9010          * mounted to the debugfs/tracing directory.
9011          */
9012         type = get_fs_type("tracefs");
9013         if (!type)
9014                 return NULL;
9015         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9016         put_filesystem(type);
9017         if (IS_ERR(mnt))
9018                 return NULL;
9019         mntget(mnt);
9020
9021         return mnt;
9022 }
9023
9024 /**
9025  * tracing_init_dentry - initialize top level trace array
9026  *
9027  * This is called when creating files or directories in the tracing
9028  * directory. It is called via fs_initcall() by any of the boot up code
9029  * and expects to return the dentry of the top level tracing directory.
9030  */
9031 int tracing_init_dentry(void)
9032 {
9033         struct trace_array *tr = &global_trace;
9034
9035         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9036                 pr_warn("Tracing disabled due to lockdown\n");
9037                 return -EPERM;
9038         }
9039
9040         /* The top level trace array uses  NULL as parent */
9041         if (tr->dir)
9042                 return 0;
9043
9044         if (WARN_ON(!tracefs_initialized()))
9045                 return -ENODEV;
9046
9047         /*
9048          * As there may still be users that expect the tracing
9049          * files to exist in debugfs/tracing, we must automount
9050          * the tracefs file system there, so older tools still
9051          * work with the newer kerenl.
9052          */
9053         tr->dir = debugfs_create_automount("tracing", NULL,
9054                                            trace_automount, NULL);
9055
9056         return 0;
9057 }
9058
9059 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9060 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9061
9062 static void __init trace_eval_init(void)
9063 {
9064         int len;
9065
9066         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9067         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9068 }
9069
9070 #ifdef CONFIG_MODULES
9071 static void trace_module_add_evals(struct module *mod)
9072 {
9073         if (!mod->num_trace_evals)
9074                 return;
9075
9076         /*
9077          * Modules with bad taint do not have events created, do
9078          * not bother with enums either.
9079          */
9080         if (trace_module_has_bad_taint(mod))
9081                 return;
9082
9083         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9084 }
9085
9086 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9087 static void trace_module_remove_evals(struct module *mod)
9088 {
9089         union trace_eval_map_item *map;
9090         union trace_eval_map_item **last = &trace_eval_maps;
9091
9092         if (!mod->num_trace_evals)
9093                 return;
9094
9095         mutex_lock(&trace_eval_mutex);
9096
9097         map = trace_eval_maps;
9098
9099         while (map) {
9100                 if (map->head.mod == mod)
9101                         break;
9102                 map = trace_eval_jmp_to_tail(map);
9103                 last = &map->tail.next;
9104                 map = map->tail.next;
9105         }
9106         if (!map)
9107                 goto out;
9108
9109         *last = trace_eval_jmp_to_tail(map)->tail.next;
9110         kfree(map);
9111  out:
9112         mutex_unlock(&trace_eval_mutex);
9113 }
9114 #else
9115 static inline void trace_module_remove_evals(struct module *mod) { }
9116 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9117
9118 static int trace_module_notify(struct notifier_block *self,
9119                                unsigned long val, void *data)
9120 {
9121         struct module *mod = data;
9122
9123         switch (val) {
9124         case MODULE_STATE_COMING:
9125                 trace_module_add_evals(mod);
9126                 break;
9127         case MODULE_STATE_GOING:
9128                 trace_module_remove_evals(mod);
9129                 break;
9130         }
9131
9132         return NOTIFY_OK;
9133 }
9134
9135 static struct notifier_block trace_module_nb = {
9136         .notifier_call = trace_module_notify,
9137         .priority = 0,
9138 };
9139 #endif /* CONFIG_MODULES */
9140
9141 static __init int tracer_init_tracefs(void)
9142 {
9143         int ret;
9144
9145         trace_access_lock_init();
9146
9147         ret = tracing_init_dentry();
9148         if (ret)
9149                 return 0;
9150
9151         event_trace_init();
9152
9153         init_tracer_tracefs(&global_trace, NULL);
9154         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9155
9156         trace_create_file("tracing_thresh", 0644, NULL,
9157                         &global_trace, &tracing_thresh_fops);
9158
9159         trace_create_file("README", 0444, NULL,
9160                         NULL, &tracing_readme_fops);
9161
9162         trace_create_file("saved_cmdlines", 0444, NULL,
9163                         NULL, &tracing_saved_cmdlines_fops);
9164
9165         trace_create_file("saved_cmdlines_size", 0644, NULL,
9166                           NULL, &tracing_saved_cmdlines_size_fops);
9167
9168         trace_create_file("saved_tgids", 0444, NULL,
9169                         NULL, &tracing_saved_tgids_fops);
9170
9171         trace_eval_init();
9172
9173         trace_create_eval_file(NULL);
9174
9175 #ifdef CONFIG_MODULES
9176         register_module_notifier(&trace_module_nb);
9177 #endif
9178
9179 #ifdef CONFIG_DYNAMIC_FTRACE
9180         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9181                         NULL, &tracing_dyn_info_fops);
9182 #endif
9183
9184         create_trace_instances(NULL);
9185
9186         update_tracer_options(&global_trace);
9187
9188         return 0;
9189 }
9190
9191 static int trace_panic_handler(struct notifier_block *this,
9192                                unsigned long event, void *unused)
9193 {
9194         if (ftrace_dump_on_oops)
9195                 ftrace_dump(ftrace_dump_on_oops);
9196         return NOTIFY_OK;
9197 }
9198
9199 static struct notifier_block trace_panic_notifier = {
9200         .notifier_call  = trace_panic_handler,
9201         .next           = NULL,
9202         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9203 };
9204
9205 static int trace_die_handler(struct notifier_block *self,
9206                              unsigned long val,
9207                              void *data)
9208 {
9209         switch (val) {
9210         case DIE_OOPS:
9211                 if (ftrace_dump_on_oops)
9212                         ftrace_dump(ftrace_dump_on_oops);
9213                 break;
9214         default:
9215                 break;
9216         }
9217         return NOTIFY_OK;
9218 }
9219
9220 static struct notifier_block trace_die_notifier = {
9221         .notifier_call = trace_die_handler,
9222         .priority = 200
9223 };
9224
9225 /*
9226  * printk is set to max of 1024, we really don't need it that big.
9227  * Nothing should be printing 1000 characters anyway.
9228  */
9229 #define TRACE_MAX_PRINT         1000
9230
9231 /*
9232  * Define here KERN_TRACE so that we have one place to modify
9233  * it if we decide to change what log level the ftrace dump
9234  * should be at.
9235  */
9236 #define KERN_TRACE              KERN_EMERG
9237
9238 void
9239 trace_printk_seq(struct trace_seq *s)
9240 {
9241         /* Probably should print a warning here. */
9242         if (s->seq.len >= TRACE_MAX_PRINT)
9243                 s->seq.len = TRACE_MAX_PRINT;
9244
9245         /*
9246          * More paranoid code. Although the buffer size is set to
9247          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9248          * an extra layer of protection.
9249          */
9250         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9251                 s->seq.len = s->seq.size - 1;
9252
9253         /* should be zero ended, but we are paranoid. */
9254         s->buffer[s->seq.len] = 0;
9255
9256         printk(KERN_TRACE "%s", s->buffer);
9257
9258         trace_seq_init(s);
9259 }
9260
9261 void trace_init_global_iter(struct trace_iterator *iter)
9262 {
9263         iter->tr = &global_trace;
9264         iter->trace = iter->tr->current_trace;
9265         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9266         iter->array_buffer = &global_trace.array_buffer;
9267
9268         if (iter->trace && iter->trace->open)
9269                 iter->trace->open(iter);
9270
9271         /* Annotate start of buffers if we had overruns */
9272         if (ring_buffer_overruns(iter->array_buffer->buffer))
9273                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9274
9275         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9276         if (trace_clocks[iter->tr->clock_id].in_ns)
9277                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9278 }
9279
9280 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9281 {
9282         /* use static because iter can be a bit big for the stack */
9283         static struct trace_iterator iter;
9284         static atomic_t dump_running;
9285         struct trace_array *tr = &global_trace;
9286         unsigned int old_userobj;
9287         unsigned long flags;
9288         int cnt = 0, cpu;
9289
9290         /* Only allow one dump user at a time. */
9291         if (atomic_inc_return(&dump_running) != 1) {
9292                 atomic_dec(&dump_running);
9293                 return;
9294         }
9295
9296         /*
9297          * Always turn off tracing when we dump.
9298          * We don't need to show trace output of what happens
9299          * between multiple crashes.
9300          *
9301          * If the user does a sysrq-z, then they can re-enable
9302          * tracing with echo 1 > tracing_on.
9303          */
9304         tracing_off();
9305
9306         local_irq_save(flags);
9307         printk_nmi_direct_enter();
9308
9309         /* Simulate the iterator */
9310         trace_init_global_iter(&iter);
9311         /* Can not use kmalloc for iter.temp */
9312         iter.temp = static_temp_buf;
9313         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9314
9315         for_each_tracing_cpu(cpu) {
9316                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9317         }
9318
9319         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9320
9321         /* don't look at user memory in panic mode */
9322         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9323
9324         switch (oops_dump_mode) {
9325         case DUMP_ALL:
9326                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9327                 break;
9328         case DUMP_ORIG:
9329                 iter.cpu_file = raw_smp_processor_id();
9330                 break;
9331         case DUMP_NONE:
9332                 goto out_enable;
9333         default:
9334                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9335                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9336         }
9337
9338         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9339
9340         /* Did function tracer already get disabled? */
9341         if (ftrace_is_dead()) {
9342                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9343                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9344         }
9345
9346         /*
9347          * We need to stop all tracing on all CPUS to read
9348          * the next buffer. This is a bit expensive, but is
9349          * not done often. We fill all what we can read,
9350          * and then release the locks again.
9351          */
9352
9353         while (!trace_empty(&iter)) {
9354
9355                 if (!cnt)
9356                         printk(KERN_TRACE "---------------------------------\n");
9357
9358                 cnt++;
9359
9360                 trace_iterator_reset(&iter);
9361                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9362
9363                 if (trace_find_next_entry_inc(&iter) != NULL) {
9364                         int ret;
9365
9366                         ret = print_trace_line(&iter);
9367                         if (ret != TRACE_TYPE_NO_CONSUME)
9368                                 trace_consume(&iter);
9369                 }
9370                 touch_nmi_watchdog();
9371
9372                 trace_printk_seq(&iter.seq);
9373         }
9374
9375         if (!cnt)
9376                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9377         else
9378                 printk(KERN_TRACE "---------------------------------\n");
9379
9380  out_enable:
9381         tr->trace_flags |= old_userobj;
9382
9383         for_each_tracing_cpu(cpu) {
9384                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9385         }
9386         atomic_dec(&dump_running);
9387         printk_nmi_direct_exit();
9388         local_irq_restore(flags);
9389 }
9390 EXPORT_SYMBOL_GPL(ftrace_dump);
9391
9392 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9393 {
9394         char **argv;
9395         int argc, ret;
9396
9397         argc = 0;
9398         ret = 0;
9399         argv = argv_split(GFP_KERNEL, buf, &argc);
9400         if (!argv)
9401                 return -ENOMEM;
9402
9403         if (argc)
9404                 ret = createfn(argc, argv);
9405
9406         argv_free(argv);
9407
9408         return ret;
9409 }
9410
9411 #define WRITE_BUFSIZE  4096
9412
9413 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9414                                 size_t count, loff_t *ppos,
9415                                 int (*createfn)(int, char **))
9416 {
9417         char *kbuf, *buf, *tmp;
9418         int ret = 0;
9419         size_t done = 0;
9420         size_t size;
9421
9422         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9423         if (!kbuf)
9424                 return -ENOMEM;
9425
9426         while (done < count) {
9427                 size = count - done;
9428
9429                 if (size >= WRITE_BUFSIZE)
9430                         size = WRITE_BUFSIZE - 1;
9431
9432                 if (copy_from_user(kbuf, buffer + done, size)) {
9433                         ret = -EFAULT;
9434                         goto out;
9435                 }
9436                 kbuf[size] = '\0';
9437                 buf = kbuf;
9438                 do {
9439                         tmp = strchr(buf, '\n');
9440                         if (tmp) {
9441                                 *tmp = '\0';
9442                                 size = tmp - buf + 1;
9443                         } else {
9444                                 size = strlen(buf);
9445                                 if (done + size < count) {
9446                                         if (buf != kbuf)
9447                                                 break;
9448                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9449                                         pr_warn("Line length is too long: Should be less than %d\n",
9450                                                 WRITE_BUFSIZE - 2);
9451                                         ret = -EINVAL;
9452                                         goto out;
9453                                 }
9454                         }
9455                         done += size;
9456
9457                         /* Remove comments */
9458                         tmp = strchr(buf, '#');
9459
9460                         if (tmp)
9461                                 *tmp = '\0';
9462
9463                         ret = trace_run_command(buf, createfn);
9464                         if (ret)
9465                                 goto out;
9466                         buf += size;
9467
9468                 } while (done < count);
9469         }
9470         ret = done;
9471
9472 out:
9473         kfree(kbuf);
9474
9475         return ret;
9476 }
9477
9478 __init static int tracer_alloc_buffers(void)
9479 {
9480         int ring_buf_size;
9481         int ret = -ENOMEM;
9482
9483
9484         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9485                 pr_warn("Tracing disabled due to lockdown\n");
9486                 return -EPERM;
9487         }
9488
9489         /*
9490          * Make sure we don't accidentally add more trace options
9491          * than we have bits for.
9492          */
9493         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9494
9495         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9496                 goto out;
9497
9498         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9499                 goto out_free_buffer_mask;
9500
9501         /* Only allocate trace_printk buffers if a trace_printk exists */
9502         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9503                 /* Must be called before global_trace.buffer is allocated */
9504                 trace_printk_init_buffers();
9505
9506         /* To save memory, keep the ring buffer size to its minimum */
9507         if (ring_buffer_expanded)
9508                 ring_buf_size = trace_buf_size;
9509         else
9510                 ring_buf_size = 1;
9511
9512         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9513         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9514
9515         raw_spin_lock_init(&global_trace.start_lock);
9516
9517         /*
9518          * The prepare callbacks allocates some memory for the ring buffer. We
9519          * don't free the buffer if the CPU goes down. If we were to free
9520          * the buffer, then the user would lose any trace that was in the
9521          * buffer. The memory will be removed once the "instance" is removed.
9522          */
9523         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9524                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9525                                       NULL);
9526         if (ret < 0)
9527                 goto out_free_cpumask;
9528         /* Used for event triggers */
9529         ret = -ENOMEM;
9530         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9531         if (!temp_buffer)
9532                 goto out_rm_hp_state;
9533
9534         if (trace_create_savedcmd() < 0)
9535                 goto out_free_temp_buffer;
9536
9537         /* TODO: make the number of buffers hot pluggable with CPUS */
9538         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9539                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9540                 goto out_free_savedcmd;
9541         }
9542
9543         if (global_trace.buffer_disabled)
9544                 tracing_off();
9545
9546         if (trace_boot_clock) {
9547                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9548                 if (ret < 0)
9549                         pr_warn("Trace clock %s not defined, going back to default\n",
9550                                 trace_boot_clock);
9551         }
9552
9553         /*
9554          * register_tracer() might reference current_trace, so it
9555          * needs to be set before we register anything. This is
9556          * just a bootstrap of current_trace anyway.
9557          */
9558         global_trace.current_trace = &nop_trace;
9559
9560         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9561
9562         ftrace_init_global_array_ops(&global_trace);
9563
9564         init_trace_flags_index(&global_trace);
9565
9566         register_tracer(&nop_trace);
9567
9568         /* Function tracing may start here (via kernel command line) */
9569         init_function_trace();
9570
9571         /* All seems OK, enable tracing */
9572         tracing_disabled = 0;
9573
9574         atomic_notifier_chain_register(&panic_notifier_list,
9575                                        &trace_panic_notifier);
9576
9577         register_die_notifier(&trace_die_notifier);
9578
9579         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9580
9581         INIT_LIST_HEAD(&global_trace.systems);
9582         INIT_LIST_HEAD(&global_trace.events);
9583         INIT_LIST_HEAD(&global_trace.hist_vars);
9584         INIT_LIST_HEAD(&global_trace.err_log);
9585         list_add(&global_trace.list, &ftrace_trace_arrays);
9586
9587         apply_trace_boot_options();
9588
9589         register_snapshot_cmd();
9590
9591         return 0;
9592
9593 out_free_savedcmd:
9594         free_saved_cmdlines_buffer(savedcmd);
9595 out_free_temp_buffer:
9596         ring_buffer_free(temp_buffer);
9597 out_rm_hp_state:
9598         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9599 out_free_cpumask:
9600         free_cpumask_var(global_trace.tracing_cpumask);
9601 out_free_buffer_mask:
9602         free_cpumask_var(tracing_buffer_mask);
9603 out:
9604         return ret;
9605 }
9606
9607 void __init early_trace_init(void)
9608 {
9609         if (tracepoint_printk) {
9610                 tracepoint_print_iter =
9611                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9612                 if (MEM_FAIL(!tracepoint_print_iter,
9613                              "Failed to allocate trace iterator\n"))
9614                         tracepoint_printk = 0;
9615                 else
9616                         static_key_enable(&tracepoint_printk_key.key);
9617         }
9618         tracer_alloc_buffers();
9619 }
9620
9621 void __init trace_init(void)
9622 {
9623         trace_event_init();
9624 }
9625
9626 __init static int clear_boot_tracer(void)
9627 {
9628         /*
9629          * The default tracer at boot buffer is an init section.
9630          * This function is called in lateinit. If we did not
9631          * find the boot tracer, then clear it out, to prevent
9632          * later registration from accessing the buffer that is
9633          * about to be freed.
9634          */
9635         if (!default_bootup_tracer)
9636                 return 0;
9637
9638         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9639                default_bootup_tracer);
9640         default_bootup_tracer = NULL;
9641
9642         return 0;
9643 }
9644
9645 fs_initcall(tracer_init_tracefs);
9646 late_initcall_sync(clear_boot_tracer);
9647
9648 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9649 __init static int tracing_set_default_clock(void)
9650 {
9651         /* sched_clock_stable() is determined in late_initcall */
9652         if (!trace_boot_clock && !sched_clock_stable()) {
9653                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9654                         pr_warn("Can not set tracing clock due to lockdown\n");
9655                         return -EPERM;
9656                 }
9657
9658                 printk(KERN_WARNING
9659                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9660                        "If you want to keep using the local clock, then add:\n"
9661                        "  \"trace_clock=local\"\n"
9662                        "on the kernel command line\n");
9663                 tracing_set_clock(&global_trace, "global");
9664         }
9665
9666         return 0;
9667 }
9668 late_initcall_sync(tracing_set_default_clock);
9669 #endif