Merge tag 'compiler-attributes-for-linus-v5.11' of git://github.com/ojeda/linux
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
78 {
79         if (!tracing_selftest_disabled) {
80                 tracing_selftest_disabled = true;
81                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82         }
83 }
84 #endif
85
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143         struct module                   *mod;
144         unsigned long                   length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150         /*
151          * "end" is first and points to NULL as it must be different
152          * than "mod" or "eval_string"
153          */
154         union trace_eval_map_item       *next;
155         const char                      *end;   /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168         struct trace_eval_map           map;
169         struct trace_eval_map_head      head;
170         struct trace_eval_map_tail      tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178                                    struct trace_buffer *buffer,
179                                    unsigned long flags, int pc);
180
181 #define MAX_TRACER_SIZE         100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186
187 static int __init set_cmdline_ftrace(char *str)
188 {
189         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190         default_bootup_tracer = bootup_tracer_buf;
191         /* We are using ftrace early, expand it */
192         ring_buffer_expanded = true;
193         return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196
197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199         if (*str++ != '=' || !*str) {
200                 ftrace_dump_on_oops = DUMP_ALL;
201                 return 1;
202         }
203
204         if (!strcmp("orig_cpu", str)) {
205                 ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212
213 static int __init stop_trace_on_warning(char *str)
214 {
215         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216                 __disable_trace_on_warning = 1;
217         return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220
221 static int __init boot_alloc_snapshot(char *str)
222 {
223         allocate_snapshot = true;
224         /* We also need the main ring buffer expanded */
225         ring_buffer_expanded = true;
226         return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229
230
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232
233 static int __init set_trace_boot_options(char *str)
234 {
235         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236         return 0;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242
243 static int __init set_trace_boot_clock(char *str)
244 {
245         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246         trace_boot_clock = trace_boot_clock_buf;
247         return 0;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250
251 static int __init set_tracepoint_printk(char *str)
252 {
253         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254                 tracepoint_printk = 1;
255         return 1;
256 }
257 __setup("tp_printk", set_tracepoint_printk);
258
259 unsigned long long ns2usecs(u64 nsec)
260 {
261         nsec += 500;
262         do_div(nsec, 1000);
263         return nsec;
264 }
265
266 static void
267 trace_process_export(struct trace_export *export,
268                struct ring_buffer_event *event, int flag)
269 {
270         struct trace_entry *entry;
271         unsigned int size = 0;
272
273         if (export->flags & flag) {
274                 entry = ring_buffer_event_data(event);
275                 size = ring_buffer_event_length(event);
276                 export->write(export, entry, size);
277         }
278 }
279
280 static DEFINE_MUTEX(ftrace_export_lock);
281
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
283
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
287
288 static inline void ftrace_exports_enable(struct trace_export *export)
289 {
290         if (export->flags & TRACE_EXPORT_FUNCTION)
291                 static_branch_inc(&trace_function_exports_enabled);
292
293         if (export->flags & TRACE_EXPORT_EVENT)
294                 static_branch_inc(&trace_event_exports_enabled);
295
296         if (export->flags & TRACE_EXPORT_MARKER)
297                 static_branch_inc(&trace_marker_exports_enabled);
298 }
299
300 static inline void ftrace_exports_disable(struct trace_export *export)
301 {
302         if (export->flags & TRACE_EXPORT_FUNCTION)
303                 static_branch_dec(&trace_function_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_EVENT)
306                 static_branch_dec(&trace_event_exports_enabled);
307
308         if (export->flags & TRACE_EXPORT_MARKER)
309                 static_branch_dec(&trace_marker_exports_enabled);
310 }
311
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
313 {
314         struct trace_export *export;
315
316         preempt_disable_notrace();
317
318         export = rcu_dereference_raw_check(ftrace_exports_list);
319         while (export) {
320                 trace_process_export(export, event, flag);
321                 export = rcu_dereference_raw_check(export->next);
322         }
323
324         preempt_enable_notrace();
325 }
326
327 static inline void
328 add_trace_export(struct trace_export **list, struct trace_export *export)
329 {
330         rcu_assign_pointer(export->next, *list);
331         /*
332          * We are entering export into the list but another
333          * CPU might be walking that list. We need to make sure
334          * the export->next pointer is valid before another CPU sees
335          * the export pointer included into the list.
336          */
337         rcu_assign_pointer(*list, export);
338 }
339
340 static inline int
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343         struct trace_export **p;
344
345         for (p = list; *p != NULL; p = &(*p)->next)
346                 if (*p == export)
347                         break;
348
349         if (*p != export)
350                 return -1;
351
352         rcu_assign_pointer(*p, (*p)->next);
353
354         return 0;
355 }
356
357 static inline void
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
359 {
360         ftrace_exports_enable(export);
361
362         add_trace_export(list, export);
363 }
364
365 static inline int
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368         int ret;
369
370         ret = rm_trace_export(list, export);
371         ftrace_exports_disable(export);
372
373         return ret;
374 }
375
376 int register_ftrace_export(struct trace_export *export)
377 {
378         if (WARN_ON_ONCE(!export->write))
379                 return -1;
380
381         mutex_lock(&ftrace_export_lock);
382
383         add_ftrace_export(&ftrace_exports_list, export);
384
385         mutex_unlock(&ftrace_export_lock);
386
387         return 0;
388 }
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
390
391 int unregister_ftrace_export(struct trace_export *export)
392 {
393         int ret;
394
395         mutex_lock(&ftrace_export_lock);
396
397         ret = rm_ftrace_export(&ftrace_exports_list, export);
398
399         mutex_unlock(&ftrace_export_lock);
400
401         return ret;
402 }
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
404
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS                                             \
407         (FUNCTION_DEFAULT_FLAGS |                                       \
408          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
409          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
410          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
411          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
412
413 /* trace_options that are only supported by global_trace */
414 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
415                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
416
417 /* trace_flags that are default zero for instances */
418 #define ZEROED_TRACE_FLAGS \
419         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
420
421 /*
422  * The global_trace is the descriptor that holds the top-level tracing
423  * buffers for the live tracing.
424  */
425 static struct trace_array global_trace = {
426         .trace_flags = TRACE_DEFAULT_FLAGS,
427 };
428
429 LIST_HEAD(ftrace_trace_arrays);
430
431 int trace_array_get(struct trace_array *this_tr)
432 {
433         struct trace_array *tr;
434         int ret = -ENODEV;
435
436         mutex_lock(&trace_types_lock);
437         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
438                 if (tr == this_tr) {
439                         tr->ref++;
440                         ret = 0;
441                         break;
442                 }
443         }
444         mutex_unlock(&trace_types_lock);
445
446         return ret;
447 }
448
449 static void __trace_array_put(struct trace_array *this_tr)
450 {
451         WARN_ON(!this_tr->ref);
452         this_tr->ref--;
453 }
454
455 /**
456  * trace_array_put - Decrement the reference counter for this trace array.
457  *
458  * NOTE: Use this when we no longer need the trace array returned by
459  * trace_array_get_by_name(). This ensures the trace array can be later
460  * destroyed.
461  *
462  */
463 void trace_array_put(struct trace_array *this_tr)
464 {
465         if (!this_tr)
466                 return;
467
468         mutex_lock(&trace_types_lock);
469         __trace_array_put(this_tr);
470         mutex_unlock(&trace_types_lock);
471 }
472 EXPORT_SYMBOL_GPL(trace_array_put);
473
474 int tracing_check_open_get_tr(struct trace_array *tr)
475 {
476         int ret;
477
478         ret = security_locked_down(LOCKDOWN_TRACEFS);
479         if (ret)
480                 return ret;
481
482         if (tracing_disabled)
483                 return -ENODEV;
484
485         if (tr && trace_array_get(tr) < 0)
486                 return -ENODEV;
487
488         return 0;
489 }
490
491 int call_filter_check_discard(struct trace_event_call *call, void *rec,
492                               struct trace_buffer *buffer,
493                               struct ring_buffer_event *event)
494 {
495         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
496             !filter_match_preds(call->filter, rec)) {
497                 __trace_event_discard_commit(buffer, event);
498                 return 1;
499         }
500
501         return 0;
502 }
503
504 void trace_free_pid_list(struct trace_pid_list *pid_list)
505 {
506         vfree(pid_list->pids);
507         kfree(pid_list);
508 }
509
510 /**
511  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
512  * @filtered_pids: The list of pids to check
513  * @search_pid: The PID to find in @filtered_pids
514  *
515  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
516  */
517 bool
518 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
519 {
520         /*
521          * If pid_max changed after filtered_pids was created, we
522          * by default ignore all pids greater than the previous pid_max.
523          */
524         if (search_pid >= filtered_pids->pid_max)
525                 return false;
526
527         return test_bit(search_pid, filtered_pids->pids);
528 }
529
530 /**
531  * trace_ignore_this_task - should a task be ignored for tracing
532  * @filtered_pids: The list of pids to check
533  * @task: The task that should be ignored if not filtered
534  *
535  * Checks if @task should be traced or not from @filtered_pids.
536  * Returns true if @task should *NOT* be traced.
537  * Returns false if @task should be traced.
538  */
539 bool
540 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
541                        struct trace_pid_list *filtered_no_pids,
542                        struct task_struct *task)
543 {
544         /*
545          * If filterd_no_pids is not empty, and the task's pid is listed
546          * in filtered_no_pids, then return true.
547          * Otherwise, if filtered_pids is empty, that means we can
548          * trace all tasks. If it has content, then only trace pids
549          * within filtered_pids.
550          */
551
552         return (filtered_pids &&
553                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
554                 (filtered_no_pids &&
555                  trace_find_filtered_pid(filtered_no_pids, task->pid));
556 }
557
558 /**
559  * trace_filter_add_remove_task - Add or remove a task from a pid_list
560  * @pid_list: The list to modify
561  * @self: The current task for fork or NULL for exit
562  * @task: The task to add or remove
563  *
564  * If adding a task, if @self is defined, the task is only added if @self
565  * is also included in @pid_list. This happens on fork and tasks should
566  * only be added when the parent is listed. If @self is NULL, then the
567  * @task pid will be removed from the list, which would happen on exit
568  * of a task.
569  */
570 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
571                                   struct task_struct *self,
572                                   struct task_struct *task)
573 {
574         if (!pid_list)
575                 return;
576
577         /* For forks, we only add if the forking task is listed */
578         if (self) {
579                 if (!trace_find_filtered_pid(pid_list, self->pid))
580                         return;
581         }
582
583         /* Sorry, but we don't support pid_max changing after setting */
584         if (task->pid >= pid_list->pid_max)
585                 return;
586
587         /* "self" is set for forks, and NULL for exits */
588         if (self)
589                 set_bit(task->pid, pid_list->pids);
590         else
591                 clear_bit(task->pid, pid_list->pids);
592 }
593
594 /**
595  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
596  * @pid_list: The pid list to show
597  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
598  * @pos: The position of the file
599  *
600  * This is used by the seq_file "next" operation to iterate the pids
601  * listed in a trace_pid_list structure.
602  *
603  * Returns the pid+1 as we want to display pid of zero, but NULL would
604  * stop the iteration.
605  */
606 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
607 {
608         unsigned long pid = (unsigned long)v;
609
610         (*pos)++;
611
612         /* pid already is +1 of the actual prevous bit */
613         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
614
615         /* Return pid + 1 to allow zero to be represented */
616         if (pid < pid_list->pid_max)
617                 return (void *)(pid + 1);
618
619         return NULL;
620 }
621
622 /**
623  * trace_pid_start - Used for seq_file to start reading pid lists
624  * @pid_list: The pid list to show
625  * @pos: The position of the file
626  *
627  * This is used by seq_file "start" operation to start the iteration
628  * of listing pids.
629  *
630  * Returns the pid+1 as we want to display pid of zero, but NULL would
631  * stop the iteration.
632  */
633 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
634 {
635         unsigned long pid;
636         loff_t l = 0;
637
638         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
639         if (pid >= pid_list->pid_max)
640                 return NULL;
641
642         /* Return pid + 1 so that zero can be the exit value */
643         for (pid++; pid && l < *pos;
644              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
645                 ;
646         return (void *)pid;
647 }
648
649 /**
650  * trace_pid_show - show the current pid in seq_file processing
651  * @m: The seq_file structure to write into
652  * @v: A void pointer of the pid (+1) value to display
653  *
654  * Can be directly used by seq_file operations to display the current
655  * pid value.
656  */
657 int trace_pid_show(struct seq_file *m, void *v)
658 {
659         unsigned long pid = (unsigned long)v - 1;
660
661         seq_printf(m, "%lu\n", pid);
662         return 0;
663 }
664
665 /* 128 should be much more than enough */
666 #define PID_BUF_SIZE            127
667
668 int trace_pid_write(struct trace_pid_list *filtered_pids,
669                     struct trace_pid_list **new_pid_list,
670                     const char __user *ubuf, size_t cnt)
671 {
672         struct trace_pid_list *pid_list;
673         struct trace_parser parser;
674         unsigned long val;
675         int nr_pids = 0;
676         ssize_t read = 0;
677         ssize_t ret = 0;
678         loff_t pos;
679         pid_t pid;
680
681         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
682                 return -ENOMEM;
683
684         /*
685          * Always recreate a new array. The write is an all or nothing
686          * operation. Always create a new array when adding new pids by
687          * the user. If the operation fails, then the current list is
688          * not modified.
689          */
690         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
691         if (!pid_list) {
692                 trace_parser_put(&parser);
693                 return -ENOMEM;
694         }
695
696         pid_list->pid_max = READ_ONCE(pid_max);
697
698         /* Only truncating will shrink pid_max */
699         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
700                 pid_list->pid_max = filtered_pids->pid_max;
701
702         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
703         if (!pid_list->pids) {
704                 trace_parser_put(&parser);
705                 kfree(pid_list);
706                 return -ENOMEM;
707         }
708
709         if (filtered_pids) {
710                 /* copy the current bits to the new max */
711                 for_each_set_bit(pid, filtered_pids->pids,
712                                  filtered_pids->pid_max) {
713                         set_bit(pid, pid_list->pids);
714                         nr_pids++;
715                 }
716         }
717
718         while (cnt > 0) {
719
720                 pos = 0;
721
722                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
723                 if (ret < 0 || !trace_parser_loaded(&parser))
724                         break;
725
726                 read += ret;
727                 ubuf += ret;
728                 cnt -= ret;
729
730                 ret = -EINVAL;
731                 if (kstrtoul(parser.buffer, 0, &val))
732                         break;
733                 if (val >= pid_list->pid_max)
734                         break;
735
736                 pid = (pid_t)val;
737
738                 set_bit(pid, pid_list->pids);
739                 nr_pids++;
740
741                 trace_parser_clear(&parser);
742                 ret = 0;
743         }
744         trace_parser_put(&parser);
745
746         if (ret < 0) {
747                 trace_free_pid_list(pid_list);
748                 return ret;
749         }
750
751         if (!nr_pids) {
752                 /* Cleared the list of pids */
753                 trace_free_pid_list(pid_list);
754                 read = ret;
755                 pid_list = NULL;
756         }
757
758         *new_pid_list = pid_list;
759
760         return read;
761 }
762
763 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
764 {
765         u64 ts;
766
767         /* Early boot up does not have a buffer yet */
768         if (!buf->buffer)
769                 return trace_clock_local();
770
771         ts = ring_buffer_time_stamp(buf->buffer, cpu);
772         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
773
774         return ts;
775 }
776
777 u64 ftrace_now(int cpu)
778 {
779         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
780 }
781
782 /**
783  * tracing_is_enabled - Show if global_trace has been disabled
784  *
785  * Shows if the global trace has been enabled or not. It uses the
786  * mirror flag "buffer_disabled" to be used in fast paths such as for
787  * the irqsoff tracer. But it may be inaccurate due to races. If you
788  * need to know the accurate state, use tracing_is_on() which is a little
789  * slower, but accurate.
790  */
791 int tracing_is_enabled(void)
792 {
793         /*
794          * For quick access (irqsoff uses this in fast path), just
795          * return the mirror variable of the state of the ring buffer.
796          * It's a little racy, but we don't really care.
797          */
798         smp_rmb();
799         return !global_trace.buffer_disabled;
800 }
801
802 /*
803  * trace_buf_size is the size in bytes that is allocated
804  * for a buffer. Note, the number of bytes is always rounded
805  * to page size.
806  *
807  * This number is purposely set to a low number of 16384.
808  * If the dump on oops happens, it will be much appreciated
809  * to not have to wait for all that output. Anyway this can be
810  * boot time and run time configurable.
811  */
812 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
813
814 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
815
816 /* trace_types holds a link list of available tracers. */
817 static struct tracer            *trace_types __read_mostly;
818
819 /*
820  * trace_types_lock is used to protect the trace_types list.
821  */
822 DEFINE_MUTEX(trace_types_lock);
823
824 /*
825  * serialize the access of the ring buffer
826  *
827  * ring buffer serializes readers, but it is low level protection.
828  * The validity of the events (which returns by ring_buffer_peek() ..etc)
829  * are not protected by ring buffer.
830  *
831  * The content of events may become garbage if we allow other process consumes
832  * these events concurrently:
833  *   A) the page of the consumed events may become a normal page
834  *      (not reader page) in ring buffer, and this page will be rewrited
835  *      by events producer.
836  *   B) The page of the consumed events may become a page for splice_read,
837  *      and this page will be returned to system.
838  *
839  * These primitives allow multi process access to different cpu ring buffer
840  * concurrently.
841  *
842  * These primitives don't distinguish read-only and read-consume access.
843  * Multi read-only access are also serialized.
844  */
845
846 #ifdef CONFIG_SMP
847 static DECLARE_RWSEM(all_cpu_access_lock);
848 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
849
850 static inline void trace_access_lock(int cpu)
851 {
852         if (cpu == RING_BUFFER_ALL_CPUS) {
853                 /* gain it for accessing the whole ring buffer. */
854                 down_write(&all_cpu_access_lock);
855         } else {
856                 /* gain it for accessing a cpu ring buffer. */
857
858                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
859                 down_read(&all_cpu_access_lock);
860
861                 /* Secondly block other access to this @cpu ring buffer. */
862                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
863         }
864 }
865
866 static inline void trace_access_unlock(int cpu)
867 {
868         if (cpu == RING_BUFFER_ALL_CPUS) {
869                 up_write(&all_cpu_access_lock);
870         } else {
871                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
872                 up_read(&all_cpu_access_lock);
873         }
874 }
875
876 static inline void trace_access_lock_init(void)
877 {
878         int cpu;
879
880         for_each_possible_cpu(cpu)
881                 mutex_init(&per_cpu(cpu_access_lock, cpu));
882 }
883
884 #else
885
886 static DEFINE_MUTEX(access_lock);
887
888 static inline void trace_access_lock(int cpu)
889 {
890         (void)cpu;
891         mutex_lock(&access_lock);
892 }
893
894 static inline void trace_access_unlock(int cpu)
895 {
896         (void)cpu;
897         mutex_unlock(&access_lock);
898 }
899
900 static inline void trace_access_lock_init(void)
901 {
902 }
903
904 #endif
905
906 #ifdef CONFIG_STACKTRACE
907 static void __ftrace_trace_stack(struct trace_buffer *buffer,
908                                  unsigned long flags,
909                                  int skip, int pc, struct pt_regs *regs);
910 static inline void ftrace_trace_stack(struct trace_array *tr,
911                                       struct trace_buffer *buffer,
912                                       unsigned long flags,
913                                       int skip, int pc, struct pt_regs *regs);
914
915 #else
916 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
917                                         unsigned long flags,
918                                         int skip, int pc, struct pt_regs *regs)
919 {
920 }
921 static inline void ftrace_trace_stack(struct trace_array *tr,
922                                       struct trace_buffer *buffer,
923                                       unsigned long flags,
924                                       int skip, int pc, struct pt_regs *regs)
925 {
926 }
927
928 #endif
929
930 static __always_inline void
931 trace_event_setup(struct ring_buffer_event *event,
932                   int type, unsigned long flags, int pc)
933 {
934         struct trace_entry *ent = ring_buffer_event_data(event);
935
936         tracing_generic_entry_update(ent, type, flags, pc);
937 }
938
939 static __always_inline struct ring_buffer_event *
940 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
941                           int type,
942                           unsigned long len,
943                           unsigned long flags, int pc)
944 {
945         struct ring_buffer_event *event;
946
947         event = ring_buffer_lock_reserve(buffer, len);
948         if (event != NULL)
949                 trace_event_setup(event, type, flags, pc);
950
951         return event;
952 }
953
954 void tracer_tracing_on(struct trace_array *tr)
955 {
956         if (tr->array_buffer.buffer)
957                 ring_buffer_record_on(tr->array_buffer.buffer);
958         /*
959          * This flag is looked at when buffers haven't been allocated
960          * yet, or by some tracers (like irqsoff), that just want to
961          * know if the ring buffer has been disabled, but it can handle
962          * races of where it gets disabled but we still do a record.
963          * As the check is in the fast path of the tracers, it is more
964          * important to be fast than accurate.
965          */
966         tr->buffer_disabled = 0;
967         /* Make the flag seen by readers */
968         smp_wmb();
969 }
970
971 /**
972  * tracing_on - enable tracing buffers
973  *
974  * This function enables tracing buffers that may have been
975  * disabled with tracing_off.
976  */
977 void tracing_on(void)
978 {
979         tracer_tracing_on(&global_trace);
980 }
981 EXPORT_SYMBOL_GPL(tracing_on);
982
983
984 static __always_inline void
985 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
986 {
987         __this_cpu_write(trace_taskinfo_save, true);
988
989         /* If this is the temp buffer, we need to commit fully */
990         if (this_cpu_read(trace_buffered_event) == event) {
991                 /* Length is in event->array[0] */
992                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
993                 /* Release the temp buffer */
994                 this_cpu_dec(trace_buffered_event_cnt);
995         } else
996                 ring_buffer_unlock_commit(buffer, event);
997 }
998
999 /**
1000  * __trace_puts - write a constant string into the trace buffer.
1001  * @ip:    The address of the caller
1002  * @str:   The constant string to write
1003  * @size:  The size of the string.
1004  */
1005 int __trace_puts(unsigned long ip, const char *str, int size)
1006 {
1007         struct ring_buffer_event *event;
1008         struct trace_buffer *buffer;
1009         struct print_entry *entry;
1010         unsigned long irq_flags;
1011         int alloc;
1012         int pc;
1013
1014         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1015                 return 0;
1016
1017         pc = preempt_count();
1018
1019         if (unlikely(tracing_selftest_running || tracing_disabled))
1020                 return 0;
1021
1022         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1023
1024         local_save_flags(irq_flags);
1025         buffer = global_trace.array_buffer.buffer;
1026         ring_buffer_nest_start(buffer);
1027         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
1028                                             irq_flags, pc);
1029         if (!event) {
1030                 size = 0;
1031                 goto out;
1032         }
1033
1034         entry = ring_buffer_event_data(event);
1035         entry->ip = ip;
1036
1037         memcpy(&entry->buf, str, size);
1038
1039         /* Add a newline if necessary */
1040         if (entry->buf[size - 1] != '\n') {
1041                 entry->buf[size] = '\n';
1042                 entry->buf[size + 1] = '\0';
1043         } else
1044                 entry->buf[size] = '\0';
1045
1046         __buffer_unlock_commit(buffer, event);
1047         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1048  out:
1049         ring_buffer_nest_end(buffer);
1050         return size;
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:    The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061         struct ring_buffer_event *event;
1062         struct trace_buffer *buffer;
1063         struct bputs_entry *entry;
1064         unsigned long irq_flags;
1065         int size = sizeof(struct bputs_entry);
1066         int ret = 0;
1067         int pc;
1068
1069         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1070                 return 0;
1071
1072         pc = preempt_count();
1073
1074         if (unlikely(tracing_selftest_running || tracing_disabled))
1075                 return 0;
1076
1077         local_save_flags(irq_flags);
1078         buffer = global_trace.array_buffer.buffer;
1079
1080         ring_buffer_nest_start(buffer);
1081         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082                                             irq_flags, pc);
1083         if (!event)
1084                 goto out;
1085
1086         entry = ring_buffer_event_data(event);
1087         entry->ip                       = ip;
1088         entry->str                      = str;
1089
1090         __buffer_unlock_commit(buffer, event);
1091         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1092
1093         ret = 1;
1094  out:
1095         ring_buffer_nest_end(buffer);
1096         return ret;
1097 }
1098 EXPORT_SYMBOL_GPL(__trace_bputs);
1099
1100 #ifdef CONFIG_TRACER_SNAPSHOT
1101 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1102                                            void *cond_data)
1103 {
1104         struct tracer *tracer = tr->current_trace;
1105         unsigned long flags;
1106
1107         if (in_nmi()) {
1108                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1110                 return;
1111         }
1112
1113         if (!tr->allocated_snapshot) {
1114                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115                 internal_trace_puts("*** stopping trace here!   ***\n");
1116                 tracing_off();
1117                 return;
1118         }
1119
1120         /* Note, snapshot can not be used when the tracer uses it */
1121         if (tracer->use_max_tr) {
1122                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124                 return;
1125         }
1126
1127         local_irq_save(flags);
1128         update_max_tr(tr, current, smp_processor_id(), cond_data);
1129         local_irq_restore(flags);
1130 }
1131
1132 void tracing_snapshot_instance(struct trace_array *tr)
1133 {
1134         tracing_snapshot_instance_cond(tr, NULL);
1135 }
1136
1137 /**
1138  * tracing_snapshot - take a snapshot of the current buffer.
1139  *
1140  * This causes a swap between the snapshot buffer and the current live
1141  * tracing buffer. You can use this to take snapshots of the live
1142  * trace when some condition is triggered, but continue to trace.
1143  *
1144  * Note, make sure to allocate the snapshot with either
1145  * a tracing_snapshot_alloc(), or by doing it manually
1146  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1147  *
1148  * If the snapshot buffer is not allocated, it will stop tracing.
1149  * Basically making a permanent snapshot.
1150  */
1151 void tracing_snapshot(void)
1152 {
1153         struct trace_array *tr = &global_trace;
1154
1155         tracing_snapshot_instance(tr);
1156 }
1157 EXPORT_SYMBOL_GPL(tracing_snapshot);
1158
1159 /**
1160  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161  * @tr:         The tracing instance to snapshot
1162  * @cond_data:  The data to be tested conditionally, and possibly saved
1163  *
1164  * This is the same as tracing_snapshot() except that the snapshot is
1165  * conditional - the snapshot will only happen if the
1166  * cond_snapshot.update() implementation receiving the cond_data
1167  * returns true, which means that the trace array's cond_snapshot
1168  * update() operation used the cond_data to determine whether the
1169  * snapshot should be taken, and if it was, presumably saved it along
1170  * with the snapshot.
1171  */
1172 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1173 {
1174         tracing_snapshot_instance_cond(tr, cond_data);
1175 }
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1177
1178 /**
1179  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1180  * @tr:         The tracing instance
1181  *
1182  * When the user enables a conditional snapshot using
1183  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184  * with the snapshot.  This accessor is used to retrieve it.
1185  *
1186  * Should not be called from cond_snapshot.update(), since it takes
1187  * the tr->max_lock lock, which the code calling
1188  * cond_snapshot.update() has already done.
1189  *
1190  * Returns the cond_data associated with the trace array's snapshot.
1191  */
1192 void *tracing_cond_snapshot_data(struct trace_array *tr)
1193 {
1194         void *cond_data = NULL;
1195
1196         arch_spin_lock(&tr->max_lock);
1197
1198         if (tr->cond_snapshot)
1199                 cond_data = tr->cond_snapshot->cond_data;
1200
1201         arch_spin_unlock(&tr->max_lock);
1202
1203         return cond_data;
1204 }
1205 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1206
1207 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1208                                         struct array_buffer *size_buf, int cpu_id);
1209 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1210
1211 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1212 {
1213         int ret;
1214
1215         if (!tr->allocated_snapshot) {
1216
1217                 /* allocate spare buffer */
1218                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1219                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1220                 if (ret < 0)
1221                         return ret;
1222
1223                 tr->allocated_snapshot = true;
1224         }
1225
1226         return 0;
1227 }
1228
1229 static void free_snapshot(struct trace_array *tr)
1230 {
1231         /*
1232          * We don't free the ring buffer. instead, resize it because
1233          * The max_tr ring buffer has some state (e.g. ring->clock) and
1234          * we want preserve it.
1235          */
1236         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1237         set_buffer_entries(&tr->max_buffer, 1);
1238         tracing_reset_online_cpus(&tr->max_buffer);
1239         tr->allocated_snapshot = false;
1240 }
1241
1242 /**
1243  * tracing_alloc_snapshot - allocate snapshot buffer.
1244  *
1245  * This only allocates the snapshot buffer if it isn't already
1246  * allocated - it doesn't also take a snapshot.
1247  *
1248  * This is meant to be used in cases where the snapshot buffer needs
1249  * to be set up for events that can't sleep but need to be able to
1250  * trigger a snapshot.
1251  */
1252 int tracing_alloc_snapshot(void)
1253 {
1254         struct trace_array *tr = &global_trace;
1255         int ret;
1256
1257         ret = tracing_alloc_snapshot_instance(tr);
1258         WARN_ON(ret < 0);
1259
1260         return ret;
1261 }
1262 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1263
1264 /**
1265  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1266  *
1267  * This is similar to tracing_snapshot(), but it will allocate the
1268  * snapshot buffer if it isn't already allocated. Use this only
1269  * where it is safe to sleep, as the allocation may sleep.
1270  *
1271  * This causes a swap between the snapshot buffer and the current live
1272  * tracing buffer. You can use this to take snapshots of the live
1273  * trace when some condition is triggered, but continue to trace.
1274  */
1275 void tracing_snapshot_alloc(void)
1276 {
1277         int ret;
1278
1279         ret = tracing_alloc_snapshot();
1280         if (ret < 0)
1281                 return;
1282
1283         tracing_snapshot();
1284 }
1285 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1286
1287 /**
1288  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1289  * @tr:         The tracing instance
1290  * @cond_data:  User data to associate with the snapshot
1291  * @update:     Implementation of the cond_snapshot update function
1292  *
1293  * Check whether the conditional snapshot for the given instance has
1294  * already been enabled, or if the current tracer is already using a
1295  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1296  * save the cond_data and update function inside.
1297  *
1298  * Returns 0 if successful, error otherwise.
1299  */
1300 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1301                                  cond_update_fn_t update)
1302 {
1303         struct cond_snapshot *cond_snapshot;
1304         int ret = 0;
1305
1306         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1307         if (!cond_snapshot)
1308                 return -ENOMEM;
1309
1310         cond_snapshot->cond_data = cond_data;
1311         cond_snapshot->update = update;
1312
1313         mutex_lock(&trace_types_lock);
1314
1315         ret = tracing_alloc_snapshot_instance(tr);
1316         if (ret)
1317                 goto fail_unlock;
1318
1319         if (tr->current_trace->use_max_tr) {
1320                 ret = -EBUSY;
1321                 goto fail_unlock;
1322         }
1323
1324         /*
1325          * The cond_snapshot can only change to NULL without the
1326          * trace_types_lock. We don't care if we race with it going
1327          * to NULL, but we want to make sure that it's not set to
1328          * something other than NULL when we get here, which we can
1329          * do safely with only holding the trace_types_lock and not
1330          * having to take the max_lock.
1331          */
1332         if (tr->cond_snapshot) {
1333                 ret = -EBUSY;
1334                 goto fail_unlock;
1335         }
1336
1337         arch_spin_lock(&tr->max_lock);
1338         tr->cond_snapshot = cond_snapshot;
1339         arch_spin_unlock(&tr->max_lock);
1340
1341         mutex_unlock(&trace_types_lock);
1342
1343         return ret;
1344
1345  fail_unlock:
1346         mutex_unlock(&trace_types_lock);
1347         kfree(cond_snapshot);
1348         return ret;
1349 }
1350 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1351
1352 /**
1353  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1354  * @tr:         The tracing instance
1355  *
1356  * Check whether the conditional snapshot for the given instance is
1357  * enabled; if so, free the cond_snapshot associated with it,
1358  * otherwise return -EINVAL.
1359  *
1360  * Returns 0 if successful, error otherwise.
1361  */
1362 int tracing_snapshot_cond_disable(struct trace_array *tr)
1363 {
1364         int ret = 0;
1365
1366         arch_spin_lock(&tr->max_lock);
1367
1368         if (!tr->cond_snapshot)
1369                 ret = -EINVAL;
1370         else {
1371                 kfree(tr->cond_snapshot);
1372                 tr->cond_snapshot = NULL;
1373         }
1374
1375         arch_spin_unlock(&tr->max_lock);
1376
1377         return ret;
1378 }
1379 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1380 #else
1381 void tracing_snapshot(void)
1382 {
1383         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot);
1386 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1387 {
1388         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1391 int tracing_alloc_snapshot(void)
1392 {
1393         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1394         return -ENODEV;
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1397 void tracing_snapshot_alloc(void)
1398 {
1399         /* Give warning */
1400         tracing_snapshot();
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1403 void *tracing_cond_snapshot_data(struct trace_array *tr)
1404 {
1405         return NULL;
1406 }
1407 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1408 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1409 {
1410         return -ENODEV;
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1413 int tracing_snapshot_cond_disable(struct trace_array *tr)
1414 {
1415         return false;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1418 #endif /* CONFIG_TRACER_SNAPSHOT */
1419
1420 void tracer_tracing_off(struct trace_array *tr)
1421 {
1422         if (tr->array_buffer.buffer)
1423                 ring_buffer_record_off(tr->array_buffer.buffer);
1424         /*
1425          * This flag is looked at when buffers haven't been allocated
1426          * yet, or by some tracers (like irqsoff), that just want to
1427          * know if the ring buffer has been disabled, but it can handle
1428          * races of where it gets disabled but we still do a record.
1429          * As the check is in the fast path of the tracers, it is more
1430          * important to be fast than accurate.
1431          */
1432         tr->buffer_disabled = 1;
1433         /* Make the flag seen by readers */
1434         smp_wmb();
1435 }
1436
1437 /**
1438  * tracing_off - turn off tracing buffers
1439  *
1440  * This function stops the tracing buffers from recording data.
1441  * It does not disable any overhead the tracers themselves may
1442  * be causing. This function simply causes all recording to
1443  * the ring buffers to fail.
1444  */
1445 void tracing_off(void)
1446 {
1447         tracer_tracing_off(&global_trace);
1448 }
1449 EXPORT_SYMBOL_GPL(tracing_off);
1450
1451 void disable_trace_on_warning(void)
1452 {
1453         if (__disable_trace_on_warning) {
1454                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1455                         "Disabling tracing due to warning\n");
1456                 tracing_off();
1457         }
1458 }
1459
1460 /**
1461  * tracer_tracing_is_on - show real state of ring buffer enabled
1462  * @tr : the trace array to know if ring buffer is enabled
1463  *
1464  * Shows real state of the ring buffer if it is enabled or not.
1465  */
1466 bool tracer_tracing_is_on(struct trace_array *tr)
1467 {
1468         if (tr->array_buffer.buffer)
1469                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1470         return !tr->buffer_disabled;
1471 }
1472
1473 /**
1474  * tracing_is_on - show state of ring buffers enabled
1475  */
1476 int tracing_is_on(void)
1477 {
1478         return tracer_tracing_is_on(&global_trace);
1479 }
1480 EXPORT_SYMBOL_GPL(tracing_is_on);
1481
1482 static int __init set_buf_size(char *str)
1483 {
1484         unsigned long buf_size;
1485
1486         if (!str)
1487                 return 0;
1488         buf_size = memparse(str, &str);
1489         /* nr_entries can not be zero */
1490         if (buf_size == 0)
1491                 return 0;
1492         trace_buf_size = buf_size;
1493         return 1;
1494 }
1495 __setup("trace_buf_size=", set_buf_size);
1496
1497 static int __init set_tracing_thresh(char *str)
1498 {
1499         unsigned long threshold;
1500         int ret;
1501
1502         if (!str)
1503                 return 0;
1504         ret = kstrtoul(str, 0, &threshold);
1505         if (ret < 0)
1506                 return 0;
1507         tracing_thresh = threshold * 1000;
1508         return 1;
1509 }
1510 __setup("tracing_thresh=", set_tracing_thresh);
1511
1512 unsigned long nsecs_to_usecs(unsigned long nsecs)
1513 {
1514         return nsecs / 1000;
1515 }
1516
1517 /*
1518  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1519  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1520  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1521  * of strings in the order that the evals (enum) were defined.
1522  */
1523 #undef C
1524 #define C(a, b) b
1525
1526 /* These must match the bit postions in trace_iterator_flags */
1527 static const char *trace_options[] = {
1528         TRACE_FLAGS
1529         NULL
1530 };
1531
1532 static struct {
1533         u64 (*func)(void);
1534         const char *name;
1535         int in_ns;              /* is this clock in nanoseconds? */
1536 } trace_clocks[] = {
1537         { trace_clock_local,            "local",        1 },
1538         { trace_clock_global,           "global",       1 },
1539         { trace_clock_counter,          "counter",      0 },
1540         { trace_clock_jiffies,          "uptime",       0 },
1541         { trace_clock,                  "perf",         1 },
1542         { ktime_get_mono_fast_ns,       "mono",         1 },
1543         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1544         { ktime_get_boot_fast_ns,       "boot",         1 },
1545         ARCH_TRACE_CLOCKS
1546 };
1547
1548 bool trace_clock_in_ns(struct trace_array *tr)
1549 {
1550         if (trace_clocks[tr->clock_id].in_ns)
1551                 return true;
1552
1553         return false;
1554 }
1555
1556 /*
1557  * trace_parser_get_init - gets the buffer for trace parser
1558  */
1559 int trace_parser_get_init(struct trace_parser *parser, int size)
1560 {
1561         memset(parser, 0, sizeof(*parser));
1562
1563         parser->buffer = kmalloc(size, GFP_KERNEL);
1564         if (!parser->buffer)
1565                 return 1;
1566
1567         parser->size = size;
1568         return 0;
1569 }
1570
1571 /*
1572  * trace_parser_put - frees the buffer for trace parser
1573  */
1574 void trace_parser_put(struct trace_parser *parser)
1575 {
1576         kfree(parser->buffer);
1577         parser->buffer = NULL;
1578 }
1579
1580 /*
1581  * trace_get_user - reads the user input string separated by  space
1582  * (matched by isspace(ch))
1583  *
1584  * For each string found the 'struct trace_parser' is updated,
1585  * and the function returns.
1586  *
1587  * Returns number of bytes read.
1588  *
1589  * See kernel/trace/trace.h for 'struct trace_parser' details.
1590  */
1591 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1592         size_t cnt, loff_t *ppos)
1593 {
1594         char ch;
1595         size_t read = 0;
1596         ssize_t ret;
1597
1598         if (!*ppos)
1599                 trace_parser_clear(parser);
1600
1601         ret = get_user(ch, ubuf++);
1602         if (ret)
1603                 goto out;
1604
1605         read++;
1606         cnt--;
1607
1608         /*
1609          * The parser is not finished with the last write,
1610          * continue reading the user input without skipping spaces.
1611          */
1612         if (!parser->cont) {
1613                 /* skip white space */
1614                 while (cnt && isspace(ch)) {
1615                         ret = get_user(ch, ubuf++);
1616                         if (ret)
1617                                 goto out;
1618                         read++;
1619                         cnt--;
1620                 }
1621
1622                 parser->idx = 0;
1623
1624                 /* only spaces were written */
1625                 if (isspace(ch) || !ch) {
1626                         *ppos += read;
1627                         ret = read;
1628                         goto out;
1629                 }
1630         }
1631
1632         /* read the non-space input */
1633         while (cnt && !isspace(ch) && ch) {
1634                 if (parser->idx < parser->size - 1)
1635                         parser->buffer[parser->idx++] = ch;
1636                 else {
1637                         ret = -EINVAL;
1638                         goto out;
1639                 }
1640                 ret = get_user(ch, ubuf++);
1641                 if (ret)
1642                         goto out;
1643                 read++;
1644                 cnt--;
1645         }
1646
1647         /* We either got finished input or we have to wait for another call. */
1648         if (isspace(ch) || !ch) {
1649                 parser->buffer[parser->idx] = 0;
1650                 parser->cont = false;
1651         } else if (parser->idx < parser->size - 1) {
1652                 parser->cont = true;
1653                 parser->buffer[parser->idx++] = ch;
1654                 /* Make sure the parsed string always terminates with '\0'. */
1655                 parser->buffer[parser->idx] = 0;
1656         } else {
1657                 ret = -EINVAL;
1658                 goto out;
1659         }
1660
1661         *ppos += read;
1662         ret = read;
1663
1664 out:
1665         return ret;
1666 }
1667
1668 /* TODO add a seq_buf_to_buffer() */
1669 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1670 {
1671         int len;
1672
1673         if (trace_seq_used(s) <= s->seq.readpos)
1674                 return -EBUSY;
1675
1676         len = trace_seq_used(s) - s->seq.readpos;
1677         if (cnt > len)
1678                 cnt = len;
1679         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1680
1681         s->seq.readpos += cnt;
1682         return cnt;
1683 }
1684
1685 unsigned long __read_mostly     tracing_thresh;
1686 static const struct file_operations tracing_max_lat_fops;
1687
1688 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1689         defined(CONFIG_FSNOTIFY)
1690
1691 static struct workqueue_struct *fsnotify_wq;
1692
1693 static void latency_fsnotify_workfn(struct work_struct *work)
1694 {
1695         struct trace_array *tr = container_of(work, struct trace_array,
1696                                               fsnotify_work);
1697         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1698 }
1699
1700 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1701 {
1702         struct trace_array *tr = container_of(iwork, struct trace_array,
1703                                               fsnotify_irqwork);
1704         queue_work(fsnotify_wq, &tr->fsnotify_work);
1705 }
1706
1707 static void trace_create_maxlat_file(struct trace_array *tr,
1708                                      struct dentry *d_tracer)
1709 {
1710         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1711         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1712         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1713                                               d_tracer, &tr->max_latency,
1714                                               &tracing_max_lat_fops);
1715 }
1716
1717 __init static int latency_fsnotify_init(void)
1718 {
1719         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1720                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1721         if (!fsnotify_wq) {
1722                 pr_err("Unable to allocate tr_max_lat_wq\n");
1723                 return -ENOMEM;
1724         }
1725         return 0;
1726 }
1727
1728 late_initcall_sync(latency_fsnotify_init);
1729
1730 void latency_fsnotify(struct trace_array *tr)
1731 {
1732         if (!fsnotify_wq)
1733                 return;
1734         /*
1735          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1736          * possible that we are called from __schedule() or do_idle(), which
1737          * could cause a deadlock.
1738          */
1739         irq_work_queue(&tr->fsnotify_irqwork);
1740 }
1741
1742 /*
1743  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1744  *  defined(CONFIG_FSNOTIFY)
1745  */
1746 #else
1747
1748 #define trace_create_maxlat_file(tr, d_tracer)                          \
1749         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1750                           &tr->max_latency, &tracing_max_lat_fops)
1751
1752 #endif
1753
1754 #ifdef CONFIG_TRACER_MAX_TRACE
1755 /*
1756  * Copy the new maximum trace into the separate maximum-trace
1757  * structure. (this way the maximum trace is permanently saved,
1758  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1759  */
1760 static void
1761 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1762 {
1763         struct array_buffer *trace_buf = &tr->array_buffer;
1764         struct array_buffer *max_buf = &tr->max_buffer;
1765         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1766         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1767
1768         max_buf->cpu = cpu;
1769         max_buf->time_start = data->preempt_timestamp;
1770
1771         max_data->saved_latency = tr->max_latency;
1772         max_data->critical_start = data->critical_start;
1773         max_data->critical_end = data->critical_end;
1774
1775         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1776         max_data->pid = tsk->pid;
1777         /*
1778          * If tsk == current, then use current_uid(), as that does not use
1779          * RCU. The irq tracer can be called out of RCU scope.
1780          */
1781         if (tsk == current)
1782                 max_data->uid = current_uid();
1783         else
1784                 max_data->uid = task_uid(tsk);
1785
1786         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1787         max_data->policy = tsk->policy;
1788         max_data->rt_priority = tsk->rt_priority;
1789
1790         /* record this tasks comm */
1791         tracing_record_cmdline(tsk);
1792         latency_fsnotify(tr);
1793 }
1794
1795 /**
1796  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1797  * @tr: tracer
1798  * @tsk: the task with the latency
1799  * @cpu: The cpu that initiated the trace.
1800  * @cond_data: User data associated with a conditional snapshot
1801  *
1802  * Flip the buffers between the @tr and the max_tr and record information
1803  * about which task was the cause of this latency.
1804  */
1805 void
1806 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1807               void *cond_data)
1808 {
1809         if (tr->stop_count)
1810                 return;
1811
1812         WARN_ON_ONCE(!irqs_disabled());
1813
1814         if (!tr->allocated_snapshot) {
1815                 /* Only the nop tracer should hit this when disabling */
1816                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1817                 return;
1818         }
1819
1820         arch_spin_lock(&tr->max_lock);
1821
1822         /* Inherit the recordable setting from array_buffer */
1823         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1824                 ring_buffer_record_on(tr->max_buffer.buffer);
1825         else
1826                 ring_buffer_record_off(tr->max_buffer.buffer);
1827
1828 #ifdef CONFIG_TRACER_SNAPSHOT
1829         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1830                 goto out_unlock;
1831 #endif
1832         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1833
1834         __update_max_tr(tr, tsk, cpu);
1835
1836  out_unlock:
1837         arch_spin_unlock(&tr->max_lock);
1838 }
1839
1840 /**
1841  * update_max_tr_single - only copy one trace over, and reset the rest
1842  * @tr: tracer
1843  * @tsk: task with the latency
1844  * @cpu: the cpu of the buffer to copy.
1845  *
1846  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1847  */
1848 void
1849 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1850 {
1851         int ret;
1852
1853         if (tr->stop_count)
1854                 return;
1855
1856         WARN_ON_ONCE(!irqs_disabled());
1857         if (!tr->allocated_snapshot) {
1858                 /* Only the nop tracer should hit this when disabling */
1859                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1860                 return;
1861         }
1862
1863         arch_spin_lock(&tr->max_lock);
1864
1865         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1866
1867         if (ret == -EBUSY) {
1868                 /*
1869                  * We failed to swap the buffer due to a commit taking
1870                  * place on this CPU. We fail to record, but we reset
1871                  * the max trace buffer (no one writes directly to it)
1872                  * and flag that it failed.
1873                  */
1874                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1875                         "Failed to swap buffers due to commit in progress\n");
1876         }
1877
1878         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1879
1880         __update_max_tr(tr, tsk, cpu);
1881         arch_spin_unlock(&tr->max_lock);
1882 }
1883 #endif /* CONFIG_TRACER_MAX_TRACE */
1884
1885 static int wait_on_pipe(struct trace_iterator *iter, int full)
1886 {
1887         /* Iterators are static, they should be filled or empty */
1888         if (trace_buffer_iter(iter, iter->cpu_file))
1889                 return 0;
1890
1891         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1892                                 full);
1893 }
1894
1895 #ifdef CONFIG_FTRACE_STARTUP_TEST
1896 static bool selftests_can_run;
1897
1898 struct trace_selftests {
1899         struct list_head                list;
1900         struct tracer                   *type;
1901 };
1902
1903 static LIST_HEAD(postponed_selftests);
1904
1905 static int save_selftest(struct tracer *type)
1906 {
1907         struct trace_selftests *selftest;
1908
1909         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1910         if (!selftest)
1911                 return -ENOMEM;
1912
1913         selftest->type = type;
1914         list_add(&selftest->list, &postponed_selftests);
1915         return 0;
1916 }
1917
1918 static int run_tracer_selftest(struct tracer *type)
1919 {
1920         struct trace_array *tr = &global_trace;
1921         struct tracer *saved_tracer = tr->current_trace;
1922         int ret;
1923
1924         if (!type->selftest || tracing_selftest_disabled)
1925                 return 0;
1926
1927         /*
1928          * If a tracer registers early in boot up (before scheduling is
1929          * initialized and such), then do not run its selftests yet.
1930          * Instead, run it a little later in the boot process.
1931          */
1932         if (!selftests_can_run)
1933                 return save_selftest(type);
1934
1935         /*
1936          * Run a selftest on this tracer.
1937          * Here we reset the trace buffer, and set the current
1938          * tracer to be this tracer. The tracer can then run some
1939          * internal tracing to verify that everything is in order.
1940          * If we fail, we do not register this tracer.
1941          */
1942         tracing_reset_online_cpus(&tr->array_buffer);
1943
1944         tr->current_trace = type;
1945
1946 #ifdef CONFIG_TRACER_MAX_TRACE
1947         if (type->use_max_tr) {
1948                 /* If we expanded the buffers, make sure the max is expanded too */
1949                 if (ring_buffer_expanded)
1950                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1951                                            RING_BUFFER_ALL_CPUS);
1952                 tr->allocated_snapshot = true;
1953         }
1954 #endif
1955
1956         /* the test is responsible for initializing and enabling */
1957         pr_info("Testing tracer %s: ", type->name);
1958         ret = type->selftest(type, tr);
1959         /* the test is responsible for resetting too */
1960         tr->current_trace = saved_tracer;
1961         if (ret) {
1962                 printk(KERN_CONT "FAILED!\n");
1963                 /* Add the warning after printing 'FAILED' */
1964                 WARN_ON(1);
1965                 return -1;
1966         }
1967         /* Only reset on passing, to avoid touching corrupted buffers */
1968         tracing_reset_online_cpus(&tr->array_buffer);
1969
1970 #ifdef CONFIG_TRACER_MAX_TRACE
1971         if (type->use_max_tr) {
1972                 tr->allocated_snapshot = false;
1973
1974                 /* Shrink the max buffer again */
1975                 if (ring_buffer_expanded)
1976                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1977                                            RING_BUFFER_ALL_CPUS);
1978         }
1979 #endif
1980
1981         printk(KERN_CONT "PASSED\n");
1982         return 0;
1983 }
1984
1985 static __init int init_trace_selftests(void)
1986 {
1987         struct trace_selftests *p, *n;
1988         struct tracer *t, **last;
1989         int ret;
1990
1991         selftests_can_run = true;
1992
1993         mutex_lock(&trace_types_lock);
1994
1995         if (list_empty(&postponed_selftests))
1996                 goto out;
1997
1998         pr_info("Running postponed tracer tests:\n");
1999
2000         tracing_selftest_running = true;
2001         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2002                 /* This loop can take minutes when sanitizers are enabled, so
2003                  * lets make sure we allow RCU processing.
2004                  */
2005                 cond_resched();
2006                 ret = run_tracer_selftest(p->type);
2007                 /* If the test fails, then warn and remove from available_tracers */
2008                 if (ret < 0) {
2009                         WARN(1, "tracer: %s failed selftest, disabling\n",
2010                              p->type->name);
2011                         last = &trace_types;
2012                         for (t = trace_types; t; t = t->next) {
2013                                 if (t == p->type) {
2014                                         *last = t->next;
2015                                         break;
2016                                 }
2017                                 last = &t->next;
2018                         }
2019                 }
2020                 list_del(&p->list);
2021                 kfree(p);
2022         }
2023         tracing_selftest_running = false;
2024
2025  out:
2026         mutex_unlock(&trace_types_lock);
2027
2028         return 0;
2029 }
2030 core_initcall(init_trace_selftests);
2031 #else
2032 static inline int run_tracer_selftest(struct tracer *type)
2033 {
2034         return 0;
2035 }
2036 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2037
2038 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2039
2040 static void __init apply_trace_boot_options(void);
2041
2042 /**
2043  * register_tracer - register a tracer with the ftrace system.
2044  * @type: the plugin for the tracer
2045  *
2046  * Register a new plugin tracer.
2047  */
2048 int __init register_tracer(struct tracer *type)
2049 {
2050         struct tracer *t;
2051         int ret = 0;
2052
2053         if (!type->name) {
2054                 pr_info("Tracer must have a name\n");
2055                 return -1;
2056         }
2057
2058         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2059                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2060                 return -1;
2061         }
2062
2063         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2064                 pr_warn("Can not register tracer %s due to lockdown\n",
2065                            type->name);
2066                 return -EPERM;
2067         }
2068
2069         mutex_lock(&trace_types_lock);
2070
2071         tracing_selftest_running = true;
2072
2073         for (t = trace_types; t; t = t->next) {
2074                 if (strcmp(type->name, t->name) == 0) {
2075                         /* already found */
2076                         pr_info("Tracer %s already registered\n",
2077                                 type->name);
2078                         ret = -1;
2079                         goto out;
2080                 }
2081         }
2082
2083         if (!type->set_flag)
2084                 type->set_flag = &dummy_set_flag;
2085         if (!type->flags) {
2086                 /*allocate a dummy tracer_flags*/
2087                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2088                 if (!type->flags) {
2089                         ret = -ENOMEM;
2090                         goto out;
2091                 }
2092                 type->flags->val = 0;
2093                 type->flags->opts = dummy_tracer_opt;
2094         } else
2095                 if (!type->flags->opts)
2096                         type->flags->opts = dummy_tracer_opt;
2097
2098         /* store the tracer for __set_tracer_option */
2099         type->flags->trace = type;
2100
2101         ret = run_tracer_selftest(type);
2102         if (ret < 0)
2103                 goto out;
2104
2105         type->next = trace_types;
2106         trace_types = type;
2107         add_tracer_options(&global_trace, type);
2108
2109  out:
2110         tracing_selftest_running = false;
2111         mutex_unlock(&trace_types_lock);
2112
2113         if (ret || !default_bootup_tracer)
2114                 goto out_unlock;
2115
2116         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2117                 goto out_unlock;
2118
2119         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2120         /* Do we want this tracer to start on bootup? */
2121         tracing_set_tracer(&global_trace, type->name);
2122         default_bootup_tracer = NULL;
2123
2124         apply_trace_boot_options();
2125
2126         /* disable other selftests, since this will break it. */
2127         disable_tracing_selftest("running a tracer");
2128
2129  out_unlock:
2130         return ret;
2131 }
2132
2133 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2134 {
2135         struct trace_buffer *buffer = buf->buffer;
2136
2137         if (!buffer)
2138                 return;
2139
2140         ring_buffer_record_disable(buffer);
2141
2142         /* Make sure all commits have finished */
2143         synchronize_rcu();
2144         ring_buffer_reset_cpu(buffer, cpu);
2145
2146         ring_buffer_record_enable(buffer);
2147 }
2148
2149 void tracing_reset_online_cpus(struct array_buffer *buf)
2150 {
2151         struct trace_buffer *buffer = buf->buffer;
2152
2153         if (!buffer)
2154                 return;
2155
2156         ring_buffer_record_disable(buffer);
2157
2158         /* Make sure all commits have finished */
2159         synchronize_rcu();
2160
2161         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2162
2163         ring_buffer_reset_online_cpus(buffer);
2164
2165         ring_buffer_record_enable(buffer);
2166 }
2167
2168 /* Must have trace_types_lock held */
2169 void tracing_reset_all_online_cpus(void)
2170 {
2171         struct trace_array *tr;
2172
2173         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2174                 if (!tr->clear_trace)
2175                         continue;
2176                 tr->clear_trace = false;
2177                 tracing_reset_online_cpus(&tr->array_buffer);
2178 #ifdef CONFIG_TRACER_MAX_TRACE
2179                 tracing_reset_online_cpus(&tr->max_buffer);
2180 #endif
2181         }
2182 }
2183
2184 static int *tgid_map;
2185
2186 #define SAVED_CMDLINES_DEFAULT 128
2187 #define NO_CMDLINE_MAP UINT_MAX
2188 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2189 struct saved_cmdlines_buffer {
2190         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2191         unsigned *map_cmdline_to_pid;
2192         unsigned cmdline_num;
2193         int cmdline_idx;
2194         char *saved_cmdlines;
2195 };
2196 static struct saved_cmdlines_buffer *savedcmd;
2197
2198 /* temporary disable recording */
2199 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2200
2201 static inline char *get_saved_cmdlines(int idx)
2202 {
2203         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2204 }
2205
2206 static inline void set_cmdline(int idx, const char *cmdline)
2207 {
2208         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2209 }
2210
2211 static int allocate_cmdlines_buffer(unsigned int val,
2212                                     struct saved_cmdlines_buffer *s)
2213 {
2214         s->map_cmdline_to_pid = kmalloc_array(val,
2215                                               sizeof(*s->map_cmdline_to_pid),
2216                                               GFP_KERNEL);
2217         if (!s->map_cmdline_to_pid)
2218                 return -ENOMEM;
2219
2220         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2221         if (!s->saved_cmdlines) {
2222                 kfree(s->map_cmdline_to_pid);
2223                 return -ENOMEM;
2224         }
2225
2226         s->cmdline_idx = 0;
2227         s->cmdline_num = val;
2228         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2229                sizeof(s->map_pid_to_cmdline));
2230         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2231                val * sizeof(*s->map_cmdline_to_pid));
2232
2233         return 0;
2234 }
2235
2236 static int trace_create_savedcmd(void)
2237 {
2238         int ret;
2239
2240         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2241         if (!savedcmd)
2242                 return -ENOMEM;
2243
2244         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2245         if (ret < 0) {
2246                 kfree(savedcmd);
2247                 savedcmd = NULL;
2248                 return -ENOMEM;
2249         }
2250
2251         return 0;
2252 }
2253
2254 int is_tracing_stopped(void)
2255 {
2256         return global_trace.stop_count;
2257 }
2258
2259 /**
2260  * tracing_start - quick start of the tracer
2261  *
2262  * If tracing is enabled but was stopped by tracing_stop,
2263  * this will start the tracer back up.
2264  */
2265 void tracing_start(void)
2266 {
2267         struct trace_buffer *buffer;
2268         unsigned long flags;
2269
2270         if (tracing_disabled)
2271                 return;
2272
2273         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2274         if (--global_trace.stop_count) {
2275                 if (global_trace.stop_count < 0) {
2276                         /* Someone screwed up their debugging */
2277                         WARN_ON_ONCE(1);
2278                         global_trace.stop_count = 0;
2279                 }
2280                 goto out;
2281         }
2282
2283         /* Prevent the buffers from switching */
2284         arch_spin_lock(&global_trace.max_lock);
2285
2286         buffer = global_trace.array_buffer.buffer;
2287         if (buffer)
2288                 ring_buffer_record_enable(buffer);
2289
2290 #ifdef CONFIG_TRACER_MAX_TRACE
2291         buffer = global_trace.max_buffer.buffer;
2292         if (buffer)
2293                 ring_buffer_record_enable(buffer);
2294 #endif
2295
2296         arch_spin_unlock(&global_trace.max_lock);
2297
2298  out:
2299         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2300 }
2301
2302 static void tracing_start_tr(struct trace_array *tr)
2303 {
2304         struct trace_buffer *buffer;
2305         unsigned long flags;
2306
2307         if (tracing_disabled)
2308                 return;
2309
2310         /* If global, we need to also start the max tracer */
2311         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2312                 return tracing_start();
2313
2314         raw_spin_lock_irqsave(&tr->start_lock, flags);
2315
2316         if (--tr->stop_count) {
2317                 if (tr->stop_count < 0) {
2318                         /* Someone screwed up their debugging */
2319                         WARN_ON_ONCE(1);
2320                         tr->stop_count = 0;
2321                 }
2322                 goto out;
2323         }
2324
2325         buffer = tr->array_buffer.buffer;
2326         if (buffer)
2327                 ring_buffer_record_enable(buffer);
2328
2329  out:
2330         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2331 }
2332
2333 /**
2334  * tracing_stop - quick stop of the tracer
2335  *
2336  * Light weight way to stop tracing. Use in conjunction with
2337  * tracing_start.
2338  */
2339 void tracing_stop(void)
2340 {
2341         struct trace_buffer *buffer;
2342         unsigned long flags;
2343
2344         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2345         if (global_trace.stop_count++)
2346                 goto out;
2347
2348         /* Prevent the buffers from switching */
2349         arch_spin_lock(&global_trace.max_lock);
2350
2351         buffer = global_trace.array_buffer.buffer;
2352         if (buffer)
2353                 ring_buffer_record_disable(buffer);
2354
2355 #ifdef CONFIG_TRACER_MAX_TRACE
2356         buffer = global_trace.max_buffer.buffer;
2357         if (buffer)
2358                 ring_buffer_record_disable(buffer);
2359 #endif
2360
2361         arch_spin_unlock(&global_trace.max_lock);
2362
2363  out:
2364         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2365 }
2366
2367 static void tracing_stop_tr(struct trace_array *tr)
2368 {
2369         struct trace_buffer *buffer;
2370         unsigned long flags;
2371
2372         /* If global, we need to also stop the max tracer */
2373         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2374                 return tracing_stop();
2375
2376         raw_spin_lock_irqsave(&tr->start_lock, flags);
2377         if (tr->stop_count++)
2378                 goto out;
2379
2380         buffer = tr->array_buffer.buffer;
2381         if (buffer)
2382                 ring_buffer_record_disable(buffer);
2383
2384  out:
2385         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2386 }
2387
2388 static int trace_save_cmdline(struct task_struct *tsk)
2389 {
2390         unsigned pid, idx;
2391
2392         /* treat recording of idle task as a success */
2393         if (!tsk->pid)
2394                 return 1;
2395
2396         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2397                 return 0;
2398
2399         /*
2400          * It's not the end of the world if we don't get
2401          * the lock, but we also don't want to spin
2402          * nor do we want to disable interrupts,
2403          * so if we miss here, then better luck next time.
2404          */
2405         if (!arch_spin_trylock(&trace_cmdline_lock))
2406                 return 0;
2407
2408         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2409         if (idx == NO_CMDLINE_MAP) {
2410                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2411
2412                 /*
2413                  * Check whether the cmdline buffer at idx has a pid
2414                  * mapped. We are going to overwrite that entry so we
2415                  * need to clear the map_pid_to_cmdline. Otherwise we
2416                  * would read the new comm for the old pid.
2417                  */
2418                 pid = savedcmd->map_cmdline_to_pid[idx];
2419                 if (pid != NO_CMDLINE_MAP)
2420                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2421
2422                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2423                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2424
2425                 savedcmd->cmdline_idx = idx;
2426         }
2427
2428         set_cmdline(idx, tsk->comm);
2429
2430         arch_spin_unlock(&trace_cmdline_lock);
2431
2432         return 1;
2433 }
2434
2435 static void __trace_find_cmdline(int pid, char comm[])
2436 {
2437         unsigned map;
2438
2439         if (!pid) {
2440                 strcpy(comm, "<idle>");
2441                 return;
2442         }
2443
2444         if (WARN_ON_ONCE(pid < 0)) {
2445                 strcpy(comm, "<XXX>");
2446                 return;
2447         }
2448
2449         if (pid > PID_MAX_DEFAULT) {
2450                 strcpy(comm, "<...>");
2451                 return;
2452         }
2453
2454         map = savedcmd->map_pid_to_cmdline[pid];
2455         if (map != NO_CMDLINE_MAP)
2456                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2457         else
2458                 strcpy(comm, "<...>");
2459 }
2460
2461 void trace_find_cmdline(int pid, char comm[])
2462 {
2463         preempt_disable();
2464         arch_spin_lock(&trace_cmdline_lock);
2465
2466         __trace_find_cmdline(pid, comm);
2467
2468         arch_spin_unlock(&trace_cmdline_lock);
2469         preempt_enable();
2470 }
2471
2472 int trace_find_tgid(int pid)
2473 {
2474         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2475                 return 0;
2476
2477         return tgid_map[pid];
2478 }
2479
2480 static int trace_save_tgid(struct task_struct *tsk)
2481 {
2482         /* treat recording of idle task as a success */
2483         if (!tsk->pid)
2484                 return 1;
2485
2486         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2487                 return 0;
2488
2489         tgid_map[tsk->pid] = tsk->tgid;
2490         return 1;
2491 }
2492
2493 static bool tracing_record_taskinfo_skip(int flags)
2494 {
2495         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2496                 return true;
2497         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2498                 return true;
2499         if (!__this_cpu_read(trace_taskinfo_save))
2500                 return true;
2501         return false;
2502 }
2503
2504 /**
2505  * tracing_record_taskinfo - record the task info of a task
2506  *
2507  * @task:  task to record
2508  * @flags: TRACE_RECORD_CMDLINE for recording comm
2509  *         TRACE_RECORD_TGID for recording tgid
2510  */
2511 void tracing_record_taskinfo(struct task_struct *task, int flags)
2512 {
2513         bool done;
2514
2515         if (tracing_record_taskinfo_skip(flags))
2516                 return;
2517
2518         /*
2519          * Record as much task information as possible. If some fail, continue
2520          * to try to record the others.
2521          */
2522         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2523         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2524
2525         /* If recording any information failed, retry again soon. */
2526         if (!done)
2527                 return;
2528
2529         __this_cpu_write(trace_taskinfo_save, false);
2530 }
2531
2532 /**
2533  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2534  *
2535  * @prev: previous task during sched_switch
2536  * @next: next task during sched_switch
2537  * @flags: TRACE_RECORD_CMDLINE for recording comm
2538  *         TRACE_RECORD_TGID for recording tgid
2539  */
2540 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2541                                           struct task_struct *next, int flags)
2542 {
2543         bool done;
2544
2545         if (tracing_record_taskinfo_skip(flags))
2546                 return;
2547
2548         /*
2549          * Record as much task information as possible. If some fail, continue
2550          * to try to record the others.
2551          */
2552         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2553         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2554         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2555         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2556
2557         /* If recording any information failed, retry again soon. */
2558         if (!done)
2559                 return;
2560
2561         __this_cpu_write(trace_taskinfo_save, false);
2562 }
2563
2564 /* Helpers to record a specific task information */
2565 void tracing_record_cmdline(struct task_struct *task)
2566 {
2567         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2568 }
2569
2570 void tracing_record_tgid(struct task_struct *task)
2571 {
2572         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2573 }
2574
2575 /*
2576  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2577  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2578  * simplifies those functions and keeps them in sync.
2579  */
2580 enum print_line_t trace_handle_return(struct trace_seq *s)
2581 {
2582         return trace_seq_has_overflowed(s) ?
2583                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2584 }
2585 EXPORT_SYMBOL_GPL(trace_handle_return);
2586
2587 void
2588 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2589                              unsigned long flags, int pc)
2590 {
2591         struct task_struct *tsk = current;
2592
2593         entry->preempt_count            = pc & 0xff;
2594         entry->pid                      = (tsk) ? tsk->pid : 0;
2595         entry->type                     = type;
2596         entry->flags =
2597 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2598                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2599 #else
2600                 TRACE_FLAG_IRQS_NOSUPPORT |
2601 #endif
2602                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2603                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2604                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2605                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2606                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2607 }
2608 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2609
2610 struct ring_buffer_event *
2611 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2612                           int type,
2613                           unsigned long len,
2614                           unsigned long flags, int pc)
2615 {
2616         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2617 }
2618
2619 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2620 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2621 static int trace_buffered_event_ref;
2622
2623 /**
2624  * trace_buffered_event_enable - enable buffering events
2625  *
2626  * When events are being filtered, it is quicker to use a temporary
2627  * buffer to write the event data into if there's a likely chance
2628  * that it will not be committed. The discard of the ring buffer
2629  * is not as fast as committing, and is much slower than copying
2630  * a commit.
2631  *
2632  * When an event is to be filtered, allocate per cpu buffers to
2633  * write the event data into, and if the event is filtered and discarded
2634  * it is simply dropped, otherwise, the entire data is to be committed
2635  * in one shot.
2636  */
2637 void trace_buffered_event_enable(void)
2638 {
2639         struct ring_buffer_event *event;
2640         struct page *page;
2641         int cpu;
2642
2643         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2644
2645         if (trace_buffered_event_ref++)
2646                 return;
2647
2648         for_each_tracing_cpu(cpu) {
2649                 page = alloc_pages_node(cpu_to_node(cpu),
2650                                         GFP_KERNEL | __GFP_NORETRY, 0);
2651                 if (!page)
2652                         goto failed;
2653
2654                 event = page_address(page);
2655                 memset(event, 0, sizeof(*event));
2656
2657                 per_cpu(trace_buffered_event, cpu) = event;
2658
2659                 preempt_disable();
2660                 if (cpu == smp_processor_id() &&
2661                     __this_cpu_read(trace_buffered_event) !=
2662                     per_cpu(trace_buffered_event, cpu))
2663                         WARN_ON_ONCE(1);
2664                 preempt_enable();
2665         }
2666
2667         return;
2668  failed:
2669         trace_buffered_event_disable();
2670 }
2671
2672 static void enable_trace_buffered_event(void *data)
2673 {
2674         /* Probably not needed, but do it anyway */
2675         smp_rmb();
2676         this_cpu_dec(trace_buffered_event_cnt);
2677 }
2678
2679 static void disable_trace_buffered_event(void *data)
2680 {
2681         this_cpu_inc(trace_buffered_event_cnt);
2682 }
2683
2684 /**
2685  * trace_buffered_event_disable - disable buffering events
2686  *
2687  * When a filter is removed, it is faster to not use the buffered
2688  * events, and to commit directly into the ring buffer. Free up
2689  * the temp buffers when there are no more users. This requires
2690  * special synchronization with current events.
2691  */
2692 void trace_buffered_event_disable(void)
2693 {
2694         int cpu;
2695
2696         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2697
2698         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2699                 return;
2700
2701         if (--trace_buffered_event_ref)
2702                 return;
2703
2704         preempt_disable();
2705         /* For each CPU, set the buffer as used. */
2706         smp_call_function_many(tracing_buffer_mask,
2707                                disable_trace_buffered_event, NULL, 1);
2708         preempt_enable();
2709
2710         /* Wait for all current users to finish */
2711         synchronize_rcu();
2712
2713         for_each_tracing_cpu(cpu) {
2714                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2715                 per_cpu(trace_buffered_event, cpu) = NULL;
2716         }
2717         /*
2718          * Make sure trace_buffered_event is NULL before clearing
2719          * trace_buffered_event_cnt.
2720          */
2721         smp_wmb();
2722
2723         preempt_disable();
2724         /* Do the work on each cpu */
2725         smp_call_function_many(tracing_buffer_mask,
2726                                enable_trace_buffered_event, NULL, 1);
2727         preempt_enable();
2728 }
2729
2730 static struct trace_buffer *temp_buffer;
2731
2732 struct ring_buffer_event *
2733 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2734                           struct trace_event_file *trace_file,
2735                           int type, unsigned long len,
2736                           unsigned long flags, int pc)
2737 {
2738         struct ring_buffer_event *entry;
2739         int val;
2740
2741         *current_rb = trace_file->tr->array_buffer.buffer;
2742
2743         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2744              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2745             (entry = this_cpu_read(trace_buffered_event))) {
2746                 /* Try to use the per cpu buffer first */
2747                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2748                 if (val == 1) {
2749                         trace_event_setup(entry, type, flags, pc);
2750                         entry->array[0] = len;
2751                         return entry;
2752                 }
2753                 this_cpu_dec(trace_buffered_event_cnt);
2754         }
2755
2756         entry = __trace_buffer_lock_reserve(*current_rb,
2757                                             type, len, flags, pc);
2758         /*
2759          * If tracing is off, but we have triggers enabled
2760          * we still need to look at the event data. Use the temp_buffer
2761          * to store the trace event for the trigger to use. It's recursive
2762          * safe and will not be recorded anywhere.
2763          */
2764         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2765                 *current_rb = temp_buffer;
2766                 entry = __trace_buffer_lock_reserve(*current_rb,
2767                                                     type, len, flags, pc);
2768         }
2769         return entry;
2770 }
2771 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2772
2773 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2774 static DEFINE_MUTEX(tracepoint_printk_mutex);
2775
2776 static void output_printk(struct trace_event_buffer *fbuffer)
2777 {
2778         struct trace_event_call *event_call;
2779         struct trace_event_file *file;
2780         struct trace_event *event;
2781         unsigned long flags;
2782         struct trace_iterator *iter = tracepoint_print_iter;
2783
2784         /* We should never get here if iter is NULL */
2785         if (WARN_ON_ONCE(!iter))
2786                 return;
2787
2788         event_call = fbuffer->trace_file->event_call;
2789         if (!event_call || !event_call->event.funcs ||
2790             !event_call->event.funcs->trace)
2791                 return;
2792
2793         file = fbuffer->trace_file;
2794         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2795             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2796              !filter_match_preds(file->filter, fbuffer->entry)))
2797                 return;
2798
2799         event = &fbuffer->trace_file->event_call->event;
2800
2801         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2802         trace_seq_init(&iter->seq);
2803         iter->ent = fbuffer->entry;
2804         event_call->event.funcs->trace(iter, 0, event);
2805         trace_seq_putc(&iter->seq, 0);
2806         printk("%s", iter->seq.buffer);
2807
2808         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2809 }
2810
2811 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2812                              void *buffer, size_t *lenp,
2813                              loff_t *ppos)
2814 {
2815         int save_tracepoint_printk;
2816         int ret;
2817
2818         mutex_lock(&tracepoint_printk_mutex);
2819         save_tracepoint_printk = tracepoint_printk;
2820
2821         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2822
2823         /*
2824          * This will force exiting early, as tracepoint_printk
2825          * is always zero when tracepoint_printk_iter is not allocated
2826          */
2827         if (!tracepoint_print_iter)
2828                 tracepoint_printk = 0;
2829
2830         if (save_tracepoint_printk == tracepoint_printk)
2831                 goto out;
2832
2833         if (tracepoint_printk)
2834                 static_key_enable(&tracepoint_printk_key.key);
2835         else
2836                 static_key_disable(&tracepoint_printk_key.key);
2837
2838  out:
2839         mutex_unlock(&tracepoint_printk_mutex);
2840
2841         return ret;
2842 }
2843
2844 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2845 {
2846         if (static_key_false(&tracepoint_printk_key.key))
2847                 output_printk(fbuffer);
2848
2849         if (static_branch_unlikely(&trace_event_exports_enabled))
2850                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2851         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2852                                     fbuffer->event, fbuffer->entry,
2853                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2854 }
2855 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2856
2857 /*
2858  * Skip 3:
2859  *
2860  *   trace_buffer_unlock_commit_regs()
2861  *   trace_event_buffer_commit()
2862  *   trace_event_raw_event_xxx()
2863  */
2864 # define STACK_SKIP 3
2865
2866 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2867                                      struct trace_buffer *buffer,
2868                                      struct ring_buffer_event *event,
2869                                      unsigned long flags, int pc,
2870                                      struct pt_regs *regs)
2871 {
2872         __buffer_unlock_commit(buffer, event);
2873
2874         /*
2875          * If regs is not set, then skip the necessary functions.
2876          * Note, we can still get here via blktrace, wakeup tracer
2877          * and mmiotrace, but that's ok if they lose a function or
2878          * two. They are not that meaningful.
2879          */
2880         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2881         ftrace_trace_userstack(tr, buffer, flags, pc);
2882 }
2883
2884 /*
2885  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2886  */
2887 void
2888 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2889                                    struct ring_buffer_event *event)
2890 {
2891         __buffer_unlock_commit(buffer, event);
2892 }
2893
2894 void
2895 trace_function(struct trace_array *tr,
2896                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2897                int pc)
2898 {
2899         struct trace_event_call *call = &event_function;
2900         struct trace_buffer *buffer = tr->array_buffer.buffer;
2901         struct ring_buffer_event *event;
2902         struct ftrace_entry *entry;
2903
2904         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2905                                             flags, pc);
2906         if (!event)
2907                 return;
2908         entry   = ring_buffer_event_data(event);
2909         entry->ip                       = ip;
2910         entry->parent_ip                = parent_ip;
2911
2912         if (!call_filter_check_discard(call, entry, buffer, event)) {
2913                 if (static_branch_unlikely(&trace_function_exports_enabled))
2914                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2915                 __buffer_unlock_commit(buffer, event);
2916         }
2917 }
2918
2919 #ifdef CONFIG_STACKTRACE
2920
2921 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2922 #define FTRACE_KSTACK_NESTING   4
2923
2924 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2925
2926 struct ftrace_stack {
2927         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2928 };
2929
2930
2931 struct ftrace_stacks {
2932         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2933 };
2934
2935 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2936 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2937
2938 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2939                                  unsigned long flags,
2940                                  int skip, int pc, struct pt_regs *regs)
2941 {
2942         struct trace_event_call *call = &event_kernel_stack;
2943         struct ring_buffer_event *event;
2944         unsigned int size, nr_entries;
2945         struct ftrace_stack *fstack;
2946         struct stack_entry *entry;
2947         int stackidx;
2948
2949         /*
2950          * Add one, for this function and the call to save_stack_trace()
2951          * If regs is set, then these functions will not be in the way.
2952          */
2953 #ifndef CONFIG_UNWINDER_ORC
2954         if (!regs)
2955                 skip++;
2956 #endif
2957
2958         preempt_disable_notrace();
2959
2960         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2961
2962         /* This should never happen. If it does, yell once and skip */
2963         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2964                 goto out;
2965
2966         /*
2967          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2968          * interrupt will either see the value pre increment or post
2969          * increment. If the interrupt happens pre increment it will have
2970          * restored the counter when it returns.  We just need a barrier to
2971          * keep gcc from moving things around.
2972          */
2973         barrier();
2974
2975         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2976         size = ARRAY_SIZE(fstack->calls);
2977
2978         if (regs) {
2979                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2980                                                    size, skip);
2981         } else {
2982                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2983         }
2984
2985         size = nr_entries * sizeof(unsigned long);
2986         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2987                                             sizeof(*entry) + size, flags, pc);
2988         if (!event)
2989                 goto out;
2990         entry = ring_buffer_event_data(event);
2991
2992         memcpy(&entry->caller, fstack->calls, size);
2993         entry->size = nr_entries;
2994
2995         if (!call_filter_check_discard(call, entry, buffer, event))
2996                 __buffer_unlock_commit(buffer, event);
2997
2998  out:
2999         /* Again, don't let gcc optimize things here */
3000         barrier();
3001         __this_cpu_dec(ftrace_stack_reserve);
3002         preempt_enable_notrace();
3003
3004 }
3005
3006 static inline void ftrace_trace_stack(struct trace_array *tr,
3007                                       struct trace_buffer *buffer,
3008                                       unsigned long flags,
3009                                       int skip, int pc, struct pt_regs *regs)
3010 {
3011         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3012                 return;
3013
3014         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
3015 }
3016
3017 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3018                    int pc)
3019 {
3020         struct trace_buffer *buffer = tr->array_buffer.buffer;
3021
3022         if (rcu_is_watching()) {
3023                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3024                 return;
3025         }
3026
3027         /*
3028          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3029          * but if the above rcu_is_watching() failed, then the NMI
3030          * triggered someplace critical, and rcu_irq_enter() should
3031          * not be called from NMI.
3032          */
3033         if (unlikely(in_nmi()))
3034                 return;
3035
3036         rcu_irq_enter_irqson();
3037         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3038         rcu_irq_exit_irqson();
3039 }
3040
3041 /**
3042  * trace_dump_stack - record a stack back trace in the trace buffer
3043  * @skip: Number of functions to skip (helper handlers)
3044  */
3045 void trace_dump_stack(int skip)
3046 {
3047         unsigned long flags;
3048
3049         if (tracing_disabled || tracing_selftest_running)
3050                 return;
3051
3052         local_save_flags(flags);
3053
3054 #ifndef CONFIG_UNWINDER_ORC
3055         /* Skip 1 to skip this function. */
3056         skip++;
3057 #endif
3058         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3059                              flags, skip, preempt_count(), NULL);
3060 }
3061 EXPORT_SYMBOL_GPL(trace_dump_stack);
3062
3063 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3064 static DEFINE_PER_CPU(int, user_stack_count);
3065
3066 static void
3067 ftrace_trace_userstack(struct trace_array *tr,
3068                        struct trace_buffer *buffer, unsigned long flags, int pc)
3069 {
3070         struct trace_event_call *call = &event_user_stack;
3071         struct ring_buffer_event *event;
3072         struct userstack_entry *entry;
3073
3074         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3075                 return;
3076
3077         /*
3078          * NMIs can not handle page faults, even with fix ups.
3079          * The save user stack can (and often does) fault.
3080          */
3081         if (unlikely(in_nmi()))
3082                 return;
3083
3084         /*
3085          * prevent recursion, since the user stack tracing may
3086          * trigger other kernel events.
3087          */
3088         preempt_disable();
3089         if (__this_cpu_read(user_stack_count))
3090                 goto out;
3091
3092         __this_cpu_inc(user_stack_count);
3093
3094         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3095                                             sizeof(*entry), flags, pc);
3096         if (!event)
3097                 goto out_drop_count;
3098         entry   = ring_buffer_event_data(event);
3099
3100         entry->tgid             = current->tgid;
3101         memset(&entry->caller, 0, sizeof(entry->caller));
3102
3103         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3104         if (!call_filter_check_discard(call, entry, buffer, event))
3105                 __buffer_unlock_commit(buffer, event);
3106
3107  out_drop_count:
3108         __this_cpu_dec(user_stack_count);
3109  out:
3110         preempt_enable();
3111 }
3112 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3113 static void ftrace_trace_userstack(struct trace_array *tr,
3114                                    struct trace_buffer *buffer,
3115                                    unsigned long flags, int pc)
3116 {
3117 }
3118 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3119
3120 #endif /* CONFIG_STACKTRACE */
3121
3122 /* created for use with alloc_percpu */
3123 struct trace_buffer_struct {
3124         int nesting;
3125         char buffer[4][TRACE_BUF_SIZE];
3126 };
3127
3128 static struct trace_buffer_struct *trace_percpu_buffer;
3129
3130 /*
3131  * This allows for lockless recording.  If we're nested too deeply, then
3132  * this returns NULL.
3133  */
3134 static char *get_trace_buf(void)
3135 {
3136         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3137
3138         if (!buffer || buffer->nesting >= 4)
3139                 return NULL;
3140
3141         buffer->nesting++;
3142
3143         /* Interrupts must see nesting incremented before we use the buffer */
3144         barrier();
3145         return &buffer->buffer[buffer->nesting - 1][0];
3146 }
3147
3148 static void put_trace_buf(void)
3149 {
3150         /* Don't let the decrement of nesting leak before this */
3151         barrier();
3152         this_cpu_dec(trace_percpu_buffer->nesting);
3153 }
3154
3155 static int alloc_percpu_trace_buffer(void)
3156 {
3157         struct trace_buffer_struct *buffers;
3158
3159         if (trace_percpu_buffer)
3160                 return 0;
3161
3162         buffers = alloc_percpu(struct trace_buffer_struct);
3163         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3164                 return -ENOMEM;
3165
3166         trace_percpu_buffer = buffers;
3167         return 0;
3168 }
3169
3170 static int buffers_allocated;
3171
3172 void trace_printk_init_buffers(void)
3173 {
3174         if (buffers_allocated)
3175                 return;
3176
3177         if (alloc_percpu_trace_buffer())
3178                 return;
3179
3180         /* trace_printk() is for debug use only. Don't use it in production. */
3181
3182         pr_warn("\n");
3183         pr_warn("**********************************************************\n");
3184         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3185         pr_warn("**                                                      **\n");
3186         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3187         pr_warn("**                                                      **\n");
3188         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3189         pr_warn("** unsafe for production use.                           **\n");
3190         pr_warn("**                                                      **\n");
3191         pr_warn("** If you see this message and you are not debugging    **\n");
3192         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3193         pr_warn("**                                                      **\n");
3194         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3195         pr_warn("**********************************************************\n");
3196
3197         /* Expand the buffers to set size */
3198         tracing_update_buffers();
3199
3200         buffers_allocated = 1;
3201
3202         /*
3203          * trace_printk_init_buffers() can be called by modules.
3204          * If that happens, then we need to start cmdline recording
3205          * directly here. If the global_trace.buffer is already
3206          * allocated here, then this was called by module code.
3207          */
3208         if (global_trace.array_buffer.buffer)
3209                 tracing_start_cmdline_record();
3210 }
3211 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3212
3213 void trace_printk_start_comm(void)
3214 {
3215         /* Start tracing comms if trace printk is set */
3216         if (!buffers_allocated)
3217                 return;
3218         tracing_start_cmdline_record();
3219 }
3220
3221 static void trace_printk_start_stop_comm(int enabled)
3222 {
3223         if (!buffers_allocated)
3224                 return;
3225
3226         if (enabled)
3227                 tracing_start_cmdline_record();
3228         else
3229                 tracing_stop_cmdline_record();
3230 }
3231
3232 /**
3233  * trace_vbprintk - write binary msg to tracing buffer
3234  * @ip:    The address of the caller
3235  * @fmt:   The string format to write to the buffer
3236  * @args:  Arguments for @fmt
3237  */
3238 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3239 {
3240         struct trace_event_call *call = &event_bprint;
3241         struct ring_buffer_event *event;
3242         struct trace_buffer *buffer;
3243         struct trace_array *tr = &global_trace;
3244         struct bprint_entry *entry;
3245         unsigned long flags;
3246         char *tbuffer;
3247         int len = 0, size, pc;
3248
3249         if (unlikely(tracing_selftest_running || tracing_disabled))
3250                 return 0;
3251
3252         /* Don't pollute graph traces with trace_vprintk internals */
3253         pause_graph_tracing();
3254
3255         pc = preempt_count();
3256         preempt_disable_notrace();
3257
3258         tbuffer = get_trace_buf();
3259         if (!tbuffer) {
3260                 len = 0;
3261                 goto out_nobuffer;
3262         }
3263
3264         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3265
3266         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3267                 goto out_put;
3268
3269         local_save_flags(flags);
3270         size = sizeof(*entry) + sizeof(u32) * len;
3271         buffer = tr->array_buffer.buffer;
3272         ring_buffer_nest_start(buffer);
3273         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3274                                             flags, pc);
3275         if (!event)
3276                 goto out;
3277         entry = ring_buffer_event_data(event);
3278         entry->ip                       = ip;
3279         entry->fmt                      = fmt;
3280
3281         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3282         if (!call_filter_check_discard(call, entry, buffer, event)) {
3283                 __buffer_unlock_commit(buffer, event);
3284                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3285         }
3286
3287 out:
3288         ring_buffer_nest_end(buffer);
3289 out_put:
3290         put_trace_buf();
3291
3292 out_nobuffer:
3293         preempt_enable_notrace();
3294         unpause_graph_tracing();
3295
3296         return len;
3297 }
3298 EXPORT_SYMBOL_GPL(trace_vbprintk);
3299
3300 __printf(3, 0)
3301 static int
3302 __trace_array_vprintk(struct trace_buffer *buffer,
3303                       unsigned long ip, const char *fmt, va_list args)
3304 {
3305         struct trace_event_call *call = &event_print;
3306         struct ring_buffer_event *event;
3307         int len = 0, size, pc;
3308         struct print_entry *entry;
3309         unsigned long flags;
3310         char *tbuffer;
3311
3312         if (tracing_disabled || tracing_selftest_running)
3313                 return 0;
3314
3315         /* Don't pollute graph traces with trace_vprintk internals */
3316         pause_graph_tracing();
3317
3318         pc = preempt_count();
3319         preempt_disable_notrace();
3320
3321
3322         tbuffer = get_trace_buf();
3323         if (!tbuffer) {
3324                 len = 0;
3325                 goto out_nobuffer;
3326         }
3327
3328         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3329
3330         local_save_flags(flags);
3331         size = sizeof(*entry) + len + 1;
3332         ring_buffer_nest_start(buffer);
3333         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3334                                             flags, pc);
3335         if (!event)
3336                 goto out;
3337         entry = ring_buffer_event_data(event);
3338         entry->ip = ip;
3339
3340         memcpy(&entry->buf, tbuffer, len + 1);
3341         if (!call_filter_check_discard(call, entry, buffer, event)) {
3342                 __buffer_unlock_commit(buffer, event);
3343                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3344         }
3345
3346 out:
3347         ring_buffer_nest_end(buffer);
3348         put_trace_buf();
3349
3350 out_nobuffer:
3351         preempt_enable_notrace();
3352         unpause_graph_tracing();
3353
3354         return len;
3355 }
3356
3357 __printf(3, 0)
3358 int trace_array_vprintk(struct trace_array *tr,
3359                         unsigned long ip, const char *fmt, va_list args)
3360 {
3361         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3362 }
3363
3364 /**
3365  * trace_array_printk - Print a message to a specific instance
3366  * @tr: The instance trace_array descriptor
3367  * @ip: The instruction pointer that this is called from.
3368  * @fmt: The format to print (printf format)
3369  *
3370  * If a subsystem sets up its own instance, they have the right to
3371  * printk strings into their tracing instance buffer using this
3372  * function. Note, this function will not write into the top level
3373  * buffer (use trace_printk() for that), as writing into the top level
3374  * buffer should only have events that can be individually disabled.
3375  * trace_printk() is only used for debugging a kernel, and should not
3376  * be ever encorporated in normal use.
3377  *
3378  * trace_array_printk() can be used, as it will not add noise to the
3379  * top level tracing buffer.
3380  *
3381  * Note, trace_array_init_printk() must be called on @tr before this
3382  * can be used.
3383  */
3384 __printf(3, 0)
3385 int trace_array_printk(struct trace_array *tr,
3386                        unsigned long ip, const char *fmt, ...)
3387 {
3388         int ret;
3389         va_list ap;
3390
3391         if (!tr)
3392                 return -ENOENT;
3393
3394         /* This is only allowed for created instances */
3395         if (tr == &global_trace)
3396                 return 0;
3397
3398         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3399                 return 0;
3400
3401         va_start(ap, fmt);
3402         ret = trace_array_vprintk(tr, ip, fmt, ap);
3403         va_end(ap);
3404         return ret;
3405 }
3406 EXPORT_SYMBOL_GPL(trace_array_printk);
3407
3408 /**
3409  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3410  * @tr: The trace array to initialize the buffers for
3411  *
3412  * As trace_array_printk() only writes into instances, they are OK to
3413  * have in the kernel (unlike trace_printk()). This needs to be called
3414  * before trace_array_printk() can be used on a trace_array.
3415  */
3416 int trace_array_init_printk(struct trace_array *tr)
3417 {
3418         if (!tr)
3419                 return -ENOENT;
3420
3421         /* This is only allowed for created instances */
3422         if (tr == &global_trace)
3423                 return -EINVAL;
3424
3425         return alloc_percpu_trace_buffer();
3426 }
3427 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3428
3429 __printf(3, 4)
3430 int trace_array_printk_buf(struct trace_buffer *buffer,
3431                            unsigned long ip, const char *fmt, ...)
3432 {
3433         int ret;
3434         va_list ap;
3435
3436         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3437                 return 0;
3438
3439         va_start(ap, fmt);
3440         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3441         va_end(ap);
3442         return ret;
3443 }
3444
3445 __printf(2, 0)
3446 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3447 {
3448         return trace_array_vprintk(&global_trace, ip, fmt, args);
3449 }
3450 EXPORT_SYMBOL_GPL(trace_vprintk);
3451
3452 static void trace_iterator_increment(struct trace_iterator *iter)
3453 {
3454         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3455
3456         iter->idx++;
3457         if (buf_iter)
3458                 ring_buffer_iter_advance(buf_iter);
3459 }
3460
3461 static struct trace_entry *
3462 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3463                 unsigned long *lost_events)
3464 {
3465         struct ring_buffer_event *event;
3466         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3467
3468         if (buf_iter) {
3469                 event = ring_buffer_iter_peek(buf_iter, ts);
3470                 if (lost_events)
3471                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3472                                 (unsigned long)-1 : 0;
3473         } else {
3474                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3475                                          lost_events);
3476         }
3477
3478         if (event) {
3479                 iter->ent_size = ring_buffer_event_length(event);
3480                 return ring_buffer_event_data(event);
3481         }
3482         iter->ent_size = 0;
3483         return NULL;
3484 }
3485
3486 static struct trace_entry *
3487 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3488                   unsigned long *missing_events, u64 *ent_ts)
3489 {
3490         struct trace_buffer *buffer = iter->array_buffer->buffer;
3491         struct trace_entry *ent, *next = NULL;
3492         unsigned long lost_events = 0, next_lost = 0;
3493         int cpu_file = iter->cpu_file;
3494         u64 next_ts = 0, ts;
3495         int next_cpu = -1;
3496         int next_size = 0;
3497         int cpu;
3498
3499         /*
3500          * If we are in a per_cpu trace file, don't bother by iterating over
3501          * all cpu and peek directly.
3502          */
3503         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3504                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3505                         return NULL;
3506                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3507                 if (ent_cpu)
3508                         *ent_cpu = cpu_file;
3509
3510                 return ent;
3511         }
3512
3513         for_each_tracing_cpu(cpu) {
3514
3515                 if (ring_buffer_empty_cpu(buffer, cpu))
3516                         continue;
3517
3518                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3519
3520                 /*
3521                  * Pick the entry with the smallest timestamp:
3522                  */
3523                 if (ent && (!next || ts < next_ts)) {
3524                         next = ent;
3525                         next_cpu = cpu;
3526                         next_ts = ts;
3527                         next_lost = lost_events;
3528                         next_size = iter->ent_size;
3529                 }
3530         }
3531
3532         iter->ent_size = next_size;
3533
3534         if (ent_cpu)
3535                 *ent_cpu = next_cpu;
3536
3537         if (ent_ts)
3538                 *ent_ts = next_ts;
3539
3540         if (missing_events)
3541                 *missing_events = next_lost;
3542
3543         return next;
3544 }
3545
3546 #define STATIC_TEMP_BUF_SIZE    128
3547 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3548
3549 /* Find the next real entry, without updating the iterator itself */
3550 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3551                                           int *ent_cpu, u64 *ent_ts)
3552 {
3553         /* __find_next_entry will reset ent_size */
3554         int ent_size = iter->ent_size;
3555         struct trace_entry *entry;
3556
3557         /*
3558          * If called from ftrace_dump(), then the iter->temp buffer
3559          * will be the static_temp_buf and not created from kmalloc.
3560          * If the entry size is greater than the buffer, we can
3561          * not save it. Just return NULL in that case. This is only
3562          * used to add markers when two consecutive events' time
3563          * stamps have a large delta. See trace_print_lat_context()
3564          */
3565         if (iter->temp == static_temp_buf &&
3566             STATIC_TEMP_BUF_SIZE < ent_size)
3567                 return NULL;
3568
3569         /*
3570          * The __find_next_entry() may call peek_next_entry(), which may
3571          * call ring_buffer_peek() that may make the contents of iter->ent
3572          * undefined. Need to copy iter->ent now.
3573          */
3574         if (iter->ent && iter->ent != iter->temp) {
3575                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3576                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3577                         void *temp;
3578                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3579                         if (!temp)
3580                                 return NULL;
3581                         kfree(iter->temp);
3582                         iter->temp = temp;
3583                         iter->temp_size = iter->ent_size;
3584                 }
3585                 memcpy(iter->temp, iter->ent, iter->ent_size);
3586                 iter->ent = iter->temp;
3587         }
3588         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3589         /* Put back the original ent_size */
3590         iter->ent_size = ent_size;
3591
3592         return entry;
3593 }
3594
3595 /* Find the next real entry, and increment the iterator to the next entry */
3596 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3597 {
3598         iter->ent = __find_next_entry(iter, &iter->cpu,
3599                                       &iter->lost_events, &iter->ts);
3600
3601         if (iter->ent)
3602                 trace_iterator_increment(iter);
3603
3604         return iter->ent ? iter : NULL;
3605 }
3606
3607 static void trace_consume(struct trace_iterator *iter)
3608 {
3609         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3610                             &iter->lost_events);
3611 }
3612
3613 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3614 {
3615         struct trace_iterator *iter = m->private;
3616         int i = (int)*pos;
3617         void *ent;
3618
3619         WARN_ON_ONCE(iter->leftover);
3620
3621         (*pos)++;
3622
3623         /* can't go backwards */
3624         if (iter->idx > i)
3625                 return NULL;
3626
3627         if (iter->idx < 0)
3628                 ent = trace_find_next_entry_inc(iter);
3629         else
3630                 ent = iter;
3631
3632         while (ent && iter->idx < i)
3633                 ent = trace_find_next_entry_inc(iter);
3634
3635         iter->pos = *pos;
3636
3637         return ent;
3638 }
3639
3640 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3641 {
3642         struct ring_buffer_iter *buf_iter;
3643         unsigned long entries = 0;
3644         u64 ts;
3645
3646         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3647
3648         buf_iter = trace_buffer_iter(iter, cpu);
3649         if (!buf_iter)
3650                 return;
3651
3652         ring_buffer_iter_reset(buf_iter);
3653
3654         /*
3655          * We could have the case with the max latency tracers
3656          * that a reset never took place on a cpu. This is evident
3657          * by the timestamp being before the start of the buffer.
3658          */
3659         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3660                 if (ts >= iter->array_buffer->time_start)
3661                         break;
3662                 entries++;
3663                 ring_buffer_iter_advance(buf_iter);
3664         }
3665
3666         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3667 }
3668
3669 /*
3670  * The current tracer is copied to avoid a global locking
3671  * all around.
3672  */
3673 static void *s_start(struct seq_file *m, loff_t *pos)
3674 {
3675         struct trace_iterator *iter = m->private;
3676         struct trace_array *tr = iter->tr;
3677         int cpu_file = iter->cpu_file;
3678         void *p = NULL;
3679         loff_t l = 0;
3680         int cpu;
3681
3682         /*
3683          * copy the tracer to avoid using a global lock all around.
3684          * iter->trace is a copy of current_trace, the pointer to the
3685          * name may be used instead of a strcmp(), as iter->trace->name
3686          * will point to the same string as current_trace->name.
3687          */
3688         mutex_lock(&trace_types_lock);
3689         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3690                 *iter->trace = *tr->current_trace;
3691         mutex_unlock(&trace_types_lock);
3692
3693 #ifdef CONFIG_TRACER_MAX_TRACE
3694         if (iter->snapshot && iter->trace->use_max_tr)
3695                 return ERR_PTR(-EBUSY);
3696 #endif
3697
3698         if (!iter->snapshot)
3699                 atomic_inc(&trace_record_taskinfo_disabled);
3700
3701         if (*pos != iter->pos) {
3702                 iter->ent = NULL;
3703                 iter->cpu = 0;
3704                 iter->idx = -1;
3705
3706                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3707                         for_each_tracing_cpu(cpu)
3708                                 tracing_iter_reset(iter, cpu);
3709                 } else
3710                         tracing_iter_reset(iter, cpu_file);
3711
3712                 iter->leftover = 0;
3713                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3714                         ;
3715
3716         } else {
3717                 /*
3718                  * If we overflowed the seq_file before, then we want
3719                  * to just reuse the trace_seq buffer again.
3720                  */
3721                 if (iter->leftover)
3722                         p = iter;
3723                 else {
3724                         l = *pos - 1;
3725                         p = s_next(m, p, &l);
3726                 }
3727         }
3728
3729         trace_event_read_lock();
3730         trace_access_lock(cpu_file);
3731         return p;
3732 }
3733
3734 static void s_stop(struct seq_file *m, void *p)
3735 {
3736         struct trace_iterator *iter = m->private;
3737
3738 #ifdef CONFIG_TRACER_MAX_TRACE
3739         if (iter->snapshot && iter->trace->use_max_tr)
3740                 return;
3741 #endif
3742
3743         if (!iter->snapshot)
3744                 atomic_dec(&trace_record_taskinfo_disabled);
3745
3746         trace_access_unlock(iter->cpu_file);
3747         trace_event_read_unlock();
3748 }
3749
3750 static void
3751 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3752                       unsigned long *entries, int cpu)
3753 {
3754         unsigned long count;
3755
3756         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3757         /*
3758          * If this buffer has skipped entries, then we hold all
3759          * entries for the trace and we need to ignore the
3760          * ones before the time stamp.
3761          */
3762         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3763                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3764                 /* total is the same as the entries */
3765                 *total = count;
3766         } else
3767                 *total = count +
3768                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3769         *entries = count;
3770 }
3771
3772 static void
3773 get_total_entries(struct array_buffer *buf,
3774                   unsigned long *total, unsigned long *entries)
3775 {
3776         unsigned long t, e;
3777         int cpu;
3778
3779         *total = 0;
3780         *entries = 0;
3781
3782         for_each_tracing_cpu(cpu) {
3783                 get_total_entries_cpu(buf, &t, &e, cpu);
3784                 *total += t;
3785                 *entries += e;
3786         }
3787 }
3788
3789 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3790 {
3791         unsigned long total, entries;
3792
3793         if (!tr)
3794                 tr = &global_trace;
3795
3796         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3797
3798         return entries;
3799 }
3800
3801 unsigned long trace_total_entries(struct trace_array *tr)
3802 {
3803         unsigned long total, entries;
3804
3805         if (!tr)
3806                 tr = &global_trace;
3807
3808         get_total_entries(&tr->array_buffer, &total, &entries);
3809
3810         return entries;
3811 }
3812
3813 static void print_lat_help_header(struct seq_file *m)
3814 {
3815         seq_puts(m, "#                    _------=> CPU#            \n"
3816                     "#                   / _-----=> irqs-off        \n"
3817                     "#                  | / _----=> need-resched    \n"
3818                     "#                  || / _---=> hardirq/softirq \n"
3819                     "#                  ||| / _--=> preempt-depth   \n"
3820                     "#                  |||| /     delay            \n"
3821                     "#  cmd     pid     ||||| time  |   caller      \n"
3822                     "#     \\   /        |||||  \\    |   /         \n");
3823 }
3824
3825 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3826 {
3827         unsigned long total;
3828         unsigned long entries;
3829
3830         get_total_entries(buf, &total, &entries);
3831         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3832                    entries, total, num_online_cpus());
3833         seq_puts(m, "#\n");
3834 }
3835
3836 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3837                                    unsigned int flags)
3838 {
3839         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3840
3841         print_event_info(buf, m);
3842
3843         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3844         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3845 }
3846
3847 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3848                                        unsigned int flags)
3849 {
3850         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3851         const char *space = "            ";
3852         int prec = tgid ? 12 : 2;
3853
3854         print_event_info(buf, m);
3855
3856         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
3857         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3858         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3859         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3860         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
3861         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3862         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
3863 }
3864
3865 void
3866 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3867 {
3868         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3869         struct array_buffer *buf = iter->array_buffer;
3870         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3871         struct tracer *type = iter->trace;
3872         unsigned long entries;
3873         unsigned long total;
3874         const char *name = "preemption";
3875
3876         name = type->name;
3877
3878         get_total_entries(buf, &total, &entries);
3879
3880         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3881                    name, UTS_RELEASE);
3882         seq_puts(m, "# -----------------------------------"
3883                  "---------------------------------\n");
3884         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3885                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3886                    nsecs_to_usecs(data->saved_latency),
3887                    entries,
3888                    total,
3889                    buf->cpu,
3890 #if defined(CONFIG_PREEMPT_NONE)
3891                    "server",
3892 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3893                    "desktop",
3894 #elif defined(CONFIG_PREEMPT)
3895                    "preempt",
3896 #elif defined(CONFIG_PREEMPT_RT)
3897                    "preempt_rt",
3898 #else
3899                    "unknown",
3900 #endif
3901                    /* These are reserved for later use */
3902                    0, 0, 0, 0);
3903 #ifdef CONFIG_SMP
3904         seq_printf(m, " #P:%d)\n", num_online_cpus());
3905 #else
3906         seq_puts(m, ")\n");
3907 #endif
3908         seq_puts(m, "#    -----------------\n");
3909         seq_printf(m, "#    | task: %.16s-%d "
3910                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3911                    data->comm, data->pid,
3912                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3913                    data->policy, data->rt_priority);
3914         seq_puts(m, "#    -----------------\n");
3915
3916         if (data->critical_start) {
3917                 seq_puts(m, "#  => started at: ");
3918                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3919                 trace_print_seq(m, &iter->seq);
3920                 seq_puts(m, "\n#  => ended at:   ");
3921                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3922                 trace_print_seq(m, &iter->seq);
3923                 seq_puts(m, "\n#\n");
3924         }
3925
3926         seq_puts(m, "#\n");
3927 }
3928
3929 static void test_cpu_buff_start(struct trace_iterator *iter)
3930 {
3931         struct trace_seq *s = &iter->seq;
3932         struct trace_array *tr = iter->tr;
3933
3934         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3935                 return;
3936
3937         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3938                 return;
3939
3940         if (cpumask_available(iter->started) &&
3941             cpumask_test_cpu(iter->cpu, iter->started))
3942                 return;
3943
3944         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3945                 return;
3946
3947         if (cpumask_available(iter->started))
3948                 cpumask_set_cpu(iter->cpu, iter->started);
3949
3950         /* Don't print started cpu buffer for the first entry of the trace */
3951         if (iter->idx > 1)
3952                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3953                                 iter->cpu);
3954 }
3955
3956 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3957 {
3958         struct trace_array *tr = iter->tr;
3959         struct trace_seq *s = &iter->seq;
3960         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3961         struct trace_entry *entry;
3962         struct trace_event *event;
3963
3964         entry = iter->ent;
3965
3966         test_cpu_buff_start(iter);
3967
3968         event = ftrace_find_event(entry->type);
3969
3970         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3971                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3972                         trace_print_lat_context(iter);
3973                 else
3974                         trace_print_context(iter);
3975         }
3976
3977         if (trace_seq_has_overflowed(s))
3978                 return TRACE_TYPE_PARTIAL_LINE;
3979
3980         if (event)
3981                 return event->funcs->trace(iter, sym_flags, event);
3982
3983         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3984
3985         return trace_handle_return(s);
3986 }
3987
3988 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3989 {
3990         struct trace_array *tr = iter->tr;
3991         struct trace_seq *s = &iter->seq;
3992         struct trace_entry *entry;
3993         struct trace_event *event;
3994
3995         entry = iter->ent;
3996
3997         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3998                 trace_seq_printf(s, "%d %d %llu ",
3999                                  entry->pid, iter->cpu, iter->ts);
4000
4001         if (trace_seq_has_overflowed(s))
4002                 return TRACE_TYPE_PARTIAL_LINE;
4003
4004         event = ftrace_find_event(entry->type);
4005         if (event)
4006                 return event->funcs->raw(iter, 0, event);
4007
4008         trace_seq_printf(s, "%d ?\n", entry->type);
4009
4010         return trace_handle_return(s);
4011 }
4012
4013 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4014 {
4015         struct trace_array *tr = iter->tr;
4016         struct trace_seq *s = &iter->seq;
4017         unsigned char newline = '\n';
4018         struct trace_entry *entry;
4019         struct trace_event *event;
4020
4021         entry = iter->ent;
4022
4023         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4024                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4025                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4026                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4027                 if (trace_seq_has_overflowed(s))
4028                         return TRACE_TYPE_PARTIAL_LINE;
4029         }
4030
4031         event = ftrace_find_event(entry->type);
4032         if (event) {
4033                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4034                 if (ret != TRACE_TYPE_HANDLED)
4035                         return ret;
4036         }
4037
4038         SEQ_PUT_FIELD(s, newline);
4039
4040         return trace_handle_return(s);
4041 }
4042
4043 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4044 {
4045         struct trace_array *tr = iter->tr;
4046         struct trace_seq *s = &iter->seq;
4047         struct trace_entry *entry;
4048         struct trace_event *event;
4049
4050         entry = iter->ent;
4051
4052         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4053                 SEQ_PUT_FIELD(s, entry->pid);
4054                 SEQ_PUT_FIELD(s, iter->cpu);
4055                 SEQ_PUT_FIELD(s, iter->ts);
4056                 if (trace_seq_has_overflowed(s))
4057                         return TRACE_TYPE_PARTIAL_LINE;
4058         }
4059
4060         event = ftrace_find_event(entry->type);
4061         return event ? event->funcs->binary(iter, 0, event) :
4062                 TRACE_TYPE_HANDLED;
4063 }
4064
4065 int trace_empty(struct trace_iterator *iter)
4066 {
4067         struct ring_buffer_iter *buf_iter;
4068         int cpu;
4069
4070         /* If we are looking at one CPU buffer, only check that one */
4071         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4072                 cpu = iter->cpu_file;
4073                 buf_iter = trace_buffer_iter(iter, cpu);
4074                 if (buf_iter) {
4075                         if (!ring_buffer_iter_empty(buf_iter))
4076                                 return 0;
4077                 } else {
4078                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4079                                 return 0;
4080                 }
4081                 return 1;
4082         }
4083
4084         for_each_tracing_cpu(cpu) {
4085                 buf_iter = trace_buffer_iter(iter, cpu);
4086                 if (buf_iter) {
4087                         if (!ring_buffer_iter_empty(buf_iter))
4088                                 return 0;
4089                 } else {
4090                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4091                                 return 0;
4092                 }
4093         }
4094
4095         return 1;
4096 }
4097
4098 /*  Called with trace_event_read_lock() held. */
4099 enum print_line_t print_trace_line(struct trace_iterator *iter)
4100 {
4101         struct trace_array *tr = iter->tr;
4102         unsigned long trace_flags = tr->trace_flags;
4103         enum print_line_t ret;
4104
4105         if (iter->lost_events) {
4106                 if (iter->lost_events == (unsigned long)-1)
4107                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4108                                          iter->cpu);
4109                 else
4110                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4111                                          iter->cpu, iter->lost_events);
4112                 if (trace_seq_has_overflowed(&iter->seq))
4113                         return TRACE_TYPE_PARTIAL_LINE;
4114         }
4115
4116         if (iter->trace && iter->trace->print_line) {
4117                 ret = iter->trace->print_line(iter);
4118                 if (ret != TRACE_TYPE_UNHANDLED)
4119                         return ret;
4120         }
4121
4122         if (iter->ent->type == TRACE_BPUTS &&
4123                         trace_flags & TRACE_ITER_PRINTK &&
4124                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4125                 return trace_print_bputs_msg_only(iter);
4126
4127         if (iter->ent->type == TRACE_BPRINT &&
4128                         trace_flags & TRACE_ITER_PRINTK &&
4129                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4130                 return trace_print_bprintk_msg_only(iter);
4131
4132         if (iter->ent->type == TRACE_PRINT &&
4133                         trace_flags & TRACE_ITER_PRINTK &&
4134                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4135                 return trace_print_printk_msg_only(iter);
4136
4137         if (trace_flags & TRACE_ITER_BIN)
4138                 return print_bin_fmt(iter);
4139
4140         if (trace_flags & TRACE_ITER_HEX)
4141                 return print_hex_fmt(iter);
4142
4143         if (trace_flags & TRACE_ITER_RAW)
4144                 return print_raw_fmt(iter);
4145
4146         return print_trace_fmt(iter);
4147 }
4148
4149 void trace_latency_header(struct seq_file *m)
4150 {
4151         struct trace_iterator *iter = m->private;
4152         struct trace_array *tr = iter->tr;
4153
4154         /* print nothing if the buffers are empty */
4155         if (trace_empty(iter))
4156                 return;
4157
4158         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4159                 print_trace_header(m, iter);
4160
4161         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4162                 print_lat_help_header(m);
4163 }
4164
4165 void trace_default_header(struct seq_file *m)
4166 {
4167         struct trace_iterator *iter = m->private;
4168         struct trace_array *tr = iter->tr;
4169         unsigned long trace_flags = tr->trace_flags;
4170
4171         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4172                 return;
4173
4174         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4175                 /* print nothing if the buffers are empty */
4176                 if (trace_empty(iter))
4177                         return;
4178                 print_trace_header(m, iter);
4179                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4180                         print_lat_help_header(m);
4181         } else {
4182                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4183                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4184                                 print_func_help_header_irq(iter->array_buffer,
4185                                                            m, trace_flags);
4186                         else
4187                                 print_func_help_header(iter->array_buffer, m,
4188                                                        trace_flags);
4189                 }
4190         }
4191 }
4192
4193 static void test_ftrace_alive(struct seq_file *m)
4194 {
4195         if (!ftrace_is_dead())
4196                 return;
4197         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4198                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4199 }
4200
4201 #ifdef CONFIG_TRACER_MAX_TRACE
4202 static void show_snapshot_main_help(struct seq_file *m)
4203 {
4204         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4205                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4206                     "#                      Takes a snapshot of the main buffer.\n"
4207                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4208                     "#                      (Doesn't have to be '2' works with any number that\n"
4209                     "#                       is not a '0' or '1')\n");
4210 }
4211
4212 static void show_snapshot_percpu_help(struct seq_file *m)
4213 {
4214         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4215 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4216         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4217                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4218 #else
4219         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4220                     "#                     Must use main snapshot file to allocate.\n");
4221 #endif
4222         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4223                     "#                      (Doesn't have to be '2' works with any number that\n"
4224                     "#                       is not a '0' or '1')\n");
4225 }
4226
4227 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4228 {
4229         if (iter->tr->allocated_snapshot)
4230                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4231         else
4232                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4233
4234         seq_puts(m, "# Snapshot commands:\n");
4235         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4236                 show_snapshot_main_help(m);
4237         else
4238                 show_snapshot_percpu_help(m);
4239 }
4240 #else
4241 /* Should never be called */
4242 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4243 #endif
4244
4245 static int s_show(struct seq_file *m, void *v)
4246 {
4247         struct trace_iterator *iter = v;
4248         int ret;
4249
4250         if (iter->ent == NULL) {
4251                 if (iter->tr) {
4252                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4253                         seq_puts(m, "#\n");
4254                         test_ftrace_alive(m);
4255                 }
4256                 if (iter->snapshot && trace_empty(iter))
4257                         print_snapshot_help(m, iter);
4258                 else if (iter->trace && iter->trace->print_header)
4259                         iter->trace->print_header(m);
4260                 else
4261                         trace_default_header(m);
4262
4263         } else if (iter->leftover) {
4264                 /*
4265                  * If we filled the seq_file buffer earlier, we
4266                  * want to just show it now.
4267                  */
4268                 ret = trace_print_seq(m, &iter->seq);
4269
4270                 /* ret should this time be zero, but you never know */
4271                 iter->leftover = ret;
4272
4273         } else {
4274                 print_trace_line(iter);
4275                 ret = trace_print_seq(m, &iter->seq);
4276                 /*
4277                  * If we overflow the seq_file buffer, then it will
4278                  * ask us for this data again at start up.
4279                  * Use that instead.
4280                  *  ret is 0 if seq_file write succeeded.
4281                  *        -1 otherwise.
4282                  */
4283                 iter->leftover = ret;
4284         }
4285
4286         return 0;
4287 }
4288
4289 /*
4290  * Should be used after trace_array_get(), trace_types_lock
4291  * ensures that i_cdev was already initialized.
4292  */
4293 static inline int tracing_get_cpu(struct inode *inode)
4294 {
4295         if (inode->i_cdev) /* See trace_create_cpu_file() */
4296                 return (long)inode->i_cdev - 1;
4297         return RING_BUFFER_ALL_CPUS;
4298 }
4299
4300 static const struct seq_operations tracer_seq_ops = {
4301         .start          = s_start,
4302         .next           = s_next,
4303         .stop           = s_stop,
4304         .show           = s_show,
4305 };
4306
4307 static struct trace_iterator *
4308 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4309 {
4310         struct trace_array *tr = inode->i_private;
4311         struct trace_iterator *iter;
4312         int cpu;
4313
4314         if (tracing_disabled)
4315                 return ERR_PTR(-ENODEV);
4316
4317         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4318         if (!iter)
4319                 return ERR_PTR(-ENOMEM);
4320
4321         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4322                                     GFP_KERNEL);
4323         if (!iter->buffer_iter)
4324                 goto release;
4325
4326         /*
4327          * trace_find_next_entry() may need to save off iter->ent.
4328          * It will place it into the iter->temp buffer. As most
4329          * events are less than 128, allocate a buffer of that size.
4330          * If one is greater, then trace_find_next_entry() will
4331          * allocate a new buffer to adjust for the bigger iter->ent.
4332          * It's not critical if it fails to get allocated here.
4333          */
4334         iter->temp = kmalloc(128, GFP_KERNEL);
4335         if (iter->temp)
4336                 iter->temp_size = 128;
4337
4338         /*
4339          * We make a copy of the current tracer to avoid concurrent
4340          * changes on it while we are reading.
4341          */
4342         mutex_lock(&trace_types_lock);
4343         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4344         if (!iter->trace)
4345                 goto fail;
4346
4347         *iter->trace = *tr->current_trace;
4348
4349         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4350                 goto fail;
4351
4352         iter->tr = tr;
4353
4354 #ifdef CONFIG_TRACER_MAX_TRACE
4355         /* Currently only the top directory has a snapshot */
4356         if (tr->current_trace->print_max || snapshot)
4357                 iter->array_buffer = &tr->max_buffer;
4358         else
4359 #endif
4360                 iter->array_buffer = &tr->array_buffer;
4361         iter->snapshot = snapshot;
4362         iter->pos = -1;
4363         iter->cpu_file = tracing_get_cpu(inode);
4364         mutex_init(&iter->mutex);
4365
4366         /* Notify the tracer early; before we stop tracing. */
4367         if (iter->trace->open)
4368                 iter->trace->open(iter);
4369
4370         /* Annotate start of buffers if we had overruns */
4371         if (ring_buffer_overruns(iter->array_buffer->buffer))
4372                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4373
4374         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4375         if (trace_clocks[tr->clock_id].in_ns)
4376                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4377
4378         /*
4379          * If pause-on-trace is enabled, then stop the trace while
4380          * dumping, unless this is the "snapshot" file
4381          */
4382         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4383                 tracing_stop_tr(tr);
4384
4385         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4386                 for_each_tracing_cpu(cpu) {
4387                         iter->buffer_iter[cpu] =
4388                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4389                                                          cpu, GFP_KERNEL);
4390                 }
4391                 ring_buffer_read_prepare_sync();
4392                 for_each_tracing_cpu(cpu) {
4393                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4394                         tracing_iter_reset(iter, cpu);
4395                 }
4396         } else {
4397                 cpu = iter->cpu_file;
4398                 iter->buffer_iter[cpu] =
4399                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4400                                                  cpu, GFP_KERNEL);
4401                 ring_buffer_read_prepare_sync();
4402                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4403                 tracing_iter_reset(iter, cpu);
4404         }
4405
4406         mutex_unlock(&trace_types_lock);
4407
4408         return iter;
4409
4410  fail:
4411         mutex_unlock(&trace_types_lock);
4412         kfree(iter->trace);
4413         kfree(iter->temp);
4414         kfree(iter->buffer_iter);
4415 release:
4416         seq_release_private(inode, file);
4417         return ERR_PTR(-ENOMEM);
4418 }
4419
4420 int tracing_open_generic(struct inode *inode, struct file *filp)
4421 {
4422         int ret;
4423
4424         ret = tracing_check_open_get_tr(NULL);
4425         if (ret)
4426                 return ret;
4427
4428         filp->private_data = inode->i_private;
4429         return 0;
4430 }
4431
4432 bool tracing_is_disabled(void)
4433 {
4434         return (tracing_disabled) ? true: false;
4435 }
4436
4437 /*
4438  * Open and update trace_array ref count.
4439  * Must have the current trace_array passed to it.
4440  */
4441 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4442 {
4443         struct trace_array *tr = inode->i_private;
4444         int ret;
4445
4446         ret = tracing_check_open_get_tr(tr);
4447         if (ret)
4448                 return ret;
4449
4450         filp->private_data = inode->i_private;
4451
4452         return 0;
4453 }
4454
4455 static int tracing_release(struct inode *inode, struct file *file)
4456 {
4457         struct trace_array *tr = inode->i_private;
4458         struct seq_file *m = file->private_data;
4459         struct trace_iterator *iter;
4460         int cpu;
4461
4462         if (!(file->f_mode & FMODE_READ)) {
4463                 trace_array_put(tr);
4464                 return 0;
4465         }
4466
4467         /* Writes do not use seq_file */
4468         iter = m->private;
4469         mutex_lock(&trace_types_lock);
4470
4471         for_each_tracing_cpu(cpu) {
4472                 if (iter->buffer_iter[cpu])
4473                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4474         }
4475
4476         if (iter->trace && iter->trace->close)
4477                 iter->trace->close(iter);
4478
4479         if (!iter->snapshot && tr->stop_count)
4480                 /* reenable tracing if it was previously enabled */
4481                 tracing_start_tr(tr);
4482
4483         __trace_array_put(tr);
4484
4485         mutex_unlock(&trace_types_lock);
4486
4487         mutex_destroy(&iter->mutex);
4488         free_cpumask_var(iter->started);
4489         kfree(iter->temp);
4490         kfree(iter->trace);
4491         kfree(iter->buffer_iter);
4492         seq_release_private(inode, file);
4493
4494         return 0;
4495 }
4496
4497 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4498 {
4499         struct trace_array *tr = inode->i_private;
4500
4501         trace_array_put(tr);
4502         return 0;
4503 }
4504
4505 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4506 {
4507         struct trace_array *tr = inode->i_private;
4508
4509         trace_array_put(tr);
4510
4511         return single_release(inode, file);
4512 }
4513
4514 static int tracing_open(struct inode *inode, struct file *file)
4515 {
4516         struct trace_array *tr = inode->i_private;
4517         struct trace_iterator *iter;
4518         int ret;
4519
4520         ret = tracing_check_open_get_tr(tr);
4521         if (ret)
4522                 return ret;
4523
4524         /* If this file was open for write, then erase contents */
4525         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4526                 int cpu = tracing_get_cpu(inode);
4527                 struct array_buffer *trace_buf = &tr->array_buffer;
4528
4529 #ifdef CONFIG_TRACER_MAX_TRACE
4530                 if (tr->current_trace->print_max)
4531                         trace_buf = &tr->max_buffer;
4532 #endif
4533
4534                 if (cpu == RING_BUFFER_ALL_CPUS)
4535                         tracing_reset_online_cpus(trace_buf);
4536                 else
4537                         tracing_reset_cpu(trace_buf, cpu);
4538         }
4539
4540         if (file->f_mode & FMODE_READ) {
4541                 iter = __tracing_open(inode, file, false);
4542                 if (IS_ERR(iter))
4543                         ret = PTR_ERR(iter);
4544                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4545                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4546         }
4547
4548         if (ret < 0)
4549                 trace_array_put(tr);
4550
4551         return ret;
4552 }
4553
4554 /*
4555  * Some tracers are not suitable for instance buffers.
4556  * A tracer is always available for the global array (toplevel)
4557  * or if it explicitly states that it is.
4558  */
4559 static bool
4560 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4561 {
4562         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4563 }
4564
4565 /* Find the next tracer that this trace array may use */
4566 static struct tracer *
4567 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4568 {
4569         while (t && !trace_ok_for_array(t, tr))
4570                 t = t->next;
4571
4572         return t;
4573 }
4574
4575 static void *
4576 t_next(struct seq_file *m, void *v, loff_t *pos)
4577 {
4578         struct trace_array *tr = m->private;
4579         struct tracer *t = v;
4580
4581         (*pos)++;
4582
4583         if (t)
4584                 t = get_tracer_for_array(tr, t->next);
4585
4586         return t;
4587 }
4588
4589 static void *t_start(struct seq_file *m, loff_t *pos)
4590 {
4591         struct trace_array *tr = m->private;
4592         struct tracer *t;
4593         loff_t l = 0;
4594
4595         mutex_lock(&trace_types_lock);
4596
4597         t = get_tracer_for_array(tr, trace_types);
4598         for (; t && l < *pos; t = t_next(m, t, &l))
4599                         ;
4600
4601         return t;
4602 }
4603
4604 static void t_stop(struct seq_file *m, void *p)
4605 {
4606         mutex_unlock(&trace_types_lock);
4607 }
4608
4609 static int t_show(struct seq_file *m, void *v)
4610 {
4611         struct tracer *t = v;
4612
4613         if (!t)
4614                 return 0;
4615
4616         seq_puts(m, t->name);
4617         if (t->next)
4618                 seq_putc(m, ' ');
4619         else
4620                 seq_putc(m, '\n');
4621
4622         return 0;
4623 }
4624
4625 static const struct seq_operations show_traces_seq_ops = {
4626         .start          = t_start,
4627         .next           = t_next,
4628         .stop           = t_stop,
4629         .show           = t_show,
4630 };
4631
4632 static int show_traces_open(struct inode *inode, struct file *file)
4633 {
4634         struct trace_array *tr = inode->i_private;
4635         struct seq_file *m;
4636         int ret;
4637
4638         ret = tracing_check_open_get_tr(tr);
4639         if (ret)
4640                 return ret;
4641
4642         ret = seq_open(file, &show_traces_seq_ops);
4643         if (ret) {
4644                 trace_array_put(tr);
4645                 return ret;
4646         }
4647
4648         m = file->private_data;
4649         m->private = tr;
4650
4651         return 0;
4652 }
4653
4654 static int show_traces_release(struct inode *inode, struct file *file)
4655 {
4656         struct trace_array *tr = inode->i_private;
4657
4658         trace_array_put(tr);
4659         return seq_release(inode, file);
4660 }
4661
4662 static ssize_t
4663 tracing_write_stub(struct file *filp, const char __user *ubuf,
4664                    size_t count, loff_t *ppos)
4665 {
4666         return count;
4667 }
4668
4669 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4670 {
4671         int ret;
4672
4673         if (file->f_mode & FMODE_READ)
4674                 ret = seq_lseek(file, offset, whence);
4675         else
4676                 file->f_pos = ret = 0;
4677
4678         return ret;
4679 }
4680
4681 static const struct file_operations tracing_fops = {
4682         .open           = tracing_open,
4683         .read           = seq_read,
4684         .write          = tracing_write_stub,
4685         .llseek         = tracing_lseek,
4686         .release        = tracing_release,
4687 };
4688
4689 static const struct file_operations show_traces_fops = {
4690         .open           = show_traces_open,
4691         .read           = seq_read,
4692         .llseek         = seq_lseek,
4693         .release        = show_traces_release,
4694 };
4695
4696 static ssize_t
4697 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4698                      size_t count, loff_t *ppos)
4699 {
4700         struct trace_array *tr = file_inode(filp)->i_private;
4701         char *mask_str;
4702         int len;
4703
4704         len = snprintf(NULL, 0, "%*pb\n",
4705                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4706         mask_str = kmalloc(len, GFP_KERNEL);
4707         if (!mask_str)
4708                 return -ENOMEM;
4709
4710         len = snprintf(mask_str, len, "%*pb\n",
4711                        cpumask_pr_args(tr->tracing_cpumask));
4712         if (len >= count) {
4713                 count = -EINVAL;
4714                 goto out_err;
4715         }
4716         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4717
4718 out_err:
4719         kfree(mask_str);
4720
4721         return count;
4722 }
4723
4724 int tracing_set_cpumask(struct trace_array *tr,
4725                         cpumask_var_t tracing_cpumask_new)
4726 {
4727         int cpu;
4728
4729         if (!tr)
4730                 return -EINVAL;
4731
4732         local_irq_disable();
4733         arch_spin_lock(&tr->max_lock);
4734         for_each_tracing_cpu(cpu) {
4735                 /*
4736                  * Increase/decrease the disabled counter if we are
4737                  * about to flip a bit in the cpumask:
4738                  */
4739                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4740                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4741                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4742                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4743                 }
4744                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4745                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4746                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4747                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4748                 }
4749         }
4750         arch_spin_unlock(&tr->max_lock);
4751         local_irq_enable();
4752
4753         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4754
4755         return 0;
4756 }
4757
4758 static ssize_t
4759 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4760                       size_t count, loff_t *ppos)
4761 {
4762         struct trace_array *tr = file_inode(filp)->i_private;
4763         cpumask_var_t tracing_cpumask_new;
4764         int err;
4765
4766         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4767                 return -ENOMEM;
4768
4769         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4770         if (err)
4771                 goto err_free;
4772
4773         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4774         if (err)
4775                 goto err_free;
4776
4777         free_cpumask_var(tracing_cpumask_new);
4778
4779         return count;
4780
4781 err_free:
4782         free_cpumask_var(tracing_cpumask_new);
4783
4784         return err;
4785 }
4786
4787 static const struct file_operations tracing_cpumask_fops = {
4788         .open           = tracing_open_generic_tr,
4789         .read           = tracing_cpumask_read,
4790         .write          = tracing_cpumask_write,
4791         .release        = tracing_release_generic_tr,
4792         .llseek         = generic_file_llseek,
4793 };
4794
4795 static int tracing_trace_options_show(struct seq_file *m, void *v)
4796 {
4797         struct tracer_opt *trace_opts;
4798         struct trace_array *tr = m->private;
4799         u32 tracer_flags;
4800         int i;
4801
4802         mutex_lock(&trace_types_lock);
4803         tracer_flags = tr->current_trace->flags->val;
4804         trace_opts = tr->current_trace->flags->opts;
4805
4806         for (i = 0; trace_options[i]; i++) {
4807                 if (tr->trace_flags & (1 << i))
4808                         seq_printf(m, "%s\n", trace_options[i]);
4809                 else
4810                         seq_printf(m, "no%s\n", trace_options[i]);
4811         }
4812
4813         for (i = 0; trace_opts[i].name; i++) {
4814                 if (tracer_flags & trace_opts[i].bit)
4815                         seq_printf(m, "%s\n", trace_opts[i].name);
4816                 else
4817                         seq_printf(m, "no%s\n", trace_opts[i].name);
4818         }
4819         mutex_unlock(&trace_types_lock);
4820
4821         return 0;
4822 }
4823
4824 static int __set_tracer_option(struct trace_array *tr,
4825                                struct tracer_flags *tracer_flags,
4826                                struct tracer_opt *opts, int neg)
4827 {
4828         struct tracer *trace = tracer_flags->trace;
4829         int ret;
4830
4831         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4832         if (ret)
4833                 return ret;
4834
4835         if (neg)
4836                 tracer_flags->val &= ~opts->bit;
4837         else
4838                 tracer_flags->val |= opts->bit;
4839         return 0;
4840 }
4841
4842 /* Try to assign a tracer specific option */
4843 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4844 {
4845         struct tracer *trace = tr->current_trace;
4846         struct tracer_flags *tracer_flags = trace->flags;
4847         struct tracer_opt *opts = NULL;
4848         int i;
4849
4850         for (i = 0; tracer_flags->opts[i].name; i++) {
4851                 opts = &tracer_flags->opts[i];
4852
4853                 if (strcmp(cmp, opts->name) == 0)
4854                         return __set_tracer_option(tr, trace->flags, opts, neg);
4855         }
4856
4857         return -EINVAL;
4858 }
4859
4860 /* Some tracers require overwrite to stay enabled */
4861 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4862 {
4863         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4864                 return -1;
4865
4866         return 0;
4867 }
4868
4869 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4870 {
4871         if ((mask == TRACE_ITER_RECORD_TGID) ||
4872             (mask == TRACE_ITER_RECORD_CMD))
4873                 lockdep_assert_held(&event_mutex);
4874
4875         /* do nothing if flag is already set */
4876         if (!!(tr->trace_flags & mask) == !!enabled)
4877                 return 0;
4878
4879         /* Give the tracer a chance to approve the change */
4880         if (tr->current_trace->flag_changed)
4881                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4882                         return -EINVAL;
4883
4884         if (enabled)
4885                 tr->trace_flags |= mask;
4886         else
4887                 tr->trace_flags &= ~mask;
4888
4889         if (mask == TRACE_ITER_RECORD_CMD)
4890                 trace_event_enable_cmd_record(enabled);
4891
4892         if (mask == TRACE_ITER_RECORD_TGID) {
4893                 if (!tgid_map)
4894                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4895                                            sizeof(*tgid_map),
4896                                            GFP_KERNEL);
4897                 if (!tgid_map) {
4898                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4899                         return -ENOMEM;
4900                 }
4901
4902                 trace_event_enable_tgid_record(enabled);
4903         }
4904
4905         if (mask == TRACE_ITER_EVENT_FORK)
4906                 trace_event_follow_fork(tr, enabled);
4907
4908         if (mask == TRACE_ITER_FUNC_FORK)
4909                 ftrace_pid_follow_fork(tr, enabled);
4910
4911         if (mask == TRACE_ITER_OVERWRITE) {
4912                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4913 #ifdef CONFIG_TRACER_MAX_TRACE
4914                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4915 #endif
4916         }
4917
4918         if (mask == TRACE_ITER_PRINTK) {
4919                 trace_printk_start_stop_comm(enabled);
4920                 trace_printk_control(enabled);
4921         }
4922
4923         return 0;
4924 }
4925
4926 int trace_set_options(struct trace_array *tr, char *option)
4927 {
4928         char *cmp;
4929         int neg = 0;
4930         int ret;
4931         size_t orig_len = strlen(option);
4932         int len;
4933
4934         cmp = strstrip(option);
4935
4936         len = str_has_prefix(cmp, "no");
4937         if (len)
4938                 neg = 1;
4939
4940         cmp += len;
4941
4942         mutex_lock(&event_mutex);
4943         mutex_lock(&trace_types_lock);
4944
4945         ret = match_string(trace_options, -1, cmp);
4946         /* If no option could be set, test the specific tracer options */
4947         if (ret < 0)
4948                 ret = set_tracer_option(tr, cmp, neg);
4949         else
4950                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4951
4952         mutex_unlock(&trace_types_lock);
4953         mutex_unlock(&event_mutex);
4954
4955         /*
4956          * If the first trailing whitespace is replaced with '\0' by strstrip,
4957          * turn it back into a space.
4958          */
4959         if (orig_len > strlen(option))
4960                 option[strlen(option)] = ' ';
4961
4962         return ret;
4963 }
4964
4965 static void __init apply_trace_boot_options(void)
4966 {
4967         char *buf = trace_boot_options_buf;
4968         char *option;
4969
4970         while (true) {
4971                 option = strsep(&buf, ",");
4972
4973                 if (!option)
4974                         break;
4975
4976                 if (*option)
4977                         trace_set_options(&global_trace, option);
4978
4979                 /* Put back the comma to allow this to be called again */
4980                 if (buf)
4981                         *(buf - 1) = ',';
4982         }
4983 }
4984
4985 static ssize_t
4986 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4987                         size_t cnt, loff_t *ppos)
4988 {
4989         struct seq_file *m = filp->private_data;
4990         struct trace_array *tr = m->private;
4991         char buf[64];
4992         int ret;
4993
4994         if (cnt >= sizeof(buf))
4995                 return -EINVAL;
4996
4997         if (copy_from_user(buf, ubuf, cnt))
4998                 return -EFAULT;
4999
5000         buf[cnt] = 0;
5001
5002         ret = trace_set_options(tr, buf);
5003         if (ret < 0)
5004                 return ret;
5005
5006         *ppos += cnt;
5007
5008         return cnt;
5009 }
5010
5011 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5012 {
5013         struct trace_array *tr = inode->i_private;
5014         int ret;
5015
5016         ret = tracing_check_open_get_tr(tr);
5017         if (ret)
5018                 return ret;
5019
5020         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5021         if (ret < 0)
5022                 trace_array_put(tr);
5023
5024         return ret;
5025 }
5026
5027 static const struct file_operations tracing_iter_fops = {
5028         .open           = tracing_trace_options_open,
5029         .read           = seq_read,
5030         .llseek         = seq_lseek,
5031         .release        = tracing_single_release_tr,
5032         .write          = tracing_trace_options_write,
5033 };
5034
5035 static const char readme_msg[] =
5036         "tracing mini-HOWTO:\n\n"
5037         "# echo 0 > tracing_on : quick way to disable tracing\n"
5038         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5039         " Important files:\n"
5040         "  trace\t\t\t- The static contents of the buffer\n"
5041         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5042         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5043         "  current_tracer\t- function and latency tracers\n"
5044         "  available_tracers\t- list of configured tracers for current_tracer\n"
5045         "  error_log\t- error log for failed commands (that support it)\n"
5046         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5047         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5048         "  trace_clock\t\t-change the clock used to order events\n"
5049         "       local:   Per cpu clock but may not be synced across CPUs\n"
5050         "      global:   Synced across CPUs but slows tracing down.\n"
5051         "     counter:   Not a clock, but just an increment\n"
5052         "      uptime:   Jiffy counter from time of boot\n"
5053         "        perf:   Same clock that perf events use\n"
5054 #ifdef CONFIG_X86_64
5055         "     x86-tsc:   TSC cycle counter\n"
5056 #endif
5057         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5058         "       delta:   Delta difference against a buffer-wide timestamp\n"
5059         "    absolute:   Absolute (standalone) timestamp\n"
5060         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5061         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5062         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5063         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5064         "\t\t\t  Remove sub-buffer with rmdir\n"
5065         "  trace_options\t\t- Set format or modify how tracing happens\n"
5066         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5067         "\t\t\t  option name\n"
5068         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5069 #ifdef CONFIG_DYNAMIC_FTRACE
5070         "\n  available_filter_functions - list of functions that can be filtered on\n"
5071         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5072         "\t\t\t  functions\n"
5073         "\t     accepts: func_full_name or glob-matching-pattern\n"
5074         "\t     modules: Can select a group via module\n"
5075         "\t      Format: :mod:<module-name>\n"
5076         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5077         "\t    triggers: a command to perform when function is hit\n"
5078         "\t      Format: <function>:<trigger>[:count]\n"
5079         "\t     trigger: traceon, traceoff\n"
5080         "\t\t      enable_event:<system>:<event>\n"
5081         "\t\t      disable_event:<system>:<event>\n"
5082 #ifdef CONFIG_STACKTRACE
5083         "\t\t      stacktrace\n"
5084 #endif
5085 #ifdef CONFIG_TRACER_SNAPSHOT
5086         "\t\t      snapshot\n"
5087 #endif
5088         "\t\t      dump\n"
5089         "\t\t      cpudump\n"
5090         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5091         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5092         "\t     The first one will disable tracing every time do_fault is hit\n"
5093         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5094         "\t       The first time do trap is hit and it disables tracing, the\n"
5095         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5096         "\t       the counter will not decrement. It only decrements when the\n"
5097         "\t       trigger did work\n"
5098         "\t     To remove trigger without count:\n"
5099         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5100         "\t     To remove trigger with a count:\n"
5101         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5102         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5103         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5104         "\t    modules: Can select a group via module command :mod:\n"
5105         "\t    Does not accept triggers\n"
5106 #endif /* CONFIG_DYNAMIC_FTRACE */
5107 #ifdef CONFIG_FUNCTION_TRACER
5108         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5109         "\t\t    (function)\n"
5110         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5111         "\t\t    (function)\n"
5112 #endif
5113 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5114         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5115         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5116         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5117 #endif
5118 #ifdef CONFIG_TRACER_SNAPSHOT
5119         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5120         "\t\t\t  snapshot buffer. Read the contents for more\n"
5121         "\t\t\t  information\n"
5122 #endif
5123 #ifdef CONFIG_STACK_TRACER
5124         "  stack_trace\t\t- Shows the max stack trace when active\n"
5125         "  stack_max_size\t- Shows current max stack size that was traced\n"
5126         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5127         "\t\t\t  new trace)\n"
5128 #ifdef CONFIG_DYNAMIC_FTRACE
5129         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5130         "\t\t\t  traces\n"
5131 #endif
5132 #endif /* CONFIG_STACK_TRACER */
5133 #ifdef CONFIG_DYNAMIC_EVENTS
5134         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5135         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5136 #endif
5137 #ifdef CONFIG_KPROBE_EVENTS
5138         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5139         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5140 #endif
5141 #ifdef CONFIG_UPROBE_EVENTS
5142         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5143         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5144 #endif
5145 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5146         "\t  accepts: event-definitions (one definition per line)\n"
5147         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5148         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5149 #ifdef CONFIG_HIST_TRIGGERS
5150         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5151 #endif
5152         "\t           -:[<group>/]<event>\n"
5153 #ifdef CONFIG_KPROBE_EVENTS
5154         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5155   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5156 #endif
5157 #ifdef CONFIG_UPROBE_EVENTS
5158   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5159 #endif
5160         "\t     args: <name>=fetcharg[:type]\n"
5161         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5162 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5163         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5164 #else
5165         "\t           $stack<index>, $stack, $retval, $comm,\n"
5166 #endif
5167         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5168         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5169         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5170         "\t           <type>\\[<array-size>\\]\n"
5171 #ifdef CONFIG_HIST_TRIGGERS
5172         "\t    field: <stype> <name>;\n"
5173         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5174         "\t           [unsigned] char/int/long\n"
5175 #endif
5176 #endif
5177         "  events/\t\t- Directory containing all trace event subsystems:\n"
5178         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5179         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5180         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5181         "\t\t\t  events\n"
5182         "      filter\t\t- If set, only events passing filter are traced\n"
5183         "  events/<system>/<event>/\t- Directory containing control files for\n"
5184         "\t\t\t  <event>:\n"
5185         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5186         "      filter\t\t- If set, only events passing filter are traced\n"
5187         "      trigger\t\t- If set, a command to perform when event is hit\n"
5188         "\t    Format: <trigger>[:count][if <filter>]\n"
5189         "\t   trigger: traceon, traceoff\n"
5190         "\t            enable_event:<system>:<event>\n"
5191         "\t            disable_event:<system>:<event>\n"
5192 #ifdef CONFIG_HIST_TRIGGERS
5193         "\t            enable_hist:<system>:<event>\n"
5194         "\t            disable_hist:<system>:<event>\n"
5195 #endif
5196 #ifdef CONFIG_STACKTRACE
5197         "\t\t    stacktrace\n"
5198 #endif
5199 #ifdef CONFIG_TRACER_SNAPSHOT
5200         "\t\t    snapshot\n"
5201 #endif
5202 #ifdef CONFIG_HIST_TRIGGERS
5203         "\t\t    hist (see below)\n"
5204 #endif
5205         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5206         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5207         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5208         "\t                  events/block/block_unplug/trigger\n"
5209         "\t   The first disables tracing every time block_unplug is hit.\n"
5210         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5211         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5212         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5213         "\t   Like function triggers, the counter is only decremented if it\n"
5214         "\t    enabled or disabled tracing.\n"
5215         "\t   To remove a trigger without a count:\n"
5216         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5217         "\t   To remove a trigger with a count:\n"
5218         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5219         "\t   Filters can be ignored when removing a trigger.\n"
5220 #ifdef CONFIG_HIST_TRIGGERS
5221         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5222         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5223         "\t            [:values=<field1[,field2,...]>]\n"
5224         "\t            [:sort=<field1[,field2,...]>]\n"
5225         "\t            [:size=#entries]\n"
5226         "\t            [:pause][:continue][:clear]\n"
5227         "\t            [:name=histname1]\n"
5228         "\t            [:<handler>.<action>]\n"
5229         "\t            [if <filter>]\n\n"
5230         "\t    When a matching event is hit, an entry is added to a hash\n"
5231         "\t    table using the key(s) and value(s) named, and the value of a\n"
5232         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5233         "\t    correspond to fields in the event's format description.  Keys\n"
5234         "\t    can be any field, or the special string 'stacktrace'.\n"
5235         "\t    Compound keys consisting of up to two fields can be specified\n"
5236         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5237         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5238         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5239         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5240         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5241         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5242         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5243         "\t    its histogram data will be shared with other triggers of the\n"
5244         "\t    same name, and trigger hits will update this common data.\n\n"
5245         "\t    Reading the 'hist' file for the event will dump the hash\n"
5246         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5247         "\t    triggers attached to an event, there will be a table for each\n"
5248         "\t    trigger in the output.  The table displayed for a named\n"
5249         "\t    trigger will be the same as any other instance having the\n"
5250         "\t    same name.  The default format used to display a given field\n"
5251         "\t    can be modified by appending any of the following modifiers\n"
5252         "\t    to the field name, as applicable:\n\n"
5253         "\t            .hex        display a number as a hex value\n"
5254         "\t            .sym        display an address as a symbol\n"
5255         "\t            .sym-offset display an address as a symbol and offset\n"
5256         "\t            .execname   display a common_pid as a program name\n"
5257         "\t            .syscall    display a syscall id as a syscall name\n"
5258         "\t            .log2       display log2 value rather than raw number\n"
5259         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5260         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5261         "\t    trigger or to start a hist trigger but not log any events\n"
5262         "\t    until told to do so.  'continue' can be used to start or\n"
5263         "\t    restart a paused hist trigger.\n\n"
5264         "\t    The 'clear' parameter will clear the contents of a running\n"
5265         "\t    hist trigger and leave its current paused/active state\n"
5266         "\t    unchanged.\n\n"
5267         "\t    The enable_hist and disable_hist triggers can be used to\n"
5268         "\t    have one event conditionally start and stop another event's\n"
5269         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5270         "\t    the enable_event and disable_event triggers.\n\n"
5271         "\t    Hist trigger handlers and actions are executed whenever a\n"
5272         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5273         "\t        <handler>.<action>\n\n"
5274         "\t    The available handlers are:\n\n"
5275         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5276         "\t        onmax(var)               - invoke if var exceeds current max\n"
5277         "\t        onchange(var)            - invoke action if var changes\n\n"
5278         "\t    The available actions are:\n\n"
5279         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5280         "\t        save(field,...)                      - save current event fields\n"
5281 #ifdef CONFIG_TRACER_SNAPSHOT
5282         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5283 #endif
5284 #ifdef CONFIG_SYNTH_EVENTS
5285         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5286         "\t  Write into this file to define/undefine new synthetic events.\n"
5287         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5288 #endif
5289 #endif
5290 ;
5291
5292 static ssize_t
5293 tracing_readme_read(struct file *filp, char __user *ubuf,
5294                        size_t cnt, loff_t *ppos)
5295 {
5296         return simple_read_from_buffer(ubuf, cnt, ppos,
5297                                         readme_msg, strlen(readme_msg));
5298 }
5299
5300 static const struct file_operations tracing_readme_fops = {
5301         .open           = tracing_open_generic,
5302         .read           = tracing_readme_read,
5303         .llseek         = generic_file_llseek,
5304 };
5305
5306 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5307 {
5308         int *ptr = v;
5309
5310         if (*pos || m->count)
5311                 ptr++;
5312
5313         (*pos)++;
5314
5315         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5316                 if (trace_find_tgid(*ptr))
5317                         return ptr;
5318         }
5319
5320         return NULL;
5321 }
5322
5323 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5324 {
5325         void *v;
5326         loff_t l = 0;
5327
5328         if (!tgid_map)
5329                 return NULL;
5330
5331         v = &tgid_map[0];
5332         while (l <= *pos) {
5333                 v = saved_tgids_next(m, v, &l);
5334                 if (!v)
5335                         return NULL;
5336         }
5337
5338         return v;
5339 }
5340
5341 static void saved_tgids_stop(struct seq_file *m, void *v)
5342 {
5343 }
5344
5345 static int saved_tgids_show(struct seq_file *m, void *v)
5346 {
5347         int pid = (int *)v - tgid_map;
5348
5349         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5350         return 0;
5351 }
5352
5353 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5354         .start          = saved_tgids_start,
5355         .stop           = saved_tgids_stop,
5356         .next           = saved_tgids_next,
5357         .show           = saved_tgids_show,
5358 };
5359
5360 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5361 {
5362         int ret;
5363
5364         ret = tracing_check_open_get_tr(NULL);
5365         if (ret)
5366                 return ret;
5367
5368         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5369 }
5370
5371
5372 static const struct file_operations tracing_saved_tgids_fops = {
5373         .open           = tracing_saved_tgids_open,
5374         .read           = seq_read,
5375         .llseek         = seq_lseek,
5376         .release        = seq_release,
5377 };
5378
5379 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5380 {
5381         unsigned int *ptr = v;
5382
5383         if (*pos || m->count)
5384                 ptr++;
5385
5386         (*pos)++;
5387
5388         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5389              ptr++) {
5390                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5391                         continue;
5392
5393                 return ptr;
5394         }
5395
5396         return NULL;
5397 }
5398
5399 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5400 {
5401         void *v;
5402         loff_t l = 0;
5403
5404         preempt_disable();
5405         arch_spin_lock(&trace_cmdline_lock);
5406
5407         v = &savedcmd->map_cmdline_to_pid[0];
5408         while (l <= *pos) {
5409                 v = saved_cmdlines_next(m, v, &l);
5410                 if (!v)
5411                         return NULL;
5412         }
5413
5414         return v;
5415 }
5416
5417 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5418 {
5419         arch_spin_unlock(&trace_cmdline_lock);
5420         preempt_enable();
5421 }
5422
5423 static int saved_cmdlines_show(struct seq_file *m, void *v)
5424 {
5425         char buf[TASK_COMM_LEN];
5426         unsigned int *pid = v;
5427
5428         __trace_find_cmdline(*pid, buf);
5429         seq_printf(m, "%d %s\n", *pid, buf);
5430         return 0;
5431 }
5432
5433 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5434         .start          = saved_cmdlines_start,
5435         .next           = saved_cmdlines_next,
5436         .stop           = saved_cmdlines_stop,
5437         .show           = saved_cmdlines_show,
5438 };
5439
5440 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5441 {
5442         int ret;
5443
5444         ret = tracing_check_open_get_tr(NULL);
5445         if (ret)
5446                 return ret;
5447
5448         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5449 }
5450
5451 static const struct file_operations tracing_saved_cmdlines_fops = {
5452         .open           = tracing_saved_cmdlines_open,
5453         .read           = seq_read,
5454         .llseek         = seq_lseek,
5455         .release        = seq_release,
5456 };
5457
5458 static ssize_t
5459 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5460                                  size_t cnt, loff_t *ppos)
5461 {
5462         char buf[64];
5463         int r;
5464
5465         arch_spin_lock(&trace_cmdline_lock);
5466         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5467         arch_spin_unlock(&trace_cmdline_lock);
5468
5469         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5470 }
5471
5472 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5473 {
5474         kfree(s->saved_cmdlines);
5475         kfree(s->map_cmdline_to_pid);
5476         kfree(s);
5477 }
5478
5479 static int tracing_resize_saved_cmdlines(unsigned int val)
5480 {
5481         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5482
5483         s = kmalloc(sizeof(*s), GFP_KERNEL);
5484         if (!s)
5485                 return -ENOMEM;
5486
5487         if (allocate_cmdlines_buffer(val, s) < 0) {
5488                 kfree(s);
5489                 return -ENOMEM;
5490         }
5491
5492         arch_spin_lock(&trace_cmdline_lock);
5493         savedcmd_temp = savedcmd;
5494         savedcmd = s;
5495         arch_spin_unlock(&trace_cmdline_lock);
5496         free_saved_cmdlines_buffer(savedcmd_temp);
5497
5498         return 0;
5499 }
5500
5501 static ssize_t
5502 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5503                                   size_t cnt, loff_t *ppos)
5504 {
5505         unsigned long val;
5506         int ret;
5507
5508         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5509         if (ret)
5510                 return ret;
5511
5512         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5513         if (!val || val > PID_MAX_DEFAULT)
5514                 return -EINVAL;
5515
5516         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5517         if (ret < 0)
5518                 return ret;
5519
5520         *ppos += cnt;
5521
5522         return cnt;
5523 }
5524
5525 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5526         .open           = tracing_open_generic,
5527         .read           = tracing_saved_cmdlines_size_read,
5528         .write          = tracing_saved_cmdlines_size_write,
5529 };
5530
5531 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5532 static union trace_eval_map_item *
5533 update_eval_map(union trace_eval_map_item *ptr)
5534 {
5535         if (!ptr->map.eval_string) {
5536                 if (ptr->tail.next) {
5537                         ptr = ptr->tail.next;
5538                         /* Set ptr to the next real item (skip head) */
5539                         ptr++;
5540                 } else
5541                         return NULL;
5542         }
5543         return ptr;
5544 }
5545
5546 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5547 {
5548         union trace_eval_map_item *ptr = v;
5549
5550         /*
5551          * Paranoid! If ptr points to end, we don't want to increment past it.
5552          * This really should never happen.
5553          */
5554         (*pos)++;
5555         ptr = update_eval_map(ptr);
5556         if (WARN_ON_ONCE(!ptr))
5557                 return NULL;
5558
5559         ptr++;
5560         ptr = update_eval_map(ptr);
5561
5562         return ptr;
5563 }
5564
5565 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5566 {
5567         union trace_eval_map_item *v;
5568         loff_t l = 0;
5569
5570         mutex_lock(&trace_eval_mutex);
5571
5572         v = trace_eval_maps;
5573         if (v)
5574                 v++;
5575
5576         while (v && l < *pos) {
5577                 v = eval_map_next(m, v, &l);
5578         }
5579
5580         return v;
5581 }
5582
5583 static void eval_map_stop(struct seq_file *m, void *v)
5584 {
5585         mutex_unlock(&trace_eval_mutex);
5586 }
5587
5588 static int eval_map_show(struct seq_file *m, void *v)
5589 {
5590         union trace_eval_map_item *ptr = v;
5591
5592         seq_printf(m, "%s %ld (%s)\n",
5593                    ptr->map.eval_string, ptr->map.eval_value,
5594                    ptr->map.system);
5595
5596         return 0;
5597 }
5598
5599 static const struct seq_operations tracing_eval_map_seq_ops = {
5600         .start          = eval_map_start,
5601         .next           = eval_map_next,
5602         .stop           = eval_map_stop,
5603         .show           = eval_map_show,
5604 };
5605
5606 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5607 {
5608         int ret;
5609
5610         ret = tracing_check_open_get_tr(NULL);
5611         if (ret)
5612                 return ret;
5613
5614         return seq_open(filp, &tracing_eval_map_seq_ops);
5615 }
5616
5617 static const struct file_operations tracing_eval_map_fops = {
5618         .open           = tracing_eval_map_open,
5619         .read           = seq_read,
5620         .llseek         = seq_lseek,
5621         .release        = seq_release,
5622 };
5623
5624 static inline union trace_eval_map_item *
5625 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5626 {
5627         /* Return tail of array given the head */
5628         return ptr + ptr->head.length + 1;
5629 }
5630
5631 static void
5632 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5633                            int len)
5634 {
5635         struct trace_eval_map **stop;
5636         struct trace_eval_map **map;
5637         union trace_eval_map_item *map_array;
5638         union trace_eval_map_item *ptr;
5639
5640         stop = start + len;
5641
5642         /*
5643          * The trace_eval_maps contains the map plus a head and tail item,
5644          * where the head holds the module and length of array, and the
5645          * tail holds a pointer to the next list.
5646          */
5647         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5648         if (!map_array) {
5649                 pr_warn("Unable to allocate trace eval mapping\n");
5650                 return;
5651         }
5652
5653         mutex_lock(&trace_eval_mutex);
5654
5655         if (!trace_eval_maps)
5656                 trace_eval_maps = map_array;
5657         else {
5658                 ptr = trace_eval_maps;
5659                 for (;;) {
5660                         ptr = trace_eval_jmp_to_tail(ptr);
5661                         if (!ptr->tail.next)
5662                                 break;
5663                         ptr = ptr->tail.next;
5664
5665                 }
5666                 ptr->tail.next = map_array;
5667         }
5668         map_array->head.mod = mod;
5669         map_array->head.length = len;
5670         map_array++;
5671
5672         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5673                 map_array->map = **map;
5674                 map_array++;
5675         }
5676         memset(map_array, 0, sizeof(*map_array));
5677
5678         mutex_unlock(&trace_eval_mutex);
5679 }
5680
5681 static void trace_create_eval_file(struct dentry *d_tracer)
5682 {
5683         trace_create_file("eval_map", 0444, d_tracer,
5684                           NULL, &tracing_eval_map_fops);
5685 }
5686
5687 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5688 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5689 static inline void trace_insert_eval_map_file(struct module *mod,
5690                               struct trace_eval_map **start, int len) { }
5691 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5692
5693 static void trace_insert_eval_map(struct module *mod,
5694                                   struct trace_eval_map **start, int len)
5695 {
5696         struct trace_eval_map **map;
5697
5698         if (len <= 0)
5699                 return;
5700
5701         map = start;
5702
5703         trace_event_eval_update(map, len);
5704
5705         trace_insert_eval_map_file(mod, start, len);
5706 }
5707
5708 static ssize_t
5709 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5710                        size_t cnt, loff_t *ppos)
5711 {
5712         struct trace_array *tr = filp->private_data;
5713         char buf[MAX_TRACER_SIZE+2];
5714         int r;
5715
5716         mutex_lock(&trace_types_lock);
5717         r = sprintf(buf, "%s\n", tr->current_trace->name);
5718         mutex_unlock(&trace_types_lock);
5719
5720         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5721 }
5722
5723 int tracer_init(struct tracer *t, struct trace_array *tr)
5724 {
5725         tracing_reset_online_cpus(&tr->array_buffer);
5726         return t->init(tr);
5727 }
5728
5729 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5730 {
5731         int cpu;
5732
5733         for_each_tracing_cpu(cpu)
5734                 per_cpu_ptr(buf->data, cpu)->entries = val;
5735 }
5736
5737 #ifdef CONFIG_TRACER_MAX_TRACE
5738 /* resize @tr's buffer to the size of @size_tr's entries */
5739 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5740                                         struct array_buffer *size_buf, int cpu_id)
5741 {
5742         int cpu, ret = 0;
5743
5744         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5745                 for_each_tracing_cpu(cpu) {
5746                         ret = ring_buffer_resize(trace_buf->buffer,
5747                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5748                         if (ret < 0)
5749                                 break;
5750                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5751                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5752                 }
5753         } else {
5754                 ret = ring_buffer_resize(trace_buf->buffer,
5755                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5756                 if (ret == 0)
5757                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5758                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5759         }
5760
5761         return ret;
5762 }
5763 #endif /* CONFIG_TRACER_MAX_TRACE */
5764
5765 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5766                                         unsigned long size, int cpu)
5767 {
5768         int ret;
5769
5770         /*
5771          * If kernel or user changes the size of the ring buffer
5772          * we use the size that was given, and we can forget about
5773          * expanding it later.
5774          */
5775         ring_buffer_expanded = true;
5776
5777         /* May be called before buffers are initialized */
5778         if (!tr->array_buffer.buffer)
5779                 return 0;
5780
5781         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5782         if (ret < 0)
5783                 return ret;
5784
5785 #ifdef CONFIG_TRACER_MAX_TRACE
5786         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5787             !tr->current_trace->use_max_tr)
5788                 goto out;
5789
5790         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5791         if (ret < 0) {
5792                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5793                                                      &tr->array_buffer, cpu);
5794                 if (r < 0) {
5795                         /*
5796                          * AARGH! We are left with different
5797                          * size max buffer!!!!
5798                          * The max buffer is our "snapshot" buffer.
5799                          * When a tracer needs a snapshot (one of the
5800                          * latency tracers), it swaps the max buffer
5801                          * with the saved snap shot. We succeeded to
5802                          * update the size of the main buffer, but failed to
5803                          * update the size of the max buffer. But when we tried
5804                          * to reset the main buffer to the original size, we
5805                          * failed there too. This is very unlikely to
5806                          * happen, but if it does, warn and kill all
5807                          * tracing.
5808                          */
5809                         WARN_ON(1);
5810                         tracing_disabled = 1;
5811                 }
5812                 return ret;
5813         }
5814
5815         if (cpu == RING_BUFFER_ALL_CPUS)
5816                 set_buffer_entries(&tr->max_buffer, size);
5817         else
5818                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5819
5820  out:
5821 #endif /* CONFIG_TRACER_MAX_TRACE */
5822
5823         if (cpu == RING_BUFFER_ALL_CPUS)
5824                 set_buffer_entries(&tr->array_buffer, size);
5825         else
5826                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5827
5828         return ret;
5829 }
5830
5831 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5832                                   unsigned long size, int cpu_id)
5833 {
5834         int ret = size;
5835
5836         mutex_lock(&trace_types_lock);
5837
5838         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5839                 /* make sure, this cpu is enabled in the mask */
5840                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5841                         ret = -EINVAL;
5842                         goto out;
5843                 }
5844         }
5845
5846         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5847         if (ret < 0)
5848                 ret = -ENOMEM;
5849
5850 out:
5851         mutex_unlock(&trace_types_lock);
5852
5853         return ret;
5854 }
5855
5856
5857 /**
5858  * tracing_update_buffers - used by tracing facility to expand ring buffers
5859  *
5860  * To save on memory when the tracing is never used on a system with it
5861  * configured in. The ring buffers are set to a minimum size. But once
5862  * a user starts to use the tracing facility, then they need to grow
5863  * to their default size.
5864  *
5865  * This function is to be called when a tracer is about to be used.
5866  */
5867 int tracing_update_buffers(void)
5868 {
5869         int ret = 0;
5870
5871         mutex_lock(&trace_types_lock);
5872         if (!ring_buffer_expanded)
5873                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5874                                                 RING_BUFFER_ALL_CPUS);
5875         mutex_unlock(&trace_types_lock);
5876
5877         return ret;
5878 }
5879
5880 struct trace_option_dentry;
5881
5882 static void
5883 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5884
5885 /*
5886  * Used to clear out the tracer before deletion of an instance.
5887  * Must have trace_types_lock held.
5888  */
5889 static void tracing_set_nop(struct trace_array *tr)
5890 {
5891         if (tr->current_trace == &nop_trace)
5892                 return;
5893         
5894         tr->current_trace->enabled--;
5895
5896         if (tr->current_trace->reset)
5897                 tr->current_trace->reset(tr);
5898
5899         tr->current_trace = &nop_trace;
5900 }
5901
5902 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5903 {
5904         /* Only enable if the directory has been created already. */
5905         if (!tr->dir)
5906                 return;
5907
5908         create_trace_option_files(tr, t);
5909 }
5910
5911 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5912 {
5913         struct tracer *t;
5914 #ifdef CONFIG_TRACER_MAX_TRACE
5915         bool had_max_tr;
5916 #endif
5917         int ret = 0;
5918
5919         mutex_lock(&trace_types_lock);
5920
5921         if (!ring_buffer_expanded) {
5922                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5923                                                 RING_BUFFER_ALL_CPUS);
5924                 if (ret < 0)
5925                         goto out;
5926                 ret = 0;
5927         }
5928
5929         for (t = trace_types; t; t = t->next) {
5930                 if (strcmp(t->name, buf) == 0)
5931                         break;
5932         }
5933         if (!t) {
5934                 ret = -EINVAL;
5935                 goto out;
5936         }
5937         if (t == tr->current_trace)
5938                 goto out;
5939
5940 #ifdef CONFIG_TRACER_SNAPSHOT
5941         if (t->use_max_tr) {
5942                 arch_spin_lock(&tr->max_lock);
5943                 if (tr->cond_snapshot)
5944                         ret = -EBUSY;
5945                 arch_spin_unlock(&tr->max_lock);
5946                 if (ret)
5947                         goto out;
5948         }
5949 #endif
5950         /* Some tracers won't work on kernel command line */
5951         if (system_state < SYSTEM_RUNNING && t->noboot) {
5952                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5953                         t->name);
5954                 goto out;
5955         }
5956
5957         /* Some tracers are only allowed for the top level buffer */
5958         if (!trace_ok_for_array(t, tr)) {
5959                 ret = -EINVAL;
5960                 goto out;
5961         }
5962
5963         /* If trace pipe files are being read, we can't change the tracer */
5964         if (tr->trace_ref) {
5965                 ret = -EBUSY;
5966                 goto out;
5967         }
5968
5969         trace_branch_disable();
5970
5971         tr->current_trace->enabled--;
5972
5973         if (tr->current_trace->reset)
5974                 tr->current_trace->reset(tr);
5975
5976         /* Current trace needs to be nop_trace before synchronize_rcu */
5977         tr->current_trace = &nop_trace;
5978
5979 #ifdef CONFIG_TRACER_MAX_TRACE
5980         had_max_tr = tr->allocated_snapshot;
5981
5982         if (had_max_tr && !t->use_max_tr) {
5983                 /*
5984                  * We need to make sure that the update_max_tr sees that
5985                  * current_trace changed to nop_trace to keep it from
5986                  * swapping the buffers after we resize it.
5987                  * The update_max_tr is called from interrupts disabled
5988                  * so a synchronized_sched() is sufficient.
5989                  */
5990                 synchronize_rcu();
5991                 free_snapshot(tr);
5992         }
5993 #endif
5994
5995 #ifdef CONFIG_TRACER_MAX_TRACE
5996         if (t->use_max_tr && !had_max_tr) {
5997                 ret = tracing_alloc_snapshot_instance(tr);
5998                 if (ret < 0)
5999                         goto out;
6000         }
6001 #endif
6002
6003         if (t->init) {
6004                 ret = tracer_init(t, tr);
6005                 if (ret)
6006                         goto out;
6007         }
6008
6009         tr->current_trace = t;
6010         tr->current_trace->enabled++;
6011         trace_branch_enable(tr);
6012  out:
6013         mutex_unlock(&trace_types_lock);
6014
6015         return ret;
6016 }
6017
6018 static ssize_t
6019 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6020                         size_t cnt, loff_t *ppos)
6021 {
6022         struct trace_array *tr = filp->private_data;
6023         char buf[MAX_TRACER_SIZE+1];
6024         int i;
6025         size_t ret;
6026         int err;
6027
6028         ret = cnt;
6029
6030         if (cnt > MAX_TRACER_SIZE)
6031                 cnt = MAX_TRACER_SIZE;
6032
6033         if (copy_from_user(buf, ubuf, cnt))
6034                 return -EFAULT;
6035
6036         buf[cnt] = 0;
6037
6038         /* strip ending whitespace. */
6039         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6040                 buf[i] = 0;
6041
6042         err = tracing_set_tracer(tr, buf);
6043         if (err)
6044                 return err;
6045
6046         *ppos += ret;
6047
6048         return ret;
6049 }
6050
6051 static ssize_t
6052 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6053                    size_t cnt, loff_t *ppos)
6054 {
6055         char buf[64];
6056         int r;
6057
6058         r = snprintf(buf, sizeof(buf), "%ld\n",
6059                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6060         if (r > sizeof(buf))
6061                 r = sizeof(buf);
6062         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6063 }
6064
6065 static ssize_t
6066 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6067                     size_t cnt, loff_t *ppos)
6068 {
6069         unsigned long val;
6070         int ret;
6071
6072         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6073         if (ret)
6074                 return ret;
6075
6076         *ptr = val * 1000;
6077
6078         return cnt;
6079 }
6080
6081 static ssize_t
6082 tracing_thresh_read(struct file *filp, char __user *ubuf,
6083                     size_t cnt, loff_t *ppos)
6084 {
6085         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6086 }
6087
6088 static ssize_t
6089 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6090                      size_t cnt, loff_t *ppos)
6091 {
6092         struct trace_array *tr = filp->private_data;
6093         int ret;
6094
6095         mutex_lock(&trace_types_lock);
6096         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6097         if (ret < 0)
6098                 goto out;
6099
6100         if (tr->current_trace->update_thresh) {
6101                 ret = tr->current_trace->update_thresh(tr);
6102                 if (ret < 0)
6103                         goto out;
6104         }
6105
6106         ret = cnt;
6107 out:
6108         mutex_unlock(&trace_types_lock);
6109
6110         return ret;
6111 }
6112
6113 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6114
6115 static ssize_t
6116 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6117                      size_t cnt, loff_t *ppos)
6118 {
6119         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6120 }
6121
6122 static ssize_t
6123 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6124                       size_t cnt, loff_t *ppos)
6125 {
6126         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6127 }
6128
6129 #endif
6130
6131 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6132 {
6133         struct trace_array *tr = inode->i_private;
6134         struct trace_iterator *iter;
6135         int ret;
6136
6137         ret = tracing_check_open_get_tr(tr);
6138         if (ret)
6139                 return ret;
6140
6141         mutex_lock(&trace_types_lock);
6142
6143         /* create a buffer to store the information to pass to userspace */
6144         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6145         if (!iter) {
6146                 ret = -ENOMEM;
6147                 __trace_array_put(tr);
6148                 goto out;
6149         }
6150
6151         trace_seq_init(&iter->seq);
6152         iter->trace = tr->current_trace;
6153
6154         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6155                 ret = -ENOMEM;
6156                 goto fail;
6157         }
6158
6159         /* trace pipe does not show start of buffer */
6160         cpumask_setall(iter->started);
6161
6162         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6163                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6164
6165         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6166         if (trace_clocks[tr->clock_id].in_ns)
6167                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6168
6169         iter->tr = tr;
6170         iter->array_buffer = &tr->array_buffer;
6171         iter->cpu_file = tracing_get_cpu(inode);
6172         mutex_init(&iter->mutex);
6173         filp->private_data = iter;
6174
6175         if (iter->trace->pipe_open)
6176                 iter->trace->pipe_open(iter);
6177
6178         nonseekable_open(inode, filp);
6179
6180         tr->trace_ref++;
6181 out:
6182         mutex_unlock(&trace_types_lock);
6183         return ret;
6184
6185 fail:
6186         kfree(iter);
6187         __trace_array_put(tr);
6188         mutex_unlock(&trace_types_lock);
6189         return ret;
6190 }
6191
6192 static int tracing_release_pipe(struct inode *inode, struct file *file)
6193 {
6194         struct trace_iterator *iter = file->private_data;
6195         struct trace_array *tr = inode->i_private;
6196
6197         mutex_lock(&trace_types_lock);
6198
6199         tr->trace_ref--;
6200
6201         if (iter->trace->pipe_close)
6202                 iter->trace->pipe_close(iter);
6203
6204         mutex_unlock(&trace_types_lock);
6205
6206         free_cpumask_var(iter->started);
6207         mutex_destroy(&iter->mutex);
6208         kfree(iter);
6209
6210         trace_array_put(tr);
6211
6212         return 0;
6213 }
6214
6215 static __poll_t
6216 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6217 {
6218         struct trace_array *tr = iter->tr;
6219
6220         /* Iterators are static, they should be filled or empty */
6221         if (trace_buffer_iter(iter, iter->cpu_file))
6222                 return EPOLLIN | EPOLLRDNORM;
6223
6224         if (tr->trace_flags & TRACE_ITER_BLOCK)
6225                 /*
6226                  * Always select as readable when in blocking mode
6227                  */
6228                 return EPOLLIN | EPOLLRDNORM;
6229         else
6230                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6231                                              filp, poll_table);
6232 }
6233
6234 static __poll_t
6235 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6236 {
6237         struct trace_iterator *iter = filp->private_data;
6238
6239         return trace_poll(iter, filp, poll_table);
6240 }
6241
6242 /* Must be called with iter->mutex held. */
6243 static int tracing_wait_pipe(struct file *filp)
6244 {
6245         struct trace_iterator *iter = filp->private_data;
6246         int ret;
6247
6248         while (trace_empty(iter)) {
6249
6250                 if ((filp->f_flags & O_NONBLOCK)) {
6251                         return -EAGAIN;
6252                 }
6253
6254                 /*
6255                  * We block until we read something and tracing is disabled.
6256                  * We still block if tracing is disabled, but we have never
6257                  * read anything. This allows a user to cat this file, and
6258                  * then enable tracing. But after we have read something,
6259                  * we give an EOF when tracing is again disabled.
6260                  *
6261                  * iter->pos will be 0 if we haven't read anything.
6262                  */
6263                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6264                         break;
6265
6266                 mutex_unlock(&iter->mutex);
6267
6268                 ret = wait_on_pipe(iter, 0);
6269
6270                 mutex_lock(&iter->mutex);
6271
6272                 if (ret)
6273                         return ret;
6274         }
6275
6276         return 1;
6277 }
6278
6279 /*
6280  * Consumer reader.
6281  */
6282 static ssize_t
6283 tracing_read_pipe(struct file *filp, char __user *ubuf,
6284                   size_t cnt, loff_t *ppos)
6285 {
6286         struct trace_iterator *iter = filp->private_data;
6287         ssize_t sret;
6288
6289         /*
6290          * Avoid more than one consumer on a single file descriptor
6291          * This is just a matter of traces coherency, the ring buffer itself
6292          * is protected.
6293          */
6294         mutex_lock(&iter->mutex);
6295
6296         /* return any leftover data */
6297         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6298         if (sret != -EBUSY)
6299                 goto out;
6300
6301         trace_seq_init(&iter->seq);
6302
6303         if (iter->trace->read) {
6304                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6305                 if (sret)
6306                         goto out;
6307         }
6308
6309 waitagain:
6310         sret = tracing_wait_pipe(filp);
6311         if (sret <= 0)
6312                 goto out;
6313
6314         /* stop when tracing is finished */
6315         if (trace_empty(iter)) {
6316                 sret = 0;
6317                 goto out;
6318         }
6319
6320         if (cnt >= PAGE_SIZE)
6321                 cnt = PAGE_SIZE - 1;
6322
6323         /* reset all but tr, trace, and overruns */
6324         memset(&iter->seq, 0,
6325                sizeof(struct trace_iterator) -
6326                offsetof(struct trace_iterator, seq));
6327         cpumask_clear(iter->started);
6328         trace_seq_init(&iter->seq);
6329         iter->pos = -1;
6330
6331         trace_event_read_lock();
6332         trace_access_lock(iter->cpu_file);
6333         while (trace_find_next_entry_inc(iter) != NULL) {
6334                 enum print_line_t ret;
6335                 int save_len = iter->seq.seq.len;
6336
6337                 ret = print_trace_line(iter);
6338                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6339                         /* don't print partial lines */
6340                         iter->seq.seq.len = save_len;
6341                         break;
6342                 }
6343                 if (ret != TRACE_TYPE_NO_CONSUME)
6344                         trace_consume(iter);
6345
6346                 if (trace_seq_used(&iter->seq) >= cnt)
6347                         break;
6348
6349                 /*
6350                  * Setting the full flag means we reached the trace_seq buffer
6351                  * size and we should leave by partial output condition above.
6352                  * One of the trace_seq_* functions is not used properly.
6353                  */
6354                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6355                           iter->ent->type);
6356         }
6357         trace_access_unlock(iter->cpu_file);
6358         trace_event_read_unlock();
6359
6360         /* Now copy what we have to the user */
6361         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6362         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6363                 trace_seq_init(&iter->seq);
6364
6365         /*
6366          * If there was nothing to send to user, in spite of consuming trace
6367          * entries, go back to wait for more entries.
6368          */
6369         if (sret == -EBUSY)
6370                 goto waitagain;
6371
6372 out:
6373         mutex_unlock(&iter->mutex);
6374
6375         return sret;
6376 }
6377
6378 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6379                                      unsigned int idx)
6380 {
6381         __free_page(spd->pages[idx]);
6382 }
6383
6384 static size_t
6385 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6386 {
6387         size_t count;
6388         int save_len;
6389         int ret;
6390
6391         /* Seq buffer is page-sized, exactly what we need. */
6392         for (;;) {
6393                 save_len = iter->seq.seq.len;
6394                 ret = print_trace_line(iter);
6395
6396                 if (trace_seq_has_overflowed(&iter->seq)) {
6397                         iter->seq.seq.len = save_len;
6398                         break;
6399                 }
6400
6401                 /*
6402                  * This should not be hit, because it should only
6403                  * be set if the iter->seq overflowed. But check it
6404                  * anyway to be safe.
6405                  */
6406                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6407                         iter->seq.seq.len = save_len;
6408                         break;
6409                 }
6410
6411                 count = trace_seq_used(&iter->seq) - save_len;
6412                 if (rem < count) {
6413                         rem = 0;
6414                         iter->seq.seq.len = save_len;
6415                         break;
6416                 }
6417
6418                 if (ret != TRACE_TYPE_NO_CONSUME)
6419                         trace_consume(iter);
6420                 rem -= count;
6421                 if (!trace_find_next_entry_inc(iter))   {
6422                         rem = 0;
6423                         iter->ent = NULL;
6424                         break;
6425                 }
6426         }
6427
6428         return rem;
6429 }
6430
6431 static ssize_t tracing_splice_read_pipe(struct file *filp,
6432                                         loff_t *ppos,
6433                                         struct pipe_inode_info *pipe,
6434                                         size_t len,
6435                                         unsigned int flags)
6436 {
6437         struct page *pages_def[PIPE_DEF_BUFFERS];
6438         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6439         struct trace_iterator *iter = filp->private_data;
6440         struct splice_pipe_desc spd = {
6441                 .pages          = pages_def,
6442                 .partial        = partial_def,
6443                 .nr_pages       = 0, /* This gets updated below. */
6444                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6445                 .ops            = &default_pipe_buf_ops,
6446                 .spd_release    = tracing_spd_release_pipe,
6447         };
6448         ssize_t ret;
6449         size_t rem;
6450         unsigned int i;
6451
6452         if (splice_grow_spd(pipe, &spd))
6453                 return -ENOMEM;
6454
6455         mutex_lock(&iter->mutex);
6456
6457         if (iter->trace->splice_read) {
6458                 ret = iter->trace->splice_read(iter, filp,
6459                                                ppos, pipe, len, flags);
6460                 if (ret)
6461                         goto out_err;
6462         }
6463
6464         ret = tracing_wait_pipe(filp);
6465         if (ret <= 0)
6466                 goto out_err;
6467
6468         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6469                 ret = -EFAULT;
6470                 goto out_err;
6471         }
6472
6473         trace_event_read_lock();
6474         trace_access_lock(iter->cpu_file);
6475
6476         /* Fill as many pages as possible. */
6477         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6478                 spd.pages[i] = alloc_page(GFP_KERNEL);
6479                 if (!spd.pages[i])
6480                         break;
6481
6482                 rem = tracing_fill_pipe_page(rem, iter);
6483
6484                 /* Copy the data into the page, so we can start over. */
6485                 ret = trace_seq_to_buffer(&iter->seq,
6486                                           page_address(spd.pages[i]),
6487                                           trace_seq_used(&iter->seq));
6488                 if (ret < 0) {
6489                         __free_page(spd.pages[i]);
6490                         break;
6491                 }
6492                 spd.partial[i].offset = 0;
6493                 spd.partial[i].len = trace_seq_used(&iter->seq);
6494
6495                 trace_seq_init(&iter->seq);
6496         }
6497
6498         trace_access_unlock(iter->cpu_file);
6499         trace_event_read_unlock();
6500         mutex_unlock(&iter->mutex);
6501
6502         spd.nr_pages = i;
6503
6504         if (i)
6505                 ret = splice_to_pipe(pipe, &spd);
6506         else
6507                 ret = 0;
6508 out:
6509         splice_shrink_spd(&spd);
6510         return ret;
6511
6512 out_err:
6513         mutex_unlock(&iter->mutex);
6514         goto out;
6515 }
6516
6517 static ssize_t
6518 tracing_entries_read(struct file *filp, char __user *ubuf,
6519                      size_t cnt, loff_t *ppos)
6520 {
6521         struct inode *inode = file_inode(filp);
6522         struct trace_array *tr = inode->i_private;
6523         int cpu = tracing_get_cpu(inode);
6524         char buf[64];
6525         int r = 0;
6526         ssize_t ret;
6527
6528         mutex_lock(&trace_types_lock);
6529
6530         if (cpu == RING_BUFFER_ALL_CPUS) {
6531                 int cpu, buf_size_same;
6532                 unsigned long size;
6533
6534                 size = 0;
6535                 buf_size_same = 1;
6536                 /* check if all cpu sizes are same */
6537                 for_each_tracing_cpu(cpu) {
6538                         /* fill in the size from first enabled cpu */
6539                         if (size == 0)
6540                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6541                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6542                                 buf_size_same = 0;
6543                                 break;
6544                         }
6545                 }
6546
6547                 if (buf_size_same) {
6548                         if (!ring_buffer_expanded)
6549                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6550                                             size >> 10,
6551                                             trace_buf_size >> 10);
6552                         else
6553                                 r = sprintf(buf, "%lu\n", size >> 10);
6554                 } else
6555                         r = sprintf(buf, "X\n");
6556         } else
6557                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6558
6559         mutex_unlock(&trace_types_lock);
6560
6561         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6562         return ret;
6563 }
6564
6565 static ssize_t
6566 tracing_entries_write(struct file *filp, const char __user *ubuf,
6567                       size_t cnt, loff_t *ppos)
6568 {
6569         struct inode *inode = file_inode(filp);
6570         struct trace_array *tr = inode->i_private;
6571         unsigned long val;
6572         int ret;
6573
6574         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6575         if (ret)
6576                 return ret;
6577
6578         /* must have at least 1 entry */
6579         if (!val)
6580                 return -EINVAL;
6581
6582         /* value is in KB */
6583         val <<= 10;
6584         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6585         if (ret < 0)
6586                 return ret;
6587
6588         *ppos += cnt;
6589
6590         return cnt;
6591 }
6592
6593 static ssize_t
6594 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6595                                 size_t cnt, loff_t *ppos)
6596 {
6597         struct trace_array *tr = filp->private_data;
6598         char buf[64];
6599         int r, cpu;
6600         unsigned long size = 0, expanded_size = 0;
6601
6602         mutex_lock(&trace_types_lock);
6603         for_each_tracing_cpu(cpu) {
6604                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6605                 if (!ring_buffer_expanded)
6606                         expanded_size += trace_buf_size >> 10;
6607         }
6608         if (ring_buffer_expanded)
6609                 r = sprintf(buf, "%lu\n", size);
6610         else
6611                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6612         mutex_unlock(&trace_types_lock);
6613
6614         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6615 }
6616
6617 static ssize_t
6618 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6619                           size_t cnt, loff_t *ppos)
6620 {
6621         /*
6622          * There is no need to read what the user has written, this function
6623          * is just to make sure that there is no error when "echo" is used
6624          */
6625
6626         *ppos += cnt;
6627
6628         return cnt;
6629 }
6630
6631 static int
6632 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6633 {
6634         struct trace_array *tr = inode->i_private;
6635
6636         /* disable tracing ? */
6637         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6638                 tracer_tracing_off(tr);
6639         /* resize the ring buffer to 0 */
6640         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6641
6642         trace_array_put(tr);
6643
6644         return 0;
6645 }
6646
6647 static ssize_t
6648 tracing_mark_write(struct file *filp, const char __user *ubuf,
6649                                         size_t cnt, loff_t *fpos)
6650 {
6651         struct trace_array *tr = filp->private_data;
6652         struct ring_buffer_event *event;
6653         enum event_trigger_type tt = ETT_NONE;
6654         struct trace_buffer *buffer;
6655         struct print_entry *entry;
6656         unsigned long irq_flags;
6657         ssize_t written;
6658         int size;
6659         int len;
6660
6661 /* Used in tracing_mark_raw_write() as well */
6662 #define FAULTED_STR "<faulted>"
6663 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6664
6665         if (tracing_disabled)
6666                 return -EINVAL;
6667
6668         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6669                 return -EINVAL;
6670
6671         if (cnt > TRACE_BUF_SIZE)
6672                 cnt = TRACE_BUF_SIZE;
6673
6674         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6675
6676         local_save_flags(irq_flags);
6677         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6678
6679         /* If less than "<faulted>", then make sure we can still add that */
6680         if (cnt < FAULTED_SIZE)
6681                 size += FAULTED_SIZE - cnt;
6682
6683         buffer = tr->array_buffer.buffer;
6684         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6685                                             irq_flags, preempt_count());
6686         if (unlikely(!event))
6687                 /* Ring buffer disabled, return as if not open for write */
6688                 return -EBADF;
6689
6690         entry = ring_buffer_event_data(event);
6691         entry->ip = _THIS_IP_;
6692
6693         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6694         if (len) {
6695                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6696                 cnt = FAULTED_SIZE;
6697                 written = -EFAULT;
6698         } else
6699                 written = cnt;
6700
6701         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6702                 /* do not add \n before testing triggers, but add \0 */
6703                 entry->buf[cnt] = '\0';
6704                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6705         }
6706
6707         if (entry->buf[cnt - 1] != '\n') {
6708                 entry->buf[cnt] = '\n';
6709                 entry->buf[cnt + 1] = '\0';
6710         } else
6711                 entry->buf[cnt] = '\0';
6712
6713         if (static_branch_unlikely(&trace_marker_exports_enabled))
6714                 ftrace_exports(event, TRACE_EXPORT_MARKER);
6715         __buffer_unlock_commit(buffer, event);
6716
6717         if (tt)
6718                 event_triggers_post_call(tr->trace_marker_file, tt);
6719
6720         if (written > 0)
6721                 *fpos += written;
6722
6723         return written;
6724 }
6725
6726 /* Limit it for now to 3K (including tag) */
6727 #define RAW_DATA_MAX_SIZE (1024*3)
6728
6729 static ssize_t
6730 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6731                                         size_t cnt, loff_t *fpos)
6732 {
6733         struct trace_array *tr = filp->private_data;
6734         struct ring_buffer_event *event;
6735         struct trace_buffer *buffer;
6736         struct raw_data_entry *entry;
6737         unsigned long irq_flags;
6738         ssize_t written;
6739         int size;
6740         int len;
6741
6742 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6743
6744         if (tracing_disabled)
6745                 return -EINVAL;
6746
6747         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6748                 return -EINVAL;
6749
6750         /* The marker must at least have a tag id */
6751         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6752                 return -EINVAL;
6753
6754         if (cnt > TRACE_BUF_SIZE)
6755                 cnt = TRACE_BUF_SIZE;
6756
6757         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6758
6759         local_save_flags(irq_flags);
6760         size = sizeof(*entry) + cnt;
6761         if (cnt < FAULT_SIZE_ID)
6762                 size += FAULT_SIZE_ID - cnt;
6763
6764         buffer = tr->array_buffer.buffer;
6765         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6766                                             irq_flags, preempt_count());
6767         if (!event)
6768                 /* Ring buffer disabled, return as if not open for write */
6769                 return -EBADF;
6770
6771         entry = ring_buffer_event_data(event);
6772
6773         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6774         if (len) {
6775                 entry->id = -1;
6776                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6777                 written = -EFAULT;
6778         } else
6779                 written = cnt;
6780
6781         __buffer_unlock_commit(buffer, event);
6782
6783         if (written > 0)
6784                 *fpos += written;
6785
6786         return written;
6787 }
6788
6789 static int tracing_clock_show(struct seq_file *m, void *v)
6790 {
6791         struct trace_array *tr = m->private;
6792         int i;
6793
6794         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6795                 seq_printf(m,
6796                         "%s%s%s%s", i ? " " : "",
6797                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6798                         i == tr->clock_id ? "]" : "");
6799         seq_putc(m, '\n');
6800
6801         return 0;
6802 }
6803
6804 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6805 {
6806         int i;
6807
6808         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6809                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6810                         break;
6811         }
6812         if (i == ARRAY_SIZE(trace_clocks))
6813                 return -EINVAL;
6814
6815         mutex_lock(&trace_types_lock);
6816
6817         tr->clock_id = i;
6818
6819         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6820
6821         /*
6822          * New clock may not be consistent with the previous clock.
6823          * Reset the buffer so that it doesn't have incomparable timestamps.
6824          */
6825         tracing_reset_online_cpus(&tr->array_buffer);
6826
6827 #ifdef CONFIG_TRACER_MAX_TRACE
6828         if (tr->max_buffer.buffer)
6829                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6830         tracing_reset_online_cpus(&tr->max_buffer);
6831 #endif
6832
6833         mutex_unlock(&trace_types_lock);
6834
6835         return 0;
6836 }
6837
6838 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6839                                    size_t cnt, loff_t *fpos)
6840 {
6841         struct seq_file *m = filp->private_data;
6842         struct trace_array *tr = m->private;
6843         char buf[64];
6844         const char *clockstr;
6845         int ret;
6846
6847         if (cnt >= sizeof(buf))
6848                 return -EINVAL;
6849
6850         if (copy_from_user(buf, ubuf, cnt))
6851                 return -EFAULT;
6852
6853         buf[cnt] = 0;
6854
6855         clockstr = strstrip(buf);
6856
6857         ret = tracing_set_clock(tr, clockstr);
6858         if (ret)
6859                 return ret;
6860
6861         *fpos += cnt;
6862
6863         return cnt;
6864 }
6865
6866 static int tracing_clock_open(struct inode *inode, struct file *file)
6867 {
6868         struct trace_array *tr = inode->i_private;
6869         int ret;
6870
6871         ret = tracing_check_open_get_tr(tr);
6872         if (ret)
6873                 return ret;
6874
6875         ret = single_open(file, tracing_clock_show, inode->i_private);
6876         if (ret < 0)
6877                 trace_array_put(tr);
6878
6879         return ret;
6880 }
6881
6882 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6883 {
6884         struct trace_array *tr = m->private;
6885
6886         mutex_lock(&trace_types_lock);
6887
6888         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6889                 seq_puts(m, "delta [absolute]\n");
6890         else
6891                 seq_puts(m, "[delta] absolute\n");
6892
6893         mutex_unlock(&trace_types_lock);
6894
6895         return 0;
6896 }
6897
6898 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6899 {
6900         struct trace_array *tr = inode->i_private;
6901         int ret;
6902
6903         ret = tracing_check_open_get_tr(tr);
6904         if (ret)
6905                 return ret;
6906
6907         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6908         if (ret < 0)
6909                 trace_array_put(tr);
6910
6911         return ret;
6912 }
6913
6914 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6915 {
6916         int ret = 0;
6917
6918         mutex_lock(&trace_types_lock);
6919
6920         if (abs && tr->time_stamp_abs_ref++)
6921                 goto out;
6922
6923         if (!abs) {
6924                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6925                         ret = -EINVAL;
6926                         goto out;
6927                 }
6928
6929                 if (--tr->time_stamp_abs_ref)
6930                         goto out;
6931         }
6932
6933         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6934
6935 #ifdef CONFIG_TRACER_MAX_TRACE
6936         if (tr->max_buffer.buffer)
6937                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6938 #endif
6939  out:
6940         mutex_unlock(&trace_types_lock);
6941
6942         return ret;
6943 }
6944
6945 struct ftrace_buffer_info {
6946         struct trace_iterator   iter;
6947         void                    *spare;
6948         unsigned int            spare_cpu;
6949         unsigned int            read;
6950 };
6951
6952 #ifdef CONFIG_TRACER_SNAPSHOT
6953 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6954 {
6955         struct trace_array *tr = inode->i_private;
6956         struct trace_iterator *iter;
6957         struct seq_file *m;
6958         int ret;
6959
6960         ret = tracing_check_open_get_tr(tr);
6961         if (ret)
6962                 return ret;
6963
6964         if (file->f_mode & FMODE_READ) {
6965                 iter = __tracing_open(inode, file, true);
6966                 if (IS_ERR(iter))
6967                         ret = PTR_ERR(iter);
6968         } else {
6969                 /* Writes still need the seq_file to hold the private data */
6970                 ret = -ENOMEM;
6971                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6972                 if (!m)
6973                         goto out;
6974                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6975                 if (!iter) {
6976                         kfree(m);
6977                         goto out;
6978                 }
6979                 ret = 0;
6980
6981                 iter->tr = tr;
6982                 iter->array_buffer = &tr->max_buffer;
6983                 iter->cpu_file = tracing_get_cpu(inode);
6984                 m->private = iter;
6985                 file->private_data = m;
6986         }
6987 out:
6988         if (ret < 0)
6989                 trace_array_put(tr);
6990
6991         return ret;
6992 }
6993
6994 static ssize_t
6995 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6996                        loff_t *ppos)
6997 {
6998         struct seq_file *m = filp->private_data;
6999         struct trace_iterator *iter = m->private;
7000         struct trace_array *tr = iter->tr;
7001         unsigned long val;
7002         int ret;
7003
7004         ret = tracing_update_buffers();
7005         if (ret < 0)
7006                 return ret;
7007
7008         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7009         if (ret)
7010                 return ret;
7011
7012         mutex_lock(&trace_types_lock);
7013
7014         if (tr->current_trace->use_max_tr) {
7015                 ret = -EBUSY;
7016                 goto out;
7017         }
7018
7019         arch_spin_lock(&tr->max_lock);
7020         if (tr->cond_snapshot)
7021                 ret = -EBUSY;
7022         arch_spin_unlock(&tr->max_lock);
7023         if (ret)
7024                 goto out;
7025
7026         switch (val) {
7027         case 0:
7028                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7029                         ret = -EINVAL;
7030                         break;
7031                 }
7032                 if (tr->allocated_snapshot)
7033                         free_snapshot(tr);
7034                 break;
7035         case 1:
7036 /* Only allow per-cpu swap if the ring buffer supports it */
7037 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7038                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7039                         ret = -EINVAL;
7040                         break;
7041                 }
7042 #endif
7043                 if (tr->allocated_snapshot)
7044                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7045                                         &tr->array_buffer, iter->cpu_file);
7046                 else
7047                         ret = tracing_alloc_snapshot_instance(tr);
7048                 if (ret < 0)
7049                         break;
7050                 local_irq_disable();
7051                 /* Now, we're going to swap */
7052                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7053                         update_max_tr(tr, current, smp_processor_id(), NULL);
7054                 else
7055                         update_max_tr_single(tr, current, iter->cpu_file);
7056                 local_irq_enable();
7057                 break;
7058         default:
7059                 if (tr->allocated_snapshot) {
7060                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7061                                 tracing_reset_online_cpus(&tr->max_buffer);
7062                         else
7063                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7064                 }
7065                 break;
7066         }
7067
7068         if (ret >= 0) {
7069                 *ppos += cnt;
7070                 ret = cnt;
7071         }
7072 out:
7073         mutex_unlock(&trace_types_lock);
7074         return ret;
7075 }
7076
7077 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7078 {
7079         struct seq_file *m = file->private_data;
7080         int ret;
7081
7082         ret = tracing_release(inode, file);
7083
7084         if (file->f_mode & FMODE_READ)
7085                 return ret;
7086
7087         /* If write only, the seq_file is just a stub */
7088         if (m)
7089                 kfree(m->private);
7090         kfree(m);
7091
7092         return 0;
7093 }
7094
7095 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7096 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7097                                     size_t count, loff_t *ppos);
7098 static int tracing_buffers_release(struct inode *inode, struct file *file);
7099 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7100                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7101
7102 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7103 {
7104         struct ftrace_buffer_info *info;
7105         int ret;
7106
7107         /* The following checks for tracefs lockdown */
7108         ret = tracing_buffers_open(inode, filp);
7109         if (ret < 0)
7110                 return ret;
7111
7112         info = filp->private_data;
7113
7114         if (info->iter.trace->use_max_tr) {
7115                 tracing_buffers_release(inode, filp);
7116                 return -EBUSY;
7117         }
7118
7119         info->iter.snapshot = true;
7120         info->iter.array_buffer = &info->iter.tr->max_buffer;
7121
7122         return ret;
7123 }
7124
7125 #endif /* CONFIG_TRACER_SNAPSHOT */
7126
7127
7128 static const struct file_operations tracing_thresh_fops = {
7129         .open           = tracing_open_generic,
7130         .read           = tracing_thresh_read,
7131         .write          = tracing_thresh_write,
7132         .llseek         = generic_file_llseek,
7133 };
7134
7135 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7136 static const struct file_operations tracing_max_lat_fops = {
7137         .open           = tracing_open_generic,
7138         .read           = tracing_max_lat_read,
7139         .write          = tracing_max_lat_write,
7140         .llseek         = generic_file_llseek,
7141 };
7142 #endif
7143
7144 static const struct file_operations set_tracer_fops = {
7145         .open           = tracing_open_generic,
7146         .read           = tracing_set_trace_read,
7147         .write          = tracing_set_trace_write,
7148         .llseek         = generic_file_llseek,
7149 };
7150
7151 static const struct file_operations tracing_pipe_fops = {
7152         .open           = tracing_open_pipe,
7153         .poll           = tracing_poll_pipe,
7154         .read           = tracing_read_pipe,
7155         .splice_read    = tracing_splice_read_pipe,
7156         .release        = tracing_release_pipe,
7157         .llseek         = no_llseek,
7158 };
7159
7160 static const struct file_operations tracing_entries_fops = {
7161         .open           = tracing_open_generic_tr,
7162         .read           = tracing_entries_read,
7163         .write          = tracing_entries_write,
7164         .llseek         = generic_file_llseek,
7165         .release        = tracing_release_generic_tr,
7166 };
7167
7168 static const struct file_operations tracing_total_entries_fops = {
7169         .open           = tracing_open_generic_tr,
7170         .read           = tracing_total_entries_read,
7171         .llseek         = generic_file_llseek,
7172         .release        = tracing_release_generic_tr,
7173 };
7174
7175 static const struct file_operations tracing_free_buffer_fops = {
7176         .open           = tracing_open_generic_tr,
7177         .write          = tracing_free_buffer_write,
7178         .release        = tracing_free_buffer_release,
7179 };
7180
7181 static const struct file_operations tracing_mark_fops = {
7182         .open           = tracing_open_generic_tr,
7183         .write          = tracing_mark_write,
7184         .llseek         = generic_file_llseek,
7185         .release        = tracing_release_generic_tr,
7186 };
7187
7188 static const struct file_operations tracing_mark_raw_fops = {
7189         .open           = tracing_open_generic_tr,
7190         .write          = tracing_mark_raw_write,
7191         .llseek         = generic_file_llseek,
7192         .release        = tracing_release_generic_tr,
7193 };
7194
7195 static const struct file_operations trace_clock_fops = {
7196         .open           = tracing_clock_open,
7197         .read           = seq_read,
7198         .llseek         = seq_lseek,
7199         .release        = tracing_single_release_tr,
7200         .write          = tracing_clock_write,
7201 };
7202
7203 static const struct file_operations trace_time_stamp_mode_fops = {
7204         .open           = tracing_time_stamp_mode_open,
7205         .read           = seq_read,
7206         .llseek         = seq_lseek,
7207         .release        = tracing_single_release_tr,
7208 };
7209
7210 #ifdef CONFIG_TRACER_SNAPSHOT
7211 static const struct file_operations snapshot_fops = {
7212         .open           = tracing_snapshot_open,
7213         .read           = seq_read,
7214         .write          = tracing_snapshot_write,
7215         .llseek         = tracing_lseek,
7216         .release        = tracing_snapshot_release,
7217 };
7218
7219 static const struct file_operations snapshot_raw_fops = {
7220         .open           = snapshot_raw_open,
7221         .read           = tracing_buffers_read,
7222         .release        = tracing_buffers_release,
7223         .splice_read    = tracing_buffers_splice_read,
7224         .llseek         = no_llseek,
7225 };
7226
7227 #endif /* CONFIG_TRACER_SNAPSHOT */
7228
7229 #define TRACING_LOG_ERRS_MAX    8
7230 #define TRACING_LOG_LOC_MAX     128
7231
7232 #define CMD_PREFIX "  Command: "
7233
7234 struct err_info {
7235         const char      **errs; /* ptr to loc-specific array of err strings */
7236         u8              type;   /* index into errs -> specific err string */
7237         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7238         u64             ts;
7239 };
7240
7241 struct tracing_log_err {
7242         struct list_head        list;
7243         struct err_info         info;
7244         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7245         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7246 };
7247
7248 static DEFINE_MUTEX(tracing_err_log_lock);
7249
7250 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7251 {
7252         struct tracing_log_err *err;
7253
7254         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7255                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7256                 if (!err)
7257                         err = ERR_PTR(-ENOMEM);
7258                 tr->n_err_log_entries++;
7259
7260                 return err;
7261         }
7262
7263         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7264         list_del(&err->list);
7265
7266         return err;
7267 }
7268
7269 /**
7270  * err_pos - find the position of a string within a command for error careting
7271  * @cmd: The tracing command that caused the error
7272  * @str: The string to position the caret at within @cmd
7273  *
7274  * Finds the position of the first occurence of @str within @cmd.  The
7275  * return value can be passed to tracing_log_err() for caret placement
7276  * within @cmd.
7277  *
7278  * Returns the index within @cmd of the first occurence of @str or 0
7279  * if @str was not found.
7280  */
7281 unsigned int err_pos(char *cmd, const char *str)
7282 {
7283         char *found;
7284
7285         if (WARN_ON(!strlen(cmd)))
7286                 return 0;
7287
7288         found = strstr(cmd, str);
7289         if (found)
7290                 return found - cmd;
7291
7292         return 0;
7293 }
7294
7295 /**
7296  * tracing_log_err - write an error to the tracing error log
7297  * @tr: The associated trace array for the error (NULL for top level array)
7298  * @loc: A string describing where the error occurred
7299  * @cmd: The tracing command that caused the error
7300  * @errs: The array of loc-specific static error strings
7301  * @type: The index into errs[], which produces the specific static err string
7302  * @pos: The position the caret should be placed in the cmd
7303  *
7304  * Writes an error into tracing/error_log of the form:
7305  *
7306  * <loc>: error: <text>
7307  *   Command: <cmd>
7308  *              ^
7309  *
7310  * tracing/error_log is a small log file containing the last
7311  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7312  * unless there has been a tracing error, and the error log can be
7313  * cleared and have its memory freed by writing the empty string in
7314  * truncation mode to it i.e. echo > tracing/error_log.
7315  *
7316  * NOTE: the @errs array along with the @type param are used to
7317  * produce a static error string - this string is not copied and saved
7318  * when the error is logged - only a pointer to it is saved.  See
7319  * existing callers for examples of how static strings are typically
7320  * defined for use with tracing_log_err().
7321  */
7322 void tracing_log_err(struct trace_array *tr,
7323                      const char *loc, const char *cmd,
7324                      const char **errs, u8 type, u8 pos)
7325 {
7326         struct tracing_log_err *err;
7327
7328         if (!tr)
7329                 tr = &global_trace;
7330
7331         mutex_lock(&tracing_err_log_lock);
7332         err = get_tracing_log_err(tr);
7333         if (PTR_ERR(err) == -ENOMEM) {
7334                 mutex_unlock(&tracing_err_log_lock);
7335                 return;
7336         }
7337
7338         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7339         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7340
7341         err->info.errs = errs;
7342         err->info.type = type;
7343         err->info.pos = pos;
7344         err->info.ts = local_clock();
7345
7346         list_add_tail(&err->list, &tr->err_log);
7347         mutex_unlock(&tracing_err_log_lock);
7348 }
7349
7350 static void clear_tracing_err_log(struct trace_array *tr)
7351 {
7352         struct tracing_log_err *err, *next;
7353
7354         mutex_lock(&tracing_err_log_lock);
7355         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7356                 list_del(&err->list);
7357                 kfree(err);
7358         }
7359
7360         tr->n_err_log_entries = 0;
7361         mutex_unlock(&tracing_err_log_lock);
7362 }
7363
7364 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7365 {
7366         struct trace_array *tr = m->private;
7367
7368         mutex_lock(&tracing_err_log_lock);
7369
7370         return seq_list_start(&tr->err_log, *pos);
7371 }
7372
7373 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7374 {
7375         struct trace_array *tr = m->private;
7376
7377         return seq_list_next(v, &tr->err_log, pos);
7378 }
7379
7380 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7381 {
7382         mutex_unlock(&tracing_err_log_lock);
7383 }
7384
7385 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7386 {
7387         u8 i;
7388
7389         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7390                 seq_putc(m, ' ');
7391         for (i = 0; i < pos; i++)
7392                 seq_putc(m, ' ');
7393         seq_puts(m, "^\n");
7394 }
7395
7396 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7397 {
7398         struct tracing_log_err *err = v;
7399
7400         if (err) {
7401                 const char *err_text = err->info.errs[err->info.type];
7402                 u64 sec = err->info.ts;
7403                 u32 nsec;
7404
7405                 nsec = do_div(sec, NSEC_PER_SEC);
7406                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7407                            err->loc, err_text);
7408                 seq_printf(m, "%s", err->cmd);
7409                 tracing_err_log_show_pos(m, err->info.pos);
7410         }
7411
7412         return 0;
7413 }
7414
7415 static const struct seq_operations tracing_err_log_seq_ops = {
7416         .start  = tracing_err_log_seq_start,
7417         .next   = tracing_err_log_seq_next,
7418         .stop   = tracing_err_log_seq_stop,
7419         .show   = tracing_err_log_seq_show
7420 };
7421
7422 static int tracing_err_log_open(struct inode *inode, struct file *file)
7423 {
7424         struct trace_array *tr = inode->i_private;
7425         int ret = 0;
7426
7427         ret = tracing_check_open_get_tr(tr);
7428         if (ret)
7429                 return ret;
7430
7431         /* If this file was opened for write, then erase contents */
7432         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7433                 clear_tracing_err_log(tr);
7434
7435         if (file->f_mode & FMODE_READ) {
7436                 ret = seq_open(file, &tracing_err_log_seq_ops);
7437                 if (!ret) {
7438                         struct seq_file *m = file->private_data;
7439                         m->private = tr;
7440                 } else {
7441                         trace_array_put(tr);
7442                 }
7443         }
7444         return ret;
7445 }
7446
7447 static ssize_t tracing_err_log_write(struct file *file,
7448                                      const char __user *buffer,
7449                                      size_t count, loff_t *ppos)
7450 {
7451         return count;
7452 }
7453
7454 static int tracing_err_log_release(struct inode *inode, struct file *file)
7455 {
7456         struct trace_array *tr = inode->i_private;
7457
7458         trace_array_put(tr);
7459
7460         if (file->f_mode & FMODE_READ)
7461                 seq_release(inode, file);
7462
7463         return 0;
7464 }
7465
7466 static const struct file_operations tracing_err_log_fops = {
7467         .open           = tracing_err_log_open,
7468         .write          = tracing_err_log_write,
7469         .read           = seq_read,
7470         .llseek         = seq_lseek,
7471         .release        = tracing_err_log_release,
7472 };
7473
7474 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7475 {
7476         struct trace_array *tr = inode->i_private;
7477         struct ftrace_buffer_info *info;
7478         int ret;
7479
7480         ret = tracing_check_open_get_tr(tr);
7481         if (ret)
7482                 return ret;
7483
7484         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7485         if (!info) {
7486                 trace_array_put(tr);
7487                 return -ENOMEM;
7488         }
7489
7490         mutex_lock(&trace_types_lock);
7491
7492         info->iter.tr           = tr;
7493         info->iter.cpu_file     = tracing_get_cpu(inode);
7494         info->iter.trace        = tr->current_trace;
7495         info->iter.array_buffer = &tr->array_buffer;
7496         info->spare             = NULL;
7497         /* Force reading ring buffer for first read */
7498         info->read              = (unsigned int)-1;
7499
7500         filp->private_data = info;
7501
7502         tr->trace_ref++;
7503
7504         mutex_unlock(&trace_types_lock);
7505
7506         ret = nonseekable_open(inode, filp);
7507         if (ret < 0)
7508                 trace_array_put(tr);
7509
7510         return ret;
7511 }
7512
7513 static __poll_t
7514 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7515 {
7516         struct ftrace_buffer_info *info = filp->private_data;
7517         struct trace_iterator *iter = &info->iter;
7518
7519         return trace_poll(iter, filp, poll_table);
7520 }
7521
7522 static ssize_t
7523 tracing_buffers_read(struct file *filp, char __user *ubuf,
7524                      size_t count, loff_t *ppos)
7525 {
7526         struct ftrace_buffer_info *info = filp->private_data;
7527         struct trace_iterator *iter = &info->iter;
7528         ssize_t ret = 0;
7529         ssize_t size;
7530
7531         if (!count)
7532                 return 0;
7533
7534 #ifdef CONFIG_TRACER_MAX_TRACE
7535         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7536                 return -EBUSY;
7537 #endif
7538
7539         if (!info->spare) {
7540                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7541                                                           iter->cpu_file);
7542                 if (IS_ERR(info->spare)) {
7543                         ret = PTR_ERR(info->spare);
7544                         info->spare = NULL;
7545                 } else {
7546                         info->spare_cpu = iter->cpu_file;
7547                 }
7548         }
7549         if (!info->spare)
7550                 return ret;
7551
7552         /* Do we have previous read data to read? */
7553         if (info->read < PAGE_SIZE)
7554                 goto read;
7555
7556  again:
7557         trace_access_lock(iter->cpu_file);
7558         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7559                                     &info->spare,
7560                                     count,
7561                                     iter->cpu_file, 0);
7562         trace_access_unlock(iter->cpu_file);
7563
7564         if (ret < 0) {
7565                 if (trace_empty(iter)) {
7566                         if ((filp->f_flags & O_NONBLOCK))
7567                                 return -EAGAIN;
7568
7569                         ret = wait_on_pipe(iter, 0);
7570                         if (ret)
7571                                 return ret;
7572
7573                         goto again;
7574                 }
7575                 return 0;
7576         }
7577
7578         info->read = 0;
7579  read:
7580         size = PAGE_SIZE - info->read;
7581         if (size > count)
7582                 size = count;
7583
7584         ret = copy_to_user(ubuf, info->spare + info->read, size);
7585         if (ret == size)
7586                 return -EFAULT;
7587
7588         size -= ret;
7589
7590         *ppos += size;
7591         info->read += size;
7592
7593         return size;
7594 }
7595
7596 static int tracing_buffers_release(struct inode *inode, struct file *file)
7597 {
7598         struct ftrace_buffer_info *info = file->private_data;
7599         struct trace_iterator *iter = &info->iter;
7600
7601         mutex_lock(&trace_types_lock);
7602
7603         iter->tr->trace_ref--;
7604
7605         __trace_array_put(iter->tr);
7606
7607         if (info->spare)
7608                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7609                                            info->spare_cpu, info->spare);
7610         kvfree(info);
7611
7612         mutex_unlock(&trace_types_lock);
7613
7614         return 0;
7615 }
7616
7617 struct buffer_ref {
7618         struct trace_buffer     *buffer;
7619         void                    *page;
7620         int                     cpu;
7621         refcount_t              refcount;
7622 };
7623
7624 static void buffer_ref_release(struct buffer_ref *ref)
7625 {
7626         if (!refcount_dec_and_test(&ref->refcount))
7627                 return;
7628         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7629         kfree(ref);
7630 }
7631
7632 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7633                                     struct pipe_buffer *buf)
7634 {
7635         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7636
7637         buffer_ref_release(ref);
7638         buf->private = 0;
7639 }
7640
7641 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7642                                 struct pipe_buffer *buf)
7643 {
7644         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7645
7646         if (refcount_read(&ref->refcount) > INT_MAX/2)
7647                 return false;
7648
7649         refcount_inc(&ref->refcount);
7650         return true;
7651 }
7652
7653 /* Pipe buffer operations for a buffer. */
7654 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7655         .release                = buffer_pipe_buf_release,
7656         .get                    = buffer_pipe_buf_get,
7657 };
7658
7659 /*
7660  * Callback from splice_to_pipe(), if we need to release some pages
7661  * at the end of the spd in case we error'ed out in filling the pipe.
7662  */
7663 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7664 {
7665         struct buffer_ref *ref =
7666                 (struct buffer_ref *)spd->partial[i].private;
7667
7668         buffer_ref_release(ref);
7669         spd->partial[i].private = 0;
7670 }
7671
7672 static ssize_t
7673 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7674                             struct pipe_inode_info *pipe, size_t len,
7675                             unsigned int flags)
7676 {
7677         struct ftrace_buffer_info *info = file->private_data;
7678         struct trace_iterator *iter = &info->iter;
7679         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7680         struct page *pages_def[PIPE_DEF_BUFFERS];
7681         struct splice_pipe_desc spd = {
7682                 .pages          = pages_def,
7683                 .partial        = partial_def,
7684                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7685                 .ops            = &buffer_pipe_buf_ops,
7686                 .spd_release    = buffer_spd_release,
7687         };
7688         struct buffer_ref *ref;
7689         int entries, i;
7690         ssize_t ret = 0;
7691
7692 #ifdef CONFIG_TRACER_MAX_TRACE
7693         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7694                 return -EBUSY;
7695 #endif
7696
7697         if (*ppos & (PAGE_SIZE - 1))
7698                 return -EINVAL;
7699
7700         if (len & (PAGE_SIZE - 1)) {
7701                 if (len < PAGE_SIZE)
7702                         return -EINVAL;
7703                 len &= PAGE_MASK;
7704         }
7705
7706         if (splice_grow_spd(pipe, &spd))
7707                 return -ENOMEM;
7708
7709  again:
7710         trace_access_lock(iter->cpu_file);
7711         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7712
7713         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7714                 struct page *page;
7715                 int r;
7716
7717                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7718                 if (!ref) {
7719                         ret = -ENOMEM;
7720                         break;
7721                 }
7722
7723                 refcount_set(&ref->refcount, 1);
7724                 ref->buffer = iter->array_buffer->buffer;
7725                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7726                 if (IS_ERR(ref->page)) {
7727                         ret = PTR_ERR(ref->page);
7728                         ref->page = NULL;
7729                         kfree(ref);
7730                         break;
7731                 }
7732                 ref->cpu = iter->cpu_file;
7733
7734                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7735                                           len, iter->cpu_file, 1);
7736                 if (r < 0) {
7737                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7738                                                    ref->page);
7739                         kfree(ref);
7740                         break;
7741                 }
7742
7743                 page = virt_to_page(ref->page);
7744
7745                 spd.pages[i] = page;
7746                 spd.partial[i].len = PAGE_SIZE;
7747                 spd.partial[i].offset = 0;
7748                 spd.partial[i].private = (unsigned long)ref;
7749                 spd.nr_pages++;
7750                 *ppos += PAGE_SIZE;
7751
7752                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7753         }
7754
7755         trace_access_unlock(iter->cpu_file);
7756         spd.nr_pages = i;
7757
7758         /* did we read anything? */
7759         if (!spd.nr_pages) {
7760                 if (ret)
7761                         goto out;
7762
7763                 ret = -EAGAIN;
7764                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7765                         goto out;
7766
7767                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7768                 if (ret)
7769                         goto out;
7770
7771                 goto again;
7772         }
7773
7774         ret = splice_to_pipe(pipe, &spd);
7775 out:
7776         splice_shrink_spd(&spd);
7777
7778         return ret;
7779 }
7780
7781 static const struct file_operations tracing_buffers_fops = {
7782         .open           = tracing_buffers_open,
7783         .read           = tracing_buffers_read,
7784         .poll           = tracing_buffers_poll,
7785         .release        = tracing_buffers_release,
7786         .splice_read    = tracing_buffers_splice_read,
7787         .llseek         = no_llseek,
7788 };
7789
7790 static ssize_t
7791 tracing_stats_read(struct file *filp, char __user *ubuf,
7792                    size_t count, loff_t *ppos)
7793 {
7794         struct inode *inode = file_inode(filp);
7795         struct trace_array *tr = inode->i_private;
7796         struct array_buffer *trace_buf = &tr->array_buffer;
7797         int cpu = tracing_get_cpu(inode);
7798         struct trace_seq *s;
7799         unsigned long cnt;
7800         unsigned long long t;
7801         unsigned long usec_rem;
7802
7803         s = kmalloc(sizeof(*s), GFP_KERNEL);
7804         if (!s)
7805                 return -ENOMEM;
7806
7807         trace_seq_init(s);
7808
7809         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7810         trace_seq_printf(s, "entries: %ld\n", cnt);
7811
7812         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7813         trace_seq_printf(s, "overrun: %ld\n", cnt);
7814
7815         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7816         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7817
7818         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7819         trace_seq_printf(s, "bytes: %ld\n", cnt);
7820
7821         if (trace_clocks[tr->clock_id].in_ns) {
7822                 /* local or global for trace_clock */
7823                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7824                 usec_rem = do_div(t, USEC_PER_SEC);
7825                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7826                                                                 t, usec_rem);
7827
7828                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7829                 usec_rem = do_div(t, USEC_PER_SEC);
7830                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7831         } else {
7832                 /* counter or tsc mode for trace_clock */
7833                 trace_seq_printf(s, "oldest event ts: %llu\n",
7834                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7835
7836                 trace_seq_printf(s, "now ts: %llu\n",
7837                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7838         }
7839
7840         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7841         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7842
7843         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7844         trace_seq_printf(s, "read events: %ld\n", cnt);
7845
7846         count = simple_read_from_buffer(ubuf, count, ppos,
7847                                         s->buffer, trace_seq_used(s));
7848
7849         kfree(s);
7850
7851         return count;
7852 }
7853
7854 static const struct file_operations tracing_stats_fops = {
7855         .open           = tracing_open_generic_tr,
7856         .read           = tracing_stats_read,
7857         .llseek         = generic_file_llseek,
7858         .release        = tracing_release_generic_tr,
7859 };
7860
7861 #ifdef CONFIG_DYNAMIC_FTRACE
7862
7863 static ssize_t
7864 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7865                   size_t cnt, loff_t *ppos)
7866 {
7867         ssize_t ret;
7868         char *buf;
7869         int r;
7870
7871         /* 256 should be plenty to hold the amount needed */
7872         buf = kmalloc(256, GFP_KERNEL);
7873         if (!buf)
7874                 return -ENOMEM;
7875
7876         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7877                       ftrace_update_tot_cnt,
7878                       ftrace_number_of_pages,
7879                       ftrace_number_of_groups);
7880
7881         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7882         kfree(buf);
7883         return ret;
7884 }
7885
7886 static const struct file_operations tracing_dyn_info_fops = {
7887         .open           = tracing_open_generic,
7888         .read           = tracing_read_dyn_info,
7889         .llseek         = generic_file_llseek,
7890 };
7891 #endif /* CONFIG_DYNAMIC_FTRACE */
7892
7893 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7894 static void
7895 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7896                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7897                 void *data)
7898 {
7899         tracing_snapshot_instance(tr);
7900 }
7901
7902 static void
7903 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7904                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7905                       void *data)
7906 {
7907         struct ftrace_func_mapper *mapper = data;
7908         long *count = NULL;
7909
7910         if (mapper)
7911                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7912
7913         if (count) {
7914
7915                 if (*count <= 0)
7916                         return;
7917
7918                 (*count)--;
7919         }
7920
7921         tracing_snapshot_instance(tr);
7922 }
7923
7924 static int
7925 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7926                       struct ftrace_probe_ops *ops, void *data)
7927 {
7928         struct ftrace_func_mapper *mapper = data;
7929         long *count = NULL;
7930
7931         seq_printf(m, "%ps:", (void *)ip);
7932
7933         seq_puts(m, "snapshot");
7934
7935         if (mapper)
7936                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7937
7938         if (count)
7939                 seq_printf(m, ":count=%ld\n", *count);
7940         else
7941                 seq_puts(m, ":unlimited\n");
7942
7943         return 0;
7944 }
7945
7946 static int
7947 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7948                      unsigned long ip, void *init_data, void **data)
7949 {
7950         struct ftrace_func_mapper *mapper = *data;
7951
7952         if (!mapper) {
7953                 mapper = allocate_ftrace_func_mapper();
7954                 if (!mapper)
7955                         return -ENOMEM;
7956                 *data = mapper;
7957         }
7958
7959         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7960 }
7961
7962 static void
7963 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7964                      unsigned long ip, void *data)
7965 {
7966         struct ftrace_func_mapper *mapper = data;
7967
7968         if (!ip) {
7969                 if (!mapper)
7970                         return;
7971                 free_ftrace_func_mapper(mapper, NULL);
7972                 return;
7973         }
7974
7975         ftrace_func_mapper_remove_ip(mapper, ip);
7976 }
7977
7978 static struct ftrace_probe_ops snapshot_probe_ops = {
7979         .func                   = ftrace_snapshot,
7980         .print                  = ftrace_snapshot_print,
7981 };
7982
7983 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7984         .func                   = ftrace_count_snapshot,
7985         .print                  = ftrace_snapshot_print,
7986         .init                   = ftrace_snapshot_init,
7987         .free                   = ftrace_snapshot_free,
7988 };
7989
7990 static int
7991 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7992                                char *glob, char *cmd, char *param, int enable)
7993 {
7994         struct ftrace_probe_ops *ops;
7995         void *count = (void *)-1;
7996         char *number;
7997         int ret;
7998
7999         if (!tr)
8000                 return -ENODEV;
8001
8002         /* hash funcs only work with set_ftrace_filter */
8003         if (!enable)
8004                 return -EINVAL;
8005
8006         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8007
8008         if (glob[0] == '!')
8009                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8010
8011         if (!param)
8012                 goto out_reg;
8013
8014         number = strsep(&param, ":");
8015
8016         if (!strlen(number))
8017                 goto out_reg;
8018
8019         /*
8020          * We use the callback data field (which is a pointer)
8021          * as our counter.
8022          */
8023         ret = kstrtoul(number, 0, (unsigned long *)&count);
8024         if (ret)
8025                 return ret;
8026
8027  out_reg:
8028         ret = tracing_alloc_snapshot_instance(tr);
8029         if (ret < 0)
8030                 goto out;
8031
8032         ret = register_ftrace_function_probe(glob, tr, ops, count);
8033
8034  out:
8035         return ret < 0 ? ret : 0;
8036 }
8037
8038 static struct ftrace_func_command ftrace_snapshot_cmd = {
8039         .name                   = "snapshot",
8040         .func                   = ftrace_trace_snapshot_callback,
8041 };
8042
8043 static __init int register_snapshot_cmd(void)
8044 {
8045         return register_ftrace_command(&ftrace_snapshot_cmd);
8046 }
8047 #else
8048 static inline __init int register_snapshot_cmd(void) { return 0; }
8049 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8050
8051 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8052 {
8053         if (WARN_ON(!tr->dir))
8054                 return ERR_PTR(-ENODEV);
8055
8056         /* Top directory uses NULL as the parent */
8057         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8058                 return NULL;
8059
8060         /* All sub buffers have a descriptor */
8061         return tr->dir;
8062 }
8063
8064 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8065 {
8066         struct dentry *d_tracer;
8067
8068         if (tr->percpu_dir)
8069                 return tr->percpu_dir;
8070
8071         d_tracer = tracing_get_dentry(tr);
8072         if (IS_ERR(d_tracer))
8073                 return NULL;
8074
8075         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8076
8077         MEM_FAIL(!tr->percpu_dir,
8078                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8079
8080         return tr->percpu_dir;
8081 }
8082
8083 static struct dentry *
8084 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8085                       void *data, long cpu, const struct file_operations *fops)
8086 {
8087         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8088
8089         if (ret) /* See tracing_get_cpu() */
8090                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8091         return ret;
8092 }
8093
8094 static void
8095 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8096 {
8097         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8098         struct dentry *d_cpu;
8099         char cpu_dir[30]; /* 30 characters should be more than enough */
8100
8101         if (!d_percpu)
8102                 return;
8103
8104         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8105         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8106         if (!d_cpu) {
8107                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8108                 return;
8109         }
8110
8111         /* per cpu trace_pipe */
8112         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8113                                 tr, cpu, &tracing_pipe_fops);
8114
8115         /* per cpu trace */
8116         trace_create_cpu_file("trace", 0644, d_cpu,
8117                                 tr, cpu, &tracing_fops);
8118
8119         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8120                                 tr, cpu, &tracing_buffers_fops);
8121
8122         trace_create_cpu_file("stats", 0444, d_cpu,
8123                                 tr, cpu, &tracing_stats_fops);
8124
8125         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8126                                 tr, cpu, &tracing_entries_fops);
8127
8128 #ifdef CONFIG_TRACER_SNAPSHOT
8129         trace_create_cpu_file("snapshot", 0644, d_cpu,
8130                                 tr, cpu, &snapshot_fops);
8131
8132         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8133                                 tr, cpu, &snapshot_raw_fops);
8134 #endif
8135 }
8136
8137 #ifdef CONFIG_FTRACE_SELFTEST
8138 /* Let selftest have access to static functions in this file */
8139 #include "trace_selftest.c"
8140 #endif
8141
8142 static ssize_t
8143 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8144                         loff_t *ppos)
8145 {
8146         struct trace_option_dentry *topt = filp->private_data;
8147         char *buf;
8148
8149         if (topt->flags->val & topt->opt->bit)
8150                 buf = "1\n";
8151         else
8152                 buf = "0\n";
8153
8154         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8155 }
8156
8157 static ssize_t
8158 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8159                          loff_t *ppos)
8160 {
8161         struct trace_option_dentry *topt = filp->private_data;
8162         unsigned long val;
8163         int ret;
8164
8165         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8166         if (ret)
8167                 return ret;
8168
8169         if (val != 0 && val != 1)
8170                 return -EINVAL;
8171
8172         if (!!(topt->flags->val & topt->opt->bit) != val) {
8173                 mutex_lock(&trace_types_lock);
8174                 ret = __set_tracer_option(topt->tr, topt->flags,
8175                                           topt->opt, !val);
8176                 mutex_unlock(&trace_types_lock);
8177                 if (ret)
8178                         return ret;
8179         }
8180
8181         *ppos += cnt;
8182
8183         return cnt;
8184 }
8185
8186
8187 static const struct file_operations trace_options_fops = {
8188         .open = tracing_open_generic,
8189         .read = trace_options_read,
8190         .write = trace_options_write,
8191         .llseek = generic_file_llseek,
8192 };
8193
8194 /*
8195  * In order to pass in both the trace_array descriptor as well as the index
8196  * to the flag that the trace option file represents, the trace_array
8197  * has a character array of trace_flags_index[], which holds the index
8198  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8199  * The address of this character array is passed to the flag option file
8200  * read/write callbacks.
8201  *
8202  * In order to extract both the index and the trace_array descriptor,
8203  * get_tr_index() uses the following algorithm.
8204  *
8205  *   idx = *ptr;
8206  *
8207  * As the pointer itself contains the address of the index (remember
8208  * index[1] == 1).
8209  *
8210  * Then to get the trace_array descriptor, by subtracting that index
8211  * from the ptr, we get to the start of the index itself.
8212  *
8213  *   ptr - idx == &index[0]
8214  *
8215  * Then a simple container_of() from that pointer gets us to the
8216  * trace_array descriptor.
8217  */
8218 static void get_tr_index(void *data, struct trace_array **ptr,
8219                          unsigned int *pindex)
8220 {
8221         *pindex = *(unsigned char *)data;
8222
8223         *ptr = container_of(data - *pindex, struct trace_array,
8224                             trace_flags_index);
8225 }
8226
8227 static ssize_t
8228 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8229                         loff_t *ppos)
8230 {
8231         void *tr_index = filp->private_data;
8232         struct trace_array *tr;
8233         unsigned int index;
8234         char *buf;
8235
8236         get_tr_index(tr_index, &tr, &index);
8237
8238         if (tr->trace_flags & (1 << index))
8239                 buf = "1\n";
8240         else
8241                 buf = "0\n";
8242
8243         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8244 }
8245
8246 static ssize_t
8247 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8248                          loff_t *ppos)
8249 {
8250         void *tr_index = filp->private_data;
8251         struct trace_array *tr;
8252         unsigned int index;
8253         unsigned long val;
8254         int ret;
8255
8256         get_tr_index(tr_index, &tr, &index);
8257
8258         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8259         if (ret)
8260                 return ret;
8261
8262         if (val != 0 && val != 1)
8263                 return -EINVAL;
8264
8265         mutex_lock(&event_mutex);
8266         mutex_lock(&trace_types_lock);
8267         ret = set_tracer_flag(tr, 1 << index, val);
8268         mutex_unlock(&trace_types_lock);
8269         mutex_unlock(&event_mutex);
8270
8271         if (ret < 0)
8272                 return ret;
8273
8274         *ppos += cnt;
8275
8276         return cnt;
8277 }
8278
8279 static const struct file_operations trace_options_core_fops = {
8280         .open = tracing_open_generic,
8281         .read = trace_options_core_read,
8282         .write = trace_options_core_write,
8283         .llseek = generic_file_llseek,
8284 };
8285
8286 struct dentry *trace_create_file(const char *name,
8287                                  umode_t mode,
8288                                  struct dentry *parent,
8289                                  void *data,
8290                                  const struct file_operations *fops)
8291 {
8292         struct dentry *ret;
8293
8294         ret = tracefs_create_file(name, mode, parent, data, fops);
8295         if (!ret)
8296                 pr_warn("Could not create tracefs '%s' entry\n", name);
8297
8298         return ret;
8299 }
8300
8301
8302 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8303 {
8304         struct dentry *d_tracer;
8305
8306         if (tr->options)
8307                 return tr->options;
8308
8309         d_tracer = tracing_get_dentry(tr);
8310         if (IS_ERR(d_tracer))
8311                 return NULL;
8312
8313         tr->options = tracefs_create_dir("options", d_tracer);
8314         if (!tr->options) {
8315                 pr_warn("Could not create tracefs directory 'options'\n");
8316                 return NULL;
8317         }
8318
8319         return tr->options;
8320 }
8321
8322 static void
8323 create_trace_option_file(struct trace_array *tr,
8324                          struct trace_option_dentry *topt,
8325                          struct tracer_flags *flags,
8326                          struct tracer_opt *opt)
8327 {
8328         struct dentry *t_options;
8329
8330         t_options = trace_options_init_dentry(tr);
8331         if (!t_options)
8332                 return;
8333
8334         topt->flags = flags;
8335         topt->opt = opt;
8336         topt->tr = tr;
8337
8338         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8339                                     &trace_options_fops);
8340
8341 }
8342
8343 static void
8344 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8345 {
8346         struct trace_option_dentry *topts;
8347         struct trace_options *tr_topts;
8348         struct tracer_flags *flags;
8349         struct tracer_opt *opts;
8350         int cnt;
8351         int i;
8352
8353         if (!tracer)
8354                 return;
8355
8356         flags = tracer->flags;
8357
8358         if (!flags || !flags->opts)
8359                 return;
8360
8361         /*
8362          * If this is an instance, only create flags for tracers
8363          * the instance may have.
8364          */
8365         if (!trace_ok_for_array(tracer, tr))
8366                 return;
8367
8368         for (i = 0; i < tr->nr_topts; i++) {
8369                 /* Make sure there's no duplicate flags. */
8370                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8371                         return;
8372         }
8373
8374         opts = flags->opts;
8375
8376         for (cnt = 0; opts[cnt].name; cnt++)
8377                 ;
8378
8379         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8380         if (!topts)
8381                 return;
8382
8383         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8384                             GFP_KERNEL);
8385         if (!tr_topts) {
8386                 kfree(topts);
8387                 return;
8388         }
8389
8390         tr->topts = tr_topts;
8391         tr->topts[tr->nr_topts].tracer = tracer;
8392         tr->topts[tr->nr_topts].topts = topts;
8393         tr->nr_topts++;
8394
8395         for (cnt = 0; opts[cnt].name; cnt++) {
8396                 create_trace_option_file(tr, &topts[cnt], flags,
8397                                          &opts[cnt]);
8398                 MEM_FAIL(topts[cnt].entry == NULL,
8399                           "Failed to create trace option: %s",
8400                           opts[cnt].name);
8401         }
8402 }
8403
8404 static struct dentry *
8405 create_trace_option_core_file(struct trace_array *tr,
8406                               const char *option, long index)
8407 {
8408         struct dentry *t_options;
8409
8410         t_options = trace_options_init_dentry(tr);
8411         if (!t_options)
8412                 return NULL;
8413
8414         return trace_create_file(option, 0644, t_options,
8415                                  (void *)&tr->trace_flags_index[index],
8416                                  &trace_options_core_fops);
8417 }
8418
8419 static void create_trace_options_dir(struct trace_array *tr)
8420 {
8421         struct dentry *t_options;
8422         bool top_level = tr == &global_trace;
8423         int i;
8424
8425         t_options = trace_options_init_dentry(tr);
8426         if (!t_options)
8427                 return;
8428
8429         for (i = 0; trace_options[i]; i++) {
8430                 if (top_level ||
8431                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8432                         create_trace_option_core_file(tr, trace_options[i], i);
8433         }
8434 }
8435
8436 static ssize_t
8437 rb_simple_read(struct file *filp, char __user *ubuf,
8438                size_t cnt, loff_t *ppos)
8439 {
8440         struct trace_array *tr = filp->private_data;
8441         char buf[64];
8442         int r;
8443
8444         r = tracer_tracing_is_on(tr);
8445         r = sprintf(buf, "%d\n", r);
8446
8447         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8448 }
8449
8450 static ssize_t
8451 rb_simple_write(struct file *filp, const char __user *ubuf,
8452                 size_t cnt, loff_t *ppos)
8453 {
8454         struct trace_array *tr = filp->private_data;
8455         struct trace_buffer *buffer = tr->array_buffer.buffer;
8456         unsigned long val;
8457         int ret;
8458
8459         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8460         if (ret)
8461                 return ret;
8462
8463         if (buffer) {
8464                 mutex_lock(&trace_types_lock);
8465                 if (!!val == tracer_tracing_is_on(tr)) {
8466                         val = 0; /* do nothing */
8467                 } else if (val) {
8468                         tracer_tracing_on(tr);
8469                         if (tr->current_trace->start)
8470                                 tr->current_trace->start(tr);
8471                 } else {
8472                         tracer_tracing_off(tr);
8473                         if (tr->current_trace->stop)
8474                                 tr->current_trace->stop(tr);
8475                 }
8476                 mutex_unlock(&trace_types_lock);
8477         }
8478
8479         (*ppos)++;
8480
8481         return cnt;
8482 }
8483
8484 static const struct file_operations rb_simple_fops = {
8485         .open           = tracing_open_generic_tr,
8486         .read           = rb_simple_read,
8487         .write          = rb_simple_write,
8488         .release        = tracing_release_generic_tr,
8489         .llseek         = default_llseek,
8490 };
8491
8492 static ssize_t
8493 buffer_percent_read(struct file *filp, char __user *ubuf,
8494                     size_t cnt, loff_t *ppos)
8495 {
8496         struct trace_array *tr = filp->private_data;
8497         char buf[64];
8498         int r;
8499
8500         r = tr->buffer_percent;
8501         r = sprintf(buf, "%d\n", r);
8502
8503         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8504 }
8505
8506 static ssize_t
8507 buffer_percent_write(struct file *filp, const char __user *ubuf,
8508                      size_t cnt, loff_t *ppos)
8509 {
8510         struct trace_array *tr = filp->private_data;
8511         unsigned long val;
8512         int ret;
8513
8514         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8515         if (ret)
8516                 return ret;
8517
8518         if (val > 100)
8519                 return -EINVAL;
8520
8521         if (!val)
8522                 val = 1;
8523
8524         tr->buffer_percent = val;
8525
8526         (*ppos)++;
8527
8528         return cnt;
8529 }
8530
8531 static const struct file_operations buffer_percent_fops = {
8532         .open           = tracing_open_generic_tr,
8533         .read           = buffer_percent_read,
8534         .write          = buffer_percent_write,
8535         .release        = tracing_release_generic_tr,
8536         .llseek         = default_llseek,
8537 };
8538
8539 static struct dentry *trace_instance_dir;
8540
8541 static void
8542 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8543
8544 static int
8545 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8546 {
8547         enum ring_buffer_flags rb_flags;
8548
8549         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8550
8551         buf->tr = tr;
8552
8553         buf->buffer = ring_buffer_alloc(size, rb_flags);
8554         if (!buf->buffer)
8555                 return -ENOMEM;
8556
8557         buf->data = alloc_percpu(struct trace_array_cpu);
8558         if (!buf->data) {
8559                 ring_buffer_free(buf->buffer);
8560                 buf->buffer = NULL;
8561                 return -ENOMEM;
8562         }
8563
8564         /* Allocate the first page for all buffers */
8565         set_buffer_entries(&tr->array_buffer,
8566                            ring_buffer_size(tr->array_buffer.buffer, 0));
8567
8568         return 0;
8569 }
8570
8571 static int allocate_trace_buffers(struct trace_array *tr, int size)
8572 {
8573         int ret;
8574
8575         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8576         if (ret)
8577                 return ret;
8578
8579 #ifdef CONFIG_TRACER_MAX_TRACE
8580         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8581                                     allocate_snapshot ? size : 1);
8582         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8583                 ring_buffer_free(tr->array_buffer.buffer);
8584                 tr->array_buffer.buffer = NULL;
8585                 free_percpu(tr->array_buffer.data);
8586                 tr->array_buffer.data = NULL;
8587                 return -ENOMEM;
8588         }
8589         tr->allocated_snapshot = allocate_snapshot;
8590
8591         /*
8592          * Only the top level trace array gets its snapshot allocated
8593          * from the kernel command line.
8594          */
8595         allocate_snapshot = false;
8596 #endif
8597
8598         return 0;
8599 }
8600
8601 static void free_trace_buffer(struct array_buffer *buf)
8602 {
8603         if (buf->buffer) {
8604                 ring_buffer_free(buf->buffer);
8605                 buf->buffer = NULL;
8606                 free_percpu(buf->data);
8607                 buf->data = NULL;
8608         }
8609 }
8610
8611 static void free_trace_buffers(struct trace_array *tr)
8612 {
8613         if (!tr)
8614                 return;
8615
8616         free_trace_buffer(&tr->array_buffer);
8617
8618 #ifdef CONFIG_TRACER_MAX_TRACE
8619         free_trace_buffer(&tr->max_buffer);
8620 #endif
8621 }
8622
8623 static void init_trace_flags_index(struct trace_array *tr)
8624 {
8625         int i;
8626
8627         /* Used by the trace options files */
8628         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8629                 tr->trace_flags_index[i] = i;
8630 }
8631
8632 static void __update_tracer_options(struct trace_array *tr)
8633 {
8634         struct tracer *t;
8635
8636         for (t = trace_types; t; t = t->next)
8637                 add_tracer_options(tr, t);
8638 }
8639
8640 static void update_tracer_options(struct trace_array *tr)
8641 {
8642         mutex_lock(&trace_types_lock);
8643         __update_tracer_options(tr);
8644         mutex_unlock(&trace_types_lock);
8645 }
8646
8647 /* Must have trace_types_lock held */
8648 struct trace_array *trace_array_find(const char *instance)
8649 {
8650         struct trace_array *tr, *found = NULL;
8651
8652         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8653                 if (tr->name && strcmp(tr->name, instance) == 0) {
8654                         found = tr;
8655                         break;
8656                 }
8657         }
8658
8659         return found;
8660 }
8661
8662 struct trace_array *trace_array_find_get(const char *instance)
8663 {
8664         struct trace_array *tr;
8665
8666         mutex_lock(&trace_types_lock);
8667         tr = trace_array_find(instance);
8668         if (tr)
8669                 tr->ref++;
8670         mutex_unlock(&trace_types_lock);
8671
8672         return tr;
8673 }
8674
8675 static int trace_array_create_dir(struct trace_array *tr)
8676 {
8677         int ret;
8678
8679         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8680         if (!tr->dir)
8681                 return -EINVAL;
8682
8683         ret = event_trace_add_tracer(tr->dir, tr);
8684         if (ret)
8685                 tracefs_remove(tr->dir);
8686
8687         init_tracer_tracefs(tr, tr->dir);
8688         __update_tracer_options(tr);
8689
8690         return ret;
8691 }
8692
8693 static struct trace_array *trace_array_create(const char *name)
8694 {
8695         struct trace_array *tr;
8696         int ret;
8697
8698         ret = -ENOMEM;
8699         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8700         if (!tr)
8701                 return ERR_PTR(ret);
8702
8703         tr->name = kstrdup(name, GFP_KERNEL);
8704         if (!tr->name)
8705                 goto out_free_tr;
8706
8707         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8708                 goto out_free_tr;
8709
8710         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8711
8712         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8713
8714         raw_spin_lock_init(&tr->start_lock);
8715
8716         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8717
8718         tr->current_trace = &nop_trace;
8719
8720         INIT_LIST_HEAD(&tr->systems);
8721         INIT_LIST_HEAD(&tr->events);
8722         INIT_LIST_HEAD(&tr->hist_vars);
8723         INIT_LIST_HEAD(&tr->err_log);
8724
8725         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8726                 goto out_free_tr;
8727
8728         if (ftrace_allocate_ftrace_ops(tr) < 0)
8729                 goto out_free_tr;
8730
8731         ftrace_init_trace_array(tr);
8732
8733         init_trace_flags_index(tr);
8734
8735         if (trace_instance_dir) {
8736                 ret = trace_array_create_dir(tr);
8737                 if (ret)
8738                         goto out_free_tr;
8739         } else
8740                 __trace_early_add_events(tr);
8741
8742         list_add(&tr->list, &ftrace_trace_arrays);
8743
8744         tr->ref++;
8745
8746         return tr;
8747
8748  out_free_tr:
8749         ftrace_free_ftrace_ops(tr);
8750         free_trace_buffers(tr);
8751         free_cpumask_var(tr->tracing_cpumask);
8752         kfree(tr->name);
8753         kfree(tr);
8754
8755         return ERR_PTR(ret);
8756 }
8757
8758 static int instance_mkdir(const char *name)
8759 {
8760         struct trace_array *tr;
8761         int ret;
8762
8763         mutex_lock(&event_mutex);
8764         mutex_lock(&trace_types_lock);
8765
8766         ret = -EEXIST;
8767         if (trace_array_find(name))
8768                 goto out_unlock;
8769
8770         tr = trace_array_create(name);
8771
8772         ret = PTR_ERR_OR_ZERO(tr);
8773
8774 out_unlock:
8775         mutex_unlock(&trace_types_lock);
8776         mutex_unlock(&event_mutex);
8777         return ret;
8778 }
8779
8780 /**
8781  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8782  * @name: The name of the trace array to be looked up/created.
8783  *
8784  * Returns pointer to trace array with given name.
8785  * NULL, if it cannot be created.
8786  *
8787  * NOTE: This function increments the reference counter associated with the
8788  * trace array returned. This makes sure it cannot be freed while in use.
8789  * Use trace_array_put() once the trace array is no longer needed.
8790  * If the trace_array is to be freed, trace_array_destroy() needs to
8791  * be called after the trace_array_put(), or simply let user space delete
8792  * it from the tracefs instances directory. But until the
8793  * trace_array_put() is called, user space can not delete it.
8794  *
8795  */
8796 struct trace_array *trace_array_get_by_name(const char *name)
8797 {
8798         struct trace_array *tr;
8799
8800         mutex_lock(&event_mutex);
8801         mutex_lock(&trace_types_lock);
8802
8803         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8804                 if (tr->name && strcmp(tr->name, name) == 0)
8805                         goto out_unlock;
8806         }
8807
8808         tr = trace_array_create(name);
8809
8810         if (IS_ERR(tr))
8811                 tr = NULL;
8812 out_unlock:
8813         if (tr)
8814                 tr->ref++;
8815
8816         mutex_unlock(&trace_types_lock);
8817         mutex_unlock(&event_mutex);
8818         return tr;
8819 }
8820 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8821
8822 static int __remove_instance(struct trace_array *tr)
8823 {
8824         int i;
8825
8826         /* Reference counter for a newly created trace array = 1. */
8827         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8828                 return -EBUSY;
8829
8830         list_del(&tr->list);
8831
8832         /* Disable all the flags that were enabled coming in */
8833         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8834                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8835                         set_tracer_flag(tr, 1 << i, 0);
8836         }
8837
8838         tracing_set_nop(tr);
8839         clear_ftrace_function_probes(tr);
8840         event_trace_del_tracer(tr);
8841         ftrace_clear_pids(tr);
8842         ftrace_destroy_function_files(tr);
8843         tracefs_remove(tr->dir);
8844         free_trace_buffers(tr);
8845
8846         for (i = 0; i < tr->nr_topts; i++) {
8847                 kfree(tr->topts[i].topts);
8848         }
8849         kfree(tr->topts);
8850
8851         free_cpumask_var(tr->tracing_cpumask);
8852         kfree(tr->name);
8853         kfree(tr);
8854
8855         return 0;
8856 }
8857
8858 int trace_array_destroy(struct trace_array *this_tr)
8859 {
8860         struct trace_array *tr;
8861         int ret;
8862
8863         if (!this_tr)
8864                 return -EINVAL;
8865
8866         mutex_lock(&event_mutex);
8867         mutex_lock(&trace_types_lock);
8868
8869         ret = -ENODEV;
8870
8871         /* Making sure trace array exists before destroying it. */
8872         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8873                 if (tr == this_tr) {
8874                         ret = __remove_instance(tr);
8875                         break;
8876                 }
8877         }
8878
8879         mutex_unlock(&trace_types_lock);
8880         mutex_unlock(&event_mutex);
8881
8882         return ret;
8883 }
8884 EXPORT_SYMBOL_GPL(trace_array_destroy);
8885
8886 static int instance_rmdir(const char *name)
8887 {
8888         struct trace_array *tr;
8889         int ret;
8890
8891         mutex_lock(&event_mutex);
8892         mutex_lock(&trace_types_lock);
8893
8894         ret = -ENODEV;
8895         tr = trace_array_find(name);
8896         if (tr)
8897                 ret = __remove_instance(tr);
8898
8899         mutex_unlock(&trace_types_lock);
8900         mutex_unlock(&event_mutex);
8901
8902         return ret;
8903 }
8904
8905 static __init void create_trace_instances(struct dentry *d_tracer)
8906 {
8907         struct trace_array *tr;
8908
8909         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8910                                                          instance_mkdir,
8911                                                          instance_rmdir);
8912         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8913                 return;
8914
8915         mutex_lock(&event_mutex);
8916         mutex_lock(&trace_types_lock);
8917
8918         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8919                 if (!tr->name)
8920                         continue;
8921                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8922                              "Failed to create instance directory\n"))
8923                         break;
8924         }
8925
8926         mutex_unlock(&trace_types_lock);
8927         mutex_unlock(&event_mutex);
8928 }
8929
8930 static void
8931 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8932 {
8933         struct trace_event_file *file;
8934         int cpu;
8935
8936         trace_create_file("available_tracers", 0444, d_tracer,
8937                         tr, &show_traces_fops);
8938
8939         trace_create_file("current_tracer", 0644, d_tracer,
8940                         tr, &set_tracer_fops);
8941
8942         trace_create_file("tracing_cpumask", 0644, d_tracer,
8943                           tr, &tracing_cpumask_fops);
8944
8945         trace_create_file("trace_options", 0644, d_tracer,
8946                           tr, &tracing_iter_fops);
8947
8948         trace_create_file("trace", 0644, d_tracer,
8949                           tr, &tracing_fops);
8950
8951         trace_create_file("trace_pipe", 0444, d_tracer,
8952                           tr, &tracing_pipe_fops);
8953
8954         trace_create_file("buffer_size_kb", 0644, d_tracer,
8955                           tr, &tracing_entries_fops);
8956
8957         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8958                           tr, &tracing_total_entries_fops);
8959
8960         trace_create_file("free_buffer", 0200, d_tracer,
8961                           tr, &tracing_free_buffer_fops);
8962
8963         trace_create_file("trace_marker", 0220, d_tracer,
8964                           tr, &tracing_mark_fops);
8965
8966         file = __find_event_file(tr, "ftrace", "print");
8967         if (file && file->dir)
8968                 trace_create_file("trigger", 0644, file->dir, file,
8969                                   &event_trigger_fops);
8970         tr->trace_marker_file = file;
8971
8972         trace_create_file("trace_marker_raw", 0220, d_tracer,
8973                           tr, &tracing_mark_raw_fops);
8974
8975         trace_create_file("trace_clock", 0644, d_tracer, tr,
8976                           &trace_clock_fops);
8977
8978         trace_create_file("tracing_on", 0644, d_tracer,
8979                           tr, &rb_simple_fops);
8980
8981         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8982                           &trace_time_stamp_mode_fops);
8983
8984         tr->buffer_percent = 50;
8985
8986         trace_create_file("buffer_percent", 0444, d_tracer,
8987                         tr, &buffer_percent_fops);
8988
8989         create_trace_options_dir(tr);
8990
8991 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8992         trace_create_maxlat_file(tr, d_tracer);
8993 #endif
8994
8995         if (ftrace_create_function_files(tr, d_tracer))
8996                 MEM_FAIL(1, "Could not allocate function filter files");
8997
8998 #ifdef CONFIG_TRACER_SNAPSHOT
8999         trace_create_file("snapshot", 0644, d_tracer,
9000                           tr, &snapshot_fops);
9001 #endif
9002
9003         trace_create_file("error_log", 0644, d_tracer,
9004                           tr, &tracing_err_log_fops);
9005
9006         for_each_tracing_cpu(cpu)
9007                 tracing_init_tracefs_percpu(tr, cpu);
9008
9009         ftrace_init_tracefs(tr, d_tracer);
9010 }
9011
9012 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9013 {
9014         struct vfsmount *mnt;
9015         struct file_system_type *type;
9016
9017         /*
9018          * To maintain backward compatibility for tools that mount
9019          * debugfs to get to the tracing facility, tracefs is automatically
9020          * mounted to the debugfs/tracing directory.
9021          */
9022         type = get_fs_type("tracefs");
9023         if (!type)
9024                 return NULL;
9025         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9026         put_filesystem(type);
9027         if (IS_ERR(mnt))
9028                 return NULL;
9029         mntget(mnt);
9030
9031         return mnt;
9032 }
9033
9034 /**
9035  * tracing_init_dentry - initialize top level trace array
9036  *
9037  * This is called when creating files or directories in the tracing
9038  * directory. It is called via fs_initcall() by any of the boot up code
9039  * and expects to return the dentry of the top level tracing directory.
9040  */
9041 int tracing_init_dentry(void)
9042 {
9043         struct trace_array *tr = &global_trace;
9044
9045         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9046                 pr_warn("Tracing disabled due to lockdown\n");
9047                 return -EPERM;
9048         }
9049
9050         /* The top level trace array uses  NULL as parent */
9051         if (tr->dir)
9052                 return 0;
9053
9054         if (WARN_ON(!tracefs_initialized()))
9055                 return -ENODEV;
9056
9057         /*
9058          * As there may still be users that expect the tracing
9059          * files to exist in debugfs/tracing, we must automount
9060          * the tracefs file system there, so older tools still
9061          * work with the newer kerenl.
9062          */
9063         tr->dir = debugfs_create_automount("tracing", NULL,
9064                                            trace_automount, NULL);
9065
9066         return 0;
9067 }
9068
9069 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9070 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9071
9072 static struct workqueue_struct *eval_map_wq __initdata;
9073 static struct work_struct eval_map_work __initdata;
9074
9075 static void __init eval_map_work_func(struct work_struct *work)
9076 {
9077         int len;
9078
9079         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9080         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9081 }
9082
9083 static int __init trace_eval_init(void)
9084 {
9085         INIT_WORK(&eval_map_work, eval_map_work_func);
9086
9087         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9088         if (!eval_map_wq) {
9089                 pr_err("Unable to allocate eval_map_wq\n");
9090                 /* Do work here */
9091                 eval_map_work_func(&eval_map_work);
9092                 return -ENOMEM;
9093         }
9094
9095         queue_work(eval_map_wq, &eval_map_work);
9096         return 0;
9097 }
9098
9099 static int __init trace_eval_sync(void)
9100 {
9101         /* Make sure the eval map updates are finished */
9102         if (eval_map_wq)
9103                 destroy_workqueue(eval_map_wq);
9104         return 0;
9105 }
9106
9107 late_initcall_sync(trace_eval_sync);
9108
9109
9110 #ifdef CONFIG_MODULES
9111 static void trace_module_add_evals(struct module *mod)
9112 {
9113         if (!mod->num_trace_evals)
9114                 return;
9115
9116         /*
9117          * Modules with bad taint do not have events created, do
9118          * not bother with enums either.
9119          */
9120         if (trace_module_has_bad_taint(mod))
9121                 return;
9122
9123         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9124 }
9125
9126 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9127 static void trace_module_remove_evals(struct module *mod)
9128 {
9129         union trace_eval_map_item *map;
9130         union trace_eval_map_item **last = &trace_eval_maps;
9131
9132         if (!mod->num_trace_evals)
9133                 return;
9134
9135         mutex_lock(&trace_eval_mutex);
9136
9137         map = trace_eval_maps;
9138
9139         while (map) {
9140                 if (map->head.mod == mod)
9141                         break;
9142                 map = trace_eval_jmp_to_tail(map);
9143                 last = &map->tail.next;
9144                 map = map->tail.next;
9145         }
9146         if (!map)
9147                 goto out;
9148
9149         *last = trace_eval_jmp_to_tail(map)->tail.next;
9150         kfree(map);
9151  out:
9152         mutex_unlock(&trace_eval_mutex);
9153 }
9154 #else
9155 static inline void trace_module_remove_evals(struct module *mod) { }
9156 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9157
9158 static int trace_module_notify(struct notifier_block *self,
9159                                unsigned long val, void *data)
9160 {
9161         struct module *mod = data;
9162
9163         switch (val) {
9164         case MODULE_STATE_COMING:
9165                 trace_module_add_evals(mod);
9166                 break;
9167         case MODULE_STATE_GOING:
9168                 trace_module_remove_evals(mod);
9169                 break;
9170         }
9171
9172         return NOTIFY_OK;
9173 }
9174
9175 static struct notifier_block trace_module_nb = {
9176         .notifier_call = trace_module_notify,
9177         .priority = 0,
9178 };
9179 #endif /* CONFIG_MODULES */
9180
9181 static __init int tracer_init_tracefs(void)
9182 {
9183         int ret;
9184
9185         trace_access_lock_init();
9186
9187         ret = tracing_init_dentry();
9188         if (ret)
9189                 return 0;
9190
9191         event_trace_init();
9192
9193         init_tracer_tracefs(&global_trace, NULL);
9194         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9195
9196         trace_create_file("tracing_thresh", 0644, NULL,
9197                         &global_trace, &tracing_thresh_fops);
9198
9199         trace_create_file("README", 0444, NULL,
9200                         NULL, &tracing_readme_fops);
9201
9202         trace_create_file("saved_cmdlines", 0444, NULL,
9203                         NULL, &tracing_saved_cmdlines_fops);
9204
9205         trace_create_file("saved_cmdlines_size", 0644, NULL,
9206                           NULL, &tracing_saved_cmdlines_size_fops);
9207
9208         trace_create_file("saved_tgids", 0444, NULL,
9209                         NULL, &tracing_saved_tgids_fops);
9210
9211         trace_eval_init();
9212
9213         trace_create_eval_file(NULL);
9214
9215 #ifdef CONFIG_MODULES
9216         register_module_notifier(&trace_module_nb);
9217 #endif
9218
9219 #ifdef CONFIG_DYNAMIC_FTRACE
9220         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9221                         NULL, &tracing_dyn_info_fops);
9222 #endif
9223
9224         create_trace_instances(NULL);
9225
9226         update_tracer_options(&global_trace);
9227
9228         return 0;
9229 }
9230
9231 static int trace_panic_handler(struct notifier_block *this,
9232                                unsigned long event, void *unused)
9233 {
9234         if (ftrace_dump_on_oops)
9235                 ftrace_dump(ftrace_dump_on_oops);
9236         return NOTIFY_OK;
9237 }
9238
9239 static struct notifier_block trace_panic_notifier = {
9240         .notifier_call  = trace_panic_handler,
9241         .next           = NULL,
9242         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9243 };
9244
9245 static int trace_die_handler(struct notifier_block *self,
9246                              unsigned long val,
9247                              void *data)
9248 {
9249         switch (val) {
9250         case DIE_OOPS:
9251                 if (ftrace_dump_on_oops)
9252                         ftrace_dump(ftrace_dump_on_oops);
9253                 break;
9254         default:
9255                 break;
9256         }
9257         return NOTIFY_OK;
9258 }
9259
9260 static struct notifier_block trace_die_notifier = {
9261         .notifier_call = trace_die_handler,
9262         .priority = 200
9263 };
9264
9265 /*
9266  * printk is set to max of 1024, we really don't need it that big.
9267  * Nothing should be printing 1000 characters anyway.
9268  */
9269 #define TRACE_MAX_PRINT         1000
9270
9271 /*
9272  * Define here KERN_TRACE so that we have one place to modify
9273  * it if we decide to change what log level the ftrace dump
9274  * should be at.
9275  */
9276 #define KERN_TRACE              KERN_EMERG
9277
9278 void
9279 trace_printk_seq(struct trace_seq *s)
9280 {
9281         /* Probably should print a warning here. */
9282         if (s->seq.len >= TRACE_MAX_PRINT)
9283                 s->seq.len = TRACE_MAX_PRINT;
9284
9285         /*
9286          * More paranoid code. Although the buffer size is set to
9287          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9288          * an extra layer of protection.
9289          */
9290         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9291                 s->seq.len = s->seq.size - 1;
9292
9293         /* should be zero ended, but we are paranoid. */
9294         s->buffer[s->seq.len] = 0;
9295
9296         printk(KERN_TRACE "%s", s->buffer);
9297
9298         trace_seq_init(s);
9299 }
9300
9301 void trace_init_global_iter(struct trace_iterator *iter)
9302 {
9303         iter->tr = &global_trace;
9304         iter->trace = iter->tr->current_trace;
9305         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9306         iter->array_buffer = &global_trace.array_buffer;
9307
9308         if (iter->trace && iter->trace->open)
9309                 iter->trace->open(iter);
9310
9311         /* Annotate start of buffers if we had overruns */
9312         if (ring_buffer_overruns(iter->array_buffer->buffer))
9313                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9314
9315         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9316         if (trace_clocks[iter->tr->clock_id].in_ns)
9317                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9318 }
9319
9320 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9321 {
9322         /* use static because iter can be a bit big for the stack */
9323         static struct trace_iterator iter;
9324         static atomic_t dump_running;
9325         struct trace_array *tr = &global_trace;
9326         unsigned int old_userobj;
9327         unsigned long flags;
9328         int cnt = 0, cpu;
9329
9330         /* Only allow one dump user at a time. */
9331         if (atomic_inc_return(&dump_running) != 1) {
9332                 atomic_dec(&dump_running);
9333                 return;
9334         }
9335
9336         /*
9337          * Always turn off tracing when we dump.
9338          * We don't need to show trace output of what happens
9339          * between multiple crashes.
9340          *
9341          * If the user does a sysrq-z, then they can re-enable
9342          * tracing with echo 1 > tracing_on.
9343          */
9344         tracing_off();
9345
9346         local_irq_save(flags);
9347         printk_nmi_direct_enter();
9348
9349         /* Simulate the iterator */
9350         trace_init_global_iter(&iter);
9351         /* Can not use kmalloc for iter.temp */
9352         iter.temp = static_temp_buf;
9353         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9354
9355         for_each_tracing_cpu(cpu) {
9356                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9357         }
9358
9359         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9360
9361         /* don't look at user memory in panic mode */
9362         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9363
9364         switch (oops_dump_mode) {
9365         case DUMP_ALL:
9366                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9367                 break;
9368         case DUMP_ORIG:
9369                 iter.cpu_file = raw_smp_processor_id();
9370                 break;
9371         case DUMP_NONE:
9372                 goto out_enable;
9373         default:
9374                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9375                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9376         }
9377
9378         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9379
9380         /* Did function tracer already get disabled? */
9381         if (ftrace_is_dead()) {
9382                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9383                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9384         }
9385
9386         /*
9387          * We need to stop all tracing on all CPUS to read
9388          * the next buffer. This is a bit expensive, but is
9389          * not done often. We fill all what we can read,
9390          * and then release the locks again.
9391          */
9392
9393         while (!trace_empty(&iter)) {
9394
9395                 if (!cnt)
9396                         printk(KERN_TRACE "---------------------------------\n");
9397
9398                 cnt++;
9399
9400                 trace_iterator_reset(&iter);
9401                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9402
9403                 if (trace_find_next_entry_inc(&iter) != NULL) {
9404                         int ret;
9405
9406                         ret = print_trace_line(&iter);
9407                         if (ret != TRACE_TYPE_NO_CONSUME)
9408                                 trace_consume(&iter);
9409                 }
9410                 touch_nmi_watchdog();
9411
9412                 trace_printk_seq(&iter.seq);
9413         }
9414
9415         if (!cnt)
9416                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9417         else
9418                 printk(KERN_TRACE "---------------------------------\n");
9419
9420  out_enable:
9421         tr->trace_flags |= old_userobj;
9422
9423         for_each_tracing_cpu(cpu) {
9424                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9425         }
9426         atomic_dec(&dump_running);
9427         printk_nmi_direct_exit();
9428         local_irq_restore(flags);
9429 }
9430 EXPORT_SYMBOL_GPL(ftrace_dump);
9431
9432 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9433 {
9434         char **argv;
9435         int argc, ret;
9436
9437         argc = 0;
9438         ret = 0;
9439         argv = argv_split(GFP_KERNEL, buf, &argc);
9440         if (!argv)
9441                 return -ENOMEM;
9442
9443         if (argc)
9444                 ret = createfn(argc, argv);
9445
9446         argv_free(argv);
9447
9448         return ret;
9449 }
9450
9451 #define WRITE_BUFSIZE  4096
9452
9453 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9454                                 size_t count, loff_t *ppos,
9455                                 int (*createfn)(int, char **))
9456 {
9457         char *kbuf, *buf, *tmp;
9458         int ret = 0;
9459         size_t done = 0;
9460         size_t size;
9461
9462         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9463         if (!kbuf)
9464                 return -ENOMEM;
9465
9466         while (done < count) {
9467                 size = count - done;
9468
9469                 if (size >= WRITE_BUFSIZE)
9470                         size = WRITE_BUFSIZE - 1;
9471
9472                 if (copy_from_user(kbuf, buffer + done, size)) {
9473                         ret = -EFAULT;
9474                         goto out;
9475                 }
9476                 kbuf[size] = '\0';
9477                 buf = kbuf;
9478                 do {
9479                         tmp = strchr(buf, '\n');
9480                         if (tmp) {
9481                                 *tmp = '\0';
9482                                 size = tmp - buf + 1;
9483                         } else {
9484                                 size = strlen(buf);
9485                                 if (done + size < count) {
9486                                         if (buf != kbuf)
9487                                                 break;
9488                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9489                                         pr_warn("Line length is too long: Should be less than %d\n",
9490                                                 WRITE_BUFSIZE - 2);
9491                                         ret = -EINVAL;
9492                                         goto out;
9493                                 }
9494                         }
9495                         done += size;
9496
9497                         /* Remove comments */
9498                         tmp = strchr(buf, '#');
9499
9500                         if (tmp)
9501                                 *tmp = '\0';
9502
9503                         ret = trace_run_command(buf, createfn);
9504                         if (ret)
9505                                 goto out;
9506                         buf += size;
9507
9508                 } while (done < count);
9509         }
9510         ret = done;
9511
9512 out:
9513         kfree(kbuf);
9514
9515         return ret;
9516 }
9517
9518 __init static int tracer_alloc_buffers(void)
9519 {
9520         int ring_buf_size;
9521         int ret = -ENOMEM;
9522
9523
9524         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9525                 pr_warn("Tracing disabled due to lockdown\n");
9526                 return -EPERM;
9527         }
9528
9529         /*
9530          * Make sure we don't accidentally add more trace options
9531          * than we have bits for.
9532          */
9533         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9534
9535         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9536                 goto out;
9537
9538         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9539                 goto out_free_buffer_mask;
9540
9541         /* Only allocate trace_printk buffers if a trace_printk exists */
9542         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9543                 /* Must be called before global_trace.buffer is allocated */
9544                 trace_printk_init_buffers();
9545
9546         /* To save memory, keep the ring buffer size to its minimum */
9547         if (ring_buffer_expanded)
9548                 ring_buf_size = trace_buf_size;
9549         else
9550                 ring_buf_size = 1;
9551
9552         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9553         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9554
9555         raw_spin_lock_init(&global_trace.start_lock);
9556
9557         /*
9558          * The prepare callbacks allocates some memory for the ring buffer. We
9559          * don't free the buffer if the CPU goes down. If we were to free
9560          * the buffer, then the user would lose any trace that was in the
9561          * buffer. The memory will be removed once the "instance" is removed.
9562          */
9563         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9564                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9565                                       NULL);
9566         if (ret < 0)
9567                 goto out_free_cpumask;
9568         /* Used for event triggers */
9569         ret = -ENOMEM;
9570         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9571         if (!temp_buffer)
9572                 goto out_rm_hp_state;
9573
9574         if (trace_create_savedcmd() < 0)
9575                 goto out_free_temp_buffer;
9576
9577         /* TODO: make the number of buffers hot pluggable with CPUS */
9578         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9579                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9580                 goto out_free_savedcmd;
9581         }
9582
9583         if (global_trace.buffer_disabled)
9584                 tracing_off();
9585
9586         if (trace_boot_clock) {
9587                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9588                 if (ret < 0)
9589                         pr_warn("Trace clock %s not defined, going back to default\n",
9590                                 trace_boot_clock);
9591         }
9592
9593         /*
9594          * register_tracer() might reference current_trace, so it
9595          * needs to be set before we register anything. This is
9596          * just a bootstrap of current_trace anyway.
9597          */
9598         global_trace.current_trace = &nop_trace;
9599
9600         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9601
9602         ftrace_init_global_array_ops(&global_trace);
9603
9604         init_trace_flags_index(&global_trace);
9605
9606         register_tracer(&nop_trace);
9607
9608         /* Function tracing may start here (via kernel command line) */
9609         init_function_trace();
9610
9611         /* All seems OK, enable tracing */
9612         tracing_disabled = 0;
9613
9614         atomic_notifier_chain_register(&panic_notifier_list,
9615                                        &trace_panic_notifier);
9616
9617         register_die_notifier(&trace_die_notifier);
9618
9619         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9620
9621         INIT_LIST_HEAD(&global_trace.systems);
9622         INIT_LIST_HEAD(&global_trace.events);
9623         INIT_LIST_HEAD(&global_trace.hist_vars);
9624         INIT_LIST_HEAD(&global_trace.err_log);
9625         list_add(&global_trace.list, &ftrace_trace_arrays);
9626
9627         apply_trace_boot_options();
9628
9629         register_snapshot_cmd();
9630
9631         return 0;
9632
9633 out_free_savedcmd:
9634         free_saved_cmdlines_buffer(savedcmd);
9635 out_free_temp_buffer:
9636         ring_buffer_free(temp_buffer);
9637 out_rm_hp_state:
9638         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9639 out_free_cpumask:
9640         free_cpumask_var(global_trace.tracing_cpumask);
9641 out_free_buffer_mask:
9642         free_cpumask_var(tracing_buffer_mask);
9643 out:
9644         return ret;
9645 }
9646
9647 void __init early_trace_init(void)
9648 {
9649         if (tracepoint_printk) {
9650                 tracepoint_print_iter =
9651                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9652                 if (MEM_FAIL(!tracepoint_print_iter,
9653                              "Failed to allocate trace iterator\n"))
9654                         tracepoint_printk = 0;
9655                 else
9656                         static_key_enable(&tracepoint_printk_key.key);
9657         }
9658         tracer_alloc_buffers();
9659 }
9660
9661 void __init trace_init(void)
9662 {
9663         trace_event_init();
9664 }
9665
9666 __init static int clear_boot_tracer(void)
9667 {
9668         /*
9669          * The default tracer at boot buffer is an init section.
9670          * This function is called in lateinit. If we did not
9671          * find the boot tracer, then clear it out, to prevent
9672          * later registration from accessing the buffer that is
9673          * about to be freed.
9674          */
9675         if (!default_bootup_tracer)
9676                 return 0;
9677
9678         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9679                default_bootup_tracer);
9680         default_bootup_tracer = NULL;
9681
9682         return 0;
9683 }
9684
9685 fs_initcall(tracer_init_tracefs);
9686 late_initcall_sync(clear_boot_tracer);
9687
9688 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9689 __init static int tracing_set_default_clock(void)
9690 {
9691         /* sched_clock_stable() is determined in late_initcall */
9692         if (!trace_boot_clock && !sched_clock_stable()) {
9693                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9694                         pr_warn("Can not set tracing clock due to lockdown\n");
9695                         return -EPERM;
9696                 }
9697
9698                 printk(KERN_WARNING
9699                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9700                        "If you want to keep using the local clock, then add:\n"
9701                        "  \"trace_clock=local\"\n"
9702                        "on the kernel command line\n");
9703                 tracing_set_clock(&global_trace, "global");
9704         }
9705
9706         return 0;
9707 }
9708 late_initcall_sync(tracing_set_default_clock);
9709 #endif