Merge tag 'probes-v6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux...
[linux-2.6-microblaze.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54
55 #include "trace.h"
56 #include "trace_output.h"
57
58 #ifdef CONFIG_FTRACE_STARTUP_TEST
59 /*
60  * We need to change this state when a selftest is running.
61  * A selftest will lurk into the ring-buffer to count the
62  * entries inserted during the selftest although some concurrent
63  * insertions into the ring-buffer such as trace_printk could occurred
64  * at the same time, giving false positive or negative results.
65  */
66 static bool __read_mostly tracing_selftest_running;
67
68 /*
69  * If boot-time tracing including tracers/events via kernel cmdline
70  * is running, we do not want to run SELFTEST.
71  */
72 bool __read_mostly tracing_selftest_disabled;
73
74 void __init disable_tracing_selftest(const char *reason)
75 {
76         if (!tracing_selftest_disabled) {
77                 tracing_selftest_disabled = true;
78                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
79         }
80 }
81 #else
82 #define tracing_selftest_running        0
83 #define tracing_selftest_disabled       0
84 #endif
85
86 /* Pipe tracepoints to printk */
87 static struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static bool tracepoint_printk_stop_on_boot __initdata;
90 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91
92 /* For tracers that don't implement custom flags */
93 static struct tracer_opt dummy_tracer_opt[] = {
94         { }
95 };
96
97 static int
98 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
99 {
100         return 0;
101 }
102
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
109
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 static int tracing_disabled = 1;
117
118 cpumask_var_t __read_mostly     tracing_buffer_mask;
119
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputing it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  */
135
136 enum ftrace_dump_mode ftrace_dump_on_oops;
137
138 /* When set, tracing will stop when a WARN*() is hit */
139 int __disable_trace_on_warning;
140
141 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
142 /* Map of enums to their values, for "eval_map" file */
143 struct trace_eval_map_head {
144         struct module                   *mod;
145         unsigned long                   length;
146 };
147
148 union trace_eval_map_item;
149
150 struct trace_eval_map_tail {
151         /*
152          * "end" is first and points to NULL as it must be different
153          * than "mod" or "eval_string"
154          */
155         union trace_eval_map_item       *next;
156         const char                      *end;   /* points to NULL */
157 };
158
159 static DEFINE_MUTEX(trace_eval_mutex);
160
161 /*
162  * The trace_eval_maps are saved in an array with two extra elements,
163  * one at the beginning, and one at the end. The beginning item contains
164  * the count of the saved maps (head.length), and the module they
165  * belong to if not built in (head.mod). The ending item contains a
166  * pointer to the next array of saved eval_map items.
167  */
168 union trace_eval_map_item {
169         struct trace_eval_map           map;
170         struct trace_eval_map_head      head;
171         struct trace_eval_map_tail      tail;
172 };
173
174 static union trace_eval_map_item *trace_eval_maps;
175 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176
177 int tracing_set_tracer(struct trace_array *tr, const char *buf);
178 static void ftrace_trace_userstack(struct trace_array *tr,
179                                    struct trace_buffer *buffer,
180                                    unsigned int trace_ctx);
181
182 #define MAX_TRACER_SIZE         100
183 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
184 static char *default_bootup_tracer;
185
186 static bool allocate_snapshot;
187 static bool snapshot_at_boot;
188
189 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
190 static int boot_instance_index;
191
192 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
193 static int boot_snapshot_index;
194
195 static int __init set_cmdline_ftrace(char *str)
196 {
197         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
198         default_bootup_tracer = bootup_tracer_buf;
199         /* We are using ftrace early, expand it */
200         trace_set_ring_buffer_expanded(NULL);
201         return 1;
202 }
203 __setup("ftrace=", set_cmdline_ftrace);
204
205 static int __init set_ftrace_dump_on_oops(char *str)
206 {
207         if (*str++ != '=' || !*str || !strcmp("1", str)) {
208                 ftrace_dump_on_oops = DUMP_ALL;
209                 return 1;
210         }
211
212         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
213                 ftrace_dump_on_oops = DUMP_ORIG;
214                 return 1;
215         }
216
217         return 0;
218 }
219 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
220
221 static int __init stop_trace_on_warning(char *str)
222 {
223         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
224                 __disable_trace_on_warning = 1;
225         return 1;
226 }
227 __setup("traceoff_on_warning", stop_trace_on_warning);
228
229 static int __init boot_alloc_snapshot(char *str)
230 {
231         char *slot = boot_snapshot_info + boot_snapshot_index;
232         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
233         int ret;
234
235         if (str[0] == '=') {
236                 str++;
237                 if (strlen(str) >= left)
238                         return -1;
239
240                 ret = snprintf(slot, left, "%s\t", str);
241                 boot_snapshot_index += ret;
242         } else {
243                 allocate_snapshot = true;
244                 /* We also need the main ring buffer expanded */
245                 trace_set_ring_buffer_expanded(NULL);
246         }
247         return 1;
248 }
249 __setup("alloc_snapshot", boot_alloc_snapshot);
250
251
252 static int __init boot_snapshot(char *str)
253 {
254         snapshot_at_boot = true;
255         boot_alloc_snapshot(str);
256         return 1;
257 }
258 __setup("ftrace_boot_snapshot", boot_snapshot);
259
260
261 static int __init boot_instance(char *str)
262 {
263         char *slot = boot_instance_info + boot_instance_index;
264         int left = sizeof(boot_instance_info) - boot_instance_index;
265         int ret;
266
267         if (strlen(str) >= left)
268                 return -1;
269
270         ret = snprintf(slot, left, "%s\t", str);
271         boot_instance_index += ret;
272
273         return 1;
274 }
275 __setup("trace_instance=", boot_instance);
276
277
278 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
279
280 static int __init set_trace_boot_options(char *str)
281 {
282         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
283         return 1;
284 }
285 __setup("trace_options=", set_trace_boot_options);
286
287 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
288 static char *trace_boot_clock __initdata;
289
290 static int __init set_trace_boot_clock(char *str)
291 {
292         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
293         trace_boot_clock = trace_boot_clock_buf;
294         return 1;
295 }
296 __setup("trace_clock=", set_trace_boot_clock);
297
298 static int __init set_tracepoint_printk(char *str)
299 {
300         /* Ignore the "tp_printk_stop_on_boot" param */
301         if (*str == '_')
302                 return 0;
303
304         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
305                 tracepoint_printk = 1;
306         return 1;
307 }
308 __setup("tp_printk", set_tracepoint_printk);
309
310 static int __init set_tracepoint_printk_stop(char *str)
311 {
312         tracepoint_printk_stop_on_boot = true;
313         return 1;
314 }
315 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
316
317 unsigned long long ns2usecs(u64 nsec)
318 {
319         nsec += 500;
320         do_div(nsec, 1000);
321         return nsec;
322 }
323
324 static void
325 trace_process_export(struct trace_export *export,
326                struct ring_buffer_event *event, int flag)
327 {
328         struct trace_entry *entry;
329         unsigned int size = 0;
330
331         if (export->flags & flag) {
332                 entry = ring_buffer_event_data(event);
333                 size = ring_buffer_event_length(event);
334                 export->write(export, entry, size);
335         }
336 }
337
338 static DEFINE_MUTEX(ftrace_export_lock);
339
340 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
341
342 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
343 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
344 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
345
346 static inline void ftrace_exports_enable(struct trace_export *export)
347 {
348         if (export->flags & TRACE_EXPORT_FUNCTION)
349                 static_branch_inc(&trace_function_exports_enabled);
350
351         if (export->flags & TRACE_EXPORT_EVENT)
352                 static_branch_inc(&trace_event_exports_enabled);
353
354         if (export->flags & TRACE_EXPORT_MARKER)
355                 static_branch_inc(&trace_marker_exports_enabled);
356 }
357
358 static inline void ftrace_exports_disable(struct trace_export *export)
359 {
360         if (export->flags & TRACE_EXPORT_FUNCTION)
361                 static_branch_dec(&trace_function_exports_enabled);
362
363         if (export->flags & TRACE_EXPORT_EVENT)
364                 static_branch_dec(&trace_event_exports_enabled);
365
366         if (export->flags & TRACE_EXPORT_MARKER)
367                 static_branch_dec(&trace_marker_exports_enabled);
368 }
369
370 static void ftrace_exports(struct ring_buffer_event *event, int flag)
371 {
372         struct trace_export *export;
373
374         preempt_disable_notrace();
375
376         export = rcu_dereference_raw_check(ftrace_exports_list);
377         while (export) {
378                 trace_process_export(export, event, flag);
379                 export = rcu_dereference_raw_check(export->next);
380         }
381
382         preempt_enable_notrace();
383 }
384
385 static inline void
386 add_trace_export(struct trace_export **list, struct trace_export *export)
387 {
388         rcu_assign_pointer(export->next, *list);
389         /*
390          * We are entering export into the list but another
391          * CPU might be walking that list. We need to make sure
392          * the export->next pointer is valid before another CPU sees
393          * the export pointer included into the list.
394          */
395         rcu_assign_pointer(*list, export);
396 }
397
398 static inline int
399 rm_trace_export(struct trace_export **list, struct trace_export *export)
400 {
401         struct trace_export **p;
402
403         for (p = list; *p != NULL; p = &(*p)->next)
404                 if (*p == export)
405                         break;
406
407         if (*p != export)
408                 return -1;
409
410         rcu_assign_pointer(*p, (*p)->next);
411
412         return 0;
413 }
414
415 static inline void
416 add_ftrace_export(struct trace_export **list, struct trace_export *export)
417 {
418         ftrace_exports_enable(export);
419
420         add_trace_export(list, export);
421 }
422
423 static inline int
424 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
425 {
426         int ret;
427
428         ret = rm_trace_export(list, export);
429         ftrace_exports_disable(export);
430
431         return ret;
432 }
433
434 int register_ftrace_export(struct trace_export *export)
435 {
436         if (WARN_ON_ONCE(!export->write))
437                 return -1;
438
439         mutex_lock(&ftrace_export_lock);
440
441         add_ftrace_export(&ftrace_exports_list, export);
442
443         mutex_unlock(&ftrace_export_lock);
444
445         return 0;
446 }
447 EXPORT_SYMBOL_GPL(register_ftrace_export);
448
449 int unregister_ftrace_export(struct trace_export *export)
450 {
451         int ret;
452
453         mutex_lock(&ftrace_export_lock);
454
455         ret = rm_ftrace_export(&ftrace_exports_list, export);
456
457         mutex_unlock(&ftrace_export_lock);
458
459         return ret;
460 }
461 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
462
463 /* trace_flags holds trace_options default values */
464 #define TRACE_DEFAULT_FLAGS                                             \
465         (FUNCTION_DEFAULT_FLAGS |                                       \
466          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
467          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
468          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
469          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
470          TRACE_ITER_HASH_PTR)
471
472 /* trace_options that are only supported by global_trace */
473 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
474                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
475
476 /* trace_flags that are default zero for instances */
477 #define ZEROED_TRACE_FLAGS \
478         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
479
480 /*
481  * The global_trace is the descriptor that holds the top-level tracing
482  * buffers for the live tracing.
483  */
484 static struct trace_array global_trace = {
485         .trace_flags = TRACE_DEFAULT_FLAGS,
486 };
487
488 void trace_set_ring_buffer_expanded(struct trace_array *tr)
489 {
490         if (!tr)
491                 tr = &global_trace;
492         tr->ring_buffer_expanded = true;
493 }
494
495 LIST_HEAD(ftrace_trace_arrays);
496
497 int trace_array_get(struct trace_array *this_tr)
498 {
499         struct trace_array *tr;
500         int ret = -ENODEV;
501
502         mutex_lock(&trace_types_lock);
503         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
504                 if (tr == this_tr) {
505                         tr->ref++;
506                         ret = 0;
507                         break;
508                 }
509         }
510         mutex_unlock(&trace_types_lock);
511
512         return ret;
513 }
514
515 static void __trace_array_put(struct trace_array *this_tr)
516 {
517         WARN_ON(!this_tr->ref);
518         this_tr->ref--;
519 }
520
521 /**
522  * trace_array_put - Decrement the reference counter for this trace array.
523  * @this_tr : pointer to the trace array
524  *
525  * NOTE: Use this when we no longer need the trace array returned by
526  * trace_array_get_by_name(). This ensures the trace array can be later
527  * destroyed.
528  *
529  */
530 void trace_array_put(struct trace_array *this_tr)
531 {
532         if (!this_tr)
533                 return;
534
535         mutex_lock(&trace_types_lock);
536         __trace_array_put(this_tr);
537         mutex_unlock(&trace_types_lock);
538 }
539 EXPORT_SYMBOL_GPL(trace_array_put);
540
541 int tracing_check_open_get_tr(struct trace_array *tr)
542 {
543         int ret;
544
545         ret = security_locked_down(LOCKDOWN_TRACEFS);
546         if (ret)
547                 return ret;
548
549         if (tracing_disabled)
550                 return -ENODEV;
551
552         if (tr && trace_array_get(tr) < 0)
553                 return -ENODEV;
554
555         return 0;
556 }
557
558 int call_filter_check_discard(struct trace_event_call *call, void *rec,
559                               struct trace_buffer *buffer,
560                               struct ring_buffer_event *event)
561 {
562         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
563             !filter_match_preds(call->filter, rec)) {
564                 __trace_event_discard_commit(buffer, event);
565                 return 1;
566         }
567
568         return 0;
569 }
570
571 /**
572  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
573  * @filtered_pids: The list of pids to check
574  * @search_pid: The PID to find in @filtered_pids
575  *
576  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
577  */
578 bool
579 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
580 {
581         return trace_pid_list_is_set(filtered_pids, search_pid);
582 }
583
584 /**
585  * trace_ignore_this_task - should a task be ignored for tracing
586  * @filtered_pids: The list of pids to check
587  * @filtered_no_pids: The list of pids not to be traced
588  * @task: The task that should be ignored if not filtered
589  *
590  * Checks if @task should be traced or not from @filtered_pids.
591  * Returns true if @task should *NOT* be traced.
592  * Returns false if @task should be traced.
593  */
594 bool
595 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
596                        struct trace_pid_list *filtered_no_pids,
597                        struct task_struct *task)
598 {
599         /*
600          * If filtered_no_pids is not empty, and the task's pid is listed
601          * in filtered_no_pids, then return true.
602          * Otherwise, if filtered_pids is empty, that means we can
603          * trace all tasks. If it has content, then only trace pids
604          * within filtered_pids.
605          */
606
607         return (filtered_pids &&
608                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
609                 (filtered_no_pids &&
610                  trace_find_filtered_pid(filtered_no_pids, task->pid));
611 }
612
613 /**
614  * trace_filter_add_remove_task - Add or remove a task from a pid_list
615  * @pid_list: The list to modify
616  * @self: The current task for fork or NULL for exit
617  * @task: The task to add or remove
618  *
619  * If adding a task, if @self is defined, the task is only added if @self
620  * is also included in @pid_list. This happens on fork and tasks should
621  * only be added when the parent is listed. If @self is NULL, then the
622  * @task pid will be removed from the list, which would happen on exit
623  * of a task.
624  */
625 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
626                                   struct task_struct *self,
627                                   struct task_struct *task)
628 {
629         if (!pid_list)
630                 return;
631
632         /* For forks, we only add if the forking task is listed */
633         if (self) {
634                 if (!trace_find_filtered_pid(pid_list, self->pid))
635                         return;
636         }
637
638         /* "self" is set for forks, and NULL for exits */
639         if (self)
640                 trace_pid_list_set(pid_list, task->pid);
641         else
642                 trace_pid_list_clear(pid_list, task->pid);
643 }
644
645 /**
646  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
647  * @pid_list: The pid list to show
648  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
649  * @pos: The position of the file
650  *
651  * This is used by the seq_file "next" operation to iterate the pids
652  * listed in a trace_pid_list structure.
653  *
654  * Returns the pid+1 as we want to display pid of zero, but NULL would
655  * stop the iteration.
656  */
657 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
658 {
659         long pid = (unsigned long)v;
660         unsigned int next;
661
662         (*pos)++;
663
664         /* pid already is +1 of the actual previous bit */
665         if (trace_pid_list_next(pid_list, pid, &next) < 0)
666                 return NULL;
667
668         pid = next;
669
670         /* Return pid + 1 to allow zero to be represented */
671         return (void *)(pid + 1);
672 }
673
674 /**
675  * trace_pid_start - Used for seq_file to start reading pid lists
676  * @pid_list: The pid list to show
677  * @pos: The position of the file
678  *
679  * This is used by seq_file "start" operation to start the iteration
680  * of listing pids.
681  *
682  * Returns the pid+1 as we want to display pid of zero, but NULL would
683  * stop the iteration.
684  */
685 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
686 {
687         unsigned long pid;
688         unsigned int first;
689         loff_t l = 0;
690
691         if (trace_pid_list_first(pid_list, &first) < 0)
692                 return NULL;
693
694         pid = first;
695
696         /* Return pid + 1 so that zero can be the exit value */
697         for (pid++; pid && l < *pos;
698              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
699                 ;
700         return (void *)pid;
701 }
702
703 /**
704  * trace_pid_show - show the current pid in seq_file processing
705  * @m: The seq_file structure to write into
706  * @v: A void pointer of the pid (+1) value to display
707  *
708  * Can be directly used by seq_file operations to display the current
709  * pid value.
710  */
711 int trace_pid_show(struct seq_file *m, void *v)
712 {
713         unsigned long pid = (unsigned long)v - 1;
714
715         seq_printf(m, "%lu\n", pid);
716         return 0;
717 }
718
719 /* 128 should be much more than enough */
720 #define PID_BUF_SIZE            127
721
722 int trace_pid_write(struct trace_pid_list *filtered_pids,
723                     struct trace_pid_list **new_pid_list,
724                     const char __user *ubuf, size_t cnt)
725 {
726         struct trace_pid_list *pid_list;
727         struct trace_parser parser;
728         unsigned long val;
729         int nr_pids = 0;
730         ssize_t read = 0;
731         ssize_t ret;
732         loff_t pos;
733         pid_t pid;
734
735         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
736                 return -ENOMEM;
737
738         /*
739          * Always recreate a new array. The write is an all or nothing
740          * operation. Always create a new array when adding new pids by
741          * the user. If the operation fails, then the current list is
742          * not modified.
743          */
744         pid_list = trace_pid_list_alloc();
745         if (!pid_list) {
746                 trace_parser_put(&parser);
747                 return -ENOMEM;
748         }
749
750         if (filtered_pids) {
751                 /* copy the current bits to the new max */
752                 ret = trace_pid_list_first(filtered_pids, &pid);
753                 while (!ret) {
754                         trace_pid_list_set(pid_list, pid);
755                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
756                         nr_pids++;
757                 }
758         }
759
760         ret = 0;
761         while (cnt > 0) {
762
763                 pos = 0;
764
765                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
766                 if (ret < 0)
767                         break;
768
769                 read += ret;
770                 ubuf += ret;
771                 cnt -= ret;
772
773                 if (!trace_parser_loaded(&parser))
774                         break;
775
776                 ret = -EINVAL;
777                 if (kstrtoul(parser.buffer, 0, &val))
778                         break;
779
780                 pid = (pid_t)val;
781
782                 if (trace_pid_list_set(pid_list, pid) < 0) {
783                         ret = -1;
784                         break;
785                 }
786                 nr_pids++;
787
788                 trace_parser_clear(&parser);
789                 ret = 0;
790         }
791         trace_parser_put(&parser);
792
793         if (ret < 0) {
794                 trace_pid_list_free(pid_list);
795                 return ret;
796         }
797
798         if (!nr_pids) {
799                 /* Cleared the list of pids */
800                 trace_pid_list_free(pid_list);
801                 pid_list = NULL;
802         }
803
804         *new_pid_list = pid_list;
805
806         return read;
807 }
808
809 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
810 {
811         u64 ts;
812
813         /* Early boot up does not have a buffer yet */
814         if (!buf->buffer)
815                 return trace_clock_local();
816
817         ts = ring_buffer_time_stamp(buf->buffer);
818         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
819
820         return ts;
821 }
822
823 u64 ftrace_now(int cpu)
824 {
825         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
826 }
827
828 /**
829  * tracing_is_enabled - Show if global_trace has been enabled
830  *
831  * Shows if the global trace has been enabled or not. It uses the
832  * mirror flag "buffer_disabled" to be used in fast paths such as for
833  * the irqsoff tracer. But it may be inaccurate due to races. If you
834  * need to know the accurate state, use tracing_is_on() which is a little
835  * slower, but accurate.
836  */
837 int tracing_is_enabled(void)
838 {
839         /*
840          * For quick access (irqsoff uses this in fast path), just
841          * return the mirror variable of the state of the ring buffer.
842          * It's a little racy, but we don't really care.
843          */
844         smp_rmb();
845         return !global_trace.buffer_disabled;
846 }
847
848 /*
849  * trace_buf_size is the size in bytes that is allocated
850  * for a buffer. Note, the number of bytes is always rounded
851  * to page size.
852  *
853  * This number is purposely set to a low number of 16384.
854  * If the dump on oops happens, it will be much appreciated
855  * to not have to wait for all that output. Anyway this can be
856  * boot time and run time configurable.
857  */
858 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
859
860 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
861
862 /* trace_types holds a link list of available tracers. */
863 static struct tracer            *trace_types __read_mostly;
864
865 /*
866  * trace_types_lock is used to protect the trace_types list.
867  */
868 DEFINE_MUTEX(trace_types_lock);
869
870 /*
871  * serialize the access of the ring buffer
872  *
873  * ring buffer serializes readers, but it is low level protection.
874  * The validity of the events (which returns by ring_buffer_peek() ..etc)
875  * are not protected by ring buffer.
876  *
877  * The content of events may become garbage if we allow other process consumes
878  * these events concurrently:
879  *   A) the page of the consumed events may become a normal page
880  *      (not reader page) in ring buffer, and this page will be rewritten
881  *      by events producer.
882  *   B) The page of the consumed events may become a page for splice_read,
883  *      and this page will be returned to system.
884  *
885  * These primitives allow multi process access to different cpu ring buffer
886  * concurrently.
887  *
888  * These primitives don't distinguish read-only and read-consume access.
889  * Multi read-only access are also serialized.
890  */
891
892 #ifdef CONFIG_SMP
893 static DECLARE_RWSEM(all_cpu_access_lock);
894 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
895
896 static inline void trace_access_lock(int cpu)
897 {
898         if (cpu == RING_BUFFER_ALL_CPUS) {
899                 /* gain it for accessing the whole ring buffer. */
900                 down_write(&all_cpu_access_lock);
901         } else {
902                 /* gain it for accessing a cpu ring buffer. */
903
904                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
905                 down_read(&all_cpu_access_lock);
906
907                 /* Secondly block other access to this @cpu ring buffer. */
908                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
909         }
910 }
911
912 static inline void trace_access_unlock(int cpu)
913 {
914         if (cpu == RING_BUFFER_ALL_CPUS) {
915                 up_write(&all_cpu_access_lock);
916         } else {
917                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
918                 up_read(&all_cpu_access_lock);
919         }
920 }
921
922 static inline void trace_access_lock_init(void)
923 {
924         int cpu;
925
926         for_each_possible_cpu(cpu)
927                 mutex_init(&per_cpu(cpu_access_lock, cpu));
928 }
929
930 #else
931
932 static DEFINE_MUTEX(access_lock);
933
934 static inline void trace_access_lock(int cpu)
935 {
936         (void)cpu;
937         mutex_lock(&access_lock);
938 }
939
940 static inline void trace_access_unlock(int cpu)
941 {
942         (void)cpu;
943         mutex_unlock(&access_lock);
944 }
945
946 static inline void trace_access_lock_init(void)
947 {
948 }
949
950 #endif
951
952 #ifdef CONFIG_STACKTRACE
953 static void __ftrace_trace_stack(struct trace_buffer *buffer,
954                                  unsigned int trace_ctx,
955                                  int skip, struct pt_regs *regs);
956 static inline void ftrace_trace_stack(struct trace_array *tr,
957                                       struct trace_buffer *buffer,
958                                       unsigned int trace_ctx,
959                                       int skip, struct pt_regs *regs);
960
961 #else
962 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
963                                         unsigned int trace_ctx,
964                                         int skip, struct pt_regs *regs)
965 {
966 }
967 static inline void ftrace_trace_stack(struct trace_array *tr,
968                                       struct trace_buffer *buffer,
969                                       unsigned long trace_ctx,
970                                       int skip, struct pt_regs *regs)
971 {
972 }
973
974 #endif
975
976 static __always_inline void
977 trace_event_setup(struct ring_buffer_event *event,
978                   int type, unsigned int trace_ctx)
979 {
980         struct trace_entry *ent = ring_buffer_event_data(event);
981
982         tracing_generic_entry_update(ent, type, trace_ctx);
983 }
984
985 static __always_inline struct ring_buffer_event *
986 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
987                           int type,
988                           unsigned long len,
989                           unsigned int trace_ctx)
990 {
991         struct ring_buffer_event *event;
992
993         event = ring_buffer_lock_reserve(buffer, len);
994         if (event != NULL)
995                 trace_event_setup(event, type, trace_ctx);
996
997         return event;
998 }
999
1000 void tracer_tracing_on(struct trace_array *tr)
1001 {
1002         if (tr->array_buffer.buffer)
1003                 ring_buffer_record_on(tr->array_buffer.buffer);
1004         /*
1005          * This flag is looked at when buffers haven't been allocated
1006          * yet, or by some tracers (like irqsoff), that just want to
1007          * know if the ring buffer has been disabled, but it can handle
1008          * races of where it gets disabled but we still do a record.
1009          * As the check is in the fast path of the tracers, it is more
1010          * important to be fast than accurate.
1011          */
1012         tr->buffer_disabled = 0;
1013         /* Make the flag seen by readers */
1014         smp_wmb();
1015 }
1016
1017 /**
1018  * tracing_on - enable tracing buffers
1019  *
1020  * This function enables tracing buffers that may have been
1021  * disabled with tracing_off.
1022  */
1023 void tracing_on(void)
1024 {
1025         tracer_tracing_on(&global_trace);
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_on);
1028
1029
1030 static __always_inline void
1031 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1032 {
1033         __this_cpu_write(trace_taskinfo_save, true);
1034
1035         /* If this is the temp buffer, we need to commit fully */
1036         if (this_cpu_read(trace_buffered_event) == event) {
1037                 /* Length is in event->array[0] */
1038                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1039                 /* Release the temp buffer */
1040                 this_cpu_dec(trace_buffered_event_cnt);
1041                 /* ring_buffer_unlock_commit() enables preemption */
1042                 preempt_enable_notrace();
1043         } else
1044                 ring_buffer_unlock_commit(buffer);
1045 }
1046
1047 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1048                        const char *str, int size)
1049 {
1050         struct ring_buffer_event *event;
1051         struct trace_buffer *buffer;
1052         struct print_entry *entry;
1053         unsigned int trace_ctx;
1054         int alloc;
1055
1056         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1057                 return 0;
1058
1059         if (unlikely(tracing_selftest_running && tr == &global_trace))
1060                 return 0;
1061
1062         if (unlikely(tracing_disabled))
1063                 return 0;
1064
1065         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1066
1067         trace_ctx = tracing_gen_ctx();
1068         buffer = tr->array_buffer.buffer;
1069         ring_buffer_nest_start(buffer);
1070         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1071                                             trace_ctx);
1072         if (!event) {
1073                 size = 0;
1074                 goto out;
1075         }
1076
1077         entry = ring_buffer_event_data(event);
1078         entry->ip = ip;
1079
1080         memcpy(&entry->buf, str, size);
1081
1082         /* Add a newline if necessary */
1083         if (entry->buf[size - 1] != '\n') {
1084                 entry->buf[size] = '\n';
1085                 entry->buf[size + 1] = '\0';
1086         } else
1087                 entry->buf[size] = '\0';
1088
1089         __buffer_unlock_commit(buffer, event);
1090         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1091  out:
1092         ring_buffer_nest_end(buffer);
1093         return size;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_array_puts);
1096
1097 /**
1098  * __trace_puts - write a constant string into the trace buffer.
1099  * @ip:    The address of the caller
1100  * @str:   The constant string to write
1101  * @size:  The size of the string.
1102  */
1103 int __trace_puts(unsigned long ip, const char *str, int size)
1104 {
1105         return __trace_array_puts(&global_trace, ip, str, size);
1106 }
1107 EXPORT_SYMBOL_GPL(__trace_puts);
1108
1109 /**
1110  * __trace_bputs - write the pointer to a constant string into trace buffer
1111  * @ip:    The address of the caller
1112  * @str:   The constant string to write to the buffer to
1113  */
1114 int __trace_bputs(unsigned long ip, const char *str)
1115 {
1116         struct ring_buffer_event *event;
1117         struct trace_buffer *buffer;
1118         struct bputs_entry *entry;
1119         unsigned int trace_ctx;
1120         int size = sizeof(struct bputs_entry);
1121         int ret = 0;
1122
1123         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1124                 return 0;
1125
1126         if (unlikely(tracing_selftest_running || tracing_disabled))
1127                 return 0;
1128
1129         trace_ctx = tracing_gen_ctx();
1130         buffer = global_trace.array_buffer.buffer;
1131
1132         ring_buffer_nest_start(buffer);
1133         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1134                                             trace_ctx);
1135         if (!event)
1136                 goto out;
1137
1138         entry = ring_buffer_event_data(event);
1139         entry->ip                       = ip;
1140         entry->str                      = str;
1141
1142         __buffer_unlock_commit(buffer, event);
1143         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1144
1145         ret = 1;
1146  out:
1147         ring_buffer_nest_end(buffer);
1148         return ret;
1149 }
1150 EXPORT_SYMBOL_GPL(__trace_bputs);
1151
1152 #ifdef CONFIG_TRACER_SNAPSHOT
1153 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1154                                            void *cond_data)
1155 {
1156         struct tracer *tracer = tr->current_trace;
1157         unsigned long flags;
1158
1159         if (in_nmi()) {
1160                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1161                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1162                 return;
1163         }
1164
1165         if (!tr->allocated_snapshot) {
1166                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1167                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1168                 tracer_tracing_off(tr);
1169                 return;
1170         }
1171
1172         /* Note, snapshot can not be used when the tracer uses it */
1173         if (tracer->use_max_tr) {
1174                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1175                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1176                 return;
1177         }
1178
1179         local_irq_save(flags);
1180         update_max_tr(tr, current, smp_processor_id(), cond_data);
1181         local_irq_restore(flags);
1182 }
1183
1184 void tracing_snapshot_instance(struct trace_array *tr)
1185 {
1186         tracing_snapshot_instance_cond(tr, NULL);
1187 }
1188
1189 /**
1190  * tracing_snapshot - take a snapshot of the current buffer.
1191  *
1192  * This causes a swap between the snapshot buffer and the current live
1193  * tracing buffer. You can use this to take snapshots of the live
1194  * trace when some condition is triggered, but continue to trace.
1195  *
1196  * Note, make sure to allocate the snapshot with either
1197  * a tracing_snapshot_alloc(), or by doing it manually
1198  * with: echo 1 > /sys/kernel/tracing/snapshot
1199  *
1200  * If the snapshot buffer is not allocated, it will stop tracing.
1201  * Basically making a permanent snapshot.
1202  */
1203 void tracing_snapshot(void)
1204 {
1205         struct trace_array *tr = &global_trace;
1206
1207         tracing_snapshot_instance(tr);
1208 }
1209 EXPORT_SYMBOL_GPL(tracing_snapshot);
1210
1211 /**
1212  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1213  * @tr:         The tracing instance to snapshot
1214  * @cond_data:  The data to be tested conditionally, and possibly saved
1215  *
1216  * This is the same as tracing_snapshot() except that the snapshot is
1217  * conditional - the snapshot will only happen if the
1218  * cond_snapshot.update() implementation receiving the cond_data
1219  * returns true, which means that the trace array's cond_snapshot
1220  * update() operation used the cond_data to determine whether the
1221  * snapshot should be taken, and if it was, presumably saved it along
1222  * with the snapshot.
1223  */
1224 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1225 {
1226         tracing_snapshot_instance_cond(tr, cond_data);
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1229
1230 /**
1231  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1232  * @tr:         The tracing instance
1233  *
1234  * When the user enables a conditional snapshot using
1235  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1236  * with the snapshot.  This accessor is used to retrieve it.
1237  *
1238  * Should not be called from cond_snapshot.update(), since it takes
1239  * the tr->max_lock lock, which the code calling
1240  * cond_snapshot.update() has already done.
1241  *
1242  * Returns the cond_data associated with the trace array's snapshot.
1243  */
1244 void *tracing_cond_snapshot_data(struct trace_array *tr)
1245 {
1246         void *cond_data = NULL;
1247
1248         local_irq_disable();
1249         arch_spin_lock(&tr->max_lock);
1250
1251         if (tr->cond_snapshot)
1252                 cond_data = tr->cond_snapshot->cond_data;
1253
1254         arch_spin_unlock(&tr->max_lock);
1255         local_irq_enable();
1256
1257         return cond_data;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1260
1261 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1262                                         struct array_buffer *size_buf, int cpu_id);
1263 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1264
1265 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1266 {
1267         int order;
1268         int ret;
1269
1270         if (!tr->allocated_snapshot) {
1271
1272                 /* Make the snapshot buffer have the same order as main buffer */
1273                 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1274                 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1275                 if (ret < 0)
1276                         return ret;
1277
1278                 /* allocate spare buffer */
1279                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1280                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1281                 if (ret < 0)
1282                         return ret;
1283
1284                 tr->allocated_snapshot = true;
1285         }
1286
1287         return 0;
1288 }
1289
1290 static void free_snapshot(struct trace_array *tr)
1291 {
1292         /*
1293          * We don't free the ring buffer. instead, resize it because
1294          * The max_tr ring buffer has some state (e.g. ring->clock) and
1295          * we want preserve it.
1296          */
1297         ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1298         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1299         set_buffer_entries(&tr->max_buffer, 1);
1300         tracing_reset_online_cpus(&tr->max_buffer);
1301         tr->allocated_snapshot = false;
1302 }
1303
1304 /**
1305  * tracing_alloc_snapshot - allocate snapshot buffer.
1306  *
1307  * This only allocates the snapshot buffer if it isn't already
1308  * allocated - it doesn't also take a snapshot.
1309  *
1310  * This is meant to be used in cases where the snapshot buffer needs
1311  * to be set up for events that can't sleep but need to be able to
1312  * trigger a snapshot.
1313  */
1314 int tracing_alloc_snapshot(void)
1315 {
1316         struct trace_array *tr = &global_trace;
1317         int ret;
1318
1319         ret = tracing_alloc_snapshot_instance(tr);
1320         WARN_ON(ret < 0);
1321
1322         return ret;
1323 }
1324 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1325
1326 /**
1327  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1328  *
1329  * This is similar to tracing_snapshot(), but it will allocate the
1330  * snapshot buffer if it isn't already allocated. Use this only
1331  * where it is safe to sleep, as the allocation may sleep.
1332  *
1333  * This causes a swap between the snapshot buffer and the current live
1334  * tracing buffer. You can use this to take snapshots of the live
1335  * trace when some condition is triggered, but continue to trace.
1336  */
1337 void tracing_snapshot_alloc(void)
1338 {
1339         int ret;
1340
1341         ret = tracing_alloc_snapshot();
1342         if (ret < 0)
1343                 return;
1344
1345         tracing_snapshot();
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1348
1349 /**
1350  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1351  * @tr:         The tracing instance
1352  * @cond_data:  User data to associate with the snapshot
1353  * @update:     Implementation of the cond_snapshot update function
1354  *
1355  * Check whether the conditional snapshot for the given instance has
1356  * already been enabled, or if the current tracer is already using a
1357  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1358  * save the cond_data and update function inside.
1359  *
1360  * Returns 0 if successful, error otherwise.
1361  */
1362 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1363                                  cond_update_fn_t update)
1364 {
1365         struct cond_snapshot *cond_snapshot;
1366         int ret = 0;
1367
1368         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1369         if (!cond_snapshot)
1370                 return -ENOMEM;
1371
1372         cond_snapshot->cond_data = cond_data;
1373         cond_snapshot->update = update;
1374
1375         mutex_lock(&trace_types_lock);
1376
1377         ret = tracing_alloc_snapshot_instance(tr);
1378         if (ret)
1379                 goto fail_unlock;
1380
1381         if (tr->current_trace->use_max_tr) {
1382                 ret = -EBUSY;
1383                 goto fail_unlock;
1384         }
1385
1386         /*
1387          * The cond_snapshot can only change to NULL without the
1388          * trace_types_lock. We don't care if we race with it going
1389          * to NULL, but we want to make sure that it's not set to
1390          * something other than NULL when we get here, which we can
1391          * do safely with only holding the trace_types_lock and not
1392          * having to take the max_lock.
1393          */
1394         if (tr->cond_snapshot) {
1395                 ret = -EBUSY;
1396                 goto fail_unlock;
1397         }
1398
1399         local_irq_disable();
1400         arch_spin_lock(&tr->max_lock);
1401         tr->cond_snapshot = cond_snapshot;
1402         arch_spin_unlock(&tr->max_lock);
1403         local_irq_enable();
1404
1405         mutex_unlock(&trace_types_lock);
1406
1407         return ret;
1408
1409  fail_unlock:
1410         mutex_unlock(&trace_types_lock);
1411         kfree(cond_snapshot);
1412         return ret;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1415
1416 /**
1417  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1418  * @tr:         The tracing instance
1419  *
1420  * Check whether the conditional snapshot for the given instance is
1421  * enabled; if so, free the cond_snapshot associated with it,
1422  * otherwise return -EINVAL.
1423  *
1424  * Returns 0 if successful, error otherwise.
1425  */
1426 int tracing_snapshot_cond_disable(struct trace_array *tr)
1427 {
1428         int ret = 0;
1429
1430         local_irq_disable();
1431         arch_spin_lock(&tr->max_lock);
1432
1433         if (!tr->cond_snapshot)
1434                 ret = -EINVAL;
1435         else {
1436                 kfree(tr->cond_snapshot);
1437                 tr->cond_snapshot = NULL;
1438         }
1439
1440         arch_spin_unlock(&tr->max_lock);
1441         local_irq_enable();
1442
1443         return ret;
1444 }
1445 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1446 #else
1447 void tracing_snapshot(void)
1448 {
1449         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1450 }
1451 EXPORT_SYMBOL_GPL(tracing_snapshot);
1452 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1453 {
1454         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1455 }
1456 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1457 int tracing_alloc_snapshot(void)
1458 {
1459         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1460         return -ENODEV;
1461 }
1462 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1463 void tracing_snapshot_alloc(void)
1464 {
1465         /* Give warning */
1466         tracing_snapshot();
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1469 void *tracing_cond_snapshot_data(struct trace_array *tr)
1470 {
1471         return NULL;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1474 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1475 {
1476         return -ENODEV;
1477 }
1478 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1479 int tracing_snapshot_cond_disable(struct trace_array *tr)
1480 {
1481         return false;
1482 }
1483 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1484 #define free_snapshot(tr)       do { } while (0)
1485 #endif /* CONFIG_TRACER_SNAPSHOT */
1486
1487 void tracer_tracing_off(struct trace_array *tr)
1488 {
1489         if (tr->array_buffer.buffer)
1490                 ring_buffer_record_off(tr->array_buffer.buffer);
1491         /*
1492          * This flag is looked at when buffers haven't been allocated
1493          * yet, or by some tracers (like irqsoff), that just want to
1494          * know if the ring buffer has been disabled, but it can handle
1495          * races of where it gets disabled but we still do a record.
1496          * As the check is in the fast path of the tracers, it is more
1497          * important to be fast than accurate.
1498          */
1499         tr->buffer_disabled = 1;
1500         /* Make the flag seen by readers */
1501         smp_wmb();
1502 }
1503
1504 /**
1505  * tracing_off - turn off tracing buffers
1506  *
1507  * This function stops the tracing buffers from recording data.
1508  * It does not disable any overhead the tracers themselves may
1509  * be causing. This function simply causes all recording to
1510  * the ring buffers to fail.
1511  */
1512 void tracing_off(void)
1513 {
1514         tracer_tracing_off(&global_trace);
1515 }
1516 EXPORT_SYMBOL_GPL(tracing_off);
1517
1518 void disable_trace_on_warning(void)
1519 {
1520         if (__disable_trace_on_warning) {
1521                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1522                         "Disabling tracing due to warning\n");
1523                 tracing_off();
1524         }
1525 }
1526
1527 /**
1528  * tracer_tracing_is_on - show real state of ring buffer enabled
1529  * @tr : the trace array to know if ring buffer is enabled
1530  *
1531  * Shows real state of the ring buffer if it is enabled or not.
1532  */
1533 bool tracer_tracing_is_on(struct trace_array *tr)
1534 {
1535         if (tr->array_buffer.buffer)
1536                 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1537         return !tr->buffer_disabled;
1538 }
1539
1540 /**
1541  * tracing_is_on - show state of ring buffers enabled
1542  */
1543 int tracing_is_on(void)
1544 {
1545         return tracer_tracing_is_on(&global_trace);
1546 }
1547 EXPORT_SYMBOL_GPL(tracing_is_on);
1548
1549 static int __init set_buf_size(char *str)
1550 {
1551         unsigned long buf_size;
1552
1553         if (!str)
1554                 return 0;
1555         buf_size = memparse(str, &str);
1556         /*
1557          * nr_entries can not be zero and the startup
1558          * tests require some buffer space. Therefore
1559          * ensure we have at least 4096 bytes of buffer.
1560          */
1561         trace_buf_size = max(4096UL, buf_size);
1562         return 1;
1563 }
1564 __setup("trace_buf_size=", set_buf_size);
1565
1566 static int __init set_tracing_thresh(char *str)
1567 {
1568         unsigned long threshold;
1569         int ret;
1570
1571         if (!str)
1572                 return 0;
1573         ret = kstrtoul(str, 0, &threshold);
1574         if (ret < 0)
1575                 return 0;
1576         tracing_thresh = threshold * 1000;
1577         return 1;
1578 }
1579 __setup("tracing_thresh=", set_tracing_thresh);
1580
1581 unsigned long nsecs_to_usecs(unsigned long nsecs)
1582 {
1583         return nsecs / 1000;
1584 }
1585
1586 /*
1587  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1588  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1589  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1590  * of strings in the order that the evals (enum) were defined.
1591  */
1592 #undef C
1593 #define C(a, b) b
1594
1595 /* These must match the bit positions in trace_iterator_flags */
1596 static const char *trace_options[] = {
1597         TRACE_FLAGS
1598         NULL
1599 };
1600
1601 static struct {
1602         u64 (*func)(void);
1603         const char *name;
1604         int in_ns;              /* is this clock in nanoseconds? */
1605 } trace_clocks[] = {
1606         { trace_clock_local,            "local",        1 },
1607         { trace_clock_global,           "global",       1 },
1608         { trace_clock_counter,          "counter",      0 },
1609         { trace_clock_jiffies,          "uptime",       0 },
1610         { trace_clock,                  "perf",         1 },
1611         { ktime_get_mono_fast_ns,       "mono",         1 },
1612         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1613         { ktime_get_boot_fast_ns,       "boot",         1 },
1614         { ktime_get_tai_fast_ns,        "tai",          1 },
1615         ARCH_TRACE_CLOCKS
1616 };
1617
1618 bool trace_clock_in_ns(struct trace_array *tr)
1619 {
1620         if (trace_clocks[tr->clock_id].in_ns)
1621                 return true;
1622
1623         return false;
1624 }
1625
1626 /*
1627  * trace_parser_get_init - gets the buffer for trace parser
1628  */
1629 int trace_parser_get_init(struct trace_parser *parser, int size)
1630 {
1631         memset(parser, 0, sizeof(*parser));
1632
1633         parser->buffer = kmalloc(size, GFP_KERNEL);
1634         if (!parser->buffer)
1635                 return 1;
1636
1637         parser->size = size;
1638         return 0;
1639 }
1640
1641 /*
1642  * trace_parser_put - frees the buffer for trace parser
1643  */
1644 void trace_parser_put(struct trace_parser *parser)
1645 {
1646         kfree(parser->buffer);
1647         parser->buffer = NULL;
1648 }
1649
1650 /*
1651  * trace_get_user - reads the user input string separated by  space
1652  * (matched by isspace(ch))
1653  *
1654  * For each string found the 'struct trace_parser' is updated,
1655  * and the function returns.
1656  *
1657  * Returns number of bytes read.
1658  *
1659  * See kernel/trace/trace.h for 'struct trace_parser' details.
1660  */
1661 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1662         size_t cnt, loff_t *ppos)
1663 {
1664         char ch;
1665         size_t read = 0;
1666         ssize_t ret;
1667
1668         if (!*ppos)
1669                 trace_parser_clear(parser);
1670
1671         ret = get_user(ch, ubuf++);
1672         if (ret)
1673                 goto out;
1674
1675         read++;
1676         cnt--;
1677
1678         /*
1679          * The parser is not finished with the last write,
1680          * continue reading the user input without skipping spaces.
1681          */
1682         if (!parser->cont) {
1683                 /* skip white space */
1684                 while (cnt && isspace(ch)) {
1685                         ret = get_user(ch, ubuf++);
1686                         if (ret)
1687                                 goto out;
1688                         read++;
1689                         cnt--;
1690                 }
1691
1692                 parser->idx = 0;
1693
1694                 /* only spaces were written */
1695                 if (isspace(ch) || !ch) {
1696                         *ppos += read;
1697                         ret = read;
1698                         goto out;
1699                 }
1700         }
1701
1702         /* read the non-space input */
1703         while (cnt && !isspace(ch) && ch) {
1704                 if (parser->idx < parser->size - 1)
1705                         parser->buffer[parser->idx++] = ch;
1706                 else {
1707                         ret = -EINVAL;
1708                         goto out;
1709                 }
1710                 ret = get_user(ch, ubuf++);
1711                 if (ret)
1712                         goto out;
1713                 read++;
1714                 cnt--;
1715         }
1716
1717         /* We either got finished input or we have to wait for another call. */
1718         if (isspace(ch) || !ch) {
1719                 parser->buffer[parser->idx] = 0;
1720                 parser->cont = false;
1721         } else if (parser->idx < parser->size - 1) {
1722                 parser->cont = true;
1723                 parser->buffer[parser->idx++] = ch;
1724                 /* Make sure the parsed string always terminates with '\0'. */
1725                 parser->buffer[parser->idx] = 0;
1726         } else {
1727                 ret = -EINVAL;
1728                 goto out;
1729         }
1730
1731         *ppos += read;
1732         ret = read;
1733
1734 out:
1735         return ret;
1736 }
1737
1738 /* TODO add a seq_buf_to_buffer() */
1739 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1740 {
1741         int len;
1742
1743         if (trace_seq_used(s) <= s->readpos)
1744                 return -EBUSY;
1745
1746         len = trace_seq_used(s) - s->readpos;
1747         if (cnt > len)
1748                 cnt = len;
1749         memcpy(buf, s->buffer + s->readpos, cnt);
1750
1751         s->readpos += cnt;
1752         return cnt;
1753 }
1754
1755 unsigned long __read_mostly     tracing_thresh;
1756
1757 #ifdef CONFIG_TRACER_MAX_TRACE
1758 static const struct file_operations tracing_max_lat_fops;
1759
1760 #ifdef LATENCY_FS_NOTIFY
1761
1762 static struct workqueue_struct *fsnotify_wq;
1763
1764 static void latency_fsnotify_workfn(struct work_struct *work)
1765 {
1766         struct trace_array *tr = container_of(work, struct trace_array,
1767                                               fsnotify_work);
1768         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1769 }
1770
1771 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1772 {
1773         struct trace_array *tr = container_of(iwork, struct trace_array,
1774                                               fsnotify_irqwork);
1775         queue_work(fsnotify_wq, &tr->fsnotify_work);
1776 }
1777
1778 static void trace_create_maxlat_file(struct trace_array *tr,
1779                                      struct dentry *d_tracer)
1780 {
1781         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1782         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1783         tr->d_max_latency = trace_create_file("tracing_max_latency",
1784                                               TRACE_MODE_WRITE,
1785                                               d_tracer, tr,
1786                                               &tracing_max_lat_fops);
1787 }
1788
1789 __init static int latency_fsnotify_init(void)
1790 {
1791         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1792                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1793         if (!fsnotify_wq) {
1794                 pr_err("Unable to allocate tr_max_lat_wq\n");
1795                 return -ENOMEM;
1796         }
1797         return 0;
1798 }
1799
1800 late_initcall_sync(latency_fsnotify_init);
1801
1802 void latency_fsnotify(struct trace_array *tr)
1803 {
1804         if (!fsnotify_wq)
1805                 return;
1806         /*
1807          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1808          * possible that we are called from __schedule() or do_idle(), which
1809          * could cause a deadlock.
1810          */
1811         irq_work_queue(&tr->fsnotify_irqwork);
1812 }
1813
1814 #else /* !LATENCY_FS_NOTIFY */
1815
1816 #define trace_create_maxlat_file(tr, d_tracer)                          \
1817         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1818                           d_tracer, tr, &tracing_max_lat_fops)
1819
1820 #endif
1821
1822 /*
1823  * Copy the new maximum trace into the separate maximum-trace
1824  * structure. (this way the maximum trace is permanently saved,
1825  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1826  */
1827 static void
1828 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1829 {
1830         struct array_buffer *trace_buf = &tr->array_buffer;
1831         struct array_buffer *max_buf = &tr->max_buffer;
1832         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1833         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1834
1835         max_buf->cpu = cpu;
1836         max_buf->time_start = data->preempt_timestamp;
1837
1838         max_data->saved_latency = tr->max_latency;
1839         max_data->critical_start = data->critical_start;
1840         max_data->critical_end = data->critical_end;
1841
1842         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1843         max_data->pid = tsk->pid;
1844         /*
1845          * If tsk == current, then use current_uid(), as that does not use
1846          * RCU. The irq tracer can be called out of RCU scope.
1847          */
1848         if (tsk == current)
1849                 max_data->uid = current_uid();
1850         else
1851                 max_data->uid = task_uid(tsk);
1852
1853         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1854         max_data->policy = tsk->policy;
1855         max_data->rt_priority = tsk->rt_priority;
1856
1857         /* record this tasks comm */
1858         tracing_record_cmdline(tsk);
1859         latency_fsnotify(tr);
1860 }
1861
1862 /**
1863  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1864  * @tr: tracer
1865  * @tsk: the task with the latency
1866  * @cpu: The cpu that initiated the trace.
1867  * @cond_data: User data associated with a conditional snapshot
1868  *
1869  * Flip the buffers between the @tr and the max_tr and record information
1870  * about which task was the cause of this latency.
1871  */
1872 void
1873 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1874               void *cond_data)
1875 {
1876         if (tr->stop_count)
1877                 return;
1878
1879         WARN_ON_ONCE(!irqs_disabled());
1880
1881         if (!tr->allocated_snapshot) {
1882                 /* Only the nop tracer should hit this when disabling */
1883                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1884                 return;
1885         }
1886
1887         arch_spin_lock(&tr->max_lock);
1888
1889         /* Inherit the recordable setting from array_buffer */
1890         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1891                 ring_buffer_record_on(tr->max_buffer.buffer);
1892         else
1893                 ring_buffer_record_off(tr->max_buffer.buffer);
1894
1895 #ifdef CONFIG_TRACER_SNAPSHOT
1896         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1897                 arch_spin_unlock(&tr->max_lock);
1898                 return;
1899         }
1900 #endif
1901         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1902
1903         __update_max_tr(tr, tsk, cpu);
1904
1905         arch_spin_unlock(&tr->max_lock);
1906
1907         /* Any waiters on the old snapshot buffer need to wake up */
1908         ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1909 }
1910
1911 /**
1912  * update_max_tr_single - only copy one trace over, and reset the rest
1913  * @tr: tracer
1914  * @tsk: task with the latency
1915  * @cpu: the cpu of the buffer to copy.
1916  *
1917  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1918  */
1919 void
1920 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1921 {
1922         int ret;
1923
1924         if (tr->stop_count)
1925                 return;
1926
1927         WARN_ON_ONCE(!irqs_disabled());
1928         if (!tr->allocated_snapshot) {
1929                 /* Only the nop tracer should hit this when disabling */
1930                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1931                 return;
1932         }
1933
1934         arch_spin_lock(&tr->max_lock);
1935
1936         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1937
1938         if (ret == -EBUSY) {
1939                 /*
1940                  * We failed to swap the buffer due to a commit taking
1941                  * place on this CPU. We fail to record, but we reset
1942                  * the max trace buffer (no one writes directly to it)
1943                  * and flag that it failed.
1944                  * Another reason is resize is in progress.
1945                  */
1946                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1947                         "Failed to swap buffers due to commit or resize in progress\n");
1948         }
1949
1950         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1951
1952         __update_max_tr(tr, tsk, cpu);
1953         arch_spin_unlock(&tr->max_lock);
1954 }
1955
1956 #endif /* CONFIG_TRACER_MAX_TRACE */
1957
1958 static int wait_on_pipe(struct trace_iterator *iter, int full)
1959 {
1960         int ret;
1961
1962         /* Iterators are static, they should be filled or empty */
1963         if (trace_buffer_iter(iter, iter->cpu_file))
1964                 return 0;
1965
1966         ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1967
1968 #ifdef CONFIG_TRACER_MAX_TRACE
1969         /*
1970          * Make sure this is still the snapshot buffer, as if a snapshot were
1971          * to happen, this would now be the main buffer.
1972          */
1973         if (iter->snapshot)
1974                 iter->array_buffer = &iter->tr->max_buffer;
1975 #endif
1976         return ret;
1977 }
1978
1979 #ifdef CONFIG_FTRACE_STARTUP_TEST
1980 static bool selftests_can_run;
1981
1982 struct trace_selftests {
1983         struct list_head                list;
1984         struct tracer                   *type;
1985 };
1986
1987 static LIST_HEAD(postponed_selftests);
1988
1989 static int save_selftest(struct tracer *type)
1990 {
1991         struct trace_selftests *selftest;
1992
1993         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1994         if (!selftest)
1995                 return -ENOMEM;
1996
1997         selftest->type = type;
1998         list_add(&selftest->list, &postponed_selftests);
1999         return 0;
2000 }
2001
2002 static int run_tracer_selftest(struct tracer *type)
2003 {
2004         struct trace_array *tr = &global_trace;
2005         struct tracer *saved_tracer = tr->current_trace;
2006         int ret;
2007
2008         if (!type->selftest || tracing_selftest_disabled)
2009                 return 0;
2010
2011         /*
2012          * If a tracer registers early in boot up (before scheduling is
2013          * initialized and such), then do not run its selftests yet.
2014          * Instead, run it a little later in the boot process.
2015          */
2016         if (!selftests_can_run)
2017                 return save_selftest(type);
2018
2019         if (!tracing_is_on()) {
2020                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2021                         type->name);
2022                 return 0;
2023         }
2024
2025         /*
2026          * Run a selftest on this tracer.
2027          * Here we reset the trace buffer, and set the current
2028          * tracer to be this tracer. The tracer can then run some
2029          * internal tracing to verify that everything is in order.
2030          * If we fail, we do not register this tracer.
2031          */
2032         tracing_reset_online_cpus(&tr->array_buffer);
2033
2034         tr->current_trace = type;
2035
2036 #ifdef CONFIG_TRACER_MAX_TRACE
2037         if (type->use_max_tr) {
2038                 /* If we expanded the buffers, make sure the max is expanded too */
2039                 if (tr->ring_buffer_expanded)
2040                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2041                                            RING_BUFFER_ALL_CPUS);
2042                 tr->allocated_snapshot = true;
2043         }
2044 #endif
2045
2046         /* the test is responsible for initializing and enabling */
2047         pr_info("Testing tracer %s: ", type->name);
2048         ret = type->selftest(type, tr);
2049         /* the test is responsible for resetting too */
2050         tr->current_trace = saved_tracer;
2051         if (ret) {
2052                 printk(KERN_CONT "FAILED!\n");
2053                 /* Add the warning after printing 'FAILED' */
2054                 WARN_ON(1);
2055                 return -1;
2056         }
2057         /* Only reset on passing, to avoid touching corrupted buffers */
2058         tracing_reset_online_cpus(&tr->array_buffer);
2059
2060 #ifdef CONFIG_TRACER_MAX_TRACE
2061         if (type->use_max_tr) {
2062                 tr->allocated_snapshot = false;
2063
2064                 /* Shrink the max buffer again */
2065                 if (tr->ring_buffer_expanded)
2066                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2067                                            RING_BUFFER_ALL_CPUS);
2068         }
2069 #endif
2070
2071         printk(KERN_CONT "PASSED\n");
2072         return 0;
2073 }
2074
2075 static int do_run_tracer_selftest(struct tracer *type)
2076 {
2077         int ret;
2078
2079         /*
2080          * Tests can take a long time, especially if they are run one after the
2081          * other, as does happen during bootup when all the tracers are
2082          * registered. This could cause the soft lockup watchdog to trigger.
2083          */
2084         cond_resched();
2085
2086         tracing_selftest_running = true;
2087         ret = run_tracer_selftest(type);
2088         tracing_selftest_running = false;
2089
2090         return ret;
2091 }
2092
2093 static __init int init_trace_selftests(void)
2094 {
2095         struct trace_selftests *p, *n;
2096         struct tracer *t, **last;
2097         int ret;
2098
2099         selftests_can_run = true;
2100
2101         mutex_lock(&trace_types_lock);
2102
2103         if (list_empty(&postponed_selftests))
2104                 goto out;
2105
2106         pr_info("Running postponed tracer tests:\n");
2107
2108         tracing_selftest_running = true;
2109         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2110                 /* This loop can take minutes when sanitizers are enabled, so
2111                  * lets make sure we allow RCU processing.
2112                  */
2113                 cond_resched();
2114                 ret = run_tracer_selftest(p->type);
2115                 /* If the test fails, then warn and remove from available_tracers */
2116                 if (ret < 0) {
2117                         WARN(1, "tracer: %s failed selftest, disabling\n",
2118                              p->type->name);
2119                         last = &trace_types;
2120                         for (t = trace_types; t; t = t->next) {
2121                                 if (t == p->type) {
2122                                         *last = t->next;
2123                                         break;
2124                                 }
2125                                 last = &t->next;
2126                         }
2127                 }
2128                 list_del(&p->list);
2129                 kfree(p);
2130         }
2131         tracing_selftest_running = false;
2132
2133  out:
2134         mutex_unlock(&trace_types_lock);
2135
2136         return 0;
2137 }
2138 core_initcall(init_trace_selftests);
2139 #else
2140 static inline int run_tracer_selftest(struct tracer *type)
2141 {
2142         return 0;
2143 }
2144 static inline int do_run_tracer_selftest(struct tracer *type)
2145 {
2146         return 0;
2147 }
2148 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2149
2150 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2151
2152 static void __init apply_trace_boot_options(void);
2153
2154 /**
2155  * register_tracer - register a tracer with the ftrace system.
2156  * @type: the plugin for the tracer
2157  *
2158  * Register a new plugin tracer.
2159  */
2160 int __init register_tracer(struct tracer *type)
2161 {
2162         struct tracer *t;
2163         int ret = 0;
2164
2165         if (!type->name) {
2166                 pr_info("Tracer must have a name\n");
2167                 return -1;
2168         }
2169
2170         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2171                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2172                 return -1;
2173         }
2174
2175         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2176                 pr_warn("Can not register tracer %s due to lockdown\n",
2177                            type->name);
2178                 return -EPERM;
2179         }
2180
2181         mutex_lock(&trace_types_lock);
2182
2183         for (t = trace_types; t; t = t->next) {
2184                 if (strcmp(type->name, t->name) == 0) {
2185                         /* already found */
2186                         pr_info("Tracer %s already registered\n",
2187                                 type->name);
2188                         ret = -1;
2189                         goto out;
2190                 }
2191         }
2192
2193         if (!type->set_flag)
2194                 type->set_flag = &dummy_set_flag;
2195         if (!type->flags) {
2196                 /*allocate a dummy tracer_flags*/
2197                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2198                 if (!type->flags) {
2199                         ret = -ENOMEM;
2200                         goto out;
2201                 }
2202                 type->flags->val = 0;
2203                 type->flags->opts = dummy_tracer_opt;
2204         } else
2205                 if (!type->flags->opts)
2206                         type->flags->opts = dummy_tracer_opt;
2207
2208         /* store the tracer for __set_tracer_option */
2209         type->flags->trace = type;
2210
2211         ret = do_run_tracer_selftest(type);
2212         if (ret < 0)
2213                 goto out;
2214
2215         type->next = trace_types;
2216         trace_types = type;
2217         add_tracer_options(&global_trace, type);
2218
2219  out:
2220         mutex_unlock(&trace_types_lock);
2221
2222         if (ret || !default_bootup_tracer)
2223                 goto out_unlock;
2224
2225         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2226                 goto out_unlock;
2227
2228         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2229         /* Do we want this tracer to start on bootup? */
2230         tracing_set_tracer(&global_trace, type->name);
2231         default_bootup_tracer = NULL;
2232
2233         apply_trace_boot_options();
2234
2235         /* disable other selftests, since this will break it. */
2236         disable_tracing_selftest("running a tracer");
2237
2238  out_unlock:
2239         return ret;
2240 }
2241
2242 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2243 {
2244         struct trace_buffer *buffer = buf->buffer;
2245
2246         if (!buffer)
2247                 return;
2248
2249         ring_buffer_record_disable(buffer);
2250
2251         /* Make sure all commits have finished */
2252         synchronize_rcu();
2253         ring_buffer_reset_cpu(buffer, cpu);
2254
2255         ring_buffer_record_enable(buffer);
2256 }
2257
2258 void tracing_reset_online_cpus(struct array_buffer *buf)
2259 {
2260         struct trace_buffer *buffer = buf->buffer;
2261
2262         if (!buffer)
2263                 return;
2264
2265         ring_buffer_record_disable(buffer);
2266
2267         /* Make sure all commits have finished */
2268         synchronize_rcu();
2269
2270         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2271
2272         ring_buffer_reset_online_cpus(buffer);
2273
2274         ring_buffer_record_enable(buffer);
2275 }
2276
2277 /* Must have trace_types_lock held */
2278 void tracing_reset_all_online_cpus_unlocked(void)
2279 {
2280         struct trace_array *tr;
2281
2282         lockdep_assert_held(&trace_types_lock);
2283
2284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2285                 if (!tr->clear_trace)
2286                         continue;
2287                 tr->clear_trace = false;
2288                 tracing_reset_online_cpus(&tr->array_buffer);
2289 #ifdef CONFIG_TRACER_MAX_TRACE
2290                 tracing_reset_online_cpus(&tr->max_buffer);
2291 #endif
2292         }
2293 }
2294
2295 void tracing_reset_all_online_cpus(void)
2296 {
2297         mutex_lock(&trace_types_lock);
2298         tracing_reset_all_online_cpus_unlocked();
2299         mutex_unlock(&trace_types_lock);
2300 }
2301
2302 /*
2303  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2304  * is the tgid last observed corresponding to pid=i.
2305  */
2306 static int *tgid_map;
2307
2308 /* The maximum valid index into tgid_map. */
2309 static size_t tgid_map_max;
2310
2311 #define SAVED_CMDLINES_DEFAULT 128
2312 #define NO_CMDLINE_MAP UINT_MAX
2313 /*
2314  * Preemption must be disabled before acquiring trace_cmdline_lock.
2315  * The various trace_arrays' max_lock must be acquired in a context
2316  * where interrupt is disabled.
2317  */
2318 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2319 struct saved_cmdlines_buffer {
2320         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2321         unsigned *map_cmdline_to_pid;
2322         unsigned cmdline_num;
2323         int cmdline_idx;
2324         char saved_cmdlines[];
2325 };
2326 static struct saved_cmdlines_buffer *savedcmd;
2327
2328 static inline char *get_saved_cmdlines(int idx)
2329 {
2330         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2331 }
2332
2333 static inline void set_cmdline(int idx, const char *cmdline)
2334 {
2335         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2336 }
2337
2338 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2339 {
2340         int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2341
2342         kfree(s->map_cmdline_to_pid);
2343         kmemleak_free(s);
2344         free_pages((unsigned long)s, order);
2345 }
2346
2347 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2348 {
2349         struct saved_cmdlines_buffer *s;
2350         struct page *page;
2351         int orig_size, size;
2352         int order;
2353
2354         /* Figure out how much is needed to hold the given number of cmdlines */
2355         orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2356         order = get_order(orig_size);
2357         size = 1 << (order + PAGE_SHIFT);
2358         page = alloc_pages(GFP_KERNEL, order);
2359         if (!page)
2360                 return NULL;
2361
2362         s = page_address(page);
2363         kmemleak_alloc(s, size, 1, GFP_KERNEL);
2364         memset(s, 0, sizeof(*s));
2365
2366         /* Round up to actual allocation */
2367         val = (size - sizeof(*s)) / TASK_COMM_LEN;
2368         s->cmdline_num = val;
2369
2370         s->map_cmdline_to_pid = kmalloc_array(val,
2371                                               sizeof(*s->map_cmdline_to_pid),
2372                                               GFP_KERNEL);
2373         if (!s->map_cmdline_to_pid) {
2374                 free_saved_cmdlines_buffer(s);
2375                 return NULL;
2376         }
2377
2378         s->cmdline_idx = 0;
2379         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2380                sizeof(s->map_pid_to_cmdline));
2381         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2382                val * sizeof(*s->map_cmdline_to_pid));
2383
2384         return s;
2385 }
2386
2387 static int trace_create_savedcmd(void)
2388 {
2389         savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2390
2391         return savedcmd ? 0 : -ENOMEM;
2392 }
2393
2394 int is_tracing_stopped(void)
2395 {
2396         return global_trace.stop_count;
2397 }
2398
2399 static void tracing_start_tr(struct trace_array *tr)
2400 {
2401         struct trace_buffer *buffer;
2402         unsigned long flags;
2403
2404         if (tracing_disabled)
2405                 return;
2406
2407         raw_spin_lock_irqsave(&tr->start_lock, flags);
2408         if (--tr->stop_count) {
2409                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2410                         /* Someone screwed up their debugging */
2411                         tr->stop_count = 0;
2412                 }
2413                 goto out;
2414         }
2415
2416         /* Prevent the buffers from switching */
2417         arch_spin_lock(&tr->max_lock);
2418
2419         buffer = tr->array_buffer.buffer;
2420         if (buffer)
2421                 ring_buffer_record_enable(buffer);
2422
2423 #ifdef CONFIG_TRACER_MAX_TRACE
2424         buffer = tr->max_buffer.buffer;
2425         if (buffer)
2426                 ring_buffer_record_enable(buffer);
2427 #endif
2428
2429         arch_spin_unlock(&tr->max_lock);
2430
2431  out:
2432         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2433 }
2434
2435 /**
2436  * tracing_start - quick start of the tracer
2437  *
2438  * If tracing is enabled but was stopped by tracing_stop,
2439  * this will start the tracer back up.
2440  */
2441 void tracing_start(void)
2442
2443 {
2444         return tracing_start_tr(&global_trace);
2445 }
2446
2447 static void tracing_stop_tr(struct trace_array *tr)
2448 {
2449         struct trace_buffer *buffer;
2450         unsigned long flags;
2451
2452         raw_spin_lock_irqsave(&tr->start_lock, flags);
2453         if (tr->stop_count++)
2454                 goto out;
2455
2456         /* Prevent the buffers from switching */
2457         arch_spin_lock(&tr->max_lock);
2458
2459         buffer = tr->array_buffer.buffer;
2460         if (buffer)
2461                 ring_buffer_record_disable(buffer);
2462
2463 #ifdef CONFIG_TRACER_MAX_TRACE
2464         buffer = tr->max_buffer.buffer;
2465         if (buffer)
2466                 ring_buffer_record_disable(buffer);
2467 #endif
2468
2469         arch_spin_unlock(&tr->max_lock);
2470
2471  out:
2472         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2473 }
2474
2475 /**
2476  * tracing_stop - quick stop of the tracer
2477  *
2478  * Light weight way to stop tracing. Use in conjunction with
2479  * tracing_start.
2480  */
2481 void tracing_stop(void)
2482 {
2483         return tracing_stop_tr(&global_trace);
2484 }
2485
2486 static int trace_save_cmdline(struct task_struct *tsk)
2487 {
2488         unsigned tpid, idx;
2489
2490         /* treat recording of idle task as a success */
2491         if (!tsk->pid)
2492                 return 1;
2493
2494         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2495
2496         /*
2497          * It's not the end of the world if we don't get
2498          * the lock, but we also don't want to spin
2499          * nor do we want to disable interrupts,
2500          * so if we miss here, then better luck next time.
2501          *
2502          * This is called within the scheduler and wake up, so interrupts
2503          * had better been disabled and run queue lock been held.
2504          */
2505         lockdep_assert_preemption_disabled();
2506         if (!arch_spin_trylock(&trace_cmdline_lock))
2507                 return 0;
2508
2509         idx = savedcmd->map_pid_to_cmdline[tpid];
2510         if (idx == NO_CMDLINE_MAP) {
2511                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2512
2513                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2514                 savedcmd->cmdline_idx = idx;
2515         }
2516
2517         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2518         set_cmdline(idx, tsk->comm);
2519
2520         arch_spin_unlock(&trace_cmdline_lock);
2521
2522         return 1;
2523 }
2524
2525 static void __trace_find_cmdline(int pid, char comm[])
2526 {
2527         unsigned map;
2528         int tpid;
2529
2530         if (!pid) {
2531                 strcpy(comm, "<idle>");
2532                 return;
2533         }
2534
2535         if (WARN_ON_ONCE(pid < 0)) {
2536                 strcpy(comm, "<XXX>");
2537                 return;
2538         }
2539
2540         tpid = pid & (PID_MAX_DEFAULT - 1);
2541         map = savedcmd->map_pid_to_cmdline[tpid];
2542         if (map != NO_CMDLINE_MAP) {
2543                 tpid = savedcmd->map_cmdline_to_pid[map];
2544                 if (tpid == pid) {
2545                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2546                         return;
2547                 }
2548         }
2549         strcpy(comm, "<...>");
2550 }
2551
2552 void trace_find_cmdline(int pid, char comm[])
2553 {
2554         preempt_disable();
2555         arch_spin_lock(&trace_cmdline_lock);
2556
2557         __trace_find_cmdline(pid, comm);
2558
2559         arch_spin_unlock(&trace_cmdline_lock);
2560         preempt_enable();
2561 }
2562
2563 static int *trace_find_tgid_ptr(int pid)
2564 {
2565         /*
2566          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2567          * if we observe a non-NULL tgid_map then we also observe the correct
2568          * tgid_map_max.
2569          */
2570         int *map = smp_load_acquire(&tgid_map);
2571
2572         if (unlikely(!map || pid > tgid_map_max))
2573                 return NULL;
2574
2575         return &map[pid];
2576 }
2577
2578 int trace_find_tgid(int pid)
2579 {
2580         int *ptr = trace_find_tgid_ptr(pid);
2581
2582         return ptr ? *ptr : 0;
2583 }
2584
2585 static int trace_save_tgid(struct task_struct *tsk)
2586 {
2587         int *ptr;
2588
2589         /* treat recording of idle task as a success */
2590         if (!tsk->pid)
2591                 return 1;
2592
2593         ptr = trace_find_tgid_ptr(tsk->pid);
2594         if (!ptr)
2595                 return 0;
2596
2597         *ptr = tsk->tgid;
2598         return 1;
2599 }
2600
2601 static bool tracing_record_taskinfo_skip(int flags)
2602 {
2603         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2604                 return true;
2605         if (!__this_cpu_read(trace_taskinfo_save))
2606                 return true;
2607         return false;
2608 }
2609
2610 /**
2611  * tracing_record_taskinfo - record the task info of a task
2612  *
2613  * @task:  task to record
2614  * @flags: TRACE_RECORD_CMDLINE for recording comm
2615  *         TRACE_RECORD_TGID for recording tgid
2616  */
2617 void tracing_record_taskinfo(struct task_struct *task, int flags)
2618 {
2619         bool done;
2620
2621         if (tracing_record_taskinfo_skip(flags))
2622                 return;
2623
2624         /*
2625          * Record as much task information as possible. If some fail, continue
2626          * to try to record the others.
2627          */
2628         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2629         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2630
2631         /* If recording any information failed, retry again soon. */
2632         if (!done)
2633                 return;
2634
2635         __this_cpu_write(trace_taskinfo_save, false);
2636 }
2637
2638 /**
2639  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2640  *
2641  * @prev: previous task during sched_switch
2642  * @next: next task during sched_switch
2643  * @flags: TRACE_RECORD_CMDLINE for recording comm
2644  *         TRACE_RECORD_TGID for recording tgid
2645  */
2646 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2647                                           struct task_struct *next, int flags)
2648 {
2649         bool done;
2650
2651         if (tracing_record_taskinfo_skip(flags))
2652                 return;
2653
2654         /*
2655          * Record as much task information as possible. If some fail, continue
2656          * to try to record the others.
2657          */
2658         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2659         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2660         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2661         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2662
2663         /* If recording any information failed, retry again soon. */
2664         if (!done)
2665                 return;
2666
2667         __this_cpu_write(trace_taskinfo_save, false);
2668 }
2669
2670 /* Helpers to record a specific task information */
2671 void tracing_record_cmdline(struct task_struct *task)
2672 {
2673         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2674 }
2675
2676 void tracing_record_tgid(struct task_struct *task)
2677 {
2678         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2679 }
2680
2681 /*
2682  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2683  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2684  * simplifies those functions and keeps them in sync.
2685  */
2686 enum print_line_t trace_handle_return(struct trace_seq *s)
2687 {
2688         return trace_seq_has_overflowed(s) ?
2689                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2690 }
2691 EXPORT_SYMBOL_GPL(trace_handle_return);
2692
2693 static unsigned short migration_disable_value(void)
2694 {
2695 #if defined(CONFIG_SMP)
2696         return current->migration_disabled;
2697 #else
2698         return 0;
2699 #endif
2700 }
2701
2702 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2703 {
2704         unsigned int trace_flags = irqs_status;
2705         unsigned int pc;
2706
2707         pc = preempt_count();
2708
2709         if (pc & NMI_MASK)
2710                 trace_flags |= TRACE_FLAG_NMI;
2711         if (pc & HARDIRQ_MASK)
2712                 trace_flags |= TRACE_FLAG_HARDIRQ;
2713         if (in_serving_softirq())
2714                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2715         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2716                 trace_flags |= TRACE_FLAG_BH_OFF;
2717
2718         if (tif_need_resched())
2719                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2720         if (test_preempt_need_resched())
2721                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2722         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2723                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2724 }
2725
2726 struct ring_buffer_event *
2727 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2728                           int type,
2729                           unsigned long len,
2730                           unsigned int trace_ctx)
2731 {
2732         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2733 }
2734
2735 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2736 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2737 static int trace_buffered_event_ref;
2738
2739 /**
2740  * trace_buffered_event_enable - enable buffering events
2741  *
2742  * When events are being filtered, it is quicker to use a temporary
2743  * buffer to write the event data into if there's a likely chance
2744  * that it will not be committed. The discard of the ring buffer
2745  * is not as fast as committing, and is much slower than copying
2746  * a commit.
2747  *
2748  * When an event is to be filtered, allocate per cpu buffers to
2749  * write the event data into, and if the event is filtered and discarded
2750  * it is simply dropped, otherwise, the entire data is to be committed
2751  * in one shot.
2752  */
2753 void trace_buffered_event_enable(void)
2754 {
2755         struct ring_buffer_event *event;
2756         struct page *page;
2757         int cpu;
2758
2759         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2760
2761         if (trace_buffered_event_ref++)
2762                 return;
2763
2764         for_each_tracing_cpu(cpu) {
2765                 page = alloc_pages_node(cpu_to_node(cpu),
2766                                         GFP_KERNEL | __GFP_NORETRY, 0);
2767                 /* This is just an optimization and can handle failures */
2768                 if (!page) {
2769                         pr_err("Failed to allocate event buffer\n");
2770                         break;
2771                 }
2772
2773                 event = page_address(page);
2774                 memset(event, 0, sizeof(*event));
2775
2776                 per_cpu(trace_buffered_event, cpu) = event;
2777
2778                 preempt_disable();
2779                 if (cpu == smp_processor_id() &&
2780                     __this_cpu_read(trace_buffered_event) !=
2781                     per_cpu(trace_buffered_event, cpu))
2782                         WARN_ON_ONCE(1);
2783                 preempt_enable();
2784         }
2785 }
2786
2787 static void enable_trace_buffered_event(void *data)
2788 {
2789         /* Probably not needed, but do it anyway */
2790         smp_rmb();
2791         this_cpu_dec(trace_buffered_event_cnt);
2792 }
2793
2794 static void disable_trace_buffered_event(void *data)
2795 {
2796         this_cpu_inc(trace_buffered_event_cnt);
2797 }
2798
2799 /**
2800  * trace_buffered_event_disable - disable buffering events
2801  *
2802  * When a filter is removed, it is faster to not use the buffered
2803  * events, and to commit directly into the ring buffer. Free up
2804  * the temp buffers when there are no more users. This requires
2805  * special synchronization with current events.
2806  */
2807 void trace_buffered_event_disable(void)
2808 {
2809         int cpu;
2810
2811         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2812
2813         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2814                 return;
2815
2816         if (--trace_buffered_event_ref)
2817                 return;
2818
2819         /* For each CPU, set the buffer as used. */
2820         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2821                          NULL, true);
2822
2823         /* Wait for all current users to finish */
2824         synchronize_rcu();
2825
2826         for_each_tracing_cpu(cpu) {
2827                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2828                 per_cpu(trace_buffered_event, cpu) = NULL;
2829         }
2830
2831         /*
2832          * Wait for all CPUs that potentially started checking if they can use
2833          * their event buffer only after the previous synchronize_rcu() call and
2834          * they still read a valid pointer from trace_buffered_event. It must be
2835          * ensured they don't see cleared trace_buffered_event_cnt else they
2836          * could wrongly decide to use the pointed-to buffer which is now freed.
2837          */
2838         synchronize_rcu();
2839
2840         /* For each CPU, relinquish the buffer */
2841         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2842                          true);
2843 }
2844
2845 static struct trace_buffer *temp_buffer;
2846
2847 struct ring_buffer_event *
2848 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2849                           struct trace_event_file *trace_file,
2850                           int type, unsigned long len,
2851                           unsigned int trace_ctx)
2852 {
2853         struct ring_buffer_event *entry;
2854         struct trace_array *tr = trace_file->tr;
2855         int val;
2856
2857         *current_rb = tr->array_buffer.buffer;
2858
2859         if (!tr->no_filter_buffering_ref &&
2860             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2861                 preempt_disable_notrace();
2862                 /*
2863                  * Filtering is on, so try to use the per cpu buffer first.
2864                  * This buffer will simulate a ring_buffer_event,
2865                  * where the type_len is zero and the array[0] will
2866                  * hold the full length.
2867                  * (see include/linux/ring-buffer.h for details on
2868                  *  how the ring_buffer_event is structured).
2869                  *
2870                  * Using a temp buffer during filtering and copying it
2871                  * on a matched filter is quicker than writing directly
2872                  * into the ring buffer and then discarding it when
2873                  * it doesn't match. That is because the discard
2874                  * requires several atomic operations to get right.
2875                  * Copying on match and doing nothing on a failed match
2876                  * is still quicker than no copy on match, but having
2877                  * to discard out of the ring buffer on a failed match.
2878                  */
2879                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2880                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2881
2882                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2883
2884                         /*
2885                          * Preemption is disabled, but interrupts and NMIs
2886                          * can still come in now. If that happens after
2887                          * the above increment, then it will have to go
2888                          * back to the old method of allocating the event
2889                          * on the ring buffer, and if the filter fails, it
2890                          * will have to call ring_buffer_discard_commit()
2891                          * to remove it.
2892                          *
2893                          * Need to also check the unlikely case that the
2894                          * length is bigger than the temp buffer size.
2895                          * If that happens, then the reserve is pretty much
2896                          * guaranteed to fail, as the ring buffer currently
2897                          * only allows events less than a page. But that may
2898                          * change in the future, so let the ring buffer reserve
2899                          * handle the failure in that case.
2900                          */
2901                         if (val == 1 && likely(len <= max_len)) {
2902                                 trace_event_setup(entry, type, trace_ctx);
2903                                 entry->array[0] = len;
2904                                 /* Return with preemption disabled */
2905                                 return entry;
2906                         }
2907                         this_cpu_dec(trace_buffered_event_cnt);
2908                 }
2909                 /* __trace_buffer_lock_reserve() disables preemption */
2910                 preempt_enable_notrace();
2911         }
2912
2913         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2914                                             trace_ctx);
2915         /*
2916          * If tracing is off, but we have triggers enabled
2917          * we still need to look at the event data. Use the temp_buffer
2918          * to store the trace event for the trigger to use. It's recursive
2919          * safe and will not be recorded anywhere.
2920          */
2921         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2922                 *current_rb = temp_buffer;
2923                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2924                                                     trace_ctx);
2925         }
2926         return entry;
2927 }
2928 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2929
2930 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2931 static DEFINE_MUTEX(tracepoint_printk_mutex);
2932
2933 static void output_printk(struct trace_event_buffer *fbuffer)
2934 {
2935         struct trace_event_call *event_call;
2936         struct trace_event_file *file;
2937         struct trace_event *event;
2938         unsigned long flags;
2939         struct trace_iterator *iter = tracepoint_print_iter;
2940
2941         /* We should never get here if iter is NULL */
2942         if (WARN_ON_ONCE(!iter))
2943                 return;
2944
2945         event_call = fbuffer->trace_file->event_call;
2946         if (!event_call || !event_call->event.funcs ||
2947             !event_call->event.funcs->trace)
2948                 return;
2949
2950         file = fbuffer->trace_file;
2951         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2952             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2953              !filter_match_preds(file->filter, fbuffer->entry)))
2954                 return;
2955
2956         event = &fbuffer->trace_file->event_call->event;
2957
2958         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2959         trace_seq_init(&iter->seq);
2960         iter->ent = fbuffer->entry;
2961         event_call->event.funcs->trace(iter, 0, event);
2962         trace_seq_putc(&iter->seq, 0);
2963         printk("%s", iter->seq.buffer);
2964
2965         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2966 }
2967
2968 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2969                              void *buffer, size_t *lenp,
2970                              loff_t *ppos)
2971 {
2972         int save_tracepoint_printk;
2973         int ret;
2974
2975         mutex_lock(&tracepoint_printk_mutex);
2976         save_tracepoint_printk = tracepoint_printk;
2977
2978         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2979
2980         /*
2981          * This will force exiting early, as tracepoint_printk
2982          * is always zero when tracepoint_printk_iter is not allocated
2983          */
2984         if (!tracepoint_print_iter)
2985                 tracepoint_printk = 0;
2986
2987         if (save_tracepoint_printk == tracepoint_printk)
2988                 goto out;
2989
2990         if (tracepoint_printk)
2991                 static_key_enable(&tracepoint_printk_key.key);
2992         else
2993                 static_key_disable(&tracepoint_printk_key.key);
2994
2995  out:
2996         mutex_unlock(&tracepoint_printk_mutex);
2997
2998         return ret;
2999 }
3000
3001 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3002 {
3003         enum event_trigger_type tt = ETT_NONE;
3004         struct trace_event_file *file = fbuffer->trace_file;
3005
3006         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3007                         fbuffer->entry, &tt))
3008                 goto discard;
3009
3010         if (static_key_false(&tracepoint_printk_key.key))
3011                 output_printk(fbuffer);
3012
3013         if (static_branch_unlikely(&trace_event_exports_enabled))
3014                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3015
3016         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3017                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3018
3019 discard:
3020         if (tt)
3021                 event_triggers_post_call(file, tt);
3022
3023 }
3024 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3025
3026 /*
3027  * Skip 3:
3028  *
3029  *   trace_buffer_unlock_commit_regs()
3030  *   trace_event_buffer_commit()
3031  *   trace_event_raw_event_xxx()
3032  */
3033 # define STACK_SKIP 3
3034
3035 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3036                                      struct trace_buffer *buffer,
3037                                      struct ring_buffer_event *event,
3038                                      unsigned int trace_ctx,
3039                                      struct pt_regs *regs)
3040 {
3041         __buffer_unlock_commit(buffer, event);
3042
3043         /*
3044          * If regs is not set, then skip the necessary functions.
3045          * Note, we can still get here via blktrace, wakeup tracer
3046          * and mmiotrace, but that's ok if they lose a function or
3047          * two. They are not that meaningful.
3048          */
3049         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3050         ftrace_trace_userstack(tr, buffer, trace_ctx);
3051 }
3052
3053 /*
3054  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3055  */
3056 void
3057 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3058                                    struct ring_buffer_event *event)
3059 {
3060         __buffer_unlock_commit(buffer, event);
3061 }
3062
3063 void
3064 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3065                parent_ip, unsigned int trace_ctx)
3066 {
3067         struct trace_event_call *call = &event_function;
3068         struct trace_buffer *buffer = tr->array_buffer.buffer;
3069         struct ring_buffer_event *event;
3070         struct ftrace_entry *entry;
3071
3072         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3073                                             trace_ctx);
3074         if (!event)
3075                 return;
3076         entry   = ring_buffer_event_data(event);
3077         entry->ip                       = ip;
3078         entry->parent_ip                = parent_ip;
3079
3080         if (!call_filter_check_discard(call, entry, buffer, event)) {
3081                 if (static_branch_unlikely(&trace_function_exports_enabled))
3082                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3083                 __buffer_unlock_commit(buffer, event);
3084         }
3085 }
3086
3087 #ifdef CONFIG_STACKTRACE
3088
3089 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3090 #define FTRACE_KSTACK_NESTING   4
3091
3092 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3093
3094 struct ftrace_stack {
3095         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3096 };
3097
3098
3099 struct ftrace_stacks {
3100         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3101 };
3102
3103 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3104 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3105
3106 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3107                                  unsigned int trace_ctx,
3108                                  int skip, struct pt_regs *regs)
3109 {
3110         struct trace_event_call *call = &event_kernel_stack;
3111         struct ring_buffer_event *event;
3112         unsigned int size, nr_entries;
3113         struct ftrace_stack *fstack;
3114         struct stack_entry *entry;
3115         int stackidx;
3116
3117         /*
3118          * Add one, for this function and the call to save_stack_trace()
3119          * If regs is set, then these functions will not be in the way.
3120          */
3121 #ifndef CONFIG_UNWINDER_ORC
3122         if (!regs)
3123                 skip++;
3124 #endif
3125
3126         preempt_disable_notrace();
3127
3128         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3129
3130         /* This should never happen. If it does, yell once and skip */
3131         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3132                 goto out;
3133
3134         /*
3135          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3136          * interrupt will either see the value pre increment or post
3137          * increment. If the interrupt happens pre increment it will have
3138          * restored the counter when it returns.  We just need a barrier to
3139          * keep gcc from moving things around.
3140          */
3141         barrier();
3142
3143         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3144         size = ARRAY_SIZE(fstack->calls);
3145
3146         if (regs) {
3147                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3148                                                    size, skip);
3149         } else {
3150                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3151         }
3152
3153         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3154                                     struct_size(entry, caller, nr_entries),
3155                                     trace_ctx);
3156         if (!event)
3157                 goto out;
3158         entry = ring_buffer_event_data(event);
3159
3160         entry->size = nr_entries;
3161         memcpy(&entry->caller, fstack->calls,
3162                flex_array_size(entry, caller, nr_entries));
3163
3164         if (!call_filter_check_discard(call, entry, buffer, event))
3165                 __buffer_unlock_commit(buffer, event);
3166
3167  out:
3168         /* Again, don't let gcc optimize things here */
3169         barrier();
3170         __this_cpu_dec(ftrace_stack_reserve);
3171         preempt_enable_notrace();
3172
3173 }
3174
3175 static inline void ftrace_trace_stack(struct trace_array *tr,
3176                                       struct trace_buffer *buffer,
3177                                       unsigned int trace_ctx,
3178                                       int skip, struct pt_regs *regs)
3179 {
3180         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3181                 return;
3182
3183         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3184 }
3185
3186 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3187                    int skip)
3188 {
3189         struct trace_buffer *buffer = tr->array_buffer.buffer;
3190
3191         if (rcu_is_watching()) {
3192                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3193                 return;
3194         }
3195
3196         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3197                 return;
3198
3199         /*
3200          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3201          * but if the above rcu_is_watching() failed, then the NMI
3202          * triggered someplace critical, and ct_irq_enter() should
3203          * not be called from NMI.
3204          */
3205         if (unlikely(in_nmi()))
3206                 return;
3207
3208         ct_irq_enter_irqson();
3209         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3210         ct_irq_exit_irqson();
3211 }
3212
3213 /**
3214  * trace_dump_stack - record a stack back trace in the trace buffer
3215  * @skip: Number of functions to skip (helper handlers)
3216  */
3217 void trace_dump_stack(int skip)
3218 {
3219         if (tracing_disabled || tracing_selftest_running)
3220                 return;
3221
3222 #ifndef CONFIG_UNWINDER_ORC
3223         /* Skip 1 to skip this function. */
3224         skip++;
3225 #endif
3226         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3227                              tracing_gen_ctx(), skip, NULL);
3228 }
3229 EXPORT_SYMBOL_GPL(trace_dump_stack);
3230
3231 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3232 static DEFINE_PER_CPU(int, user_stack_count);
3233
3234 static void
3235 ftrace_trace_userstack(struct trace_array *tr,
3236                        struct trace_buffer *buffer, unsigned int trace_ctx)
3237 {
3238         struct trace_event_call *call = &event_user_stack;
3239         struct ring_buffer_event *event;
3240         struct userstack_entry *entry;
3241
3242         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3243                 return;
3244
3245         /*
3246          * NMIs can not handle page faults, even with fix ups.
3247          * The save user stack can (and often does) fault.
3248          */
3249         if (unlikely(in_nmi()))
3250                 return;
3251
3252         /*
3253          * prevent recursion, since the user stack tracing may
3254          * trigger other kernel events.
3255          */
3256         preempt_disable();
3257         if (__this_cpu_read(user_stack_count))
3258                 goto out;
3259
3260         __this_cpu_inc(user_stack_count);
3261
3262         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3263                                             sizeof(*entry), trace_ctx);
3264         if (!event)
3265                 goto out_drop_count;
3266         entry   = ring_buffer_event_data(event);
3267
3268         entry->tgid             = current->tgid;
3269         memset(&entry->caller, 0, sizeof(entry->caller));
3270
3271         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3272         if (!call_filter_check_discard(call, entry, buffer, event))
3273                 __buffer_unlock_commit(buffer, event);
3274
3275  out_drop_count:
3276         __this_cpu_dec(user_stack_count);
3277  out:
3278         preempt_enable();
3279 }
3280 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3281 static void ftrace_trace_userstack(struct trace_array *tr,
3282                                    struct trace_buffer *buffer,
3283                                    unsigned int trace_ctx)
3284 {
3285 }
3286 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3287
3288 #endif /* CONFIG_STACKTRACE */
3289
3290 static inline void
3291 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3292                           unsigned long long delta)
3293 {
3294         entry->bottom_delta_ts = delta & U32_MAX;
3295         entry->top_delta_ts = (delta >> 32);
3296 }
3297
3298 void trace_last_func_repeats(struct trace_array *tr,
3299                              struct trace_func_repeats *last_info,
3300                              unsigned int trace_ctx)
3301 {
3302         struct trace_buffer *buffer = tr->array_buffer.buffer;
3303         struct func_repeats_entry *entry;
3304         struct ring_buffer_event *event;
3305         u64 delta;
3306
3307         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3308                                             sizeof(*entry), trace_ctx);
3309         if (!event)
3310                 return;
3311
3312         delta = ring_buffer_event_time_stamp(buffer, event) -
3313                 last_info->ts_last_call;
3314
3315         entry = ring_buffer_event_data(event);
3316         entry->ip = last_info->ip;
3317         entry->parent_ip = last_info->parent_ip;
3318         entry->count = last_info->count;
3319         func_repeats_set_delta_ts(entry, delta);
3320
3321         __buffer_unlock_commit(buffer, event);
3322 }
3323
3324 /* created for use with alloc_percpu */
3325 struct trace_buffer_struct {
3326         int nesting;
3327         char buffer[4][TRACE_BUF_SIZE];
3328 };
3329
3330 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3331
3332 /*
3333  * This allows for lockless recording.  If we're nested too deeply, then
3334  * this returns NULL.
3335  */
3336 static char *get_trace_buf(void)
3337 {
3338         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3339
3340         if (!trace_percpu_buffer || buffer->nesting >= 4)
3341                 return NULL;
3342
3343         buffer->nesting++;
3344
3345         /* Interrupts must see nesting incremented before we use the buffer */
3346         barrier();
3347         return &buffer->buffer[buffer->nesting - 1][0];
3348 }
3349
3350 static void put_trace_buf(void)
3351 {
3352         /* Don't let the decrement of nesting leak before this */
3353         barrier();
3354         this_cpu_dec(trace_percpu_buffer->nesting);
3355 }
3356
3357 static int alloc_percpu_trace_buffer(void)
3358 {
3359         struct trace_buffer_struct __percpu *buffers;
3360
3361         if (trace_percpu_buffer)
3362                 return 0;
3363
3364         buffers = alloc_percpu(struct trace_buffer_struct);
3365         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3366                 return -ENOMEM;
3367
3368         trace_percpu_buffer = buffers;
3369         return 0;
3370 }
3371
3372 static int buffers_allocated;
3373
3374 void trace_printk_init_buffers(void)
3375 {
3376         if (buffers_allocated)
3377                 return;
3378
3379         if (alloc_percpu_trace_buffer())
3380                 return;
3381
3382         /* trace_printk() is for debug use only. Don't use it in production. */
3383
3384         pr_warn("\n");
3385         pr_warn("**********************************************************\n");
3386         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3387         pr_warn("**                                                      **\n");
3388         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3389         pr_warn("**                                                      **\n");
3390         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3391         pr_warn("** unsafe for production use.                           **\n");
3392         pr_warn("**                                                      **\n");
3393         pr_warn("** If you see this message and you are not debugging    **\n");
3394         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3395         pr_warn("**                                                      **\n");
3396         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3397         pr_warn("**********************************************************\n");
3398
3399         /* Expand the buffers to set size */
3400         tracing_update_buffers(&global_trace);
3401
3402         buffers_allocated = 1;
3403
3404         /*
3405          * trace_printk_init_buffers() can be called by modules.
3406          * If that happens, then we need to start cmdline recording
3407          * directly here. If the global_trace.buffer is already
3408          * allocated here, then this was called by module code.
3409          */
3410         if (global_trace.array_buffer.buffer)
3411                 tracing_start_cmdline_record();
3412 }
3413 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3414
3415 void trace_printk_start_comm(void)
3416 {
3417         /* Start tracing comms if trace printk is set */
3418         if (!buffers_allocated)
3419                 return;
3420         tracing_start_cmdline_record();
3421 }
3422
3423 static void trace_printk_start_stop_comm(int enabled)
3424 {
3425         if (!buffers_allocated)
3426                 return;
3427
3428         if (enabled)
3429                 tracing_start_cmdline_record();
3430         else
3431                 tracing_stop_cmdline_record();
3432 }
3433
3434 /**
3435  * trace_vbprintk - write binary msg to tracing buffer
3436  * @ip:    The address of the caller
3437  * @fmt:   The string format to write to the buffer
3438  * @args:  Arguments for @fmt
3439  */
3440 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3441 {
3442         struct trace_event_call *call = &event_bprint;
3443         struct ring_buffer_event *event;
3444         struct trace_buffer *buffer;
3445         struct trace_array *tr = &global_trace;
3446         struct bprint_entry *entry;
3447         unsigned int trace_ctx;
3448         char *tbuffer;
3449         int len = 0, size;
3450
3451         if (unlikely(tracing_selftest_running || tracing_disabled))
3452                 return 0;
3453
3454         /* Don't pollute graph traces with trace_vprintk internals */
3455         pause_graph_tracing();
3456
3457         trace_ctx = tracing_gen_ctx();
3458         preempt_disable_notrace();
3459
3460         tbuffer = get_trace_buf();
3461         if (!tbuffer) {
3462                 len = 0;
3463                 goto out_nobuffer;
3464         }
3465
3466         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3467
3468         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3469                 goto out_put;
3470
3471         size = sizeof(*entry) + sizeof(u32) * len;
3472         buffer = tr->array_buffer.buffer;
3473         ring_buffer_nest_start(buffer);
3474         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3475                                             trace_ctx);
3476         if (!event)
3477                 goto out;
3478         entry = ring_buffer_event_data(event);
3479         entry->ip                       = ip;
3480         entry->fmt                      = fmt;
3481
3482         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3483         if (!call_filter_check_discard(call, entry, buffer, event)) {
3484                 __buffer_unlock_commit(buffer, event);
3485                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3486         }
3487
3488 out:
3489         ring_buffer_nest_end(buffer);
3490 out_put:
3491         put_trace_buf();
3492
3493 out_nobuffer:
3494         preempt_enable_notrace();
3495         unpause_graph_tracing();
3496
3497         return len;
3498 }
3499 EXPORT_SYMBOL_GPL(trace_vbprintk);
3500
3501 __printf(3, 0)
3502 static int
3503 __trace_array_vprintk(struct trace_buffer *buffer,
3504                       unsigned long ip, const char *fmt, va_list args)
3505 {
3506         struct trace_event_call *call = &event_print;
3507         struct ring_buffer_event *event;
3508         int len = 0, size;
3509         struct print_entry *entry;
3510         unsigned int trace_ctx;
3511         char *tbuffer;
3512
3513         if (tracing_disabled)
3514                 return 0;
3515
3516         /* Don't pollute graph traces with trace_vprintk internals */
3517         pause_graph_tracing();
3518
3519         trace_ctx = tracing_gen_ctx();
3520         preempt_disable_notrace();
3521
3522
3523         tbuffer = get_trace_buf();
3524         if (!tbuffer) {
3525                 len = 0;
3526                 goto out_nobuffer;
3527         }
3528
3529         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3530
3531         size = sizeof(*entry) + len + 1;
3532         ring_buffer_nest_start(buffer);
3533         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3534                                             trace_ctx);
3535         if (!event)
3536                 goto out;
3537         entry = ring_buffer_event_data(event);
3538         entry->ip = ip;
3539
3540         memcpy(&entry->buf, tbuffer, len + 1);
3541         if (!call_filter_check_discard(call, entry, buffer, event)) {
3542                 __buffer_unlock_commit(buffer, event);
3543                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3544         }
3545
3546 out:
3547         ring_buffer_nest_end(buffer);
3548         put_trace_buf();
3549
3550 out_nobuffer:
3551         preempt_enable_notrace();
3552         unpause_graph_tracing();
3553
3554         return len;
3555 }
3556
3557 __printf(3, 0)
3558 int trace_array_vprintk(struct trace_array *tr,
3559                         unsigned long ip, const char *fmt, va_list args)
3560 {
3561         if (tracing_selftest_running && tr == &global_trace)
3562                 return 0;
3563
3564         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3565 }
3566
3567 /**
3568  * trace_array_printk - Print a message to a specific instance
3569  * @tr: The instance trace_array descriptor
3570  * @ip: The instruction pointer that this is called from.
3571  * @fmt: The format to print (printf format)
3572  *
3573  * If a subsystem sets up its own instance, they have the right to
3574  * printk strings into their tracing instance buffer using this
3575  * function. Note, this function will not write into the top level
3576  * buffer (use trace_printk() for that), as writing into the top level
3577  * buffer should only have events that can be individually disabled.
3578  * trace_printk() is only used for debugging a kernel, and should not
3579  * be ever incorporated in normal use.
3580  *
3581  * trace_array_printk() can be used, as it will not add noise to the
3582  * top level tracing buffer.
3583  *
3584  * Note, trace_array_init_printk() must be called on @tr before this
3585  * can be used.
3586  */
3587 __printf(3, 0)
3588 int trace_array_printk(struct trace_array *tr,
3589                        unsigned long ip, const char *fmt, ...)
3590 {
3591         int ret;
3592         va_list ap;
3593
3594         if (!tr)
3595                 return -ENOENT;
3596
3597         /* This is only allowed for created instances */
3598         if (tr == &global_trace)
3599                 return 0;
3600
3601         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3602                 return 0;
3603
3604         va_start(ap, fmt);
3605         ret = trace_array_vprintk(tr, ip, fmt, ap);
3606         va_end(ap);
3607         return ret;
3608 }
3609 EXPORT_SYMBOL_GPL(trace_array_printk);
3610
3611 /**
3612  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3613  * @tr: The trace array to initialize the buffers for
3614  *
3615  * As trace_array_printk() only writes into instances, they are OK to
3616  * have in the kernel (unlike trace_printk()). This needs to be called
3617  * before trace_array_printk() can be used on a trace_array.
3618  */
3619 int trace_array_init_printk(struct trace_array *tr)
3620 {
3621         if (!tr)
3622                 return -ENOENT;
3623
3624         /* This is only allowed for created instances */
3625         if (tr == &global_trace)
3626                 return -EINVAL;
3627
3628         return alloc_percpu_trace_buffer();
3629 }
3630 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3631
3632 __printf(3, 4)
3633 int trace_array_printk_buf(struct trace_buffer *buffer,
3634                            unsigned long ip, const char *fmt, ...)
3635 {
3636         int ret;
3637         va_list ap;
3638
3639         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3640                 return 0;
3641
3642         va_start(ap, fmt);
3643         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3644         va_end(ap);
3645         return ret;
3646 }
3647
3648 __printf(2, 0)
3649 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3650 {
3651         return trace_array_vprintk(&global_trace, ip, fmt, args);
3652 }
3653 EXPORT_SYMBOL_GPL(trace_vprintk);
3654
3655 static void trace_iterator_increment(struct trace_iterator *iter)
3656 {
3657         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3658
3659         iter->idx++;
3660         if (buf_iter)
3661                 ring_buffer_iter_advance(buf_iter);
3662 }
3663
3664 static struct trace_entry *
3665 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3666                 unsigned long *lost_events)
3667 {
3668         struct ring_buffer_event *event;
3669         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3670
3671         if (buf_iter) {
3672                 event = ring_buffer_iter_peek(buf_iter, ts);
3673                 if (lost_events)
3674                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3675                                 (unsigned long)-1 : 0;
3676         } else {
3677                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3678                                          lost_events);
3679         }
3680
3681         if (event) {
3682                 iter->ent_size = ring_buffer_event_length(event);
3683                 return ring_buffer_event_data(event);
3684         }
3685         iter->ent_size = 0;
3686         return NULL;
3687 }
3688
3689 static struct trace_entry *
3690 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3691                   unsigned long *missing_events, u64 *ent_ts)
3692 {
3693         struct trace_buffer *buffer = iter->array_buffer->buffer;
3694         struct trace_entry *ent, *next = NULL;
3695         unsigned long lost_events = 0, next_lost = 0;
3696         int cpu_file = iter->cpu_file;
3697         u64 next_ts = 0, ts;
3698         int next_cpu = -1;
3699         int next_size = 0;
3700         int cpu;
3701
3702         /*
3703          * If we are in a per_cpu trace file, don't bother by iterating over
3704          * all cpu and peek directly.
3705          */
3706         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3707                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3708                         return NULL;
3709                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3710                 if (ent_cpu)
3711                         *ent_cpu = cpu_file;
3712
3713                 return ent;
3714         }
3715
3716         for_each_tracing_cpu(cpu) {
3717
3718                 if (ring_buffer_empty_cpu(buffer, cpu))
3719                         continue;
3720
3721                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3722
3723                 /*
3724                  * Pick the entry with the smallest timestamp:
3725                  */
3726                 if (ent && (!next || ts < next_ts)) {
3727                         next = ent;
3728                         next_cpu = cpu;
3729                         next_ts = ts;
3730                         next_lost = lost_events;
3731                         next_size = iter->ent_size;
3732                 }
3733         }
3734
3735         iter->ent_size = next_size;
3736
3737         if (ent_cpu)
3738                 *ent_cpu = next_cpu;
3739
3740         if (ent_ts)
3741                 *ent_ts = next_ts;
3742
3743         if (missing_events)
3744                 *missing_events = next_lost;
3745
3746         return next;
3747 }
3748
3749 #define STATIC_FMT_BUF_SIZE     128
3750 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3751
3752 char *trace_iter_expand_format(struct trace_iterator *iter)
3753 {
3754         char *tmp;
3755
3756         /*
3757          * iter->tr is NULL when used with tp_printk, which makes
3758          * this get called where it is not safe to call krealloc().
3759          */
3760         if (!iter->tr || iter->fmt == static_fmt_buf)
3761                 return NULL;
3762
3763         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3764                        GFP_KERNEL);
3765         if (tmp) {
3766                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3767                 iter->fmt = tmp;
3768         }
3769
3770         return tmp;
3771 }
3772
3773 /* Returns true if the string is safe to dereference from an event */
3774 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3775                            bool star, int len)
3776 {
3777         unsigned long addr = (unsigned long)str;
3778         struct trace_event *trace_event;
3779         struct trace_event_call *event;
3780
3781         /* Ignore strings with no length */
3782         if (star && !len)
3783                 return true;
3784
3785         /* OK if part of the event data */
3786         if ((addr >= (unsigned long)iter->ent) &&
3787             (addr < (unsigned long)iter->ent + iter->ent_size))
3788                 return true;
3789
3790         /* OK if part of the temp seq buffer */
3791         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3792             (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3793                 return true;
3794
3795         /* Core rodata can not be freed */
3796         if (is_kernel_rodata(addr))
3797                 return true;
3798
3799         if (trace_is_tracepoint_string(str))
3800                 return true;
3801
3802         /*
3803          * Now this could be a module event, referencing core module
3804          * data, which is OK.
3805          */
3806         if (!iter->ent)
3807                 return false;
3808
3809         trace_event = ftrace_find_event(iter->ent->type);
3810         if (!trace_event)
3811                 return false;
3812
3813         event = container_of(trace_event, struct trace_event_call, event);
3814         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3815                 return false;
3816
3817         /* Would rather have rodata, but this will suffice */
3818         if (within_module_core(addr, event->module))
3819                 return true;
3820
3821         return false;
3822 }
3823
3824 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3825
3826 static int test_can_verify_check(const char *fmt, ...)
3827 {
3828         char buf[16];
3829         va_list ap;
3830         int ret;
3831
3832         /*
3833          * The verifier is dependent on vsnprintf() modifies the va_list
3834          * passed to it, where it is sent as a reference. Some architectures
3835          * (like x86_32) passes it by value, which means that vsnprintf()
3836          * does not modify the va_list passed to it, and the verifier
3837          * would then need to be able to understand all the values that
3838          * vsnprintf can use. If it is passed by value, then the verifier
3839          * is disabled.
3840          */
3841         va_start(ap, fmt);
3842         vsnprintf(buf, 16, "%d", ap);
3843         ret = va_arg(ap, int);
3844         va_end(ap);
3845
3846         return ret;
3847 }
3848
3849 static void test_can_verify(void)
3850 {
3851         if (!test_can_verify_check("%d %d", 0, 1)) {
3852                 pr_info("trace event string verifier disabled\n");
3853                 static_branch_inc(&trace_no_verify);
3854         }
3855 }
3856
3857 /**
3858  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3859  * @iter: The iterator that holds the seq buffer and the event being printed
3860  * @fmt: The format used to print the event
3861  * @ap: The va_list holding the data to print from @fmt.
3862  *
3863  * This writes the data into the @iter->seq buffer using the data from
3864  * @fmt and @ap. If the format has a %s, then the source of the string
3865  * is examined to make sure it is safe to print, otherwise it will
3866  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3867  * pointer.
3868  */
3869 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3870                          va_list ap)
3871 {
3872         const char *p = fmt;
3873         const char *str;
3874         int i, j;
3875
3876         if (WARN_ON_ONCE(!fmt))
3877                 return;
3878
3879         if (static_branch_unlikely(&trace_no_verify))
3880                 goto print;
3881
3882         /* Don't bother checking when doing a ftrace_dump() */
3883         if (iter->fmt == static_fmt_buf)
3884                 goto print;
3885
3886         while (*p) {
3887                 bool star = false;
3888                 int len = 0;
3889
3890                 j = 0;
3891
3892                 /* We only care about %s and variants */
3893                 for (i = 0; p[i]; i++) {
3894                         if (i + 1 >= iter->fmt_size) {
3895                                 /*
3896                                  * If we can't expand the copy buffer,
3897                                  * just print it.
3898                                  */
3899                                 if (!trace_iter_expand_format(iter))
3900                                         goto print;
3901                         }
3902
3903                         if (p[i] == '\\' && p[i+1]) {
3904                                 i++;
3905                                 continue;
3906                         }
3907                         if (p[i] == '%') {
3908                                 /* Need to test cases like %08.*s */
3909                                 for (j = 1; p[i+j]; j++) {
3910                                         if (isdigit(p[i+j]) ||
3911                                             p[i+j] == '.')
3912                                                 continue;
3913                                         if (p[i+j] == '*') {
3914                                                 star = true;
3915                                                 continue;
3916                                         }
3917                                         break;
3918                                 }
3919                                 if (p[i+j] == 's')
3920                                         break;
3921                                 star = false;
3922                         }
3923                         j = 0;
3924                 }
3925                 /* If no %s found then just print normally */
3926                 if (!p[i])
3927                         break;
3928
3929                 /* Copy up to the %s, and print that */
3930                 strncpy(iter->fmt, p, i);
3931                 iter->fmt[i] = '\0';
3932                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3933
3934                 /*
3935                  * If iter->seq is full, the above call no longer guarantees
3936                  * that ap is in sync with fmt processing, and further calls
3937                  * to va_arg() can return wrong positional arguments.
3938                  *
3939                  * Ensure that ap is no longer used in this case.
3940                  */
3941                 if (iter->seq.full) {
3942                         p = "";
3943                         break;
3944                 }
3945
3946                 if (star)
3947                         len = va_arg(ap, int);
3948
3949                 /* The ap now points to the string data of the %s */
3950                 str = va_arg(ap, const char *);
3951
3952                 /*
3953                  * If you hit this warning, it is likely that the
3954                  * trace event in question used %s on a string that
3955                  * was saved at the time of the event, but may not be
3956                  * around when the trace is read. Use __string(),
3957                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3958                  * instead. See samples/trace_events/trace-events-sample.h
3959                  * for reference.
3960                  */
3961                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3962                               "fmt: '%s' current_buffer: '%s'",
3963                               fmt, seq_buf_str(&iter->seq.seq))) {
3964                         int ret;
3965
3966                         /* Try to safely read the string */
3967                         if (star) {
3968                                 if (len + 1 > iter->fmt_size)
3969                                         len = iter->fmt_size - 1;
3970                                 if (len < 0)
3971                                         len = 0;
3972                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3973                                 iter->fmt[len] = 0;
3974                                 star = false;
3975                         } else {
3976                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3977                                                                   iter->fmt_size);
3978                         }
3979                         if (ret < 0)
3980                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3981                         else
3982                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3983                                                  str, iter->fmt);
3984                         str = "[UNSAFE-MEMORY]";
3985                         strcpy(iter->fmt, "%s");
3986                 } else {
3987                         strncpy(iter->fmt, p + i, j + 1);
3988                         iter->fmt[j+1] = '\0';
3989                 }
3990                 if (star)
3991                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3992                 else
3993                         trace_seq_printf(&iter->seq, iter->fmt, str);
3994
3995                 p += i + j + 1;
3996         }
3997  print:
3998         if (*p)
3999                 trace_seq_vprintf(&iter->seq, p, ap);
4000 }
4001
4002 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4003 {
4004         const char *p, *new_fmt;
4005         char *q;
4006
4007         if (WARN_ON_ONCE(!fmt))
4008                 return fmt;
4009
4010         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4011                 return fmt;
4012
4013         p = fmt;
4014         new_fmt = q = iter->fmt;
4015         while (*p) {
4016                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4017                         if (!trace_iter_expand_format(iter))
4018                                 return fmt;
4019
4020                         q += iter->fmt - new_fmt;
4021                         new_fmt = iter->fmt;
4022                 }
4023
4024                 *q++ = *p++;
4025
4026                 /* Replace %p with %px */
4027                 if (p[-1] == '%') {
4028                         if (p[0] == '%') {
4029                                 *q++ = *p++;
4030                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4031                                 *q++ = *p++;
4032                                 *q++ = 'x';
4033                         }
4034                 }
4035         }
4036         *q = '\0';
4037
4038         return new_fmt;
4039 }
4040
4041 #define STATIC_TEMP_BUF_SIZE    128
4042 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4043
4044 /* Find the next real entry, without updating the iterator itself */
4045 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4046                                           int *ent_cpu, u64 *ent_ts)
4047 {
4048         /* __find_next_entry will reset ent_size */
4049         int ent_size = iter->ent_size;
4050         struct trace_entry *entry;
4051
4052         /*
4053          * If called from ftrace_dump(), then the iter->temp buffer
4054          * will be the static_temp_buf and not created from kmalloc.
4055          * If the entry size is greater than the buffer, we can
4056          * not save it. Just return NULL in that case. This is only
4057          * used to add markers when two consecutive events' time
4058          * stamps have a large delta. See trace_print_lat_context()
4059          */
4060         if (iter->temp == static_temp_buf &&
4061             STATIC_TEMP_BUF_SIZE < ent_size)
4062                 return NULL;
4063
4064         /*
4065          * The __find_next_entry() may call peek_next_entry(), which may
4066          * call ring_buffer_peek() that may make the contents of iter->ent
4067          * undefined. Need to copy iter->ent now.
4068          */
4069         if (iter->ent && iter->ent != iter->temp) {
4070                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4071                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4072                         void *temp;
4073                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4074                         if (!temp)
4075                                 return NULL;
4076                         kfree(iter->temp);
4077                         iter->temp = temp;
4078                         iter->temp_size = iter->ent_size;
4079                 }
4080                 memcpy(iter->temp, iter->ent, iter->ent_size);
4081                 iter->ent = iter->temp;
4082         }
4083         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4084         /* Put back the original ent_size */
4085         iter->ent_size = ent_size;
4086
4087         return entry;
4088 }
4089
4090 /* Find the next real entry, and increment the iterator to the next entry */
4091 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4092 {
4093         iter->ent = __find_next_entry(iter, &iter->cpu,
4094                                       &iter->lost_events, &iter->ts);
4095
4096         if (iter->ent)
4097                 trace_iterator_increment(iter);
4098
4099         return iter->ent ? iter : NULL;
4100 }
4101
4102 static void trace_consume(struct trace_iterator *iter)
4103 {
4104         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4105                             &iter->lost_events);
4106 }
4107
4108 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4109 {
4110         struct trace_iterator *iter = m->private;
4111         int i = (int)*pos;
4112         void *ent;
4113
4114         WARN_ON_ONCE(iter->leftover);
4115
4116         (*pos)++;
4117
4118         /* can't go backwards */
4119         if (iter->idx > i)
4120                 return NULL;
4121
4122         if (iter->idx < 0)
4123                 ent = trace_find_next_entry_inc(iter);
4124         else
4125                 ent = iter;
4126
4127         while (ent && iter->idx < i)
4128                 ent = trace_find_next_entry_inc(iter);
4129
4130         iter->pos = *pos;
4131
4132         return ent;
4133 }
4134
4135 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4136 {
4137         struct ring_buffer_iter *buf_iter;
4138         unsigned long entries = 0;
4139         u64 ts;
4140
4141         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4142
4143         buf_iter = trace_buffer_iter(iter, cpu);
4144         if (!buf_iter)
4145                 return;
4146
4147         ring_buffer_iter_reset(buf_iter);
4148
4149         /*
4150          * We could have the case with the max latency tracers
4151          * that a reset never took place on a cpu. This is evident
4152          * by the timestamp being before the start of the buffer.
4153          */
4154         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4155                 if (ts >= iter->array_buffer->time_start)
4156                         break;
4157                 entries++;
4158                 ring_buffer_iter_advance(buf_iter);
4159         }
4160
4161         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4162 }
4163
4164 /*
4165  * The current tracer is copied to avoid a global locking
4166  * all around.
4167  */
4168 static void *s_start(struct seq_file *m, loff_t *pos)
4169 {
4170         struct trace_iterator *iter = m->private;
4171         struct trace_array *tr = iter->tr;
4172         int cpu_file = iter->cpu_file;
4173         void *p = NULL;
4174         loff_t l = 0;
4175         int cpu;
4176
4177         mutex_lock(&trace_types_lock);
4178         if (unlikely(tr->current_trace != iter->trace)) {
4179                 /* Close iter->trace before switching to the new current tracer */
4180                 if (iter->trace->close)
4181                         iter->trace->close(iter);
4182                 iter->trace = tr->current_trace;
4183                 /* Reopen the new current tracer */
4184                 if (iter->trace->open)
4185                         iter->trace->open(iter);
4186         }
4187         mutex_unlock(&trace_types_lock);
4188
4189 #ifdef CONFIG_TRACER_MAX_TRACE
4190         if (iter->snapshot && iter->trace->use_max_tr)
4191                 return ERR_PTR(-EBUSY);
4192 #endif
4193
4194         if (*pos != iter->pos) {
4195                 iter->ent = NULL;
4196                 iter->cpu = 0;
4197                 iter->idx = -1;
4198
4199                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4200                         for_each_tracing_cpu(cpu)
4201                                 tracing_iter_reset(iter, cpu);
4202                 } else
4203                         tracing_iter_reset(iter, cpu_file);
4204
4205                 iter->leftover = 0;
4206                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4207                         ;
4208
4209         } else {
4210                 /*
4211                  * If we overflowed the seq_file before, then we want
4212                  * to just reuse the trace_seq buffer again.
4213                  */
4214                 if (iter->leftover)
4215                         p = iter;
4216                 else {
4217                         l = *pos - 1;
4218                         p = s_next(m, p, &l);
4219                 }
4220         }
4221
4222         trace_event_read_lock();
4223         trace_access_lock(cpu_file);
4224         return p;
4225 }
4226
4227 static void s_stop(struct seq_file *m, void *p)
4228 {
4229         struct trace_iterator *iter = m->private;
4230
4231 #ifdef CONFIG_TRACER_MAX_TRACE
4232         if (iter->snapshot && iter->trace->use_max_tr)
4233                 return;
4234 #endif
4235
4236         trace_access_unlock(iter->cpu_file);
4237         trace_event_read_unlock();
4238 }
4239
4240 static void
4241 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4242                       unsigned long *entries, int cpu)
4243 {
4244         unsigned long count;
4245
4246         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4247         /*
4248          * If this buffer has skipped entries, then we hold all
4249          * entries for the trace and we need to ignore the
4250          * ones before the time stamp.
4251          */
4252         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4253                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4254                 /* total is the same as the entries */
4255                 *total = count;
4256         } else
4257                 *total = count +
4258                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4259         *entries = count;
4260 }
4261
4262 static void
4263 get_total_entries(struct array_buffer *buf,
4264                   unsigned long *total, unsigned long *entries)
4265 {
4266         unsigned long t, e;
4267         int cpu;
4268
4269         *total = 0;
4270         *entries = 0;
4271
4272         for_each_tracing_cpu(cpu) {
4273                 get_total_entries_cpu(buf, &t, &e, cpu);
4274                 *total += t;
4275                 *entries += e;
4276         }
4277 }
4278
4279 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4280 {
4281         unsigned long total, entries;
4282
4283         if (!tr)
4284                 tr = &global_trace;
4285
4286         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4287
4288         return entries;
4289 }
4290
4291 unsigned long trace_total_entries(struct trace_array *tr)
4292 {
4293         unsigned long total, entries;
4294
4295         if (!tr)
4296                 tr = &global_trace;
4297
4298         get_total_entries(&tr->array_buffer, &total, &entries);
4299
4300         return entries;
4301 }
4302
4303 static void print_lat_help_header(struct seq_file *m)
4304 {
4305         seq_puts(m, "#                    _------=> CPU#            \n"
4306                     "#                   / _-----=> irqs-off/BH-disabled\n"
4307                     "#                  | / _----=> need-resched    \n"
4308                     "#                  || / _---=> hardirq/softirq \n"
4309                     "#                  ||| / _--=> preempt-depth   \n"
4310                     "#                  |||| / _-=> migrate-disable \n"
4311                     "#                  ||||| /     delay           \n"
4312                     "#  cmd     pid     |||||| time  |   caller     \n"
4313                     "#     \\   /        ||||||  \\    |    /       \n");
4314 }
4315
4316 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4317 {
4318         unsigned long total;
4319         unsigned long entries;
4320
4321         get_total_entries(buf, &total, &entries);
4322         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4323                    entries, total, num_online_cpus());
4324         seq_puts(m, "#\n");
4325 }
4326
4327 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4328                                    unsigned int flags)
4329 {
4330         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4331
4332         print_event_info(buf, m);
4333
4334         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4335         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4336 }
4337
4338 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4339                                        unsigned int flags)
4340 {
4341         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4342         static const char space[] = "            ";
4343         int prec = tgid ? 12 : 2;
4344
4345         print_event_info(buf, m);
4346
4347         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4348         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4349         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4350         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4351         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4352         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4353         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4354         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4355 }
4356
4357 void
4358 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4359 {
4360         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4361         struct array_buffer *buf = iter->array_buffer;
4362         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4363         struct tracer *type = iter->trace;
4364         unsigned long entries;
4365         unsigned long total;
4366         const char *name = type->name;
4367
4368         get_total_entries(buf, &total, &entries);
4369
4370         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4371                    name, UTS_RELEASE);
4372         seq_puts(m, "# -----------------------------------"
4373                  "---------------------------------\n");
4374         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4375                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4376                    nsecs_to_usecs(data->saved_latency),
4377                    entries,
4378                    total,
4379                    buf->cpu,
4380                    preempt_model_none()      ? "server" :
4381                    preempt_model_voluntary() ? "desktop" :
4382                    preempt_model_full()      ? "preempt" :
4383                    preempt_model_rt()        ? "preempt_rt" :
4384                    "unknown",
4385                    /* These are reserved for later use */
4386                    0, 0, 0, 0);
4387 #ifdef CONFIG_SMP
4388         seq_printf(m, " #P:%d)\n", num_online_cpus());
4389 #else
4390         seq_puts(m, ")\n");
4391 #endif
4392         seq_puts(m, "#    -----------------\n");
4393         seq_printf(m, "#    | task: %.16s-%d "
4394                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4395                    data->comm, data->pid,
4396                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4397                    data->policy, data->rt_priority);
4398         seq_puts(m, "#    -----------------\n");
4399
4400         if (data->critical_start) {
4401                 seq_puts(m, "#  => started at: ");
4402                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4403                 trace_print_seq(m, &iter->seq);
4404                 seq_puts(m, "\n#  => ended at:   ");
4405                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4406                 trace_print_seq(m, &iter->seq);
4407                 seq_puts(m, "\n#\n");
4408         }
4409
4410         seq_puts(m, "#\n");
4411 }
4412
4413 static void test_cpu_buff_start(struct trace_iterator *iter)
4414 {
4415         struct trace_seq *s = &iter->seq;
4416         struct trace_array *tr = iter->tr;
4417
4418         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4419                 return;
4420
4421         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4422                 return;
4423
4424         if (cpumask_available(iter->started) &&
4425             cpumask_test_cpu(iter->cpu, iter->started))
4426                 return;
4427
4428         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4429                 return;
4430
4431         if (cpumask_available(iter->started))
4432                 cpumask_set_cpu(iter->cpu, iter->started);
4433
4434         /* Don't print started cpu buffer for the first entry of the trace */
4435         if (iter->idx > 1)
4436                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4437                                 iter->cpu);
4438 }
4439
4440 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4441 {
4442         struct trace_array *tr = iter->tr;
4443         struct trace_seq *s = &iter->seq;
4444         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4445         struct trace_entry *entry;
4446         struct trace_event *event;
4447
4448         entry = iter->ent;
4449
4450         test_cpu_buff_start(iter);
4451
4452         event = ftrace_find_event(entry->type);
4453
4454         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4455                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4456                         trace_print_lat_context(iter);
4457                 else
4458                         trace_print_context(iter);
4459         }
4460
4461         if (trace_seq_has_overflowed(s))
4462                 return TRACE_TYPE_PARTIAL_LINE;
4463
4464         if (event) {
4465                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4466                         return print_event_fields(iter, event);
4467                 return event->funcs->trace(iter, sym_flags, event);
4468         }
4469
4470         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4471
4472         return trace_handle_return(s);
4473 }
4474
4475 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4476 {
4477         struct trace_array *tr = iter->tr;
4478         struct trace_seq *s = &iter->seq;
4479         struct trace_entry *entry;
4480         struct trace_event *event;
4481
4482         entry = iter->ent;
4483
4484         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4485                 trace_seq_printf(s, "%d %d %llu ",
4486                                  entry->pid, iter->cpu, iter->ts);
4487
4488         if (trace_seq_has_overflowed(s))
4489                 return TRACE_TYPE_PARTIAL_LINE;
4490
4491         event = ftrace_find_event(entry->type);
4492         if (event)
4493                 return event->funcs->raw(iter, 0, event);
4494
4495         trace_seq_printf(s, "%d ?\n", entry->type);
4496
4497         return trace_handle_return(s);
4498 }
4499
4500 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4501 {
4502         struct trace_array *tr = iter->tr;
4503         struct trace_seq *s = &iter->seq;
4504         unsigned char newline = '\n';
4505         struct trace_entry *entry;
4506         struct trace_event *event;
4507
4508         entry = iter->ent;
4509
4510         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4511                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4512                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4513                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4514                 if (trace_seq_has_overflowed(s))
4515                         return TRACE_TYPE_PARTIAL_LINE;
4516         }
4517
4518         event = ftrace_find_event(entry->type);
4519         if (event) {
4520                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4521                 if (ret != TRACE_TYPE_HANDLED)
4522                         return ret;
4523         }
4524
4525         SEQ_PUT_FIELD(s, newline);
4526
4527         return trace_handle_return(s);
4528 }
4529
4530 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4531 {
4532         struct trace_array *tr = iter->tr;
4533         struct trace_seq *s = &iter->seq;
4534         struct trace_entry *entry;
4535         struct trace_event *event;
4536
4537         entry = iter->ent;
4538
4539         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4540                 SEQ_PUT_FIELD(s, entry->pid);
4541                 SEQ_PUT_FIELD(s, iter->cpu);
4542                 SEQ_PUT_FIELD(s, iter->ts);
4543                 if (trace_seq_has_overflowed(s))
4544                         return TRACE_TYPE_PARTIAL_LINE;
4545         }
4546
4547         event = ftrace_find_event(entry->type);
4548         return event ? event->funcs->binary(iter, 0, event) :
4549                 TRACE_TYPE_HANDLED;
4550 }
4551
4552 int trace_empty(struct trace_iterator *iter)
4553 {
4554         struct ring_buffer_iter *buf_iter;
4555         int cpu;
4556
4557         /* If we are looking at one CPU buffer, only check that one */
4558         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4559                 cpu = iter->cpu_file;
4560                 buf_iter = trace_buffer_iter(iter, cpu);
4561                 if (buf_iter) {
4562                         if (!ring_buffer_iter_empty(buf_iter))
4563                                 return 0;
4564                 } else {
4565                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4566                                 return 0;
4567                 }
4568                 return 1;
4569         }
4570
4571         for_each_tracing_cpu(cpu) {
4572                 buf_iter = trace_buffer_iter(iter, cpu);
4573                 if (buf_iter) {
4574                         if (!ring_buffer_iter_empty(buf_iter))
4575                                 return 0;
4576                 } else {
4577                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4578                                 return 0;
4579                 }
4580         }
4581
4582         return 1;
4583 }
4584
4585 /*  Called with trace_event_read_lock() held. */
4586 enum print_line_t print_trace_line(struct trace_iterator *iter)
4587 {
4588         struct trace_array *tr = iter->tr;
4589         unsigned long trace_flags = tr->trace_flags;
4590         enum print_line_t ret;
4591
4592         if (iter->lost_events) {
4593                 if (iter->lost_events == (unsigned long)-1)
4594                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4595                                          iter->cpu);
4596                 else
4597                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4598                                          iter->cpu, iter->lost_events);
4599                 if (trace_seq_has_overflowed(&iter->seq))
4600                         return TRACE_TYPE_PARTIAL_LINE;
4601         }
4602
4603         if (iter->trace && iter->trace->print_line) {
4604                 ret = iter->trace->print_line(iter);
4605                 if (ret != TRACE_TYPE_UNHANDLED)
4606                         return ret;
4607         }
4608
4609         if (iter->ent->type == TRACE_BPUTS &&
4610                         trace_flags & TRACE_ITER_PRINTK &&
4611                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4612                 return trace_print_bputs_msg_only(iter);
4613
4614         if (iter->ent->type == TRACE_BPRINT &&
4615                         trace_flags & TRACE_ITER_PRINTK &&
4616                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4617                 return trace_print_bprintk_msg_only(iter);
4618
4619         if (iter->ent->type == TRACE_PRINT &&
4620                         trace_flags & TRACE_ITER_PRINTK &&
4621                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4622                 return trace_print_printk_msg_only(iter);
4623
4624         if (trace_flags & TRACE_ITER_BIN)
4625                 return print_bin_fmt(iter);
4626
4627         if (trace_flags & TRACE_ITER_HEX)
4628                 return print_hex_fmt(iter);
4629
4630         if (trace_flags & TRACE_ITER_RAW)
4631                 return print_raw_fmt(iter);
4632
4633         return print_trace_fmt(iter);
4634 }
4635
4636 void trace_latency_header(struct seq_file *m)
4637 {
4638         struct trace_iterator *iter = m->private;
4639         struct trace_array *tr = iter->tr;
4640
4641         /* print nothing if the buffers are empty */
4642         if (trace_empty(iter))
4643                 return;
4644
4645         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4646                 print_trace_header(m, iter);
4647
4648         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4649                 print_lat_help_header(m);
4650 }
4651
4652 void trace_default_header(struct seq_file *m)
4653 {
4654         struct trace_iterator *iter = m->private;
4655         struct trace_array *tr = iter->tr;
4656         unsigned long trace_flags = tr->trace_flags;
4657
4658         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4659                 return;
4660
4661         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4662                 /* print nothing if the buffers are empty */
4663                 if (trace_empty(iter))
4664                         return;
4665                 print_trace_header(m, iter);
4666                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4667                         print_lat_help_header(m);
4668         } else {
4669                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4670                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4671                                 print_func_help_header_irq(iter->array_buffer,
4672                                                            m, trace_flags);
4673                         else
4674                                 print_func_help_header(iter->array_buffer, m,
4675                                                        trace_flags);
4676                 }
4677         }
4678 }
4679
4680 static void test_ftrace_alive(struct seq_file *m)
4681 {
4682         if (!ftrace_is_dead())
4683                 return;
4684         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4685                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4686 }
4687
4688 #ifdef CONFIG_TRACER_MAX_TRACE
4689 static void show_snapshot_main_help(struct seq_file *m)
4690 {
4691         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4692                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4693                     "#                      Takes a snapshot of the main buffer.\n"
4694                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4695                     "#                      (Doesn't have to be '2' works with any number that\n"
4696                     "#                       is not a '0' or '1')\n");
4697 }
4698
4699 static void show_snapshot_percpu_help(struct seq_file *m)
4700 {
4701         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4702 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4703         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4704                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4705 #else
4706         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4707                     "#                     Must use main snapshot file to allocate.\n");
4708 #endif
4709         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4710                     "#                      (Doesn't have to be '2' works with any number that\n"
4711                     "#                       is not a '0' or '1')\n");
4712 }
4713
4714 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4715 {
4716         if (iter->tr->allocated_snapshot)
4717                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4718         else
4719                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4720
4721         seq_puts(m, "# Snapshot commands:\n");
4722         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4723                 show_snapshot_main_help(m);
4724         else
4725                 show_snapshot_percpu_help(m);
4726 }
4727 #else
4728 /* Should never be called */
4729 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4730 #endif
4731
4732 static int s_show(struct seq_file *m, void *v)
4733 {
4734         struct trace_iterator *iter = v;
4735         int ret;
4736
4737         if (iter->ent == NULL) {
4738                 if (iter->tr) {
4739                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4740                         seq_puts(m, "#\n");
4741                         test_ftrace_alive(m);
4742                 }
4743                 if (iter->snapshot && trace_empty(iter))
4744                         print_snapshot_help(m, iter);
4745                 else if (iter->trace && iter->trace->print_header)
4746                         iter->trace->print_header(m);
4747                 else
4748                         trace_default_header(m);
4749
4750         } else if (iter->leftover) {
4751                 /*
4752                  * If we filled the seq_file buffer earlier, we
4753                  * want to just show it now.
4754                  */
4755                 ret = trace_print_seq(m, &iter->seq);
4756
4757                 /* ret should this time be zero, but you never know */
4758                 iter->leftover = ret;
4759
4760         } else {
4761                 ret = print_trace_line(iter);
4762                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4763                         iter->seq.full = 0;
4764                         trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4765                 }
4766                 ret = trace_print_seq(m, &iter->seq);
4767                 /*
4768                  * If we overflow the seq_file buffer, then it will
4769                  * ask us for this data again at start up.
4770                  * Use that instead.
4771                  *  ret is 0 if seq_file write succeeded.
4772                  *        -1 otherwise.
4773                  */
4774                 iter->leftover = ret;
4775         }
4776
4777         return 0;
4778 }
4779
4780 /*
4781  * Should be used after trace_array_get(), trace_types_lock
4782  * ensures that i_cdev was already initialized.
4783  */
4784 static inline int tracing_get_cpu(struct inode *inode)
4785 {
4786         if (inode->i_cdev) /* See trace_create_cpu_file() */
4787                 return (long)inode->i_cdev - 1;
4788         return RING_BUFFER_ALL_CPUS;
4789 }
4790
4791 static const struct seq_operations tracer_seq_ops = {
4792         .start          = s_start,
4793         .next           = s_next,
4794         .stop           = s_stop,
4795         .show           = s_show,
4796 };
4797
4798 /*
4799  * Note, as iter itself can be allocated and freed in different
4800  * ways, this function is only used to free its content, and not
4801  * the iterator itself. The only requirement to all the allocations
4802  * is that it must zero all fields (kzalloc), as freeing works with
4803  * ethier allocated content or NULL.
4804  */
4805 static void free_trace_iter_content(struct trace_iterator *iter)
4806 {
4807         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4808         if (iter->fmt != static_fmt_buf)
4809                 kfree(iter->fmt);
4810
4811         kfree(iter->temp);
4812         kfree(iter->buffer_iter);
4813         mutex_destroy(&iter->mutex);
4814         free_cpumask_var(iter->started);
4815 }
4816
4817 static struct trace_iterator *
4818 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4819 {
4820         struct trace_array *tr = inode->i_private;
4821         struct trace_iterator *iter;
4822         int cpu;
4823
4824         if (tracing_disabled)
4825                 return ERR_PTR(-ENODEV);
4826
4827         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4828         if (!iter)
4829                 return ERR_PTR(-ENOMEM);
4830
4831         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4832                                     GFP_KERNEL);
4833         if (!iter->buffer_iter)
4834                 goto release;
4835
4836         /*
4837          * trace_find_next_entry() may need to save off iter->ent.
4838          * It will place it into the iter->temp buffer. As most
4839          * events are less than 128, allocate a buffer of that size.
4840          * If one is greater, then trace_find_next_entry() will
4841          * allocate a new buffer to adjust for the bigger iter->ent.
4842          * It's not critical if it fails to get allocated here.
4843          */
4844         iter->temp = kmalloc(128, GFP_KERNEL);
4845         if (iter->temp)
4846                 iter->temp_size = 128;
4847
4848         /*
4849          * trace_event_printf() may need to modify given format
4850          * string to replace %p with %px so that it shows real address
4851          * instead of hash value. However, that is only for the event
4852          * tracing, other tracer may not need. Defer the allocation
4853          * until it is needed.
4854          */
4855         iter->fmt = NULL;
4856         iter->fmt_size = 0;
4857
4858         mutex_lock(&trace_types_lock);
4859         iter->trace = tr->current_trace;
4860
4861         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4862                 goto fail;
4863
4864         iter->tr = tr;
4865
4866 #ifdef CONFIG_TRACER_MAX_TRACE
4867         /* Currently only the top directory has a snapshot */
4868         if (tr->current_trace->print_max || snapshot)
4869                 iter->array_buffer = &tr->max_buffer;
4870         else
4871 #endif
4872                 iter->array_buffer = &tr->array_buffer;
4873         iter->snapshot = snapshot;
4874         iter->pos = -1;
4875         iter->cpu_file = tracing_get_cpu(inode);
4876         mutex_init(&iter->mutex);
4877
4878         /* Notify the tracer early; before we stop tracing. */
4879         if (iter->trace->open)
4880                 iter->trace->open(iter);
4881
4882         /* Annotate start of buffers if we had overruns */
4883         if (ring_buffer_overruns(iter->array_buffer->buffer))
4884                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4885
4886         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4887         if (trace_clocks[tr->clock_id].in_ns)
4888                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4889
4890         /*
4891          * If pause-on-trace is enabled, then stop the trace while
4892          * dumping, unless this is the "snapshot" file
4893          */
4894         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4895                 tracing_stop_tr(tr);
4896
4897         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4898                 for_each_tracing_cpu(cpu) {
4899                         iter->buffer_iter[cpu] =
4900                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4901                                                          cpu, GFP_KERNEL);
4902                 }
4903                 ring_buffer_read_prepare_sync();
4904                 for_each_tracing_cpu(cpu) {
4905                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4906                         tracing_iter_reset(iter, cpu);
4907                 }
4908         } else {
4909                 cpu = iter->cpu_file;
4910                 iter->buffer_iter[cpu] =
4911                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4912                                                  cpu, GFP_KERNEL);
4913                 ring_buffer_read_prepare_sync();
4914                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4915                 tracing_iter_reset(iter, cpu);
4916         }
4917
4918         mutex_unlock(&trace_types_lock);
4919
4920         return iter;
4921
4922  fail:
4923         mutex_unlock(&trace_types_lock);
4924         free_trace_iter_content(iter);
4925 release:
4926         seq_release_private(inode, file);
4927         return ERR_PTR(-ENOMEM);
4928 }
4929
4930 int tracing_open_generic(struct inode *inode, struct file *filp)
4931 {
4932         int ret;
4933
4934         ret = tracing_check_open_get_tr(NULL);
4935         if (ret)
4936                 return ret;
4937
4938         filp->private_data = inode->i_private;
4939         return 0;
4940 }
4941
4942 bool tracing_is_disabled(void)
4943 {
4944         return (tracing_disabled) ? true: false;
4945 }
4946
4947 /*
4948  * Open and update trace_array ref count.
4949  * Must have the current trace_array passed to it.
4950  */
4951 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4952 {
4953         struct trace_array *tr = inode->i_private;
4954         int ret;
4955
4956         ret = tracing_check_open_get_tr(tr);
4957         if (ret)
4958                 return ret;
4959
4960         filp->private_data = inode->i_private;
4961
4962         return 0;
4963 }
4964
4965 /*
4966  * The private pointer of the inode is the trace_event_file.
4967  * Update the tr ref count associated to it.
4968  */
4969 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4970 {
4971         struct trace_event_file *file = inode->i_private;
4972         int ret;
4973
4974         ret = tracing_check_open_get_tr(file->tr);
4975         if (ret)
4976                 return ret;
4977
4978         mutex_lock(&event_mutex);
4979
4980         /* Fail if the file is marked for removal */
4981         if (file->flags & EVENT_FILE_FL_FREED) {
4982                 trace_array_put(file->tr);
4983                 ret = -ENODEV;
4984         } else {
4985                 event_file_get(file);
4986         }
4987
4988         mutex_unlock(&event_mutex);
4989         if (ret)
4990                 return ret;
4991
4992         filp->private_data = inode->i_private;
4993
4994         return 0;
4995 }
4996
4997 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4998 {
4999         struct trace_event_file *file = inode->i_private;
5000
5001         trace_array_put(file->tr);
5002         event_file_put(file);
5003
5004         return 0;
5005 }
5006
5007 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
5008 {
5009         tracing_release_file_tr(inode, filp);
5010         return single_release(inode, filp);
5011 }
5012
5013 static int tracing_mark_open(struct inode *inode, struct file *filp)
5014 {
5015         stream_open(inode, filp);
5016         return tracing_open_generic_tr(inode, filp);
5017 }
5018
5019 static int tracing_release(struct inode *inode, struct file *file)
5020 {
5021         struct trace_array *tr = inode->i_private;
5022         struct seq_file *m = file->private_data;
5023         struct trace_iterator *iter;
5024         int cpu;
5025
5026         if (!(file->f_mode & FMODE_READ)) {
5027                 trace_array_put(tr);
5028                 return 0;
5029         }
5030
5031         /* Writes do not use seq_file */
5032         iter = m->private;
5033         mutex_lock(&trace_types_lock);
5034
5035         for_each_tracing_cpu(cpu) {
5036                 if (iter->buffer_iter[cpu])
5037                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
5038         }
5039
5040         if (iter->trace && iter->trace->close)
5041                 iter->trace->close(iter);
5042
5043         if (!iter->snapshot && tr->stop_count)
5044                 /* reenable tracing if it was previously enabled */
5045                 tracing_start_tr(tr);
5046
5047         __trace_array_put(tr);
5048
5049         mutex_unlock(&trace_types_lock);
5050
5051         free_trace_iter_content(iter);
5052         seq_release_private(inode, file);
5053
5054         return 0;
5055 }
5056
5057 int tracing_release_generic_tr(struct inode *inode, struct file *file)
5058 {
5059         struct trace_array *tr = inode->i_private;
5060
5061         trace_array_put(tr);
5062         return 0;
5063 }
5064
5065 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5066 {
5067         struct trace_array *tr = inode->i_private;
5068
5069         trace_array_put(tr);
5070
5071         return single_release(inode, file);
5072 }
5073
5074 static int tracing_open(struct inode *inode, struct file *file)
5075 {
5076         struct trace_array *tr = inode->i_private;
5077         struct trace_iterator *iter;
5078         int ret;
5079
5080         ret = tracing_check_open_get_tr(tr);
5081         if (ret)
5082                 return ret;
5083
5084         /* If this file was open for write, then erase contents */
5085         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5086                 int cpu = tracing_get_cpu(inode);
5087                 struct array_buffer *trace_buf = &tr->array_buffer;
5088
5089 #ifdef CONFIG_TRACER_MAX_TRACE
5090                 if (tr->current_trace->print_max)
5091                         trace_buf = &tr->max_buffer;
5092 #endif
5093
5094                 if (cpu == RING_BUFFER_ALL_CPUS)
5095                         tracing_reset_online_cpus(trace_buf);
5096                 else
5097                         tracing_reset_cpu(trace_buf, cpu);
5098         }
5099
5100         if (file->f_mode & FMODE_READ) {
5101                 iter = __tracing_open(inode, file, false);
5102                 if (IS_ERR(iter))
5103                         ret = PTR_ERR(iter);
5104                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5105                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5106         }
5107
5108         if (ret < 0)
5109                 trace_array_put(tr);
5110
5111         return ret;
5112 }
5113
5114 /*
5115  * Some tracers are not suitable for instance buffers.
5116  * A tracer is always available for the global array (toplevel)
5117  * or if it explicitly states that it is.
5118  */
5119 static bool
5120 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5121 {
5122         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5123 }
5124
5125 /* Find the next tracer that this trace array may use */
5126 static struct tracer *
5127 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5128 {
5129         while (t && !trace_ok_for_array(t, tr))
5130                 t = t->next;
5131
5132         return t;
5133 }
5134
5135 static void *
5136 t_next(struct seq_file *m, void *v, loff_t *pos)
5137 {
5138         struct trace_array *tr = m->private;
5139         struct tracer *t = v;
5140
5141         (*pos)++;
5142
5143         if (t)
5144                 t = get_tracer_for_array(tr, t->next);
5145
5146         return t;
5147 }
5148
5149 static void *t_start(struct seq_file *m, loff_t *pos)
5150 {
5151         struct trace_array *tr = m->private;
5152         struct tracer *t;
5153         loff_t l = 0;
5154
5155         mutex_lock(&trace_types_lock);
5156
5157         t = get_tracer_for_array(tr, trace_types);
5158         for (; t && l < *pos; t = t_next(m, t, &l))
5159                         ;
5160
5161         return t;
5162 }
5163
5164 static void t_stop(struct seq_file *m, void *p)
5165 {
5166         mutex_unlock(&trace_types_lock);
5167 }
5168
5169 static int t_show(struct seq_file *m, void *v)
5170 {
5171         struct tracer *t = v;
5172
5173         if (!t)
5174                 return 0;
5175
5176         seq_puts(m, t->name);
5177         if (t->next)
5178                 seq_putc(m, ' ');
5179         else
5180                 seq_putc(m, '\n');
5181
5182         return 0;
5183 }
5184
5185 static const struct seq_operations show_traces_seq_ops = {
5186         .start          = t_start,
5187         .next           = t_next,
5188         .stop           = t_stop,
5189         .show           = t_show,
5190 };
5191
5192 static int show_traces_open(struct inode *inode, struct file *file)
5193 {
5194         struct trace_array *tr = inode->i_private;
5195         struct seq_file *m;
5196         int ret;
5197
5198         ret = tracing_check_open_get_tr(tr);
5199         if (ret)
5200                 return ret;
5201
5202         ret = seq_open(file, &show_traces_seq_ops);
5203         if (ret) {
5204                 trace_array_put(tr);
5205                 return ret;
5206         }
5207
5208         m = file->private_data;
5209         m->private = tr;
5210
5211         return 0;
5212 }
5213
5214 static int show_traces_release(struct inode *inode, struct file *file)
5215 {
5216         struct trace_array *tr = inode->i_private;
5217
5218         trace_array_put(tr);
5219         return seq_release(inode, file);
5220 }
5221
5222 static ssize_t
5223 tracing_write_stub(struct file *filp, const char __user *ubuf,
5224                    size_t count, loff_t *ppos)
5225 {
5226         return count;
5227 }
5228
5229 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5230 {
5231         int ret;
5232
5233         if (file->f_mode & FMODE_READ)
5234                 ret = seq_lseek(file, offset, whence);
5235         else
5236                 file->f_pos = ret = 0;
5237
5238         return ret;
5239 }
5240
5241 static const struct file_operations tracing_fops = {
5242         .open           = tracing_open,
5243         .read           = seq_read,
5244         .read_iter      = seq_read_iter,
5245         .splice_read    = copy_splice_read,
5246         .write          = tracing_write_stub,
5247         .llseek         = tracing_lseek,
5248         .release        = tracing_release,
5249 };
5250
5251 static const struct file_operations show_traces_fops = {
5252         .open           = show_traces_open,
5253         .read           = seq_read,
5254         .llseek         = seq_lseek,
5255         .release        = show_traces_release,
5256 };
5257
5258 static ssize_t
5259 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5260                      size_t count, loff_t *ppos)
5261 {
5262         struct trace_array *tr = file_inode(filp)->i_private;
5263         char *mask_str;
5264         int len;
5265
5266         len = snprintf(NULL, 0, "%*pb\n",
5267                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5268         mask_str = kmalloc(len, GFP_KERNEL);
5269         if (!mask_str)
5270                 return -ENOMEM;
5271
5272         len = snprintf(mask_str, len, "%*pb\n",
5273                        cpumask_pr_args(tr->tracing_cpumask));
5274         if (len >= count) {
5275                 count = -EINVAL;
5276                 goto out_err;
5277         }
5278         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5279
5280 out_err:
5281         kfree(mask_str);
5282
5283         return count;
5284 }
5285
5286 int tracing_set_cpumask(struct trace_array *tr,
5287                         cpumask_var_t tracing_cpumask_new)
5288 {
5289         int cpu;
5290
5291         if (!tr)
5292                 return -EINVAL;
5293
5294         local_irq_disable();
5295         arch_spin_lock(&tr->max_lock);
5296         for_each_tracing_cpu(cpu) {
5297                 /*
5298                  * Increase/decrease the disabled counter if we are
5299                  * about to flip a bit in the cpumask:
5300                  */
5301                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5302                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5303                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5304                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5305 #ifdef CONFIG_TRACER_MAX_TRACE
5306                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5307 #endif
5308                 }
5309                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5310                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5311                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5312                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5313 #ifdef CONFIG_TRACER_MAX_TRACE
5314                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5315 #endif
5316                 }
5317         }
5318         arch_spin_unlock(&tr->max_lock);
5319         local_irq_enable();
5320
5321         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5322
5323         return 0;
5324 }
5325
5326 static ssize_t
5327 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5328                       size_t count, loff_t *ppos)
5329 {
5330         struct trace_array *tr = file_inode(filp)->i_private;
5331         cpumask_var_t tracing_cpumask_new;
5332         int err;
5333
5334         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5335                 return -ENOMEM;
5336
5337         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5338         if (err)
5339                 goto err_free;
5340
5341         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5342         if (err)
5343                 goto err_free;
5344
5345         free_cpumask_var(tracing_cpumask_new);
5346
5347         return count;
5348
5349 err_free:
5350         free_cpumask_var(tracing_cpumask_new);
5351
5352         return err;
5353 }
5354
5355 static const struct file_operations tracing_cpumask_fops = {
5356         .open           = tracing_open_generic_tr,
5357         .read           = tracing_cpumask_read,
5358         .write          = tracing_cpumask_write,
5359         .release        = tracing_release_generic_tr,
5360         .llseek         = generic_file_llseek,
5361 };
5362
5363 static int tracing_trace_options_show(struct seq_file *m, void *v)
5364 {
5365         struct tracer_opt *trace_opts;
5366         struct trace_array *tr = m->private;
5367         u32 tracer_flags;
5368         int i;
5369
5370         mutex_lock(&trace_types_lock);
5371         tracer_flags = tr->current_trace->flags->val;
5372         trace_opts = tr->current_trace->flags->opts;
5373
5374         for (i = 0; trace_options[i]; i++) {
5375                 if (tr->trace_flags & (1 << i))
5376                         seq_printf(m, "%s\n", trace_options[i]);
5377                 else
5378                         seq_printf(m, "no%s\n", trace_options[i]);
5379         }
5380
5381         for (i = 0; trace_opts[i].name; i++) {
5382                 if (tracer_flags & trace_opts[i].bit)
5383                         seq_printf(m, "%s\n", trace_opts[i].name);
5384                 else
5385                         seq_printf(m, "no%s\n", trace_opts[i].name);
5386         }
5387         mutex_unlock(&trace_types_lock);
5388
5389         return 0;
5390 }
5391
5392 static int __set_tracer_option(struct trace_array *tr,
5393                                struct tracer_flags *tracer_flags,
5394                                struct tracer_opt *opts, int neg)
5395 {
5396         struct tracer *trace = tracer_flags->trace;
5397         int ret;
5398
5399         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5400         if (ret)
5401                 return ret;
5402
5403         if (neg)
5404                 tracer_flags->val &= ~opts->bit;
5405         else
5406                 tracer_flags->val |= opts->bit;
5407         return 0;
5408 }
5409
5410 /* Try to assign a tracer specific option */
5411 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5412 {
5413         struct tracer *trace = tr->current_trace;
5414         struct tracer_flags *tracer_flags = trace->flags;
5415         struct tracer_opt *opts = NULL;
5416         int i;
5417
5418         for (i = 0; tracer_flags->opts[i].name; i++) {
5419                 opts = &tracer_flags->opts[i];
5420
5421                 if (strcmp(cmp, opts->name) == 0)
5422                         return __set_tracer_option(tr, trace->flags, opts, neg);
5423         }
5424
5425         return -EINVAL;
5426 }
5427
5428 /* Some tracers require overwrite to stay enabled */
5429 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5430 {
5431         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5432                 return -1;
5433
5434         return 0;
5435 }
5436
5437 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5438 {
5439         int *map;
5440
5441         if ((mask == TRACE_ITER_RECORD_TGID) ||
5442             (mask == TRACE_ITER_RECORD_CMD))
5443                 lockdep_assert_held(&event_mutex);
5444
5445         /* do nothing if flag is already set */
5446         if (!!(tr->trace_flags & mask) == !!enabled)
5447                 return 0;
5448
5449         /* Give the tracer a chance to approve the change */
5450         if (tr->current_trace->flag_changed)
5451                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5452                         return -EINVAL;
5453
5454         if (enabled)
5455                 tr->trace_flags |= mask;
5456         else
5457                 tr->trace_flags &= ~mask;
5458
5459         if (mask == TRACE_ITER_RECORD_CMD)
5460                 trace_event_enable_cmd_record(enabled);
5461
5462         if (mask == TRACE_ITER_RECORD_TGID) {
5463                 if (!tgid_map) {
5464                         tgid_map_max = pid_max;
5465                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5466                                        GFP_KERNEL);
5467
5468                         /*
5469                          * Pairs with smp_load_acquire() in
5470                          * trace_find_tgid_ptr() to ensure that if it observes
5471                          * the tgid_map we just allocated then it also observes
5472                          * the corresponding tgid_map_max value.
5473                          */
5474                         smp_store_release(&tgid_map, map);
5475                 }
5476                 if (!tgid_map) {
5477                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5478                         return -ENOMEM;
5479                 }
5480
5481                 trace_event_enable_tgid_record(enabled);
5482         }
5483
5484         if (mask == TRACE_ITER_EVENT_FORK)
5485                 trace_event_follow_fork(tr, enabled);
5486
5487         if (mask == TRACE_ITER_FUNC_FORK)
5488                 ftrace_pid_follow_fork(tr, enabled);
5489
5490         if (mask == TRACE_ITER_OVERWRITE) {
5491                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5492 #ifdef CONFIG_TRACER_MAX_TRACE
5493                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5494 #endif
5495         }
5496
5497         if (mask == TRACE_ITER_PRINTK) {
5498                 trace_printk_start_stop_comm(enabled);
5499                 trace_printk_control(enabled);
5500         }
5501
5502         return 0;
5503 }
5504
5505 int trace_set_options(struct trace_array *tr, char *option)
5506 {
5507         char *cmp;
5508         int neg = 0;
5509         int ret;
5510         size_t orig_len = strlen(option);
5511         int len;
5512
5513         cmp = strstrip(option);
5514
5515         len = str_has_prefix(cmp, "no");
5516         if (len)
5517                 neg = 1;
5518
5519         cmp += len;
5520
5521         mutex_lock(&event_mutex);
5522         mutex_lock(&trace_types_lock);
5523
5524         ret = match_string(trace_options, -1, cmp);
5525         /* If no option could be set, test the specific tracer options */
5526         if (ret < 0)
5527                 ret = set_tracer_option(tr, cmp, neg);
5528         else
5529                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5530
5531         mutex_unlock(&trace_types_lock);
5532         mutex_unlock(&event_mutex);
5533
5534         /*
5535          * If the first trailing whitespace is replaced with '\0' by strstrip,
5536          * turn it back into a space.
5537          */
5538         if (orig_len > strlen(option))
5539                 option[strlen(option)] = ' ';
5540
5541         return ret;
5542 }
5543
5544 static void __init apply_trace_boot_options(void)
5545 {
5546         char *buf = trace_boot_options_buf;
5547         char *option;
5548
5549         while (true) {
5550                 option = strsep(&buf, ",");
5551
5552                 if (!option)
5553                         break;
5554
5555                 if (*option)
5556                         trace_set_options(&global_trace, option);
5557
5558                 /* Put back the comma to allow this to be called again */
5559                 if (buf)
5560                         *(buf - 1) = ',';
5561         }
5562 }
5563
5564 static ssize_t
5565 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5566                         size_t cnt, loff_t *ppos)
5567 {
5568         struct seq_file *m = filp->private_data;
5569         struct trace_array *tr = m->private;
5570         char buf[64];
5571         int ret;
5572
5573         if (cnt >= sizeof(buf))
5574                 return -EINVAL;
5575
5576         if (copy_from_user(buf, ubuf, cnt))
5577                 return -EFAULT;
5578
5579         buf[cnt] = 0;
5580
5581         ret = trace_set_options(tr, buf);
5582         if (ret < 0)
5583                 return ret;
5584
5585         *ppos += cnt;
5586
5587         return cnt;
5588 }
5589
5590 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5591 {
5592         struct trace_array *tr = inode->i_private;
5593         int ret;
5594
5595         ret = tracing_check_open_get_tr(tr);
5596         if (ret)
5597                 return ret;
5598
5599         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5600         if (ret < 0)
5601                 trace_array_put(tr);
5602
5603         return ret;
5604 }
5605
5606 static const struct file_operations tracing_iter_fops = {
5607         .open           = tracing_trace_options_open,
5608         .read           = seq_read,
5609         .llseek         = seq_lseek,
5610         .release        = tracing_single_release_tr,
5611         .write          = tracing_trace_options_write,
5612 };
5613
5614 static const char readme_msg[] =
5615         "tracing mini-HOWTO:\n\n"
5616         "# echo 0 > tracing_on : quick way to disable tracing\n"
5617         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5618         " Important files:\n"
5619         "  trace\t\t\t- The static contents of the buffer\n"
5620         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5621         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5622         "  current_tracer\t- function and latency tracers\n"
5623         "  available_tracers\t- list of configured tracers for current_tracer\n"
5624         "  error_log\t- error log for failed commands (that support it)\n"
5625         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5626         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5627         "  trace_clock\t\t- change the clock used to order events\n"
5628         "       local:   Per cpu clock but may not be synced across CPUs\n"
5629         "      global:   Synced across CPUs but slows tracing down.\n"
5630         "     counter:   Not a clock, but just an increment\n"
5631         "      uptime:   Jiffy counter from time of boot\n"
5632         "        perf:   Same clock that perf events use\n"
5633 #ifdef CONFIG_X86_64
5634         "     x86-tsc:   TSC cycle counter\n"
5635 #endif
5636         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5637         "       delta:   Delta difference against a buffer-wide timestamp\n"
5638         "    absolute:   Absolute (standalone) timestamp\n"
5639         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5640         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5641         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5642         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5643         "\t\t\t  Remove sub-buffer with rmdir\n"
5644         "  trace_options\t\t- Set format or modify how tracing happens\n"
5645         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5646         "\t\t\t  option name\n"
5647         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5648 #ifdef CONFIG_DYNAMIC_FTRACE
5649         "\n  available_filter_functions - list of functions that can be filtered on\n"
5650         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5651         "\t\t\t  functions\n"
5652         "\t     accepts: func_full_name or glob-matching-pattern\n"
5653         "\t     modules: Can select a group via module\n"
5654         "\t      Format: :mod:<module-name>\n"
5655         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5656         "\t    triggers: a command to perform when function is hit\n"
5657         "\t      Format: <function>:<trigger>[:count]\n"
5658         "\t     trigger: traceon, traceoff\n"
5659         "\t\t      enable_event:<system>:<event>\n"
5660         "\t\t      disable_event:<system>:<event>\n"
5661 #ifdef CONFIG_STACKTRACE
5662         "\t\t      stacktrace\n"
5663 #endif
5664 #ifdef CONFIG_TRACER_SNAPSHOT
5665         "\t\t      snapshot\n"
5666 #endif
5667         "\t\t      dump\n"
5668         "\t\t      cpudump\n"
5669         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5670         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5671         "\t     The first one will disable tracing every time do_fault is hit\n"
5672         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5673         "\t       The first time do trap is hit and it disables tracing, the\n"
5674         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5675         "\t       the counter will not decrement. It only decrements when the\n"
5676         "\t       trigger did work\n"
5677         "\t     To remove trigger without count:\n"
5678         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5679         "\t     To remove trigger with a count:\n"
5680         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5681         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5682         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5683         "\t    modules: Can select a group via module command :mod:\n"
5684         "\t    Does not accept triggers\n"
5685 #endif /* CONFIG_DYNAMIC_FTRACE */
5686 #ifdef CONFIG_FUNCTION_TRACER
5687         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5688         "\t\t    (function)\n"
5689         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5690         "\t\t    (function)\n"
5691 #endif
5692 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5693         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5694         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5695         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5696 #endif
5697 #ifdef CONFIG_TRACER_SNAPSHOT
5698         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5699         "\t\t\t  snapshot buffer. Read the contents for more\n"
5700         "\t\t\t  information\n"
5701 #endif
5702 #ifdef CONFIG_STACK_TRACER
5703         "  stack_trace\t\t- Shows the max stack trace when active\n"
5704         "  stack_max_size\t- Shows current max stack size that was traced\n"
5705         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5706         "\t\t\t  new trace)\n"
5707 #ifdef CONFIG_DYNAMIC_FTRACE
5708         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5709         "\t\t\t  traces\n"
5710 #endif
5711 #endif /* CONFIG_STACK_TRACER */
5712 #ifdef CONFIG_DYNAMIC_EVENTS
5713         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5714         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5715 #endif
5716 #ifdef CONFIG_KPROBE_EVENTS
5717         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5718         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5719 #endif
5720 #ifdef CONFIG_UPROBE_EVENTS
5721         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5722         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5723 #endif
5724 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5725     defined(CONFIG_FPROBE_EVENTS)
5726         "\t  accepts: event-definitions (one definition per line)\n"
5727 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5728         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5729         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5730 #endif
5731 #ifdef CONFIG_FPROBE_EVENTS
5732         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5733         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5734 #endif
5735 #ifdef CONFIG_HIST_TRIGGERS
5736         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5737 #endif
5738         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5739         "\t           -:[<group>/][<event>]\n"
5740 #ifdef CONFIG_KPROBE_EVENTS
5741         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5742   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5743 #endif
5744 #ifdef CONFIG_UPROBE_EVENTS
5745   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5746 #endif
5747         "\t     args: <name>=fetcharg[:type]\n"
5748         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5749 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5750         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5751 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5752         "\t           <argname>[->field[->field|.field...]],\n"
5753 #endif
5754 #else
5755         "\t           $stack<index>, $stack, $retval, $comm,\n"
5756 #endif
5757         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5758         "\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5759         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5760         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5761         "\t           symstr, <type>\\[<array-size>\\]\n"
5762 #ifdef CONFIG_HIST_TRIGGERS
5763         "\t    field: <stype> <name>;\n"
5764         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5765         "\t           [unsigned] char/int/long\n"
5766 #endif
5767         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5768         "\t            of the <attached-group>/<attached-event>.\n"
5769 #endif
5770         "  events/\t\t- Directory containing all trace event subsystems:\n"
5771         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5772         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5773         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5774         "\t\t\t  events\n"
5775         "      filter\t\t- If set, only events passing filter are traced\n"
5776         "  events/<system>/<event>/\t- Directory containing control files for\n"
5777         "\t\t\t  <event>:\n"
5778         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5779         "      filter\t\t- If set, only events passing filter are traced\n"
5780         "      trigger\t\t- If set, a command to perform when event is hit\n"
5781         "\t    Format: <trigger>[:count][if <filter>]\n"
5782         "\t   trigger: traceon, traceoff\n"
5783         "\t            enable_event:<system>:<event>\n"
5784         "\t            disable_event:<system>:<event>\n"
5785 #ifdef CONFIG_HIST_TRIGGERS
5786         "\t            enable_hist:<system>:<event>\n"
5787         "\t            disable_hist:<system>:<event>\n"
5788 #endif
5789 #ifdef CONFIG_STACKTRACE
5790         "\t\t    stacktrace\n"
5791 #endif
5792 #ifdef CONFIG_TRACER_SNAPSHOT
5793         "\t\t    snapshot\n"
5794 #endif
5795 #ifdef CONFIG_HIST_TRIGGERS
5796         "\t\t    hist (see below)\n"
5797 #endif
5798         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5799         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5800         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5801         "\t                  events/block/block_unplug/trigger\n"
5802         "\t   The first disables tracing every time block_unplug is hit.\n"
5803         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5804         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5805         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5806         "\t   Like function triggers, the counter is only decremented if it\n"
5807         "\t    enabled or disabled tracing.\n"
5808         "\t   To remove a trigger without a count:\n"
5809         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5810         "\t   To remove a trigger with a count:\n"
5811         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5812         "\t   Filters can be ignored when removing a trigger.\n"
5813 #ifdef CONFIG_HIST_TRIGGERS
5814         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5815         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5816         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5817         "\t            [:values=<field1[,field2,...]>]\n"
5818         "\t            [:sort=<field1[,field2,...]>]\n"
5819         "\t            [:size=#entries]\n"
5820         "\t            [:pause][:continue][:clear]\n"
5821         "\t            [:name=histname1]\n"
5822         "\t            [:nohitcount]\n"
5823         "\t            [:<handler>.<action>]\n"
5824         "\t            [if <filter>]\n\n"
5825         "\t    Note, special fields can be used as well:\n"
5826         "\t            common_timestamp - to record current timestamp\n"
5827         "\t            common_cpu - to record the CPU the event happened on\n"
5828         "\n"
5829         "\t    A hist trigger variable can be:\n"
5830         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5831         "\t        - a reference to another variable e.g. y=$x,\n"
5832         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5833         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5834         "\n"
5835         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5836         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5837         "\t    variable reference, field or numeric literal.\n"
5838         "\n"
5839         "\t    When a matching event is hit, an entry is added to a hash\n"
5840         "\t    table using the key(s) and value(s) named, and the value of a\n"
5841         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5842         "\t    correspond to fields in the event's format description.  Keys\n"
5843         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5844         "\t    Compound keys consisting of up to two fields can be specified\n"
5845         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5846         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5847         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5848         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5849         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5850         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5851         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5852         "\t    its histogram data will be shared with other triggers of the\n"
5853         "\t    same name, and trigger hits will update this common data.\n\n"
5854         "\t    Reading the 'hist' file for the event will dump the hash\n"
5855         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5856         "\t    triggers attached to an event, there will be a table for each\n"
5857         "\t    trigger in the output.  The table displayed for a named\n"
5858         "\t    trigger will be the same as any other instance having the\n"
5859         "\t    same name.  The default format used to display a given field\n"
5860         "\t    can be modified by appending any of the following modifiers\n"
5861         "\t    to the field name, as applicable:\n\n"
5862         "\t            .hex        display a number as a hex value\n"
5863         "\t            .sym        display an address as a symbol\n"
5864         "\t            .sym-offset display an address as a symbol and offset\n"
5865         "\t            .execname   display a common_pid as a program name\n"
5866         "\t            .syscall    display a syscall id as a syscall name\n"
5867         "\t            .log2       display log2 value rather than raw number\n"
5868         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5869         "\t            .usecs      display a common_timestamp in microseconds\n"
5870         "\t            .percent    display a number of percentage value\n"
5871         "\t            .graph      display a bar-graph of a value\n\n"
5872         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5873         "\t    trigger or to start a hist trigger but not log any events\n"
5874         "\t    until told to do so.  'continue' can be used to start or\n"
5875         "\t    restart a paused hist trigger.\n\n"
5876         "\t    The 'clear' parameter will clear the contents of a running\n"
5877         "\t    hist trigger and leave its current paused/active state\n"
5878         "\t    unchanged.\n\n"
5879         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5880         "\t    raw hitcount in the histogram.\n\n"
5881         "\t    The enable_hist and disable_hist triggers can be used to\n"
5882         "\t    have one event conditionally start and stop another event's\n"
5883         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5884         "\t    the enable_event and disable_event triggers.\n\n"
5885         "\t    Hist trigger handlers and actions are executed whenever a\n"
5886         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5887         "\t        <handler>.<action>\n\n"
5888         "\t    The available handlers are:\n\n"
5889         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5890         "\t        onmax(var)               - invoke if var exceeds current max\n"
5891         "\t        onchange(var)            - invoke action if var changes\n\n"
5892         "\t    The available actions are:\n\n"
5893         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5894         "\t        save(field,...)                      - save current event fields\n"
5895 #ifdef CONFIG_TRACER_SNAPSHOT
5896         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5897 #endif
5898 #ifdef CONFIG_SYNTH_EVENTS
5899         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5900         "\t  Write into this file to define/undefine new synthetic events.\n"
5901         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5902 #endif
5903 #endif
5904 ;
5905
5906 static ssize_t
5907 tracing_readme_read(struct file *filp, char __user *ubuf,
5908                        size_t cnt, loff_t *ppos)
5909 {
5910         return simple_read_from_buffer(ubuf, cnt, ppos,
5911                                         readme_msg, strlen(readme_msg));
5912 }
5913
5914 static const struct file_operations tracing_readme_fops = {
5915         .open           = tracing_open_generic,
5916         .read           = tracing_readme_read,
5917         .llseek         = generic_file_llseek,
5918 };
5919
5920 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5921 {
5922         int pid = ++(*pos);
5923
5924         return trace_find_tgid_ptr(pid);
5925 }
5926
5927 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5928 {
5929         int pid = *pos;
5930
5931         return trace_find_tgid_ptr(pid);
5932 }
5933
5934 static void saved_tgids_stop(struct seq_file *m, void *v)
5935 {
5936 }
5937
5938 static int saved_tgids_show(struct seq_file *m, void *v)
5939 {
5940         int *entry = (int *)v;
5941         int pid = entry - tgid_map;
5942         int tgid = *entry;
5943
5944         if (tgid == 0)
5945                 return SEQ_SKIP;
5946
5947         seq_printf(m, "%d %d\n", pid, tgid);
5948         return 0;
5949 }
5950
5951 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5952         .start          = saved_tgids_start,
5953         .stop           = saved_tgids_stop,
5954         .next           = saved_tgids_next,
5955         .show           = saved_tgids_show,
5956 };
5957
5958 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5959 {
5960         int ret;
5961
5962         ret = tracing_check_open_get_tr(NULL);
5963         if (ret)
5964                 return ret;
5965
5966         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5967 }
5968
5969
5970 static const struct file_operations tracing_saved_tgids_fops = {
5971         .open           = tracing_saved_tgids_open,
5972         .read           = seq_read,
5973         .llseek         = seq_lseek,
5974         .release        = seq_release,
5975 };
5976
5977 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5978 {
5979         unsigned int *ptr = v;
5980
5981         if (*pos || m->count)
5982                 ptr++;
5983
5984         (*pos)++;
5985
5986         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5987              ptr++) {
5988                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5989                         continue;
5990
5991                 return ptr;
5992         }
5993
5994         return NULL;
5995 }
5996
5997 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5998 {
5999         void *v;
6000         loff_t l = 0;
6001
6002         preempt_disable();
6003         arch_spin_lock(&trace_cmdline_lock);
6004
6005         v = &savedcmd->map_cmdline_to_pid[0];
6006         while (l <= *pos) {
6007                 v = saved_cmdlines_next(m, v, &l);
6008                 if (!v)
6009                         return NULL;
6010         }
6011
6012         return v;
6013 }
6014
6015 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6016 {
6017         arch_spin_unlock(&trace_cmdline_lock);
6018         preempt_enable();
6019 }
6020
6021 static int saved_cmdlines_show(struct seq_file *m, void *v)
6022 {
6023         char buf[TASK_COMM_LEN];
6024         unsigned int *pid = v;
6025
6026         __trace_find_cmdline(*pid, buf);
6027         seq_printf(m, "%d %s\n", *pid, buf);
6028         return 0;
6029 }
6030
6031 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6032         .start          = saved_cmdlines_start,
6033         .next           = saved_cmdlines_next,
6034         .stop           = saved_cmdlines_stop,
6035         .show           = saved_cmdlines_show,
6036 };
6037
6038 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6039 {
6040         int ret;
6041
6042         ret = tracing_check_open_get_tr(NULL);
6043         if (ret)
6044                 return ret;
6045
6046         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6047 }
6048
6049 static const struct file_operations tracing_saved_cmdlines_fops = {
6050         .open           = tracing_saved_cmdlines_open,
6051         .read           = seq_read,
6052         .llseek         = seq_lseek,
6053         .release        = seq_release,
6054 };
6055
6056 static ssize_t
6057 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6058                                  size_t cnt, loff_t *ppos)
6059 {
6060         char buf[64];
6061         int r;
6062
6063         preempt_disable();
6064         arch_spin_lock(&trace_cmdline_lock);
6065         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6066         arch_spin_unlock(&trace_cmdline_lock);
6067         preempt_enable();
6068
6069         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6070 }
6071
6072 static int tracing_resize_saved_cmdlines(unsigned int val)
6073 {
6074         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6075
6076         s = allocate_cmdlines_buffer(val);
6077         if (!s)
6078                 return -ENOMEM;
6079
6080         preempt_disable();
6081         arch_spin_lock(&trace_cmdline_lock);
6082         savedcmd_temp = savedcmd;
6083         savedcmd = s;
6084         arch_spin_unlock(&trace_cmdline_lock);
6085         preempt_enable();
6086         free_saved_cmdlines_buffer(savedcmd_temp);
6087
6088         return 0;
6089 }
6090
6091 static ssize_t
6092 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6093                                   size_t cnt, loff_t *ppos)
6094 {
6095         unsigned long val;
6096         int ret;
6097
6098         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6099         if (ret)
6100                 return ret;
6101
6102         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6103         if (!val || val > PID_MAX_DEFAULT)
6104                 return -EINVAL;
6105
6106         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6107         if (ret < 0)
6108                 return ret;
6109
6110         *ppos += cnt;
6111
6112         return cnt;
6113 }
6114
6115 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6116         .open           = tracing_open_generic,
6117         .read           = tracing_saved_cmdlines_size_read,
6118         .write          = tracing_saved_cmdlines_size_write,
6119 };
6120
6121 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6122 static union trace_eval_map_item *
6123 update_eval_map(union trace_eval_map_item *ptr)
6124 {
6125         if (!ptr->map.eval_string) {
6126                 if (ptr->tail.next) {
6127                         ptr = ptr->tail.next;
6128                         /* Set ptr to the next real item (skip head) */
6129                         ptr++;
6130                 } else
6131                         return NULL;
6132         }
6133         return ptr;
6134 }
6135
6136 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6137 {
6138         union trace_eval_map_item *ptr = v;
6139
6140         /*
6141          * Paranoid! If ptr points to end, we don't want to increment past it.
6142          * This really should never happen.
6143          */
6144         (*pos)++;
6145         ptr = update_eval_map(ptr);
6146         if (WARN_ON_ONCE(!ptr))
6147                 return NULL;
6148
6149         ptr++;
6150         ptr = update_eval_map(ptr);
6151
6152         return ptr;
6153 }
6154
6155 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6156 {
6157         union trace_eval_map_item *v;
6158         loff_t l = 0;
6159
6160         mutex_lock(&trace_eval_mutex);
6161
6162         v = trace_eval_maps;
6163         if (v)
6164                 v++;
6165
6166         while (v && l < *pos) {
6167                 v = eval_map_next(m, v, &l);
6168         }
6169
6170         return v;
6171 }
6172
6173 static void eval_map_stop(struct seq_file *m, void *v)
6174 {
6175         mutex_unlock(&trace_eval_mutex);
6176 }
6177
6178 static int eval_map_show(struct seq_file *m, void *v)
6179 {
6180         union trace_eval_map_item *ptr = v;
6181
6182         seq_printf(m, "%s %ld (%s)\n",
6183                    ptr->map.eval_string, ptr->map.eval_value,
6184                    ptr->map.system);
6185
6186         return 0;
6187 }
6188
6189 static const struct seq_operations tracing_eval_map_seq_ops = {
6190         .start          = eval_map_start,
6191         .next           = eval_map_next,
6192         .stop           = eval_map_stop,
6193         .show           = eval_map_show,
6194 };
6195
6196 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6197 {
6198         int ret;
6199
6200         ret = tracing_check_open_get_tr(NULL);
6201         if (ret)
6202                 return ret;
6203
6204         return seq_open(filp, &tracing_eval_map_seq_ops);
6205 }
6206
6207 static const struct file_operations tracing_eval_map_fops = {
6208         .open           = tracing_eval_map_open,
6209         .read           = seq_read,
6210         .llseek         = seq_lseek,
6211         .release        = seq_release,
6212 };
6213
6214 static inline union trace_eval_map_item *
6215 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6216 {
6217         /* Return tail of array given the head */
6218         return ptr + ptr->head.length + 1;
6219 }
6220
6221 static void
6222 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6223                            int len)
6224 {
6225         struct trace_eval_map **stop;
6226         struct trace_eval_map **map;
6227         union trace_eval_map_item *map_array;
6228         union trace_eval_map_item *ptr;
6229
6230         stop = start + len;
6231
6232         /*
6233          * The trace_eval_maps contains the map plus a head and tail item,
6234          * where the head holds the module and length of array, and the
6235          * tail holds a pointer to the next list.
6236          */
6237         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6238         if (!map_array) {
6239                 pr_warn("Unable to allocate trace eval mapping\n");
6240                 return;
6241         }
6242
6243         mutex_lock(&trace_eval_mutex);
6244
6245         if (!trace_eval_maps)
6246                 trace_eval_maps = map_array;
6247         else {
6248                 ptr = trace_eval_maps;
6249                 for (;;) {
6250                         ptr = trace_eval_jmp_to_tail(ptr);
6251                         if (!ptr->tail.next)
6252                                 break;
6253                         ptr = ptr->tail.next;
6254
6255                 }
6256                 ptr->tail.next = map_array;
6257         }
6258         map_array->head.mod = mod;
6259         map_array->head.length = len;
6260         map_array++;
6261
6262         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6263                 map_array->map = **map;
6264                 map_array++;
6265         }
6266         memset(map_array, 0, sizeof(*map_array));
6267
6268         mutex_unlock(&trace_eval_mutex);
6269 }
6270
6271 static void trace_create_eval_file(struct dentry *d_tracer)
6272 {
6273         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6274                           NULL, &tracing_eval_map_fops);
6275 }
6276
6277 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6278 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6279 static inline void trace_insert_eval_map_file(struct module *mod,
6280                               struct trace_eval_map **start, int len) { }
6281 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6282
6283 static void trace_insert_eval_map(struct module *mod,
6284                                   struct trace_eval_map **start, int len)
6285 {
6286         struct trace_eval_map **map;
6287
6288         if (len <= 0)
6289                 return;
6290
6291         map = start;
6292
6293         trace_event_eval_update(map, len);
6294
6295         trace_insert_eval_map_file(mod, start, len);
6296 }
6297
6298 static ssize_t
6299 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6300                        size_t cnt, loff_t *ppos)
6301 {
6302         struct trace_array *tr = filp->private_data;
6303         char buf[MAX_TRACER_SIZE+2];
6304         int r;
6305
6306         mutex_lock(&trace_types_lock);
6307         r = sprintf(buf, "%s\n", tr->current_trace->name);
6308         mutex_unlock(&trace_types_lock);
6309
6310         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6311 }
6312
6313 int tracer_init(struct tracer *t, struct trace_array *tr)
6314 {
6315         tracing_reset_online_cpus(&tr->array_buffer);
6316         return t->init(tr);
6317 }
6318
6319 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6320 {
6321         int cpu;
6322
6323         for_each_tracing_cpu(cpu)
6324                 per_cpu_ptr(buf->data, cpu)->entries = val;
6325 }
6326
6327 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6328 {
6329         if (cpu == RING_BUFFER_ALL_CPUS) {
6330                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6331         } else {
6332                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6333         }
6334 }
6335
6336 #ifdef CONFIG_TRACER_MAX_TRACE
6337 /* resize @tr's buffer to the size of @size_tr's entries */
6338 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6339                                         struct array_buffer *size_buf, int cpu_id)
6340 {
6341         int cpu, ret = 0;
6342
6343         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6344                 for_each_tracing_cpu(cpu) {
6345                         ret = ring_buffer_resize(trace_buf->buffer,
6346                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6347                         if (ret < 0)
6348                                 break;
6349                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6350                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6351                 }
6352         } else {
6353                 ret = ring_buffer_resize(trace_buf->buffer,
6354                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6355                 if (ret == 0)
6356                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6357                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6358         }
6359
6360         return ret;
6361 }
6362 #endif /* CONFIG_TRACER_MAX_TRACE */
6363
6364 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6365                                         unsigned long size, int cpu)
6366 {
6367         int ret;
6368
6369         /*
6370          * If kernel or user changes the size of the ring buffer
6371          * we use the size that was given, and we can forget about
6372          * expanding it later.
6373          */
6374         trace_set_ring_buffer_expanded(tr);
6375
6376         /* May be called before buffers are initialized */
6377         if (!tr->array_buffer.buffer)
6378                 return 0;
6379
6380         /* Do not allow tracing while resizing ring buffer */
6381         tracing_stop_tr(tr);
6382
6383         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6384         if (ret < 0)
6385                 goto out_start;
6386
6387 #ifdef CONFIG_TRACER_MAX_TRACE
6388         if (!tr->allocated_snapshot)
6389                 goto out;
6390
6391         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6392         if (ret < 0) {
6393                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6394                                                      &tr->array_buffer, cpu);
6395                 if (r < 0) {
6396                         /*
6397                          * AARGH! We are left with different
6398                          * size max buffer!!!!
6399                          * The max buffer is our "snapshot" buffer.
6400                          * When a tracer needs a snapshot (one of the
6401                          * latency tracers), it swaps the max buffer
6402                          * with the saved snap shot. We succeeded to
6403                          * update the size of the main buffer, but failed to
6404                          * update the size of the max buffer. But when we tried
6405                          * to reset the main buffer to the original size, we
6406                          * failed there too. This is very unlikely to
6407                          * happen, but if it does, warn and kill all
6408                          * tracing.
6409                          */
6410                         WARN_ON(1);
6411                         tracing_disabled = 1;
6412                 }
6413                 goto out_start;
6414         }
6415
6416         update_buffer_entries(&tr->max_buffer, cpu);
6417
6418  out:
6419 #endif /* CONFIG_TRACER_MAX_TRACE */
6420
6421         update_buffer_entries(&tr->array_buffer, cpu);
6422  out_start:
6423         tracing_start_tr(tr);
6424         return ret;
6425 }
6426
6427 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6428                                   unsigned long size, int cpu_id)
6429 {
6430         int ret;
6431
6432         mutex_lock(&trace_types_lock);
6433
6434         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6435                 /* make sure, this cpu is enabled in the mask */
6436                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6437                         ret = -EINVAL;
6438                         goto out;
6439                 }
6440         }
6441
6442         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6443         if (ret < 0)
6444                 ret = -ENOMEM;
6445
6446 out:
6447         mutex_unlock(&trace_types_lock);
6448
6449         return ret;
6450 }
6451
6452
6453 /**
6454  * tracing_update_buffers - used by tracing facility to expand ring buffers
6455  * @tr: The tracing instance
6456  *
6457  * To save on memory when the tracing is never used on a system with it
6458  * configured in. The ring buffers are set to a minimum size. But once
6459  * a user starts to use the tracing facility, then they need to grow
6460  * to their default size.
6461  *
6462  * This function is to be called when a tracer is about to be used.
6463  */
6464 int tracing_update_buffers(struct trace_array *tr)
6465 {
6466         int ret = 0;
6467
6468         mutex_lock(&trace_types_lock);
6469         if (!tr->ring_buffer_expanded)
6470                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6471                                                 RING_BUFFER_ALL_CPUS);
6472         mutex_unlock(&trace_types_lock);
6473
6474         return ret;
6475 }
6476
6477 struct trace_option_dentry;
6478
6479 static void
6480 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6481
6482 /*
6483  * Used to clear out the tracer before deletion of an instance.
6484  * Must have trace_types_lock held.
6485  */
6486 static void tracing_set_nop(struct trace_array *tr)
6487 {
6488         if (tr->current_trace == &nop_trace)
6489                 return;
6490         
6491         tr->current_trace->enabled--;
6492
6493         if (tr->current_trace->reset)
6494                 tr->current_trace->reset(tr);
6495
6496         tr->current_trace = &nop_trace;
6497 }
6498
6499 static bool tracer_options_updated;
6500
6501 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6502 {
6503         /* Only enable if the directory has been created already. */
6504         if (!tr->dir)
6505                 return;
6506
6507         /* Only create trace option files after update_tracer_options finish */
6508         if (!tracer_options_updated)
6509                 return;
6510
6511         create_trace_option_files(tr, t);
6512 }
6513
6514 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6515 {
6516         struct tracer *t;
6517 #ifdef CONFIG_TRACER_MAX_TRACE
6518         bool had_max_tr;
6519 #endif
6520         int ret = 0;
6521
6522         mutex_lock(&trace_types_lock);
6523
6524         if (!tr->ring_buffer_expanded) {
6525                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6526                                                 RING_BUFFER_ALL_CPUS);
6527                 if (ret < 0)
6528                         goto out;
6529                 ret = 0;
6530         }
6531
6532         for (t = trace_types; t; t = t->next) {
6533                 if (strcmp(t->name, buf) == 0)
6534                         break;
6535         }
6536         if (!t) {
6537                 ret = -EINVAL;
6538                 goto out;
6539         }
6540         if (t == tr->current_trace)
6541                 goto out;
6542
6543 #ifdef CONFIG_TRACER_SNAPSHOT
6544         if (t->use_max_tr) {
6545                 local_irq_disable();
6546                 arch_spin_lock(&tr->max_lock);
6547                 if (tr->cond_snapshot)
6548                         ret = -EBUSY;
6549                 arch_spin_unlock(&tr->max_lock);
6550                 local_irq_enable();
6551                 if (ret)
6552                         goto out;
6553         }
6554 #endif
6555         /* Some tracers won't work on kernel command line */
6556         if (system_state < SYSTEM_RUNNING && t->noboot) {
6557                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6558                         t->name);
6559                 goto out;
6560         }
6561
6562         /* Some tracers are only allowed for the top level buffer */
6563         if (!trace_ok_for_array(t, tr)) {
6564                 ret = -EINVAL;
6565                 goto out;
6566         }
6567
6568         /* If trace pipe files are being read, we can't change the tracer */
6569         if (tr->trace_ref) {
6570                 ret = -EBUSY;
6571                 goto out;
6572         }
6573
6574         trace_branch_disable();
6575
6576         tr->current_trace->enabled--;
6577
6578         if (tr->current_trace->reset)
6579                 tr->current_trace->reset(tr);
6580
6581 #ifdef CONFIG_TRACER_MAX_TRACE
6582         had_max_tr = tr->current_trace->use_max_tr;
6583
6584         /* Current trace needs to be nop_trace before synchronize_rcu */
6585         tr->current_trace = &nop_trace;
6586
6587         if (had_max_tr && !t->use_max_tr) {
6588                 /*
6589                  * We need to make sure that the update_max_tr sees that
6590                  * current_trace changed to nop_trace to keep it from
6591                  * swapping the buffers after we resize it.
6592                  * The update_max_tr is called from interrupts disabled
6593                  * so a synchronized_sched() is sufficient.
6594                  */
6595                 synchronize_rcu();
6596                 free_snapshot(tr);
6597         }
6598
6599         if (t->use_max_tr && !tr->allocated_snapshot) {
6600                 ret = tracing_alloc_snapshot_instance(tr);
6601                 if (ret < 0)
6602                         goto out;
6603         }
6604 #else
6605         tr->current_trace = &nop_trace;
6606 #endif
6607
6608         if (t->init) {
6609                 ret = tracer_init(t, tr);
6610                 if (ret)
6611                         goto out;
6612         }
6613
6614         tr->current_trace = t;
6615         tr->current_trace->enabled++;
6616         trace_branch_enable(tr);
6617  out:
6618         mutex_unlock(&trace_types_lock);
6619
6620         return ret;
6621 }
6622
6623 static ssize_t
6624 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6625                         size_t cnt, loff_t *ppos)
6626 {
6627         struct trace_array *tr = filp->private_data;
6628         char buf[MAX_TRACER_SIZE+1];
6629         char *name;
6630         size_t ret;
6631         int err;
6632
6633         ret = cnt;
6634
6635         if (cnt > MAX_TRACER_SIZE)
6636                 cnt = MAX_TRACER_SIZE;
6637
6638         if (copy_from_user(buf, ubuf, cnt))
6639                 return -EFAULT;
6640
6641         buf[cnt] = 0;
6642
6643         name = strim(buf);
6644
6645         err = tracing_set_tracer(tr, name);
6646         if (err)
6647                 return err;
6648
6649         *ppos += ret;
6650
6651         return ret;
6652 }
6653
6654 static ssize_t
6655 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6656                    size_t cnt, loff_t *ppos)
6657 {
6658         char buf[64];
6659         int r;
6660
6661         r = snprintf(buf, sizeof(buf), "%ld\n",
6662                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6663         if (r > sizeof(buf))
6664                 r = sizeof(buf);
6665         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6666 }
6667
6668 static ssize_t
6669 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6670                     size_t cnt, loff_t *ppos)
6671 {
6672         unsigned long val;
6673         int ret;
6674
6675         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6676         if (ret)
6677                 return ret;
6678
6679         *ptr = val * 1000;
6680
6681         return cnt;
6682 }
6683
6684 static ssize_t
6685 tracing_thresh_read(struct file *filp, char __user *ubuf,
6686                     size_t cnt, loff_t *ppos)
6687 {
6688         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6689 }
6690
6691 static ssize_t
6692 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6693                      size_t cnt, loff_t *ppos)
6694 {
6695         struct trace_array *tr = filp->private_data;
6696         int ret;
6697
6698         mutex_lock(&trace_types_lock);
6699         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6700         if (ret < 0)
6701                 goto out;
6702
6703         if (tr->current_trace->update_thresh) {
6704                 ret = tr->current_trace->update_thresh(tr);
6705                 if (ret < 0)
6706                         goto out;
6707         }
6708
6709         ret = cnt;
6710 out:
6711         mutex_unlock(&trace_types_lock);
6712
6713         return ret;
6714 }
6715
6716 #ifdef CONFIG_TRACER_MAX_TRACE
6717
6718 static ssize_t
6719 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6720                      size_t cnt, loff_t *ppos)
6721 {
6722         struct trace_array *tr = filp->private_data;
6723
6724         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6725 }
6726
6727 static ssize_t
6728 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6729                       size_t cnt, loff_t *ppos)
6730 {
6731         struct trace_array *tr = filp->private_data;
6732
6733         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6734 }
6735
6736 #endif
6737
6738 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6739 {
6740         if (cpu == RING_BUFFER_ALL_CPUS) {
6741                 if (cpumask_empty(tr->pipe_cpumask)) {
6742                         cpumask_setall(tr->pipe_cpumask);
6743                         return 0;
6744                 }
6745         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6746                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6747                 return 0;
6748         }
6749         return -EBUSY;
6750 }
6751
6752 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6753 {
6754         if (cpu == RING_BUFFER_ALL_CPUS) {
6755                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6756                 cpumask_clear(tr->pipe_cpumask);
6757         } else {
6758                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6759                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6760         }
6761 }
6762
6763 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6764 {
6765         struct trace_array *tr = inode->i_private;
6766         struct trace_iterator *iter;
6767         int cpu;
6768         int ret;
6769
6770         ret = tracing_check_open_get_tr(tr);
6771         if (ret)
6772                 return ret;
6773
6774         mutex_lock(&trace_types_lock);
6775         cpu = tracing_get_cpu(inode);
6776         ret = open_pipe_on_cpu(tr, cpu);
6777         if (ret)
6778                 goto fail_pipe_on_cpu;
6779
6780         /* create a buffer to store the information to pass to userspace */
6781         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6782         if (!iter) {
6783                 ret = -ENOMEM;
6784                 goto fail_alloc_iter;
6785         }
6786
6787         trace_seq_init(&iter->seq);
6788         iter->trace = tr->current_trace;
6789
6790         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6791                 ret = -ENOMEM;
6792                 goto fail;
6793         }
6794
6795         /* trace pipe does not show start of buffer */
6796         cpumask_setall(iter->started);
6797
6798         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6799                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6800
6801         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6802         if (trace_clocks[tr->clock_id].in_ns)
6803                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6804
6805         iter->tr = tr;
6806         iter->array_buffer = &tr->array_buffer;
6807         iter->cpu_file = cpu;
6808         mutex_init(&iter->mutex);
6809         filp->private_data = iter;
6810
6811         if (iter->trace->pipe_open)
6812                 iter->trace->pipe_open(iter);
6813
6814         nonseekable_open(inode, filp);
6815
6816         tr->trace_ref++;
6817
6818         mutex_unlock(&trace_types_lock);
6819         return ret;
6820
6821 fail:
6822         kfree(iter);
6823 fail_alloc_iter:
6824         close_pipe_on_cpu(tr, cpu);
6825 fail_pipe_on_cpu:
6826         __trace_array_put(tr);
6827         mutex_unlock(&trace_types_lock);
6828         return ret;
6829 }
6830
6831 static int tracing_release_pipe(struct inode *inode, struct file *file)
6832 {
6833         struct trace_iterator *iter = file->private_data;
6834         struct trace_array *tr = inode->i_private;
6835
6836         mutex_lock(&trace_types_lock);
6837
6838         tr->trace_ref--;
6839
6840         if (iter->trace->pipe_close)
6841                 iter->trace->pipe_close(iter);
6842         close_pipe_on_cpu(tr, iter->cpu_file);
6843         mutex_unlock(&trace_types_lock);
6844
6845         free_trace_iter_content(iter);
6846         kfree(iter);
6847
6848         trace_array_put(tr);
6849
6850         return 0;
6851 }
6852
6853 static __poll_t
6854 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6855 {
6856         struct trace_array *tr = iter->tr;
6857
6858         /* Iterators are static, they should be filled or empty */
6859         if (trace_buffer_iter(iter, iter->cpu_file))
6860                 return EPOLLIN | EPOLLRDNORM;
6861
6862         if (tr->trace_flags & TRACE_ITER_BLOCK)
6863                 /*
6864                  * Always select as readable when in blocking mode
6865                  */
6866                 return EPOLLIN | EPOLLRDNORM;
6867         else
6868                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6869                                              filp, poll_table, iter->tr->buffer_percent);
6870 }
6871
6872 static __poll_t
6873 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6874 {
6875         struct trace_iterator *iter = filp->private_data;
6876
6877         return trace_poll(iter, filp, poll_table);
6878 }
6879
6880 /* Must be called with iter->mutex held. */
6881 static int tracing_wait_pipe(struct file *filp)
6882 {
6883         struct trace_iterator *iter = filp->private_data;
6884         int ret;
6885
6886         while (trace_empty(iter)) {
6887
6888                 if ((filp->f_flags & O_NONBLOCK)) {
6889                         return -EAGAIN;
6890                 }
6891
6892                 /*
6893                  * We block until we read something and tracing is disabled.
6894                  * We still block if tracing is disabled, but we have never
6895                  * read anything. This allows a user to cat this file, and
6896                  * then enable tracing. But after we have read something,
6897                  * we give an EOF when tracing is again disabled.
6898                  *
6899                  * iter->pos will be 0 if we haven't read anything.
6900                  */
6901                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6902                         break;
6903
6904                 mutex_unlock(&iter->mutex);
6905
6906                 ret = wait_on_pipe(iter, 0);
6907
6908                 mutex_lock(&iter->mutex);
6909
6910                 if (ret)
6911                         return ret;
6912         }
6913
6914         return 1;
6915 }
6916
6917 /*
6918  * Consumer reader.
6919  */
6920 static ssize_t
6921 tracing_read_pipe(struct file *filp, char __user *ubuf,
6922                   size_t cnt, loff_t *ppos)
6923 {
6924         struct trace_iterator *iter = filp->private_data;
6925         ssize_t sret;
6926
6927         /*
6928          * Avoid more than one consumer on a single file descriptor
6929          * This is just a matter of traces coherency, the ring buffer itself
6930          * is protected.
6931          */
6932         mutex_lock(&iter->mutex);
6933
6934         /* return any leftover data */
6935         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6936         if (sret != -EBUSY)
6937                 goto out;
6938
6939         trace_seq_init(&iter->seq);
6940
6941         if (iter->trace->read) {
6942                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6943                 if (sret)
6944                         goto out;
6945         }
6946
6947 waitagain:
6948         sret = tracing_wait_pipe(filp);
6949         if (sret <= 0)
6950                 goto out;
6951
6952         /* stop when tracing is finished */
6953         if (trace_empty(iter)) {
6954                 sret = 0;
6955                 goto out;
6956         }
6957
6958         if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6959                 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6960
6961         /* reset all but tr, trace, and overruns */
6962         trace_iterator_reset(iter);
6963         cpumask_clear(iter->started);
6964         trace_seq_init(&iter->seq);
6965
6966         trace_event_read_lock();
6967         trace_access_lock(iter->cpu_file);
6968         while (trace_find_next_entry_inc(iter) != NULL) {
6969                 enum print_line_t ret;
6970                 int save_len = iter->seq.seq.len;
6971
6972                 ret = print_trace_line(iter);
6973                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6974                         /*
6975                          * If one print_trace_line() fills entire trace_seq in one shot,
6976                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6977                          * In this case, we need to consume it, otherwise, loop will peek
6978                          * this event next time, resulting in an infinite loop.
6979                          */
6980                         if (save_len == 0) {
6981                                 iter->seq.full = 0;
6982                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6983                                 trace_consume(iter);
6984                                 break;
6985                         }
6986
6987                         /* In other cases, don't print partial lines */
6988                         iter->seq.seq.len = save_len;
6989                         break;
6990                 }
6991                 if (ret != TRACE_TYPE_NO_CONSUME)
6992                         trace_consume(iter);
6993
6994                 if (trace_seq_used(&iter->seq) >= cnt)
6995                         break;
6996
6997                 /*
6998                  * Setting the full flag means we reached the trace_seq buffer
6999                  * size and we should leave by partial output condition above.
7000                  * One of the trace_seq_* functions is not used properly.
7001                  */
7002                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7003                           iter->ent->type);
7004         }
7005         trace_access_unlock(iter->cpu_file);
7006         trace_event_read_unlock();
7007
7008         /* Now copy what we have to the user */
7009         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7010         if (iter->seq.readpos >= trace_seq_used(&iter->seq))
7011                 trace_seq_init(&iter->seq);
7012
7013         /*
7014          * If there was nothing to send to user, in spite of consuming trace
7015          * entries, go back to wait for more entries.
7016          */
7017         if (sret == -EBUSY)
7018                 goto waitagain;
7019
7020 out:
7021         mutex_unlock(&iter->mutex);
7022
7023         return sret;
7024 }
7025
7026 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7027                                      unsigned int idx)
7028 {
7029         __free_page(spd->pages[idx]);
7030 }
7031
7032 static size_t
7033 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7034 {
7035         size_t count;
7036         int save_len;
7037         int ret;
7038
7039         /* Seq buffer is page-sized, exactly what we need. */
7040         for (;;) {
7041                 save_len = iter->seq.seq.len;
7042                 ret = print_trace_line(iter);
7043
7044                 if (trace_seq_has_overflowed(&iter->seq)) {
7045                         iter->seq.seq.len = save_len;
7046                         break;
7047                 }
7048
7049                 /*
7050                  * This should not be hit, because it should only
7051                  * be set if the iter->seq overflowed. But check it
7052                  * anyway to be safe.
7053                  */
7054                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7055                         iter->seq.seq.len = save_len;
7056                         break;
7057                 }
7058
7059                 count = trace_seq_used(&iter->seq) - save_len;
7060                 if (rem < count) {
7061                         rem = 0;
7062                         iter->seq.seq.len = save_len;
7063                         break;
7064                 }
7065
7066                 if (ret != TRACE_TYPE_NO_CONSUME)
7067                         trace_consume(iter);
7068                 rem -= count;
7069                 if (!trace_find_next_entry_inc(iter))   {
7070                         rem = 0;
7071                         iter->ent = NULL;
7072                         break;
7073                 }
7074         }
7075
7076         return rem;
7077 }
7078
7079 static ssize_t tracing_splice_read_pipe(struct file *filp,
7080                                         loff_t *ppos,
7081                                         struct pipe_inode_info *pipe,
7082                                         size_t len,
7083                                         unsigned int flags)
7084 {
7085         struct page *pages_def[PIPE_DEF_BUFFERS];
7086         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7087         struct trace_iterator *iter = filp->private_data;
7088         struct splice_pipe_desc spd = {
7089                 .pages          = pages_def,
7090                 .partial        = partial_def,
7091                 .nr_pages       = 0, /* This gets updated below. */
7092                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7093                 .ops            = &default_pipe_buf_ops,
7094                 .spd_release    = tracing_spd_release_pipe,
7095         };
7096         ssize_t ret;
7097         size_t rem;
7098         unsigned int i;
7099
7100         if (splice_grow_spd(pipe, &spd))
7101                 return -ENOMEM;
7102
7103         mutex_lock(&iter->mutex);
7104
7105         if (iter->trace->splice_read) {
7106                 ret = iter->trace->splice_read(iter, filp,
7107                                                ppos, pipe, len, flags);
7108                 if (ret)
7109                         goto out_err;
7110         }
7111
7112         ret = tracing_wait_pipe(filp);
7113         if (ret <= 0)
7114                 goto out_err;
7115
7116         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7117                 ret = -EFAULT;
7118                 goto out_err;
7119         }
7120
7121         trace_event_read_lock();
7122         trace_access_lock(iter->cpu_file);
7123
7124         /* Fill as many pages as possible. */
7125         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7126                 spd.pages[i] = alloc_page(GFP_KERNEL);
7127                 if (!spd.pages[i])
7128                         break;
7129
7130                 rem = tracing_fill_pipe_page(rem, iter);
7131
7132                 /* Copy the data into the page, so we can start over. */
7133                 ret = trace_seq_to_buffer(&iter->seq,
7134                                           page_address(spd.pages[i]),
7135                                           trace_seq_used(&iter->seq));
7136                 if (ret < 0) {
7137                         __free_page(spd.pages[i]);
7138                         break;
7139                 }
7140                 spd.partial[i].offset = 0;
7141                 spd.partial[i].len = trace_seq_used(&iter->seq);
7142
7143                 trace_seq_init(&iter->seq);
7144         }
7145
7146         trace_access_unlock(iter->cpu_file);
7147         trace_event_read_unlock();
7148         mutex_unlock(&iter->mutex);
7149
7150         spd.nr_pages = i;
7151
7152         if (i)
7153                 ret = splice_to_pipe(pipe, &spd);
7154         else
7155                 ret = 0;
7156 out:
7157         splice_shrink_spd(&spd);
7158         return ret;
7159
7160 out_err:
7161         mutex_unlock(&iter->mutex);
7162         goto out;
7163 }
7164
7165 static ssize_t
7166 tracing_entries_read(struct file *filp, char __user *ubuf,
7167                      size_t cnt, loff_t *ppos)
7168 {
7169         struct inode *inode = file_inode(filp);
7170         struct trace_array *tr = inode->i_private;
7171         int cpu = tracing_get_cpu(inode);
7172         char buf[64];
7173         int r = 0;
7174         ssize_t ret;
7175
7176         mutex_lock(&trace_types_lock);
7177
7178         if (cpu == RING_BUFFER_ALL_CPUS) {
7179                 int cpu, buf_size_same;
7180                 unsigned long size;
7181
7182                 size = 0;
7183                 buf_size_same = 1;
7184                 /* check if all cpu sizes are same */
7185                 for_each_tracing_cpu(cpu) {
7186                         /* fill in the size from first enabled cpu */
7187                         if (size == 0)
7188                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7189                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7190                                 buf_size_same = 0;
7191                                 break;
7192                         }
7193                 }
7194
7195                 if (buf_size_same) {
7196                         if (!tr->ring_buffer_expanded)
7197                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7198                                             size >> 10,
7199                                             trace_buf_size >> 10);
7200                         else
7201                                 r = sprintf(buf, "%lu\n", size >> 10);
7202                 } else
7203                         r = sprintf(buf, "X\n");
7204         } else
7205                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7206
7207         mutex_unlock(&trace_types_lock);
7208
7209         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7210         return ret;
7211 }
7212
7213 static ssize_t
7214 tracing_entries_write(struct file *filp, const char __user *ubuf,
7215                       size_t cnt, loff_t *ppos)
7216 {
7217         struct inode *inode = file_inode(filp);
7218         struct trace_array *tr = inode->i_private;
7219         unsigned long val;
7220         int ret;
7221
7222         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7223         if (ret)
7224                 return ret;
7225
7226         /* must have at least 1 entry */
7227         if (!val)
7228                 return -EINVAL;
7229
7230         /* value is in KB */
7231         val <<= 10;
7232         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7233         if (ret < 0)
7234                 return ret;
7235
7236         *ppos += cnt;
7237
7238         return cnt;
7239 }
7240
7241 static ssize_t
7242 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7243                                 size_t cnt, loff_t *ppos)
7244 {
7245         struct trace_array *tr = filp->private_data;
7246         char buf[64];
7247         int r, cpu;
7248         unsigned long size = 0, expanded_size = 0;
7249
7250         mutex_lock(&trace_types_lock);
7251         for_each_tracing_cpu(cpu) {
7252                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7253                 if (!tr->ring_buffer_expanded)
7254                         expanded_size += trace_buf_size >> 10;
7255         }
7256         if (tr->ring_buffer_expanded)
7257                 r = sprintf(buf, "%lu\n", size);
7258         else
7259                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7260         mutex_unlock(&trace_types_lock);
7261
7262         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7263 }
7264
7265 static ssize_t
7266 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7267                           size_t cnt, loff_t *ppos)
7268 {
7269         /*
7270          * There is no need to read what the user has written, this function
7271          * is just to make sure that there is no error when "echo" is used
7272          */
7273
7274         *ppos += cnt;
7275
7276         return cnt;
7277 }
7278
7279 static int
7280 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7281 {
7282         struct trace_array *tr = inode->i_private;
7283
7284         /* disable tracing ? */
7285         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7286                 tracer_tracing_off(tr);
7287         /* resize the ring buffer to 0 */
7288         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7289
7290         trace_array_put(tr);
7291
7292         return 0;
7293 }
7294
7295 #define TRACE_MARKER_MAX_SIZE           4096
7296
7297 static ssize_t
7298 tracing_mark_write(struct file *filp, const char __user *ubuf,
7299                                         size_t cnt, loff_t *fpos)
7300 {
7301         struct trace_array *tr = filp->private_data;
7302         struct ring_buffer_event *event;
7303         enum event_trigger_type tt = ETT_NONE;
7304         struct trace_buffer *buffer;
7305         struct print_entry *entry;
7306         int meta_size;
7307         ssize_t written;
7308         size_t size;
7309         int len;
7310
7311 /* Used in tracing_mark_raw_write() as well */
7312 #define FAULTED_STR "<faulted>"
7313 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7314
7315         if (tracing_disabled)
7316                 return -EINVAL;
7317
7318         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7319                 return -EINVAL;
7320
7321         if ((ssize_t)cnt < 0)
7322                 return -EINVAL;
7323
7324         if (cnt > TRACE_MARKER_MAX_SIZE)
7325                 cnt = TRACE_MARKER_MAX_SIZE;
7326
7327         meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7328  again:
7329         size = cnt + meta_size;
7330
7331         /* If less than "<faulted>", then make sure we can still add that */
7332         if (cnt < FAULTED_SIZE)
7333                 size += FAULTED_SIZE - cnt;
7334
7335         buffer = tr->array_buffer.buffer;
7336         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7337                                             tracing_gen_ctx());
7338         if (unlikely(!event)) {
7339                 /*
7340                  * If the size was greater than what was allowed, then
7341                  * make it smaller and try again.
7342                  */
7343                 if (size > ring_buffer_max_event_size(buffer)) {
7344                         /* cnt < FAULTED size should never be bigger than max */
7345                         if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7346                                 return -EBADF;
7347                         cnt = ring_buffer_max_event_size(buffer) - meta_size;
7348                         /* The above should only happen once */
7349                         if (WARN_ON_ONCE(cnt + meta_size == size))
7350                                 return -EBADF;
7351                         goto again;
7352                 }
7353
7354                 /* Ring buffer disabled, return as if not open for write */
7355                 return -EBADF;
7356         }
7357
7358         entry = ring_buffer_event_data(event);
7359         entry->ip = _THIS_IP_;
7360
7361         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7362         if (len) {
7363                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7364                 cnt = FAULTED_SIZE;
7365                 written = -EFAULT;
7366         } else
7367                 written = cnt;
7368
7369         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7370                 /* do not add \n before testing triggers, but add \0 */
7371                 entry->buf[cnt] = '\0';
7372                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7373         }
7374
7375         if (entry->buf[cnt - 1] != '\n') {
7376                 entry->buf[cnt] = '\n';
7377                 entry->buf[cnt + 1] = '\0';
7378         } else
7379                 entry->buf[cnt] = '\0';
7380
7381         if (static_branch_unlikely(&trace_marker_exports_enabled))
7382                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7383         __buffer_unlock_commit(buffer, event);
7384
7385         if (tt)
7386                 event_triggers_post_call(tr->trace_marker_file, tt);
7387
7388         return written;
7389 }
7390
7391 static ssize_t
7392 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7393                                         size_t cnt, loff_t *fpos)
7394 {
7395         struct trace_array *tr = filp->private_data;
7396         struct ring_buffer_event *event;
7397         struct trace_buffer *buffer;
7398         struct raw_data_entry *entry;
7399         ssize_t written;
7400         int size;
7401         int len;
7402
7403 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7404
7405         if (tracing_disabled)
7406                 return -EINVAL;
7407
7408         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7409                 return -EINVAL;
7410
7411         /* The marker must at least have a tag id */
7412         if (cnt < sizeof(unsigned int))
7413                 return -EINVAL;
7414
7415         size = sizeof(*entry) + cnt;
7416         if (cnt < FAULT_SIZE_ID)
7417                 size += FAULT_SIZE_ID - cnt;
7418
7419         buffer = tr->array_buffer.buffer;
7420
7421         if (size > ring_buffer_max_event_size(buffer))
7422                 return -EINVAL;
7423
7424         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7425                                             tracing_gen_ctx());
7426         if (!event)
7427                 /* Ring buffer disabled, return as if not open for write */
7428                 return -EBADF;
7429
7430         entry = ring_buffer_event_data(event);
7431
7432         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7433         if (len) {
7434                 entry->id = -1;
7435                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7436                 written = -EFAULT;
7437         } else
7438                 written = cnt;
7439
7440         __buffer_unlock_commit(buffer, event);
7441
7442         return written;
7443 }
7444
7445 static int tracing_clock_show(struct seq_file *m, void *v)
7446 {
7447         struct trace_array *tr = m->private;
7448         int i;
7449
7450         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7451                 seq_printf(m,
7452                         "%s%s%s%s", i ? " " : "",
7453                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7454                         i == tr->clock_id ? "]" : "");
7455         seq_putc(m, '\n');
7456
7457         return 0;
7458 }
7459
7460 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7461 {
7462         int i;
7463
7464         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7465                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7466                         break;
7467         }
7468         if (i == ARRAY_SIZE(trace_clocks))
7469                 return -EINVAL;
7470
7471         mutex_lock(&trace_types_lock);
7472
7473         tr->clock_id = i;
7474
7475         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7476
7477         /*
7478          * New clock may not be consistent with the previous clock.
7479          * Reset the buffer so that it doesn't have incomparable timestamps.
7480          */
7481         tracing_reset_online_cpus(&tr->array_buffer);
7482
7483 #ifdef CONFIG_TRACER_MAX_TRACE
7484         if (tr->max_buffer.buffer)
7485                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7486         tracing_reset_online_cpus(&tr->max_buffer);
7487 #endif
7488
7489         mutex_unlock(&trace_types_lock);
7490
7491         return 0;
7492 }
7493
7494 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7495                                    size_t cnt, loff_t *fpos)
7496 {
7497         struct seq_file *m = filp->private_data;
7498         struct trace_array *tr = m->private;
7499         char buf[64];
7500         const char *clockstr;
7501         int ret;
7502
7503         if (cnt >= sizeof(buf))
7504                 return -EINVAL;
7505
7506         if (copy_from_user(buf, ubuf, cnt))
7507                 return -EFAULT;
7508
7509         buf[cnt] = 0;
7510
7511         clockstr = strstrip(buf);
7512
7513         ret = tracing_set_clock(tr, clockstr);
7514         if (ret)
7515                 return ret;
7516
7517         *fpos += cnt;
7518
7519         return cnt;
7520 }
7521
7522 static int tracing_clock_open(struct inode *inode, struct file *file)
7523 {
7524         struct trace_array *tr = inode->i_private;
7525         int ret;
7526
7527         ret = tracing_check_open_get_tr(tr);
7528         if (ret)
7529                 return ret;
7530
7531         ret = single_open(file, tracing_clock_show, inode->i_private);
7532         if (ret < 0)
7533                 trace_array_put(tr);
7534
7535         return ret;
7536 }
7537
7538 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7539 {
7540         struct trace_array *tr = m->private;
7541
7542         mutex_lock(&trace_types_lock);
7543
7544         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7545                 seq_puts(m, "delta [absolute]\n");
7546         else
7547                 seq_puts(m, "[delta] absolute\n");
7548
7549         mutex_unlock(&trace_types_lock);
7550
7551         return 0;
7552 }
7553
7554 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7555 {
7556         struct trace_array *tr = inode->i_private;
7557         int ret;
7558
7559         ret = tracing_check_open_get_tr(tr);
7560         if (ret)
7561                 return ret;
7562
7563         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7564         if (ret < 0)
7565                 trace_array_put(tr);
7566
7567         return ret;
7568 }
7569
7570 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7571 {
7572         if (rbe == this_cpu_read(trace_buffered_event))
7573                 return ring_buffer_time_stamp(buffer);
7574
7575         return ring_buffer_event_time_stamp(buffer, rbe);
7576 }
7577
7578 /*
7579  * Set or disable using the per CPU trace_buffer_event when possible.
7580  */
7581 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7582 {
7583         int ret = 0;
7584
7585         mutex_lock(&trace_types_lock);
7586
7587         if (set && tr->no_filter_buffering_ref++)
7588                 goto out;
7589
7590         if (!set) {
7591                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7592                         ret = -EINVAL;
7593                         goto out;
7594                 }
7595
7596                 --tr->no_filter_buffering_ref;
7597         }
7598  out:
7599         mutex_unlock(&trace_types_lock);
7600
7601         return ret;
7602 }
7603
7604 struct ftrace_buffer_info {
7605         struct trace_iterator   iter;
7606         void                    *spare;
7607         unsigned int            spare_cpu;
7608         unsigned int            spare_size;
7609         unsigned int            read;
7610 };
7611
7612 #ifdef CONFIG_TRACER_SNAPSHOT
7613 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7614 {
7615         struct trace_array *tr = inode->i_private;
7616         struct trace_iterator *iter;
7617         struct seq_file *m;
7618         int ret;
7619
7620         ret = tracing_check_open_get_tr(tr);
7621         if (ret)
7622                 return ret;
7623
7624         if (file->f_mode & FMODE_READ) {
7625                 iter = __tracing_open(inode, file, true);
7626                 if (IS_ERR(iter))
7627                         ret = PTR_ERR(iter);
7628         } else {
7629                 /* Writes still need the seq_file to hold the private data */
7630                 ret = -ENOMEM;
7631                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7632                 if (!m)
7633                         goto out;
7634                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7635                 if (!iter) {
7636                         kfree(m);
7637                         goto out;
7638                 }
7639                 ret = 0;
7640
7641                 iter->tr = tr;
7642                 iter->array_buffer = &tr->max_buffer;
7643                 iter->cpu_file = tracing_get_cpu(inode);
7644                 m->private = iter;
7645                 file->private_data = m;
7646         }
7647 out:
7648         if (ret < 0)
7649                 trace_array_put(tr);
7650
7651         return ret;
7652 }
7653
7654 static void tracing_swap_cpu_buffer(void *tr)
7655 {
7656         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7657 }
7658
7659 static ssize_t
7660 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7661                        loff_t *ppos)
7662 {
7663         struct seq_file *m = filp->private_data;
7664         struct trace_iterator *iter = m->private;
7665         struct trace_array *tr = iter->tr;
7666         unsigned long val;
7667         int ret;
7668
7669         ret = tracing_update_buffers(tr);
7670         if (ret < 0)
7671                 return ret;
7672
7673         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7674         if (ret)
7675                 return ret;
7676
7677         mutex_lock(&trace_types_lock);
7678
7679         if (tr->current_trace->use_max_tr) {
7680                 ret = -EBUSY;
7681                 goto out;
7682         }
7683
7684         local_irq_disable();
7685         arch_spin_lock(&tr->max_lock);
7686         if (tr->cond_snapshot)
7687                 ret = -EBUSY;
7688         arch_spin_unlock(&tr->max_lock);
7689         local_irq_enable();
7690         if (ret)
7691                 goto out;
7692
7693         switch (val) {
7694         case 0:
7695                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7696                         ret = -EINVAL;
7697                         break;
7698                 }
7699                 if (tr->allocated_snapshot)
7700                         free_snapshot(tr);
7701                 break;
7702         case 1:
7703 /* Only allow per-cpu swap if the ring buffer supports it */
7704 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7705                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7706                         ret = -EINVAL;
7707                         break;
7708                 }
7709 #endif
7710                 if (tr->allocated_snapshot)
7711                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7712                                         &tr->array_buffer, iter->cpu_file);
7713                 else
7714                         ret = tracing_alloc_snapshot_instance(tr);
7715                 if (ret < 0)
7716                         break;
7717                 /* Now, we're going to swap */
7718                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7719                         local_irq_disable();
7720                         update_max_tr(tr, current, smp_processor_id(), NULL);
7721                         local_irq_enable();
7722                 } else {
7723                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7724                                                  (void *)tr, 1);
7725                 }
7726                 break;
7727         default:
7728                 if (tr->allocated_snapshot) {
7729                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7730                                 tracing_reset_online_cpus(&tr->max_buffer);
7731                         else
7732                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7733                 }
7734                 break;
7735         }
7736
7737         if (ret >= 0) {
7738                 *ppos += cnt;
7739                 ret = cnt;
7740         }
7741 out:
7742         mutex_unlock(&trace_types_lock);
7743         return ret;
7744 }
7745
7746 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7747 {
7748         struct seq_file *m = file->private_data;
7749         int ret;
7750
7751         ret = tracing_release(inode, file);
7752
7753         if (file->f_mode & FMODE_READ)
7754                 return ret;
7755
7756         /* If write only, the seq_file is just a stub */
7757         if (m)
7758                 kfree(m->private);
7759         kfree(m);
7760
7761         return 0;
7762 }
7763
7764 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7765 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7766                                     size_t count, loff_t *ppos);
7767 static int tracing_buffers_release(struct inode *inode, struct file *file);
7768 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7769                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7770
7771 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7772 {
7773         struct ftrace_buffer_info *info;
7774         int ret;
7775
7776         /* The following checks for tracefs lockdown */
7777         ret = tracing_buffers_open(inode, filp);
7778         if (ret < 0)
7779                 return ret;
7780
7781         info = filp->private_data;
7782
7783         if (info->iter.trace->use_max_tr) {
7784                 tracing_buffers_release(inode, filp);
7785                 return -EBUSY;
7786         }
7787
7788         info->iter.snapshot = true;
7789         info->iter.array_buffer = &info->iter.tr->max_buffer;
7790
7791         return ret;
7792 }
7793
7794 #endif /* CONFIG_TRACER_SNAPSHOT */
7795
7796
7797 static const struct file_operations tracing_thresh_fops = {
7798         .open           = tracing_open_generic,
7799         .read           = tracing_thresh_read,
7800         .write          = tracing_thresh_write,
7801         .llseek         = generic_file_llseek,
7802 };
7803
7804 #ifdef CONFIG_TRACER_MAX_TRACE
7805 static const struct file_operations tracing_max_lat_fops = {
7806         .open           = tracing_open_generic_tr,
7807         .read           = tracing_max_lat_read,
7808         .write          = tracing_max_lat_write,
7809         .llseek         = generic_file_llseek,
7810         .release        = tracing_release_generic_tr,
7811 };
7812 #endif
7813
7814 static const struct file_operations set_tracer_fops = {
7815         .open           = tracing_open_generic_tr,
7816         .read           = tracing_set_trace_read,
7817         .write          = tracing_set_trace_write,
7818         .llseek         = generic_file_llseek,
7819         .release        = tracing_release_generic_tr,
7820 };
7821
7822 static const struct file_operations tracing_pipe_fops = {
7823         .open           = tracing_open_pipe,
7824         .poll           = tracing_poll_pipe,
7825         .read           = tracing_read_pipe,
7826         .splice_read    = tracing_splice_read_pipe,
7827         .release        = tracing_release_pipe,
7828         .llseek         = no_llseek,
7829 };
7830
7831 static const struct file_operations tracing_entries_fops = {
7832         .open           = tracing_open_generic_tr,
7833         .read           = tracing_entries_read,
7834         .write          = tracing_entries_write,
7835         .llseek         = generic_file_llseek,
7836         .release        = tracing_release_generic_tr,
7837 };
7838
7839 static const struct file_operations tracing_total_entries_fops = {
7840         .open           = tracing_open_generic_tr,
7841         .read           = tracing_total_entries_read,
7842         .llseek         = generic_file_llseek,
7843         .release        = tracing_release_generic_tr,
7844 };
7845
7846 static const struct file_operations tracing_free_buffer_fops = {
7847         .open           = tracing_open_generic_tr,
7848         .write          = tracing_free_buffer_write,
7849         .release        = tracing_free_buffer_release,
7850 };
7851
7852 static const struct file_operations tracing_mark_fops = {
7853         .open           = tracing_mark_open,
7854         .write          = tracing_mark_write,
7855         .release        = tracing_release_generic_tr,
7856 };
7857
7858 static const struct file_operations tracing_mark_raw_fops = {
7859         .open           = tracing_mark_open,
7860         .write          = tracing_mark_raw_write,
7861         .release        = tracing_release_generic_tr,
7862 };
7863
7864 static const struct file_operations trace_clock_fops = {
7865         .open           = tracing_clock_open,
7866         .read           = seq_read,
7867         .llseek         = seq_lseek,
7868         .release        = tracing_single_release_tr,
7869         .write          = tracing_clock_write,
7870 };
7871
7872 static const struct file_operations trace_time_stamp_mode_fops = {
7873         .open           = tracing_time_stamp_mode_open,
7874         .read           = seq_read,
7875         .llseek         = seq_lseek,
7876         .release        = tracing_single_release_tr,
7877 };
7878
7879 #ifdef CONFIG_TRACER_SNAPSHOT
7880 static const struct file_operations snapshot_fops = {
7881         .open           = tracing_snapshot_open,
7882         .read           = seq_read,
7883         .write          = tracing_snapshot_write,
7884         .llseek         = tracing_lseek,
7885         .release        = tracing_snapshot_release,
7886 };
7887
7888 static const struct file_operations snapshot_raw_fops = {
7889         .open           = snapshot_raw_open,
7890         .read           = tracing_buffers_read,
7891         .release        = tracing_buffers_release,
7892         .splice_read    = tracing_buffers_splice_read,
7893         .llseek         = no_llseek,
7894 };
7895
7896 #endif /* CONFIG_TRACER_SNAPSHOT */
7897
7898 /*
7899  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7900  * @filp: The active open file structure
7901  * @ubuf: The userspace provided buffer to read value into
7902  * @cnt: The maximum number of bytes to read
7903  * @ppos: The current "file" position
7904  *
7905  * This function implements the write interface for a struct trace_min_max_param.
7906  * The filp->private_data must point to a trace_min_max_param structure that
7907  * defines where to write the value, the min and the max acceptable values,
7908  * and a lock to protect the write.
7909  */
7910 static ssize_t
7911 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7912 {
7913         struct trace_min_max_param *param = filp->private_data;
7914         u64 val;
7915         int err;
7916
7917         if (!param)
7918                 return -EFAULT;
7919
7920         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7921         if (err)
7922                 return err;
7923
7924         if (param->lock)
7925                 mutex_lock(param->lock);
7926
7927         if (param->min && val < *param->min)
7928                 err = -EINVAL;
7929
7930         if (param->max && val > *param->max)
7931                 err = -EINVAL;
7932
7933         if (!err)
7934                 *param->val = val;
7935
7936         if (param->lock)
7937                 mutex_unlock(param->lock);
7938
7939         if (err)
7940                 return err;
7941
7942         return cnt;
7943 }
7944
7945 /*
7946  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7947  * @filp: The active open file structure
7948  * @ubuf: The userspace provided buffer to read value into
7949  * @cnt: The maximum number of bytes to read
7950  * @ppos: The current "file" position
7951  *
7952  * This function implements the read interface for a struct trace_min_max_param.
7953  * The filp->private_data must point to a trace_min_max_param struct with valid
7954  * data.
7955  */
7956 static ssize_t
7957 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7958 {
7959         struct trace_min_max_param *param = filp->private_data;
7960         char buf[U64_STR_SIZE];
7961         int len;
7962         u64 val;
7963
7964         if (!param)
7965                 return -EFAULT;
7966
7967         val = *param->val;
7968
7969         if (cnt > sizeof(buf))
7970                 cnt = sizeof(buf);
7971
7972         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7973
7974         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7975 }
7976
7977 const struct file_operations trace_min_max_fops = {
7978         .open           = tracing_open_generic,
7979         .read           = trace_min_max_read,
7980         .write          = trace_min_max_write,
7981 };
7982
7983 #define TRACING_LOG_ERRS_MAX    8
7984 #define TRACING_LOG_LOC_MAX     128
7985
7986 #define CMD_PREFIX "  Command: "
7987
7988 struct err_info {
7989         const char      **errs; /* ptr to loc-specific array of err strings */
7990         u8              type;   /* index into errs -> specific err string */
7991         u16             pos;    /* caret position */
7992         u64             ts;
7993 };
7994
7995 struct tracing_log_err {
7996         struct list_head        list;
7997         struct err_info         info;
7998         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7999         char                    *cmd;                     /* what caused err */
8000 };
8001
8002 static DEFINE_MUTEX(tracing_err_log_lock);
8003
8004 static struct tracing_log_err *alloc_tracing_log_err(int len)
8005 {
8006         struct tracing_log_err *err;
8007
8008         err = kzalloc(sizeof(*err), GFP_KERNEL);
8009         if (!err)
8010                 return ERR_PTR(-ENOMEM);
8011
8012         err->cmd = kzalloc(len, GFP_KERNEL);
8013         if (!err->cmd) {
8014                 kfree(err);
8015                 return ERR_PTR(-ENOMEM);
8016         }
8017
8018         return err;
8019 }
8020
8021 static void free_tracing_log_err(struct tracing_log_err *err)
8022 {
8023         kfree(err->cmd);
8024         kfree(err);
8025 }
8026
8027 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8028                                                    int len)
8029 {
8030         struct tracing_log_err *err;
8031         char *cmd;
8032
8033         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8034                 err = alloc_tracing_log_err(len);
8035                 if (PTR_ERR(err) != -ENOMEM)
8036                         tr->n_err_log_entries++;
8037
8038                 return err;
8039         }
8040         cmd = kzalloc(len, GFP_KERNEL);
8041         if (!cmd)
8042                 return ERR_PTR(-ENOMEM);
8043         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8044         kfree(err->cmd);
8045         err->cmd = cmd;
8046         list_del(&err->list);
8047
8048         return err;
8049 }
8050
8051 /**
8052  * err_pos - find the position of a string within a command for error careting
8053  * @cmd: The tracing command that caused the error
8054  * @str: The string to position the caret at within @cmd
8055  *
8056  * Finds the position of the first occurrence of @str within @cmd.  The
8057  * return value can be passed to tracing_log_err() for caret placement
8058  * within @cmd.
8059  *
8060  * Returns the index within @cmd of the first occurrence of @str or 0
8061  * if @str was not found.
8062  */
8063 unsigned int err_pos(char *cmd, const char *str)
8064 {
8065         char *found;
8066
8067         if (WARN_ON(!strlen(cmd)))
8068                 return 0;
8069
8070         found = strstr(cmd, str);
8071         if (found)
8072                 return found - cmd;
8073
8074         return 0;
8075 }
8076
8077 /**
8078  * tracing_log_err - write an error to the tracing error log
8079  * @tr: The associated trace array for the error (NULL for top level array)
8080  * @loc: A string describing where the error occurred
8081  * @cmd: The tracing command that caused the error
8082  * @errs: The array of loc-specific static error strings
8083  * @type: The index into errs[], which produces the specific static err string
8084  * @pos: The position the caret should be placed in the cmd
8085  *
8086  * Writes an error into tracing/error_log of the form:
8087  *
8088  * <loc>: error: <text>
8089  *   Command: <cmd>
8090  *              ^
8091  *
8092  * tracing/error_log is a small log file containing the last
8093  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8094  * unless there has been a tracing error, and the error log can be
8095  * cleared and have its memory freed by writing the empty string in
8096  * truncation mode to it i.e. echo > tracing/error_log.
8097  *
8098  * NOTE: the @errs array along with the @type param are used to
8099  * produce a static error string - this string is not copied and saved
8100  * when the error is logged - only a pointer to it is saved.  See
8101  * existing callers for examples of how static strings are typically
8102  * defined for use with tracing_log_err().
8103  */
8104 void tracing_log_err(struct trace_array *tr,
8105                      const char *loc, const char *cmd,
8106                      const char **errs, u8 type, u16 pos)
8107 {
8108         struct tracing_log_err *err;
8109         int len = 0;
8110
8111         if (!tr)
8112                 tr = &global_trace;
8113
8114         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8115
8116         mutex_lock(&tracing_err_log_lock);
8117         err = get_tracing_log_err(tr, len);
8118         if (PTR_ERR(err) == -ENOMEM) {
8119                 mutex_unlock(&tracing_err_log_lock);
8120                 return;
8121         }
8122
8123         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8124         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8125
8126         err->info.errs = errs;
8127         err->info.type = type;
8128         err->info.pos = pos;
8129         err->info.ts = local_clock();
8130
8131         list_add_tail(&err->list, &tr->err_log);
8132         mutex_unlock(&tracing_err_log_lock);
8133 }
8134
8135 static void clear_tracing_err_log(struct trace_array *tr)
8136 {
8137         struct tracing_log_err *err, *next;
8138
8139         mutex_lock(&tracing_err_log_lock);
8140         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8141                 list_del(&err->list);
8142                 free_tracing_log_err(err);
8143         }
8144
8145         tr->n_err_log_entries = 0;
8146         mutex_unlock(&tracing_err_log_lock);
8147 }
8148
8149 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8150 {
8151         struct trace_array *tr = m->private;
8152
8153         mutex_lock(&tracing_err_log_lock);
8154
8155         return seq_list_start(&tr->err_log, *pos);
8156 }
8157
8158 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8159 {
8160         struct trace_array *tr = m->private;
8161
8162         return seq_list_next(v, &tr->err_log, pos);
8163 }
8164
8165 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8166 {
8167         mutex_unlock(&tracing_err_log_lock);
8168 }
8169
8170 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8171 {
8172         u16 i;
8173
8174         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8175                 seq_putc(m, ' ');
8176         for (i = 0; i < pos; i++)
8177                 seq_putc(m, ' ');
8178         seq_puts(m, "^\n");
8179 }
8180
8181 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8182 {
8183         struct tracing_log_err *err = v;
8184
8185         if (err) {
8186                 const char *err_text = err->info.errs[err->info.type];
8187                 u64 sec = err->info.ts;
8188                 u32 nsec;
8189
8190                 nsec = do_div(sec, NSEC_PER_SEC);
8191                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8192                            err->loc, err_text);
8193                 seq_printf(m, "%s", err->cmd);
8194                 tracing_err_log_show_pos(m, err->info.pos);
8195         }
8196
8197         return 0;
8198 }
8199
8200 static const struct seq_operations tracing_err_log_seq_ops = {
8201         .start  = tracing_err_log_seq_start,
8202         .next   = tracing_err_log_seq_next,
8203         .stop   = tracing_err_log_seq_stop,
8204         .show   = tracing_err_log_seq_show
8205 };
8206
8207 static int tracing_err_log_open(struct inode *inode, struct file *file)
8208 {
8209         struct trace_array *tr = inode->i_private;
8210         int ret = 0;
8211
8212         ret = tracing_check_open_get_tr(tr);
8213         if (ret)
8214                 return ret;
8215
8216         /* If this file was opened for write, then erase contents */
8217         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8218                 clear_tracing_err_log(tr);
8219
8220         if (file->f_mode & FMODE_READ) {
8221                 ret = seq_open(file, &tracing_err_log_seq_ops);
8222                 if (!ret) {
8223                         struct seq_file *m = file->private_data;
8224                         m->private = tr;
8225                 } else {
8226                         trace_array_put(tr);
8227                 }
8228         }
8229         return ret;
8230 }
8231
8232 static ssize_t tracing_err_log_write(struct file *file,
8233                                      const char __user *buffer,
8234                                      size_t count, loff_t *ppos)
8235 {
8236         return count;
8237 }
8238
8239 static int tracing_err_log_release(struct inode *inode, struct file *file)
8240 {
8241         struct trace_array *tr = inode->i_private;
8242
8243         trace_array_put(tr);
8244
8245         if (file->f_mode & FMODE_READ)
8246                 seq_release(inode, file);
8247
8248         return 0;
8249 }
8250
8251 static const struct file_operations tracing_err_log_fops = {
8252         .open           = tracing_err_log_open,
8253         .write          = tracing_err_log_write,
8254         .read           = seq_read,
8255         .llseek         = tracing_lseek,
8256         .release        = tracing_err_log_release,
8257 };
8258
8259 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8260 {
8261         struct trace_array *tr = inode->i_private;
8262         struct ftrace_buffer_info *info;
8263         int ret;
8264
8265         ret = tracing_check_open_get_tr(tr);
8266         if (ret)
8267                 return ret;
8268
8269         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8270         if (!info) {
8271                 trace_array_put(tr);
8272                 return -ENOMEM;
8273         }
8274
8275         mutex_lock(&trace_types_lock);
8276
8277         info->iter.tr           = tr;
8278         info->iter.cpu_file     = tracing_get_cpu(inode);
8279         info->iter.trace        = tr->current_trace;
8280         info->iter.array_buffer = &tr->array_buffer;
8281         info->spare             = NULL;
8282         /* Force reading ring buffer for first read */
8283         info->read              = (unsigned int)-1;
8284
8285         filp->private_data = info;
8286
8287         tr->trace_ref++;
8288
8289         mutex_unlock(&trace_types_lock);
8290
8291         ret = nonseekable_open(inode, filp);
8292         if (ret < 0)
8293                 trace_array_put(tr);
8294
8295         return ret;
8296 }
8297
8298 static __poll_t
8299 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8300 {
8301         struct ftrace_buffer_info *info = filp->private_data;
8302         struct trace_iterator *iter = &info->iter;
8303
8304         return trace_poll(iter, filp, poll_table);
8305 }
8306
8307 static ssize_t
8308 tracing_buffers_read(struct file *filp, char __user *ubuf,
8309                      size_t count, loff_t *ppos)
8310 {
8311         struct ftrace_buffer_info *info = filp->private_data;
8312         struct trace_iterator *iter = &info->iter;
8313         void *trace_data;
8314         int page_size;
8315         ssize_t ret = 0;
8316         ssize_t size;
8317
8318         if (!count)
8319                 return 0;
8320
8321 #ifdef CONFIG_TRACER_MAX_TRACE
8322         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8323                 return -EBUSY;
8324 #endif
8325
8326         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8327
8328         /* Make sure the spare matches the current sub buffer size */
8329         if (info->spare) {
8330                 if (page_size != info->spare_size) {
8331                         ring_buffer_free_read_page(iter->array_buffer->buffer,
8332                                                    info->spare_cpu, info->spare);
8333                         info->spare = NULL;
8334                 }
8335         }
8336
8337         if (!info->spare) {
8338                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8339                                                           iter->cpu_file);
8340                 if (IS_ERR(info->spare)) {
8341                         ret = PTR_ERR(info->spare);
8342                         info->spare = NULL;
8343                 } else {
8344                         info->spare_cpu = iter->cpu_file;
8345                         info->spare_size = page_size;
8346                 }
8347         }
8348         if (!info->spare)
8349                 return ret;
8350
8351         /* Do we have previous read data to read? */
8352         if (info->read < page_size)
8353                 goto read;
8354
8355  again:
8356         trace_access_lock(iter->cpu_file);
8357         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8358                                     info->spare,
8359                                     count,
8360                                     iter->cpu_file, 0);
8361         trace_access_unlock(iter->cpu_file);
8362
8363         if (ret < 0) {
8364                 if (trace_empty(iter)) {
8365                         if ((filp->f_flags & O_NONBLOCK))
8366                                 return -EAGAIN;
8367
8368                         ret = wait_on_pipe(iter, 0);
8369                         if (ret)
8370                                 return ret;
8371
8372                         goto again;
8373                 }
8374                 return 0;
8375         }
8376
8377         info->read = 0;
8378  read:
8379         size = page_size - info->read;
8380         if (size > count)
8381                 size = count;
8382         trace_data = ring_buffer_read_page_data(info->spare);
8383         ret = copy_to_user(ubuf, trace_data + info->read, size);
8384         if (ret == size)
8385                 return -EFAULT;
8386
8387         size -= ret;
8388
8389         *ppos += size;
8390         info->read += size;
8391
8392         return size;
8393 }
8394
8395 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8396 {
8397         struct ftrace_buffer_info *info = file->private_data;
8398         struct trace_iterator *iter = &info->iter;
8399
8400         iter->wait_index++;
8401         /* Make sure the waiters see the new wait_index */
8402         smp_wmb();
8403
8404         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8405
8406         return 0;
8407 }
8408
8409 static int tracing_buffers_release(struct inode *inode, struct file *file)
8410 {
8411         struct ftrace_buffer_info *info = file->private_data;
8412         struct trace_iterator *iter = &info->iter;
8413
8414         mutex_lock(&trace_types_lock);
8415
8416         iter->tr->trace_ref--;
8417
8418         __trace_array_put(iter->tr);
8419
8420         if (info->spare)
8421                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8422                                            info->spare_cpu, info->spare);
8423         kvfree(info);
8424
8425         mutex_unlock(&trace_types_lock);
8426
8427         return 0;
8428 }
8429
8430 struct buffer_ref {
8431         struct trace_buffer     *buffer;
8432         void                    *page;
8433         int                     cpu;
8434         refcount_t              refcount;
8435 };
8436
8437 static void buffer_ref_release(struct buffer_ref *ref)
8438 {
8439         if (!refcount_dec_and_test(&ref->refcount))
8440                 return;
8441         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8442         kfree(ref);
8443 }
8444
8445 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8446                                     struct pipe_buffer *buf)
8447 {
8448         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8449
8450         buffer_ref_release(ref);
8451         buf->private = 0;
8452 }
8453
8454 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8455                                 struct pipe_buffer *buf)
8456 {
8457         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8458
8459         if (refcount_read(&ref->refcount) > INT_MAX/2)
8460                 return false;
8461
8462         refcount_inc(&ref->refcount);
8463         return true;
8464 }
8465
8466 /* Pipe buffer operations for a buffer. */
8467 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8468         .release                = buffer_pipe_buf_release,
8469         .get                    = buffer_pipe_buf_get,
8470 };
8471
8472 /*
8473  * Callback from splice_to_pipe(), if we need to release some pages
8474  * at the end of the spd in case we error'ed out in filling the pipe.
8475  */
8476 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8477 {
8478         struct buffer_ref *ref =
8479                 (struct buffer_ref *)spd->partial[i].private;
8480
8481         buffer_ref_release(ref);
8482         spd->partial[i].private = 0;
8483 }
8484
8485 static ssize_t
8486 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8487                             struct pipe_inode_info *pipe, size_t len,
8488                             unsigned int flags)
8489 {
8490         struct ftrace_buffer_info *info = file->private_data;
8491         struct trace_iterator *iter = &info->iter;
8492         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8493         struct page *pages_def[PIPE_DEF_BUFFERS];
8494         struct splice_pipe_desc spd = {
8495                 .pages          = pages_def,
8496                 .partial        = partial_def,
8497                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8498                 .ops            = &buffer_pipe_buf_ops,
8499                 .spd_release    = buffer_spd_release,
8500         };
8501         struct buffer_ref *ref;
8502         int page_size;
8503         int entries, i;
8504         ssize_t ret = 0;
8505
8506 #ifdef CONFIG_TRACER_MAX_TRACE
8507         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8508                 return -EBUSY;
8509 #endif
8510
8511         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8512         if (*ppos & (page_size - 1))
8513                 return -EINVAL;
8514
8515         if (len & (page_size - 1)) {
8516                 if (len < page_size)
8517                         return -EINVAL;
8518                 len &= (~(page_size - 1));
8519         }
8520
8521         if (splice_grow_spd(pipe, &spd))
8522                 return -ENOMEM;
8523
8524  again:
8525         trace_access_lock(iter->cpu_file);
8526         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8527
8528         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8529                 struct page *page;
8530                 int r;
8531
8532                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8533                 if (!ref) {
8534                         ret = -ENOMEM;
8535                         break;
8536                 }
8537
8538                 refcount_set(&ref->refcount, 1);
8539                 ref->buffer = iter->array_buffer->buffer;
8540                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8541                 if (IS_ERR(ref->page)) {
8542                         ret = PTR_ERR(ref->page);
8543                         ref->page = NULL;
8544                         kfree(ref);
8545                         break;
8546                 }
8547                 ref->cpu = iter->cpu_file;
8548
8549                 r = ring_buffer_read_page(ref->buffer, ref->page,
8550                                           len, iter->cpu_file, 1);
8551                 if (r < 0) {
8552                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8553                                                    ref->page);
8554                         kfree(ref);
8555                         break;
8556                 }
8557
8558                 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8559
8560                 spd.pages[i] = page;
8561                 spd.partial[i].len = page_size;
8562                 spd.partial[i].offset = 0;
8563                 spd.partial[i].private = (unsigned long)ref;
8564                 spd.nr_pages++;
8565                 *ppos += page_size;
8566
8567                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8568         }
8569
8570         trace_access_unlock(iter->cpu_file);
8571         spd.nr_pages = i;
8572
8573         /* did we read anything? */
8574         if (!spd.nr_pages) {
8575                 long wait_index;
8576
8577                 if (ret)
8578                         goto out;
8579
8580                 ret = -EAGAIN;
8581                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8582                         goto out;
8583
8584                 wait_index = READ_ONCE(iter->wait_index);
8585
8586                 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8587                 if (ret)
8588                         goto out;
8589
8590                 /* No need to wait after waking up when tracing is off */
8591                 if (!tracer_tracing_is_on(iter->tr))
8592                         goto out;
8593
8594                 /* Make sure we see the new wait_index */
8595                 smp_rmb();
8596                 if (wait_index != iter->wait_index)
8597                         goto out;
8598
8599                 goto again;
8600         }
8601
8602         ret = splice_to_pipe(pipe, &spd);
8603 out:
8604         splice_shrink_spd(&spd);
8605
8606         return ret;
8607 }
8608
8609 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8610 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8611 {
8612         struct ftrace_buffer_info *info = file->private_data;
8613         struct trace_iterator *iter = &info->iter;
8614
8615         if (cmd)
8616                 return -ENOIOCTLCMD;
8617
8618         mutex_lock(&trace_types_lock);
8619
8620         iter->wait_index++;
8621         /* Make sure the waiters see the new wait_index */
8622         smp_wmb();
8623
8624         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8625
8626         mutex_unlock(&trace_types_lock);
8627         return 0;
8628 }
8629
8630 static const struct file_operations tracing_buffers_fops = {
8631         .open           = tracing_buffers_open,
8632         .read           = tracing_buffers_read,
8633         .poll           = tracing_buffers_poll,
8634         .release        = tracing_buffers_release,
8635         .flush          = tracing_buffers_flush,
8636         .splice_read    = tracing_buffers_splice_read,
8637         .unlocked_ioctl = tracing_buffers_ioctl,
8638         .llseek         = no_llseek,
8639 };
8640
8641 static ssize_t
8642 tracing_stats_read(struct file *filp, char __user *ubuf,
8643                    size_t count, loff_t *ppos)
8644 {
8645         struct inode *inode = file_inode(filp);
8646         struct trace_array *tr = inode->i_private;
8647         struct array_buffer *trace_buf = &tr->array_buffer;
8648         int cpu = tracing_get_cpu(inode);
8649         struct trace_seq *s;
8650         unsigned long cnt;
8651         unsigned long long t;
8652         unsigned long usec_rem;
8653
8654         s = kmalloc(sizeof(*s), GFP_KERNEL);
8655         if (!s)
8656                 return -ENOMEM;
8657
8658         trace_seq_init(s);
8659
8660         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8661         trace_seq_printf(s, "entries: %ld\n", cnt);
8662
8663         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8664         trace_seq_printf(s, "overrun: %ld\n", cnt);
8665
8666         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8667         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8668
8669         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8670         trace_seq_printf(s, "bytes: %ld\n", cnt);
8671
8672         if (trace_clocks[tr->clock_id].in_ns) {
8673                 /* local or global for trace_clock */
8674                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8675                 usec_rem = do_div(t, USEC_PER_SEC);
8676                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8677                                                                 t, usec_rem);
8678
8679                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8680                 usec_rem = do_div(t, USEC_PER_SEC);
8681                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8682         } else {
8683                 /* counter or tsc mode for trace_clock */
8684                 trace_seq_printf(s, "oldest event ts: %llu\n",
8685                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8686
8687                 trace_seq_printf(s, "now ts: %llu\n",
8688                                 ring_buffer_time_stamp(trace_buf->buffer));
8689         }
8690
8691         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8692         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8693
8694         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8695         trace_seq_printf(s, "read events: %ld\n", cnt);
8696
8697         count = simple_read_from_buffer(ubuf, count, ppos,
8698                                         s->buffer, trace_seq_used(s));
8699
8700         kfree(s);
8701
8702         return count;
8703 }
8704
8705 static const struct file_operations tracing_stats_fops = {
8706         .open           = tracing_open_generic_tr,
8707         .read           = tracing_stats_read,
8708         .llseek         = generic_file_llseek,
8709         .release        = tracing_release_generic_tr,
8710 };
8711
8712 #ifdef CONFIG_DYNAMIC_FTRACE
8713
8714 static ssize_t
8715 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8716                   size_t cnt, loff_t *ppos)
8717 {
8718         ssize_t ret;
8719         char *buf;
8720         int r;
8721
8722         /* 256 should be plenty to hold the amount needed */
8723         buf = kmalloc(256, GFP_KERNEL);
8724         if (!buf)
8725                 return -ENOMEM;
8726
8727         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8728                       ftrace_update_tot_cnt,
8729                       ftrace_number_of_pages,
8730                       ftrace_number_of_groups);
8731
8732         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8733         kfree(buf);
8734         return ret;
8735 }
8736
8737 static const struct file_operations tracing_dyn_info_fops = {
8738         .open           = tracing_open_generic,
8739         .read           = tracing_read_dyn_info,
8740         .llseek         = generic_file_llseek,
8741 };
8742 #endif /* CONFIG_DYNAMIC_FTRACE */
8743
8744 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8745 static void
8746 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8747                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8748                 void *data)
8749 {
8750         tracing_snapshot_instance(tr);
8751 }
8752
8753 static void
8754 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8755                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8756                       void *data)
8757 {
8758         struct ftrace_func_mapper *mapper = data;
8759         long *count = NULL;
8760
8761         if (mapper)
8762                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8763
8764         if (count) {
8765
8766                 if (*count <= 0)
8767                         return;
8768
8769                 (*count)--;
8770         }
8771
8772         tracing_snapshot_instance(tr);
8773 }
8774
8775 static int
8776 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8777                       struct ftrace_probe_ops *ops, void *data)
8778 {
8779         struct ftrace_func_mapper *mapper = data;
8780         long *count = NULL;
8781
8782         seq_printf(m, "%ps:", (void *)ip);
8783
8784         seq_puts(m, "snapshot");
8785
8786         if (mapper)
8787                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8788
8789         if (count)
8790                 seq_printf(m, ":count=%ld\n", *count);
8791         else
8792                 seq_puts(m, ":unlimited\n");
8793
8794         return 0;
8795 }
8796
8797 static int
8798 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8799                      unsigned long ip, void *init_data, void **data)
8800 {
8801         struct ftrace_func_mapper *mapper = *data;
8802
8803         if (!mapper) {
8804                 mapper = allocate_ftrace_func_mapper();
8805                 if (!mapper)
8806                         return -ENOMEM;
8807                 *data = mapper;
8808         }
8809
8810         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8811 }
8812
8813 static void
8814 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8815                      unsigned long ip, void *data)
8816 {
8817         struct ftrace_func_mapper *mapper = data;
8818
8819         if (!ip) {
8820                 if (!mapper)
8821                         return;
8822                 free_ftrace_func_mapper(mapper, NULL);
8823                 return;
8824         }
8825
8826         ftrace_func_mapper_remove_ip(mapper, ip);
8827 }
8828
8829 static struct ftrace_probe_ops snapshot_probe_ops = {
8830         .func                   = ftrace_snapshot,
8831         .print                  = ftrace_snapshot_print,
8832 };
8833
8834 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8835         .func                   = ftrace_count_snapshot,
8836         .print                  = ftrace_snapshot_print,
8837         .init                   = ftrace_snapshot_init,
8838         .free                   = ftrace_snapshot_free,
8839 };
8840
8841 static int
8842 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8843                                char *glob, char *cmd, char *param, int enable)
8844 {
8845         struct ftrace_probe_ops *ops;
8846         void *count = (void *)-1;
8847         char *number;
8848         int ret;
8849
8850         if (!tr)
8851                 return -ENODEV;
8852
8853         /* hash funcs only work with set_ftrace_filter */
8854         if (!enable)
8855                 return -EINVAL;
8856
8857         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8858
8859         if (glob[0] == '!')
8860                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8861
8862         if (!param)
8863                 goto out_reg;
8864
8865         number = strsep(&param, ":");
8866
8867         if (!strlen(number))
8868                 goto out_reg;
8869
8870         /*
8871          * We use the callback data field (which is a pointer)
8872          * as our counter.
8873          */
8874         ret = kstrtoul(number, 0, (unsigned long *)&count);
8875         if (ret)
8876                 return ret;
8877
8878  out_reg:
8879         ret = tracing_alloc_snapshot_instance(tr);
8880         if (ret < 0)
8881                 goto out;
8882
8883         ret = register_ftrace_function_probe(glob, tr, ops, count);
8884
8885  out:
8886         return ret < 0 ? ret : 0;
8887 }
8888
8889 static struct ftrace_func_command ftrace_snapshot_cmd = {
8890         .name                   = "snapshot",
8891         .func                   = ftrace_trace_snapshot_callback,
8892 };
8893
8894 static __init int register_snapshot_cmd(void)
8895 {
8896         return register_ftrace_command(&ftrace_snapshot_cmd);
8897 }
8898 #else
8899 static inline __init int register_snapshot_cmd(void) { return 0; }
8900 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8901
8902 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8903 {
8904         if (WARN_ON(!tr->dir))
8905                 return ERR_PTR(-ENODEV);
8906
8907         /* Top directory uses NULL as the parent */
8908         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8909                 return NULL;
8910
8911         /* All sub buffers have a descriptor */
8912         return tr->dir;
8913 }
8914
8915 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8916 {
8917         struct dentry *d_tracer;
8918
8919         if (tr->percpu_dir)
8920                 return tr->percpu_dir;
8921
8922         d_tracer = tracing_get_dentry(tr);
8923         if (IS_ERR(d_tracer))
8924                 return NULL;
8925
8926         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8927
8928         MEM_FAIL(!tr->percpu_dir,
8929                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8930
8931         return tr->percpu_dir;
8932 }
8933
8934 static struct dentry *
8935 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8936                       void *data, long cpu, const struct file_operations *fops)
8937 {
8938         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8939
8940         if (ret) /* See tracing_get_cpu() */
8941                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8942         return ret;
8943 }
8944
8945 static void
8946 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8947 {
8948         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8949         struct dentry *d_cpu;
8950         char cpu_dir[30]; /* 30 characters should be more than enough */
8951
8952         if (!d_percpu)
8953                 return;
8954
8955         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8956         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8957         if (!d_cpu) {
8958                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8959                 return;
8960         }
8961
8962         /* per cpu trace_pipe */
8963         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8964                                 tr, cpu, &tracing_pipe_fops);
8965
8966         /* per cpu trace */
8967         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8968                                 tr, cpu, &tracing_fops);
8969
8970         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8971                                 tr, cpu, &tracing_buffers_fops);
8972
8973         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8974                                 tr, cpu, &tracing_stats_fops);
8975
8976         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8977                                 tr, cpu, &tracing_entries_fops);
8978
8979 #ifdef CONFIG_TRACER_SNAPSHOT
8980         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8981                                 tr, cpu, &snapshot_fops);
8982
8983         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8984                                 tr, cpu, &snapshot_raw_fops);
8985 #endif
8986 }
8987
8988 #ifdef CONFIG_FTRACE_SELFTEST
8989 /* Let selftest have access to static functions in this file */
8990 #include "trace_selftest.c"
8991 #endif
8992
8993 static ssize_t
8994 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8995                         loff_t *ppos)
8996 {
8997         struct trace_option_dentry *topt = filp->private_data;
8998         char *buf;
8999
9000         if (topt->flags->val & topt->opt->bit)
9001                 buf = "1\n";
9002         else
9003                 buf = "0\n";
9004
9005         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9006 }
9007
9008 static ssize_t
9009 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9010                          loff_t *ppos)
9011 {
9012         struct trace_option_dentry *topt = filp->private_data;
9013         unsigned long val;
9014         int ret;
9015
9016         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9017         if (ret)
9018                 return ret;
9019
9020         if (val != 0 && val != 1)
9021                 return -EINVAL;
9022
9023         if (!!(topt->flags->val & topt->opt->bit) != val) {
9024                 mutex_lock(&trace_types_lock);
9025                 ret = __set_tracer_option(topt->tr, topt->flags,
9026                                           topt->opt, !val);
9027                 mutex_unlock(&trace_types_lock);
9028                 if (ret)
9029                         return ret;
9030         }
9031
9032         *ppos += cnt;
9033
9034         return cnt;
9035 }
9036
9037 static int tracing_open_options(struct inode *inode, struct file *filp)
9038 {
9039         struct trace_option_dentry *topt = inode->i_private;
9040         int ret;
9041
9042         ret = tracing_check_open_get_tr(topt->tr);
9043         if (ret)
9044                 return ret;
9045
9046         filp->private_data = inode->i_private;
9047         return 0;
9048 }
9049
9050 static int tracing_release_options(struct inode *inode, struct file *file)
9051 {
9052         struct trace_option_dentry *topt = file->private_data;
9053
9054         trace_array_put(topt->tr);
9055         return 0;
9056 }
9057
9058 static const struct file_operations trace_options_fops = {
9059         .open = tracing_open_options,
9060         .read = trace_options_read,
9061         .write = trace_options_write,
9062         .llseek = generic_file_llseek,
9063         .release = tracing_release_options,
9064 };
9065
9066 /*
9067  * In order to pass in both the trace_array descriptor as well as the index
9068  * to the flag that the trace option file represents, the trace_array
9069  * has a character array of trace_flags_index[], which holds the index
9070  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9071  * The address of this character array is passed to the flag option file
9072  * read/write callbacks.
9073  *
9074  * In order to extract both the index and the trace_array descriptor,
9075  * get_tr_index() uses the following algorithm.
9076  *
9077  *   idx = *ptr;
9078  *
9079  * As the pointer itself contains the address of the index (remember
9080  * index[1] == 1).
9081  *
9082  * Then to get the trace_array descriptor, by subtracting that index
9083  * from the ptr, we get to the start of the index itself.
9084  *
9085  *   ptr - idx == &index[0]
9086  *
9087  * Then a simple container_of() from that pointer gets us to the
9088  * trace_array descriptor.
9089  */
9090 static void get_tr_index(void *data, struct trace_array **ptr,
9091                          unsigned int *pindex)
9092 {
9093         *pindex = *(unsigned char *)data;
9094
9095         *ptr = container_of(data - *pindex, struct trace_array,
9096                             trace_flags_index);
9097 }
9098
9099 static ssize_t
9100 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9101                         loff_t *ppos)
9102 {
9103         void *tr_index = filp->private_data;
9104         struct trace_array *tr;
9105         unsigned int index;
9106         char *buf;
9107
9108         get_tr_index(tr_index, &tr, &index);
9109
9110         if (tr->trace_flags & (1 << index))
9111                 buf = "1\n";
9112         else
9113                 buf = "0\n";
9114
9115         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9116 }
9117
9118 static ssize_t
9119 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9120                          loff_t *ppos)
9121 {
9122         void *tr_index = filp->private_data;
9123         struct trace_array *tr;
9124         unsigned int index;
9125         unsigned long val;
9126         int ret;
9127
9128         get_tr_index(tr_index, &tr, &index);
9129
9130         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9131         if (ret)
9132                 return ret;
9133
9134         if (val != 0 && val != 1)
9135                 return -EINVAL;
9136
9137         mutex_lock(&event_mutex);
9138         mutex_lock(&trace_types_lock);
9139         ret = set_tracer_flag(tr, 1 << index, val);
9140         mutex_unlock(&trace_types_lock);
9141         mutex_unlock(&event_mutex);
9142
9143         if (ret < 0)
9144                 return ret;
9145
9146         *ppos += cnt;
9147
9148         return cnt;
9149 }
9150
9151 static const struct file_operations trace_options_core_fops = {
9152         .open = tracing_open_generic,
9153         .read = trace_options_core_read,
9154         .write = trace_options_core_write,
9155         .llseek = generic_file_llseek,
9156 };
9157
9158 struct dentry *trace_create_file(const char *name,
9159                                  umode_t mode,
9160                                  struct dentry *parent,
9161                                  void *data,
9162                                  const struct file_operations *fops)
9163 {
9164         struct dentry *ret;
9165
9166         ret = tracefs_create_file(name, mode, parent, data, fops);
9167         if (!ret)
9168                 pr_warn("Could not create tracefs '%s' entry\n", name);
9169
9170         return ret;
9171 }
9172
9173
9174 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9175 {
9176         struct dentry *d_tracer;
9177
9178         if (tr->options)
9179                 return tr->options;
9180
9181         d_tracer = tracing_get_dentry(tr);
9182         if (IS_ERR(d_tracer))
9183                 return NULL;
9184
9185         tr->options = tracefs_create_dir("options", d_tracer);
9186         if (!tr->options) {
9187                 pr_warn("Could not create tracefs directory 'options'\n");
9188                 return NULL;
9189         }
9190
9191         return tr->options;
9192 }
9193
9194 static void
9195 create_trace_option_file(struct trace_array *tr,
9196                          struct trace_option_dentry *topt,
9197                          struct tracer_flags *flags,
9198                          struct tracer_opt *opt)
9199 {
9200         struct dentry *t_options;
9201
9202         t_options = trace_options_init_dentry(tr);
9203         if (!t_options)
9204                 return;
9205
9206         topt->flags = flags;
9207         topt->opt = opt;
9208         topt->tr = tr;
9209
9210         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9211                                         t_options, topt, &trace_options_fops);
9212
9213 }
9214
9215 static void
9216 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9217 {
9218         struct trace_option_dentry *topts;
9219         struct trace_options *tr_topts;
9220         struct tracer_flags *flags;
9221         struct tracer_opt *opts;
9222         int cnt;
9223         int i;
9224
9225         if (!tracer)
9226                 return;
9227
9228         flags = tracer->flags;
9229
9230         if (!flags || !flags->opts)
9231                 return;
9232
9233         /*
9234          * If this is an instance, only create flags for tracers
9235          * the instance may have.
9236          */
9237         if (!trace_ok_for_array(tracer, tr))
9238                 return;
9239
9240         for (i = 0; i < tr->nr_topts; i++) {
9241                 /* Make sure there's no duplicate flags. */
9242                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9243                         return;
9244         }
9245
9246         opts = flags->opts;
9247
9248         for (cnt = 0; opts[cnt].name; cnt++)
9249                 ;
9250
9251         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9252         if (!topts)
9253                 return;
9254
9255         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9256                             GFP_KERNEL);
9257         if (!tr_topts) {
9258                 kfree(topts);
9259                 return;
9260         }
9261
9262         tr->topts = tr_topts;
9263         tr->topts[tr->nr_topts].tracer = tracer;
9264         tr->topts[tr->nr_topts].topts = topts;
9265         tr->nr_topts++;
9266
9267         for (cnt = 0; opts[cnt].name; cnt++) {
9268                 create_trace_option_file(tr, &topts[cnt], flags,
9269                                          &opts[cnt]);
9270                 MEM_FAIL(topts[cnt].entry == NULL,
9271                           "Failed to create trace option: %s",
9272                           opts[cnt].name);
9273         }
9274 }
9275
9276 static struct dentry *
9277 create_trace_option_core_file(struct trace_array *tr,
9278                               const char *option, long index)
9279 {
9280         struct dentry *t_options;
9281
9282         t_options = trace_options_init_dentry(tr);
9283         if (!t_options)
9284                 return NULL;
9285
9286         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9287                                  (void *)&tr->trace_flags_index[index],
9288                                  &trace_options_core_fops);
9289 }
9290
9291 static void create_trace_options_dir(struct trace_array *tr)
9292 {
9293         struct dentry *t_options;
9294         bool top_level = tr == &global_trace;
9295         int i;
9296
9297         t_options = trace_options_init_dentry(tr);
9298         if (!t_options)
9299                 return;
9300
9301         for (i = 0; trace_options[i]; i++) {
9302                 if (top_level ||
9303                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9304                         create_trace_option_core_file(tr, trace_options[i], i);
9305         }
9306 }
9307
9308 static ssize_t
9309 rb_simple_read(struct file *filp, char __user *ubuf,
9310                size_t cnt, loff_t *ppos)
9311 {
9312         struct trace_array *tr = filp->private_data;
9313         char buf[64];
9314         int r;
9315
9316         r = tracer_tracing_is_on(tr);
9317         r = sprintf(buf, "%d\n", r);
9318
9319         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9320 }
9321
9322 static ssize_t
9323 rb_simple_write(struct file *filp, const char __user *ubuf,
9324                 size_t cnt, loff_t *ppos)
9325 {
9326         struct trace_array *tr = filp->private_data;
9327         struct trace_buffer *buffer = tr->array_buffer.buffer;
9328         unsigned long val;
9329         int ret;
9330
9331         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9332         if (ret)
9333                 return ret;
9334
9335         if (buffer) {
9336                 mutex_lock(&trace_types_lock);
9337                 if (!!val == tracer_tracing_is_on(tr)) {
9338                         val = 0; /* do nothing */
9339                 } else if (val) {
9340                         tracer_tracing_on(tr);
9341                         if (tr->current_trace->start)
9342                                 tr->current_trace->start(tr);
9343                 } else {
9344                         tracer_tracing_off(tr);
9345                         if (tr->current_trace->stop)
9346                                 tr->current_trace->stop(tr);
9347                         /* Wake up any waiters */
9348                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9349                 }
9350                 mutex_unlock(&trace_types_lock);
9351         }
9352
9353         (*ppos)++;
9354
9355         return cnt;
9356 }
9357
9358 static const struct file_operations rb_simple_fops = {
9359         .open           = tracing_open_generic_tr,
9360         .read           = rb_simple_read,
9361         .write          = rb_simple_write,
9362         .release        = tracing_release_generic_tr,
9363         .llseek         = default_llseek,
9364 };
9365
9366 static ssize_t
9367 buffer_percent_read(struct file *filp, char __user *ubuf,
9368                     size_t cnt, loff_t *ppos)
9369 {
9370         struct trace_array *tr = filp->private_data;
9371         char buf[64];
9372         int r;
9373
9374         r = tr->buffer_percent;
9375         r = sprintf(buf, "%d\n", r);
9376
9377         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9378 }
9379
9380 static ssize_t
9381 buffer_percent_write(struct file *filp, const char __user *ubuf,
9382                      size_t cnt, loff_t *ppos)
9383 {
9384         struct trace_array *tr = filp->private_data;
9385         unsigned long val;
9386         int ret;
9387
9388         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9389         if (ret)
9390                 return ret;
9391
9392         if (val > 100)
9393                 return -EINVAL;
9394
9395         tr->buffer_percent = val;
9396
9397         (*ppos)++;
9398
9399         return cnt;
9400 }
9401
9402 static const struct file_operations buffer_percent_fops = {
9403         .open           = tracing_open_generic_tr,
9404         .read           = buffer_percent_read,
9405         .write          = buffer_percent_write,
9406         .release        = tracing_release_generic_tr,
9407         .llseek         = default_llseek,
9408 };
9409
9410 static ssize_t
9411 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9412 {
9413         struct trace_array *tr = filp->private_data;
9414         size_t size;
9415         char buf[64];
9416         int order;
9417         int r;
9418
9419         order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9420         size = (PAGE_SIZE << order) / 1024;
9421
9422         r = sprintf(buf, "%zd\n", size);
9423
9424         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9425 }
9426
9427 static ssize_t
9428 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9429                          size_t cnt, loff_t *ppos)
9430 {
9431         struct trace_array *tr = filp->private_data;
9432         unsigned long val;
9433         int old_order;
9434         int order;
9435         int pages;
9436         int ret;
9437
9438         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9439         if (ret)
9440                 return ret;
9441
9442         val *= 1024; /* value passed in is in KB */
9443
9444         pages = DIV_ROUND_UP(val, PAGE_SIZE);
9445         order = fls(pages - 1);
9446
9447         /* limit between 1 and 128 system pages */
9448         if (order < 0 || order > 7)
9449                 return -EINVAL;
9450
9451         /* Do not allow tracing while changing the order of the ring buffer */
9452         tracing_stop_tr(tr);
9453
9454         old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9455         if (old_order == order)
9456                 goto out;
9457
9458         ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9459         if (ret)
9460                 goto out;
9461
9462 #ifdef CONFIG_TRACER_MAX_TRACE
9463
9464         if (!tr->allocated_snapshot)
9465                 goto out_max;
9466
9467         ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9468         if (ret) {
9469                 /* Put back the old order */
9470                 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9471                 if (WARN_ON_ONCE(cnt)) {
9472                         /*
9473                          * AARGH! We are left with different orders!
9474                          * The max buffer is our "snapshot" buffer.
9475                          * When a tracer needs a snapshot (one of the
9476                          * latency tracers), it swaps the max buffer
9477                          * with the saved snap shot. We succeeded to
9478                          * update the order of the main buffer, but failed to
9479                          * update the order of the max buffer. But when we tried
9480                          * to reset the main buffer to the original size, we
9481                          * failed there too. This is very unlikely to
9482                          * happen, but if it does, warn and kill all
9483                          * tracing.
9484                          */
9485                         tracing_disabled = 1;
9486                 }
9487                 goto out;
9488         }
9489  out_max:
9490 #endif
9491         (*ppos)++;
9492  out:
9493         if (ret)
9494                 cnt = ret;
9495         tracing_start_tr(tr);
9496         return cnt;
9497 }
9498
9499 static const struct file_operations buffer_subbuf_size_fops = {
9500         .open           = tracing_open_generic_tr,
9501         .read           = buffer_subbuf_size_read,
9502         .write          = buffer_subbuf_size_write,
9503         .release        = tracing_release_generic_tr,
9504         .llseek         = default_llseek,
9505 };
9506
9507 static struct dentry *trace_instance_dir;
9508
9509 static void
9510 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9511
9512 static int
9513 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9514 {
9515         enum ring_buffer_flags rb_flags;
9516
9517         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9518
9519         buf->tr = tr;
9520
9521         buf->buffer = ring_buffer_alloc(size, rb_flags);
9522         if (!buf->buffer)
9523                 return -ENOMEM;
9524
9525         buf->data = alloc_percpu(struct trace_array_cpu);
9526         if (!buf->data) {
9527                 ring_buffer_free(buf->buffer);
9528                 buf->buffer = NULL;
9529                 return -ENOMEM;
9530         }
9531
9532         /* Allocate the first page for all buffers */
9533         set_buffer_entries(&tr->array_buffer,
9534                            ring_buffer_size(tr->array_buffer.buffer, 0));
9535
9536         return 0;
9537 }
9538
9539 static void free_trace_buffer(struct array_buffer *buf)
9540 {
9541         if (buf->buffer) {
9542                 ring_buffer_free(buf->buffer);
9543                 buf->buffer = NULL;
9544                 free_percpu(buf->data);
9545                 buf->data = NULL;
9546         }
9547 }
9548
9549 static int allocate_trace_buffers(struct trace_array *tr, int size)
9550 {
9551         int ret;
9552
9553         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9554         if (ret)
9555                 return ret;
9556
9557 #ifdef CONFIG_TRACER_MAX_TRACE
9558         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9559                                     allocate_snapshot ? size : 1);
9560         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9561                 free_trace_buffer(&tr->array_buffer);
9562                 return -ENOMEM;
9563         }
9564         tr->allocated_snapshot = allocate_snapshot;
9565
9566         allocate_snapshot = false;
9567 #endif
9568
9569         return 0;
9570 }
9571
9572 static void free_trace_buffers(struct trace_array *tr)
9573 {
9574         if (!tr)
9575                 return;
9576
9577         free_trace_buffer(&tr->array_buffer);
9578
9579 #ifdef CONFIG_TRACER_MAX_TRACE
9580         free_trace_buffer(&tr->max_buffer);
9581 #endif
9582 }
9583
9584 static void init_trace_flags_index(struct trace_array *tr)
9585 {
9586         int i;
9587
9588         /* Used by the trace options files */
9589         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9590                 tr->trace_flags_index[i] = i;
9591 }
9592
9593 static void __update_tracer_options(struct trace_array *tr)
9594 {
9595         struct tracer *t;
9596
9597         for (t = trace_types; t; t = t->next)
9598                 add_tracer_options(tr, t);
9599 }
9600
9601 static void update_tracer_options(struct trace_array *tr)
9602 {
9603         mutex_lock(&trace_types_lock);
9604         tracer_options_updated = true;
9605         __update_tracer_options(tr);
9606         mutex_unlock(&trace_types_lock);
9607 }
9608
9609 /* Must have trace_types_lock held */
9610 struct trace_array *trace_array_find(const char *instance)
9611 {
9612         struct trace_array *tr, *found = NULL;
9613
9614         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9615                 if (tr->name && strcmp(tr->name, instance) == 0) {
9616                         found = tr;
9617                         break;
9618                 }
9619         }
9620
9621         return found;
9622 }
9623
9624 struct trace_array *trace_array_find_get(const char *instance)
9625 {
9626         struct trace_array *tr;
9627
9628         mutex_lock(&trace_types_lock);
9629         tr = trace_array_find(instance);
9630         if (tr)
9631                 tr->ref++;
9632         mutex_unlock(&trace_types_lock);
9633
9634         return tr;
9635 }
9636
9637 static int trace_array_create_dir(struct trace_array *tr)
9638 {
9639         int ret;
9640
9641         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9642         if (!tr->dir)
9643                 return -EINVAL;
9644
9645         ret = event_trace_add_tracer(tr->dir, tr);
9646         if (ret) {
9647                 tracefs_remove(tr->dir);
9648                 return ret;
9649         }
9650
9651         init_tracer_tracefs(tr, tr->dir);
9652         __update_tracer_options(tr);
9653
9654         return ret;
9655 }
9656
9657 static struct trace_array *
9658 trace_array_create_systems(const char *name, const char *systems)
9659 {
9660         struct trace_array *tr;
9661         int ret;
9662
9663         ret = -ENOMEM;
9664         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9665         if (!tr)
9666                 return ERR_PTR(ret);
9667
9668         tr->name = kstrdup(name, GFP_KERNEL);
9669         if (!tr->name)
9670                 goto out_free_tr;
9671
9672         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9673                 goto out_free_tr;
9674
9675         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9676                 goto out_free_tr;
9677
9678         if (systems) {
9679                 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9680                 if (!tr->system_names)
9681                         goto out_free_tr;
9682         }
9683
9684         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9685
9686         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9687
9688         raw_spin_lock_init(&tr->start_lock);
9689
9690         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9691
9692         tr->current_trace = &nop_trace;
9693
9694         INIT_LIST_HEAD(&tr->systems);
9695         INIT_LIST_HEAD(&tr->events);
9696         INIT_LIST_HEAD(&tr->hist_vars);
9697         INIT_LIST_HEAD(&tr->err_log);
9698
9699         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9700                 goto out_free_tr;
9701
9702         /* The ring buffer is defaultly expanded */
9703         trace_set_ring_buffer_expanded(tr);
9704
9705         if (ftrace_allocate_ftrace_ops(tr) < 0)
9706                 goto out_free_tr;
9707
9708         ftrace_init_trace_array(tr);
9709
9710         init_trace_flags_index(tr);
9711
9712         if (trace_instance_dir) {
9713                 ret = trace_array_create_dir(tr);
9714                 if (ret)
9715                         goto out_free_tr;
9716         } else
9717                 __trace_early_add_events(tr);
9718
9719         list_add(&tr->list, &ftrace_trace_arrays);
9720
9721         tr->ref++;
9722
9723         return tr;
9724
9725  out_free_tr:
9726         ftrace_free_ftrace_ops(tr);
9727         free_trace_buffers(tr);
9728         free_cpumask_var(tr->pipe_cpumask);
9729         free_cpumask_var(tr->tracing_cpumask);
9730         kfree_const(tr->system_names);
9731         kfree(tr->name);
9732         kfree(tr);
9733
9734         return ERR_PTR(ret);
9735 }
9736
9737 static struct trace_array *trace_array_create(const char *name)
9738 {
9739         return trace_array_create_systems(name, NULL);
9740 }
9741
9742 static int instance_mkdir(const char *name)
9743 {
9744         struct trace_array *tr;
9745         int ret;
9746
9747         mutex_lock(&event_mutex);
9748         mutex_lock(&trace_types_lock);
9749
9750         ret = -EEXIST;
9751         if (trace_array_find(name))
9752                 goto out_unlock;
9753
9754         tr = trace_array_create(name);
9755
9756         ret = PTR_ERR_OR_ZERO(tr);
9757
9758 out_unlock:
9759         mutex_unlock(&trace_types_lock);
9760         mutex_unlock(&event_mutex);
9761         return ret;
9762 }
9763
9764 /**
9765  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9766  * @name: The name of the trace array to be looked up/created.
9767  * @systems: A list of systems to create event directories for (NULL for all)
9768  *
9769  * Returns pointer to trace array with given name.
9770  * NULL, if it cannot be created.
9771  *
9772  * NOTE: This function increments the reference counter associated with the
9773  * trace array returned. This makes sure it cannot be freed while in use.
9774  * Use trace_array_put() once the trace array is no longer needed.
9775  * If the trace_array is to be freed, trace_array_destroy() needs to
9776  * be called after the trace_array_put(), or simply let user space delete
9777  * it from the tracefs instances directory. But until the
9778  * trace_array_put() is called, user space can not delete it.
9779  *
9780  */
9781 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9782 {
9783         struct trace_array *tr;
9784
9785         mutex_lock(&event_mutex);
9786         mutex_lock(&trace_types_lock);
9787
9788         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9789                 if (tr->name && strcmp(tr->name, name) == 0)
9790                         goto out_unlock;
9791         }
9792
9793         tr = trace_array_create_systems(name, systems);
9794
9795         if (IS_ERR(tr))
9796                 tr = NULL;
9797 out_unlock:
9798         if (tr)
9799                 tr->ref++;
9800
9801         mutex_unlock(&trace_types_lock);
9802         mutex_unlock(&event_mutex);
9803         return tr;
9804 }
9805 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9806
9807 static int __remove_instance(struct trace_array *tr)
9808 {
9809         int i;
9810
9811         /* Reference counter for a newly created trace array = 1. */
9812         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9813                 return -EBUSY;
9814
9815         list_del(&tr->list);
9816
9817         /* Disable all the flags that were enabled coming in */
9818         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9819                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9820                         set_tracer_flag(tr, 1 << i, 0);
9821         }
9822
9823         tracing_set_nop(tr);
9824         clear_ftrace_function_probes(tr);
9825         event_trace_del_tracer(tr);
9826         ftrace_clear_pids(tr);
9827         ftrace_destroy_function_files(tr);
9828         tracefs_remove(tr->dir);
9829         free_percpu(tr->last_func_repeats);
9830         free_trace_buffers(tr);
9831         clear_tracing_err_log(tr);
9832
9833         for (i = 0; i < tr->nr_topts; i++) {
9834                 kfree(tr->topts[i].topts);
9835         }
9836         kfree(tr->topts);
9837
9838         free_cpumask_var(tr->pipe_cpumask);
9839         free_cpumask_var(tr->tracing_cpumask);
9840         kfree_const(tr->system_names);
9841         kfree(tr->name);
9842         kfree(tr);
9843
9844         return 0;
9845 }
9846
9847 int trace_array_destroy(struct trace_array *this_tr)
9848 {
9849         struct trace_array *tr;
9850         int ret;
9851
9852         if (!this_tr)
9853                 return -EINVAL;
9854
9855         mutex_lock(&event_mutex);
9856         mutex_lock(&trace_types_lock);
9857
9858         ret = -ENODEV;
9859
9860         /* Making sure trace array exists before destroying it. */
9861         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9862                 if (tr == this_tr) {
9863                         ret = __remove_instance(tr);
9864                         break;
9865                 }
9866         }
9867
9868         mutex_unlock(&trace_types_lock);
9869         mutex_unlock(&event_mutex);
9870
9871         return ret;
9872 }
9873 EXPORT_SYMBOL_GPL(trace_array_destroy);
9874
9875 static int instance_rmdir(const char *name)
9876 {
9877         struct trace_array *tr;
9878         int ret;
9879
9880         mutex_lock(&event_mutex);
9881         mutex_lock(&trace_types_lock);
9882
9883         ret = -ENODEV;
9884         tr = trace_array_find(name);
9885         if (tr)
9886                 ret = __remove_instance(tr);
9887
9888         mutex_unlock(&trace_types_lock);
9889         mutex_unlock(&event_mutex);
9890
9891         return ret;
9892 }
9893
9894 static __init void create_trace_instances(struct dentry *d_tracer)
9895 {
9896         struct trace_array *tr;
9897
9898         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9899                                                          instance_mkdir,
9900                                                          instance_rmdir);
9901         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9902                 return;
9903
9904         mutex_lock(&event_mutex);
9905         mutex_lock(&trace_types_lock);
9906
9907         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9908                 if (!tr->name)
9909                         continue;
9910                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9911                              "Failed to create instance directory\n"))
9912                         break;
9913         }
9914
9915         mutex_unlock(&trace_types_lock);
9916         mutex_unlock(&event_mutex);
9917 }
9918
9919 static void
9920 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9921 {
9922         int cpu;
9923
9924         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9925                         tr, &show_traces_fops);
9926
9927         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9928                         tr, &set_tracer_fops);
9929
9930         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9931                           tr, &tracing_cpumask_fops);
9932
9933         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9934                           tr, &tracing_iter_fops);
9935
9936         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9937                           tr, &tracing_fops);
9938
9939         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9940                           tr, &tracing_pipe_fops);
9941
9942         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9943                           tr, &tracing_entries_fops);
9944
9945         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9946                           tr, &tracing_total_entries_fops);
9947
9948         trace_create_file("free_buffer", 0200, d_tracer,
9949                           tr, &tracing_free_buffer_fops);
9950
9951         trace_create_file("trace_marker", 0220, d_tracer,
9952                           tr, &tracing_mark_fops);
9953
9954         tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9955
9956         trace_create_file("trace_marker_raw", 0220, d_tracer,
9957                           tr, &tracing_mark_raw_fops);
9958
9959         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9960                           &trace_clock_fops);
9961
9962         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9963                           tr, &rb_simple_fops);
9964
9965         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9966                           &trace_time_stamp_mode_fops);
9967
9968         tr->buffer_percent = 50;
9969
9970         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9971                         tr, &buffer_percent_fops);
9972
9973         trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9974                           tr, &buffer_subbuf_size_fops);
9975
9976         create_trace_options_dir(tr);
9977
9978 #ifdef CONFIG_TRACER_MAX_TRACE
9979         trace_create_maxlat_file(tr, d_tracer);
9980 #endif
9981
9982         if (ftrace_create_function_files(tr, d_tracer))
9983                 MEM_FAIL(1, "Could not allocate function filter files");
9984
9985 #ifdef CONFIG_TRACER_SNAPSHOT
9986         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9987                           tr, &snapshot_fops);
9988 #endif
9989
9990         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9991                           tr, &tracing_err_log_fops);
9992
9993         for_each_tracing_cpu(cpu)
9994                 tracing_init_tracefs_percpu(tr, cpu);
9995
9996         ftrace_init_tracefs(tr, d_tracer);
9997 }
9998
9999 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10000 {
10001         struct vfsmount *mnt;
10002         struct file_system_type *type;
10003
10004         /*
10005          * To maintain backward compatibility for tools that mount
10006          * debugfs to get to the tracing facility, tracefs is automatically
10007          * mounted to the debugfs/tracing directory.
10008          */
10009         type = get_fs_type("tracefs");
10010         if (!type)
10011                 return NULL;
10012         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
10013         put_filesystem(type);
10014         if (IS_ERR(mnt))
10015                 return NULL;
10016         mntget(mnt);
10017
10018         return mnt;
10019 }
10020
10021 /**
10022  * tracing_init_dentry - initialize top level trace array
10023  *
10024  * This is called when creating files or directories in the tracing
10025  * directory. It is called via fs_initcall() by any of the boot up code
10026  * and expects to return the dentry of the top level tracing directory.
10027  */
10028 int tracing_init_dentry(void)
10029 {
10030         struct trace_array *tr = &global_trace;
10031
10032         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10033                 pr_warn("Tracing disabled due to lockdown\n");
10034                 return -EPERM;
10035         }
10036
10037         /* The top level trace array uses  NULL as parent */
10038         if (tr->dir)
10039                 return 0;
10040
10041         if (WARN_ON(!tracefs_initialized()))
10042                 return -ENODEV;
10043
10044         /*
10045          * As there may still be users that expect the tracing
10046          * files to exist in debugfs/tracing, we must automount
10047          * the tracefs file system there, so older tools still
10048          * work with the newer kernel.
10049          */
10050         tr->dir = debugfs_create_automount("tracing", NULL,
10051                                            trace_automount, NULL);
10052
10053         return 0;
10054 }
10055
10056 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10057 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10058
10059 static struct workqueue_struct *eval_map_wq __initdata;
10060 static struct work_struct eval_map_work __initdata;
10061 static struct work_struct tracerfs_init_work __initdata;
10062
10063 static void __init eval_map_work_func(struct work_struct *work)
10064 {
10065         int len;
10066
10067         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10068         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10069 }
10070
10071 static int __init trace_eval_init(void)
10072 {
10073         INIT_WORK(&eval_map_work, eval_map_work_func);
10074
10075         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10076         if (!eval_map_wq) {
10077                 pr_err("Unable to allocate eval_map_wq\n");
10078                 /* Do work here */
10079                 eval_map_work_func(&eval_map_work);
10080                 return -ENOMEM;
10081         }
10082
10083         queue_work(eval_map_wq, &eval_map_work);
10084         return 0;
10085 }
10086
10087 subsys_initcall(trace_eval_init);
10088
10089 static int __init trace_eval_sync(void)
10090 {
10091         /* Make sure the eval map updates are finished */
10092         if (eval_map_wq)
10093                 destroy_workqueue(eval_map_wq);
10094         return 0;
10095 }
10096
10097 late_initcall_sync(trace_eval_sync);
10098
10099
10100 #ifdef CONFIG_MODULES
10101 static void trace_module_add_evals(struct module *mod)
10102 {
10103         if (!mod->num_trace_evals)
10104                 return;
10105
10106         /*
10107          * Modules with bad taint do not have events created, do
10108          * not bother with enums either.
10109          */
10110         if (trace_module_has_bad_taint(mod))
10111                 return;
10112
10113         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10114 }
10115
10116 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10117 static void trace_module_remove_evals(struct module *mod)
10118 {
10119         union trace_eval_map_item *map;
10120         union trace_eval_map_item **last = &trace_eval_maps;
10121
10122         if (!mod->num_trace_evals)
10123                 return;
10124
10125         mutex_lock(&trace_eval_mutex);
10126
10127         map = trace_eval_maps;
10128
10129         while (map) {
10130                 if (map->head.mod == mod)
10131                         break;
10132                 map = trace_eval_jmp_to_tail(map);
10133                 last = &map->tail.next;
10134                 map = map->tail.next;
10135         }
10136         if (!map)
10137                 goto out;
10138
10139         *last = trace_eval_jmp_to_tail(map)->tail.next;
10140         kfree(map);
10141  out:
10142         mutex_unlock(&trace_eval_mutex);
10143 }
10144 #else
10145 static inline void trace_module_remove_evals(struct module *mod) { }
10146 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10147
10148 static int trace_module_notify(struct notifier_block *self,
10149                                unsigned long val, void *data)
10150 {
10151         struct module *mod = data;
10152
10153         switch (val) {
10154         case MODULE_STATE_COMING:
10155                 trace_module_add_evals(mod);
10156                 break;
10157         case MODULE_STATE_GOING:
10158                 trace_module_remove_evals(mod);
10159                 break;
10160         }
10161
10162         return NOTIFY_OK;
10163 }
10164
10165 static struct notifier_block trace_module_nb = {
10166         .notifier_call = trace_module_notify,
10167         .priority = 0,
10168 };
10169 #endif /* CONFIG_MODULES */
10170
10171 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10172 {
10173
10174         event_trace_init();
10175
10176         init_tracer_tracefs(&global_trace, NULL);
10177         ftrace_init_tracefs_toplevel(&global_trace, NULL);
10178
10179         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10180                         &global_trace, &tracing_thresh_fops);
10181
10182         trace_create_file("README", TRACE_MODE_READ, NULL,
10183                         NULL, &tracing_readme_fops);
10184
10185         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10186                         NULL, &tracing_saved_cmdlines_fops);
10187
10188         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10189                           NULL, &tracing_saved_cmdlines_size_fops);
10190
10191         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10192                         NULL, &tracing_saved_tgids_fops);
10193
10194         trace_create_eval_file(NULL);
10195
10196 #ifdef CONFIG_MODULES
10197         register_module_notifier(&trace_module_nb);
10198 #endif
10199
10200 #ifdef CONFIG_DYNAMIC_FTRACE
10201         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10202                         NULL, &tracing_dyn_info_fops);
10203 #endif
10204
10205         create_trace_instances(NULL);
10206
10207         update_tracer_options(&global_trace);
10208 }
10209
10210 static __init int tracer_init_tracefs(void)
10211 {
10212         int ret;
10213
10214         trace_access_lock_init();
10215
10216         ret = tracing_init_dentry();
10217         if (ret)
10218                 return 0;
10219
10220         if (eval_map_wq) {
10221                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10222                 queue_work(eval_map_wq, &tracerfs_init_work);
10223         } else {
10224                 tracer_init_tracefs_work_func(NULL);
10225         }
10226
10227         rv_init_interface();
10228
10229         return 0;
10230 }
10231
10232 fs_initcall(tracer_init_tracefs);
10233
10234 static int trace_die_panic_handler(struct notifier_block *self,
10235                                 unsigned long ev, void *unused);
10236
10237 static struct notifier_block trace_panic_notifier = {
10238         .notifier_call = trace_die_panic_handler,
10239         .priority = INT_MAX - 1,
10240 };
10241
10242 static struct notifier_block trace_die_notifier = {
10243         .notifier_call = trace_die_panic_handler,
10244         .priority = INT_MAX - 1,
10245 };
10246
10247 /*
10248  * The idea is to execute the following die/panic callback early, in order
10249  * to avoid showing irrelevant information in the trace (like other panic
10250  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10251  * warnings get disabled (to prevent potential log flooding).
10252  */
10253 static int trace_die_panic_handler(struct notifier_block *self,
10254                                 unsigned long ev, void *unused)
10255 {
10256         if (!ftrace_dump_on_oops)
10257                 return NOTIFY_DONE;
10258
10259         /* The die notifier requires DIE_OOPS to trigger */
10260         if (self == &trace_die_notifier && ev != DIE_OOPS)
10261                 return NOTIFY_DONE;
10262
10263         ftrace_dump(ftrace_dump_on_oops);
10264
10265         return NOTIFY_DONE;
10266 }
10267
10268 /*
10269  * printk is set to max of 1024, we really don't need it that big.
10270  * Nothing should be printing 1000 characters anyway.
10271  */
10272 #define TRACE_MAX_PRINT         1000
10273
10274 /*
10275  * Define here KERN_TRACE so that we have one place to modify
10276  * it if we decide to change what log level the ftrace dump
10277  * should be at.
10278  */
10279 #define KERN_TRACE              KERN_EMERG
10280
10281 void
10282 trace_printk_seq(struct trace_seq *s)
10283 {
10284         /* Probably should print a warning here. */
10285         if (s->seq.len >= TRACE_MAX_PRINT)
10286                 s->seq.len = TRACE_MAX_PRINT;
10287
10288         /*
10289          * More paranoid code. Although the buffer size is set to
10290          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10291          * an extra layer of protection.
10292          */
10293         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10294                 s->seq.len = s->seq.size - 1;
10295
10296         /* should be zero ended, but we are paranoid. */
10297         s->buffer[s->seq.len] = 0;
10298
10299         printk(KERN_TRACE "%s", s->buffer);
10300
10301         trace_seq_init(s);
10302 }
10303
10304 void trace_init_global_iter(struct trace_iterator *iter)
10305 {
10306         iter->tr = &global_trace;
10307         iter->trace = iter->tr->current_trace;
10308         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10309         iter->array_buffer = &global_trace.array_buffer;
10310
10311         if (iter->trace && iter->trace->open)
10312                 iter->trace->open(iter);
10313
10314         /* Annotate start of buffers if we had overruns */
10315         if (ring_buffer_overruns(iter->array_buffer->buffer))
10316                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10317
10318         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10319         if (trace_clocks[iter->tr->clock_id].in_ns)
10320                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10321
10322         /* Can not use kmalloc for iter.temp and iter.fmt */
10323         iter->temp = static_temp_buf;
10324         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10325         iter->fmt = static_fmt_buf;
10326         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10327 }
10328
10329 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10330 {
10331         /* use static because iter can be a bit big for the stack */
10332         static struct trace_iterator iter;
10333         static atomic_t dump_running;
10334         struct trace_array *tr = &global_trace;
10335         unsigned int old_userobj;
10336         unsigned long flags;
10337         int cnt = 0, cpu;
10338
10339         /* Only allow one dump user at a time. */
10340         if (atomic_inc_return(&dump_running) != 1) {
10341                 atomic_dec(&dump_running);
10342                 return;
10343         }
10344
10345         /*
10346          * Always turn off tracing when we dump.
10347          * We don't need to show trace output of what happens
10348          * between multiple crashes.
10349          *
10350          * If the user does a sysrq-z, then they can re-enable
10351          * tracing with echo 1 > tracing_on.
10352          */
10353         tracing_off();
10354
10355         local_irq_save(flags);
10356
10357         /* Simulate the iterator */
10358         trace_init_global_iter(&iter);
10359
10360         for_each_tracing_cpu(cpu) {
10361                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10362         }
10363
10364         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10365
10366         /* don't look at user memory in panic mode */
10367         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10368
10369         switch (oops_dump_mode) {
10370         case DUMP_ALL:
10371                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10372                 break;
10373         case DUMP_ORIG:
10374                 iter.cpu_file = raw_smp_processor_id();
10375                 break;
10376         case DUMP_NONE:
10377                 goto out_enable;
10378         default:
10379                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10380                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10381         }
10382
10383         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10384
10385         /* Did function tracer already get disabled? */
10386         if (ftrace_is_dead()) {
10387                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10388                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10389         }
10390
10391         /*
10392          * We need to stop all tracing on all CPUS to read
10393          * the next buffer. This is a bit expensive, but is
10394          * not done often. We fill all what we can read,
10395          * and then release the locks again.
10396          */
10397
10398         while (!trace_empty(&iter)) {
10399
10400                 if (!cnt)
10401                         printk(KERN_TRACE "---------------------------------\n");
10402
10403                 cnt++;
10404
10405                 trace_iterator_reset(&iter);
10406                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10407
10408                 if (trace_find_next_entry_inc(&iter) != NULL) {
10409                         int ret;
10410
10411                         ret = print_trace_line(&iter);
10412                         if (ret != TRACE_TYPE_NO_CONSUME)
10413                                 trace_consume(&iter);
10414                 }
10415                 touch_nmi_watchdog();
10416
10417                 trace_printk_seq(&iter.seq);
10418         }
10419
10420         if (!cnt)
10421                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10422         else
10423                 printk(KERN_TRACE "---------------------------------\n");
10424
10425  out_enable:
10426         tr->trace_flags |= old_userobj;
10427
10428         for_each_tracing_cpu(cpu) {
10429                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10430         }
10431         atomic_dec(&dump_running);
10432         local_irq_restore(flags);
10433 }
10434 EXPORT_SYMBOL_GPL(ftrace_dump);
10435
10436 #define WRITE_BUFSIZE  4096
10437
10438 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10439                                 size_t count, loff_t *ppos,
10440                                 int (*createfn)(const char *))
10441 {
10442         char *kbuf, *buf, *tmp;
10443         int ret = 0;
10444         size_t done = 0;
10445         size_t size;
10446
10447         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10448         if (!kbuf)
10449                 return -ENOMEM;
10450
10451         while (done < count) {
10452                 size = count - done;
10453
10454                 if (size >= WRITE_BUFSIZE)
10455                         size = WRITE_BUFSIZE - 1;
10456
10457                 if (copy_from_user(kbuf, buffer + done, size)) {
10458                         ret = -EFAULT;
10459                         goto out;
10460                 }
10461                 kbuf[size] = '\0';
10462                 buf = kbuf;
10463                 do {
10464                         tmp = strchr(buf, '\n');
10465                         if (tmp) {
10466                                 *tmp = '\0';
10467                                 size = tmp - buf + 1;
10468                         } else {
10469                                 size = strlen(buf);
10470                                 if (done + size < count) {
10471                                         if (buf != kbuf)
10472                                                 break;
10473                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10474                                         pr_warn("Line length is too long: Should be less than %d\n",
10475                                                 WRITE_BUFSIZE - 2);
10476                                         ret = -EINVAL;
10477                                         goto out;
10478                                 }
10479                         }
10480                         done += size;
10481
10482                         /* Remove comments */
10483                         tmp = strchr(buf, '#');
10484
10485                         if (tmp)
10486                                 *tmp = '\0';
10487
10488                         ret = createfn(buf);
10489                         if (ret)
10490                                 goto out;
10491                         buf += size;
10492
10493                 } while (done < count);
10494         }
10495         ret = done;
10496
10497 out:
10498         kfree(kbuf);
10499
10500         return ret;
10501 }
10502
10503 #ifdef CONFIG_TRACER_MAX_TRACE
10504 __init static bool tr_needs_alloc_snapshot(const char *name)
10505 {
10506         char *test;
10507         int len = strlen(name);
10508         bool ret;
10509
10510         if (!boot_snapshot_index)
10511                 return false;
10512
10513         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10514             boot_snapshot_info[len] == '\t')
10515                 return true;
10516
10517         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10518         if (!test)
10519                 return false;
10520
10521         sprintf(test, "\t%s\t", name);
10522         ret = strstr(boot_snapshot_info, test) == NULL;
10523         kfree(test);
10524         return ret;
10525 }
10526
10527 __init static void do_allocate_snapshot(const char *name)
10528 {
10529         if (!tr_needs_alloc_snapshot(name))
10530                 return;
10531
10532         /*
10533          * When allocate_snapshot is set, the next call to
10534          * allocate_trace_buffers() (called by trace_array_get_by_name())
10535          * will allocate the snapshot buffer. That will alse clear
10536          * this flag.
10537          */
10538         allocate_snapshot = true;
10539 }
10540 #else
10541 static inline void do_allocate_snapshot(const char *name) { }
10542 #endif
10543
10544 __init static void enable_instances(void)
10545 {
10546         struct trace_array *tr;
10547         char *curr_str;
10548         char *str;
10549         char *tok;
10550
10551         /* A tab is always appended */
10552         boot_instance_info[boot_instance_index - 1] = '\0';
10553         str = boot_instance_info;
10554
10555         while ((curr_str = strsep(&str, "\t"))) {
10556
10557                 tok = strsep(&curr_str, ",");
10558
10559                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10560                         do_allocate_snapshot(tok);
10561
10562                 tr = trace_array_get_by_name(tok, NULL);
10563                 if (!tr) {
10564                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10565                         continue;
10566                 }
10567                 /* Allow user space to delete it */
10568                 trace_array_put(tr);
10569
10570                 while ((tok = strsep(&curr_str, ","))) {
10571                         early_enable_events(tr, tok, true);
10572                 }
10573         }
10574 }
10575
10576 __init static int tracer_alloc_buffers(void)
10577 {
10578         int ring_buf_size;
10579         int ret = -ENOMEM;
10580
10581
10582         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10583                 pr_warn("Tracing disabled due to lockdown\n");
10584                 return -EPERM;
10585         }
10586
10587         /*
10588          * Make sure we don't accidentally add more trace options
10589          * than we have bits for.
10590          */
10591         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10592
10593         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10594                 goto out;
10595
10596         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10597                 goto out_free_buffer_mask;
10598
10599         /* Only allocate trace_printk buffers if a trace_printk exists */
10600         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10601                 /* Must be called before global_trace.buffer is allocated */
10602                 trace_printk_init_buffers();
10603
10604         /* To save memory, keep the ring buffer size to its minimum */
10605         if (global_trace.ring_buffer_expanded)
10606                 ring_buf_size = trace_buf_size;
10607         else
10608                 ring_buf_size = 1;
10609
10610         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10611         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10612
10613         raw_spin_lock_init(&global_trace.start_lock);
10614
10615         /*
10616          * The prepare callbacks allocates some memory for the ring buffer. We
10617          * don't free the buffer if the CPU goes down. If we were to free
10618          * the buffer, then the user would lose any trace that was in the
10619          * buffer. The memory will be removed once the "instance" is removed.
10620          */
10621         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10622                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10623                                       NULL);
10624         if (ret < 0)
10625                 goto out_free_cpumask;
10626         /* Used for event triggers */
10627         ret = -ENOMEM;
10628         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10629         if (!temp_buffer)
10630                 goto out_rm_hp_state;
10631
10632         if (trace_create_savedcmd() < 0)
10633                 goto out_free_temp_buffer;
10634
10635         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10636                 goto out_free_savedcmd;
10637
10638         /* TODO: make the number of buffers hot pluggable with CPUS */
10639         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10640                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10641                 goto out_free_pipe_cpumask;
10642         }
10643         if (global_trace.buffer_disabled)
10644                 tracing_off();
10645
10646         if (trace_boot_clock) {
10647                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10648                 if (ret < 0)
10649                         pr_warn("Trace clock %s not defined, going back to default\n",
10650                                 trace_boot_clock);
10651         }
10652
10653         /*
10654          * register_tracer() might reference current_trace, so it
10655          * needs to be set before we register anything. This is
10656          * just a bootstrap of current_trace anyway.
10657          */
10658         global_trace.current_trace = &nop_trace;
10659
10660         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10661
10662         ftrace_init_global_array_ops(&global_trace);
10663
10664         init_trace_flags_index(&global_trace);
10665
10666         register_tracer(&nop_trace);
10667
10668         /* Function tracing may start here (via kernel command line) */
10669         init_function_trace();
10670
10671         /* All seems OK, enable tracing */
10672         tracing_disabled = 0;
10673
10674         atomic_notifier_chain_register(&panic_notifier_list,
10675                                        &trace_panic_notifier);
10676
10677         register_die_notifier(&trace_die_notifier);
10678
10679         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10680
10681         INIT_LIST_HEAD(&global_trace.systems);
10682         INIT_LIST_HEAD(&global_trace.events);
10683         INIT_LIST_HEAD(&global_trace.hist_vars);
10684         INIT_LIST_HEAD(&global_trace.err_log);
10685         list_add(&global_trace.list, &ftrace_trace_arrays);
10686
10687         apply_trace_boot_options();
10688
10689         register_snapshot_cmd();
10690
10691         test_can_verify();
10692
10693         return 0;
10694
10695 out_free_pipe_cpumask:
10696         free_cpumask_var(global_trace.pipe_cpumask);
10697 out_free_savedcmd:
10698         free_saved_cmdlines_buffer(savedcmd);
10699 out_free_temp_buffer:
10700         ring_buffer_free(temp_buffer);
10701 out_rm_hp_state:
10702         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10703 out_free_cpumask:
10704         free_cpumask_var(global_trace.tracing_cpumask);
10705 out_free_buffer_mask:
10706         free_cpumask_var(tracing_buffer_mask);
10707 out:
10708         return ret;
10709 }
10710
10711 void __init ftrace_boot_snapshot(void)
10712 {
10713 #ifdef CONFIG_TRACER_MAX_TRACE
10714         struct trace_array *tr;
10715
10716         if (!snapshot_at_boot)
10717                 return;
10718
10719         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10720                 if (!tr->allocated_snapshot)
10721                         continue;
10722
10723                 tracing_snapshot_instance(tr);
10724                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10725         }
10726 #endif
10727 }
10728
10729 void __init early_trace_init(void)
10730 {
10731         if (tracepoint_printk) {
10732                 tracepoint_print_iter =
10733                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10734                 if (MEM_FAIL(!tracepoint_print_iter,
10735                              "Failed to allocate trace iterator\n"))
10736                         tracepoint_printk = 0;
10737                 else
10738                         static_key_enable(&tracepoint_printk_key.key);
10739         }
10740         tracer_alloc_buffers();
10741
10742         init_events();
10743 }
10744
10745 void __init trace_init(void)
10746 {
10747         trace_event_init();
10748
10749         if (boot_instance_index)
10750                 enable_instances();
10751 }
10752
10753 __init static void clear_boot_tracer(void)
10754 {
10755         /*
10756          * The default tracer at boot buffer is an init section.
10757          * This function is called in lateinit. If we did not
10758          * find the boot tracer, then clear it out, to prevent
10759          * later registration from accessing the buffer that is
10760          * about to be freed.
10761          */
10762         if (!default_bootup_tracer)
10763                 return;
10764
10765         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10766                default_bootup_tracer);
10767         default_bootup_tracer = NULL;
10768 }
10769
10770 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10771 __init static void tracing_set_default_clock(void)
10772 {
10773         /* sched_clock_stable() is determined in late_initcall */
10774         if (!trace_boot_clock && !sched_clock_stable()) {
10775                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10776                         pr_warn("Can not set tracing clock due to lockdown\n");
10777                         return;
10778                 }
10779
10780                 printk(KERN_WARNING
10781                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10782                        "If you want to keep using the local clock, then add:\n"
10783                        "  \"trace_clock=local\"\n"
10784                        "on the kernel command line\n");
10785                 tracing_set_clock(&global_trace, "global");
10786         }
10787 }
10788 #else
10789 static inline void tracing_set_default_clock(void) { }
10790 #endif
10791
10792 __init static int late_trace_init(void)
10793 {
10794         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10795                 static_key_disable(&tracepoint_printk_key.key);
10796                 tracepoint_printk = 0;
10797         }
10798
10799         tracing_set_default_clock();
10800         clear_boot_tracer();
10801         return 0;
10802 }
10803
10804 late_initcall_sync(late_trace_init);