1 // SPDX-License-Identifier: GPL-2.0
3 * ring buffer based function tracer
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
53 #include "trace_output.h"
56 * On boot up, the ring buffer is set to the minimum size, so that
57 * we do not waste memory on systems that are not using tracing.
59 bool ring_buffer_expanded;
62 * We need to change this state when a selftest is running.
63 * A selftest will lurk into the ring-buffer to count the
64 * entries inserted during the selftest although some concurrent
65 * insertions into the ring-buffer such as trace_printk could occurred
66 * at the same time, giving false positive or negative results.
68 static bool __read_mostly tracing_selftest_running;
71 * If boot-time tracing including tracers/events via kernel cmdline
72 * is running, we do not want to run SELFTEST.
74 bool __read_mostly tracing_selftest_disabled;
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
79 if (!tracing_selftest_disabled) {
80 tracing_selftest_disabled = true;
81 pr_info("Ftrace startup test is disabled due to %s\n", reason);
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 * To prevent the comm cache from being overwritten when no
104 * tracing is active, only save the comm when a trace event
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 * Kill all tracing for good (never come back).
111 * It is initialized to 1 but will turn to zero if the initialization
112 * of the tracer is successful. But that is the only place that sets
115 static int tracing_disabled = 1;
117 cpumask_var_t __read_mostly tracing_buffer_mask;
120 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123 * is set, then ftrace_dump is called. This will output the contents
124 * of the ftrace buffers to the console. This is very useful for
125 * capturing traces that lead to crashes and outputing it to a
128 * It is default off, but you can enable it with either specifying
129 * "ftrace_dump_on_oops" in the kernel command line, or setting
130 * /proc/sys/kernel/ftrace_dump_on_oops
131 * Set 1 if you want to dump buffers of all CPUs
132 * Set 2 if you want to dump the buffer of the CPU that triggered oops
135 enum ftrace_dump_mode ftrace_dump_on_oops;
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
144 unsigned long length;
147 union trace_eval_map_item;
149 struct trace_eval_map_tail {
151 * "end" is first and points to NULL as it must be different
152 * than "mod" or "eval_string"
154 union trace_eval_map_item *next;
155 const char *end; /* points to NULL */
158 static DEFINE_MUTEX(trace_eval_mutex);
161 * The trace_eval_maps are saved in an array with two extra elements,
162 * one at the beginning, and one at the end. The beginning item contains
163 * the count of the saved maps (head.length), and the module they
164 * belong to if not built in (head.mod). The ending item contains a
165 * pointer to the next array of saved eval_map items.
167 union trace_eval_map_item {
168 struct trace_eval_map map;
169 struct trace_eval_map_head head;
170 struct trace_eval_map_tail tail;
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 struct trace_buffer *buffer,
179 unsigned int trace_ctx);
181 #define MAX_TRACER_SIZE 100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
185 static bool allocate_snapshot;
187 static int __init set_cmdline_ftrace(char *str)
189 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190 default_bootup_tracer = bootup_tracer_buf;
191 /* We are using ftrace early, expand it */
192 ring_buffer_expanded = true;
195 __setup("ftrace=", set_cmdline_ftrace);
197 static int __init set_ftrace_dump_on_oops(char *str)
199 if (*str++ != '=' || !*str) {
200 ftrace_dump_on_oops = DUMP_ALL;
204 if (!strcmp("orig_cpu", str)) {
205 ftrace_dump_on_oops = DUMP_ORIG;
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
213 static int __init stop_trace_on_warning(char *str)
215 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216 __disable_trace_on_warning = 1;
219 __setup("traceoff_on_warning", stop_trace_on_warning);
221 static int __init boot_alloc_snapshot(char *str)
223 allocate_snapshot = true;
224 /* We also need the main ring buffer expanded */
225 ring_buffer_expanded = true;
228 __setup("alloc_snapshot", boot_alloc_snapshot);
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
233 static int __init set_trace_boot_options(char *str)
235 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 __setup("trace_options=", set_trace_boot_options);
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
243 static int __init set_trace_boot_clock(char *str)
245 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246 trace_boot_clock = trace_boot_clock_buf;
249 __setup("trace_clock=", set_trace_boot_clock);
251 static int __init set_tracepoint_printk(char *str)
253 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254 tracepoint_printk = 1;
257 __setup("tp_printk", set_tracepoint_printk);
259 unsigned long long ns2usecs(u64 nsec)
267 trace_process_export(struct trace_export *export,
268 struct ring_buffer_event *event, int flag)
270 struct trace_entry *entry;
271 unsigned int size = 0;
273 if (export->flags & flag) {
274 entry = ring_buffer_event_data(event);
275 size = ring_buffer_event_length(event);
276 export->write(export, entry, size);
280 static DEFINE_MUTEX(ftrace_export_lock);
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
288 static inline void ftrace_exports_enable(struct trace_export *export)
290 if (export->flags & TRACE_EXPORT_FUNCTION)
291 static_branch_inc(&trace_function_exports_enabled);
293 if (export->flags & TRACE_EXPORT_EVENT)
294 static_branch_inc(&trace_event_exports_enabled);
296 if (export->flags & TRACE_EXPORT_MARKER)
297 static_branch_inc(&trace_marker_exports_enabled);
300 static inline void ftrace_exports_disable(struct trace_export *export)
302 if (export->flags & TRACE_EXPORT_FUNCTION)
303 static_branch_dec(&trace_function_exports_enabled);
305 if (export->flags & TRACE_EXPORT_EVENT)
306 static_branch_dec(&trace_event_exports_enabled);
308 if (export->flags & TRACE_EXPORT_MARKER)
309 static_branch_dec(&trace_marker_exports_enabled);
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
314 struct trace_export *export;
316 preempt_disable_notrace();
318 export = rcu_dereference_raw_check(ftrace_exports_list);
320 trace_process_export(export, event, flag);
321 export = rcu_dereference_raw_check(export->next);
324 preempt_enable_notrace();
328 add_trace_export(struct trace_export **list, struct trace_export *export)
330 rcu_assign_pointer(export->next, *list);
332 * We are entering export into the list but another
333 * CPU might be walking that list. We need to make sure
334 * the export->next pointer is valid before another CPU sees
335 * the export pointer included into the list.
337 rcu_assign_pointer(*list, export);
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
343 struct trace_export **p;
345 for (p = list; *p != NULL; p = &(*p)->next)
352 rcu_assign_pointer(*p, (*p)->next);
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
360 ftrace_exports_enable(export);
362 add_trace_export(list, export);
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
370 ret = rm_trace_export(list, export);
371 ftrace_exports_disable(export);
376 int register_ftrace_export(struct trace_export *export)
378 if (WARN_ON_ONCE(!export->write))
381 mutex_lock(&ftrace_export_lock);
383 add_ftrace_export(&ftrace_exports_list, export);
385 mutex_unlock(&ftrace_export_lock);
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
391 int unregister_ftrace_export(struct trace_export *export)
395 mutex_lock(&ftrace_export_lock);
397 ret = rm_ftrace_export(&ftrace_exports_list, export);
399 mutex_unlock(&ftrace_export_lock);
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS \
407 (FUNCTION_DEFAULT_FLAGS | \
408 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
409 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
410 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
411 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
413 /* trace_options that are only supported by global_trace */
414 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
415 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
417 /* trace_flags that are default zero for instances */
418 #define ZEROED_TRACE_FLAGS \
419 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
422 * The global_trace is the descriptor that holds the top-level tracing
423 * buffers for the live tracing.
425 static struct trace_array global_trace = {
426 .trace_flags = TRACE_DEFAULT_FLAGS,
429 LIST_HEAD(ftrace_trace_arrays);
431 int trace_array_get(struct trace_array *this_tr)
433 struct trace_array *tr;
436 mutex_lock(&trace_types_lock);
437 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
444 mutex_unlock(&trace_types_lock);
449 static void __trace_array_put(struct trace_array *this_tr)
451 WARN_ON(!this_tr->ref);
456 * trace_array_put - Decrement the reference counter for this trace array.
457 * @this_tr : pointer to the trace array
459 * NOTE: Use this when we no longer need the trace array returned by
460 * trace_array_get_by_name(). This ensures the trace array can be later
464 void trace_array_put(struct trace_array *this_tr)
469 mutex_lock(&trace_types_lock);
470 __trace_array_put(this_tr);
471 mutex_unlock(&trace_types_lock);
473 EXPORT_SYMBOL_GPL(trace_array_put);
475 int tracing_check_open_get_tr(struct trace_array *tr)
479 ret = security_locked_down(LOCKDOWN_TRACEFS);
483 if (tracing_disabled)
486 if (tr && trace_array_get(tr) < 0)
492 int call_filter_check_discard(struct trace_event_call *call, void *rec,
493 struct trace_buffer *buffer,
494 struct ring_buffer_event *event)
496 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
497 !filter_match_preds(call->filter, rec)) {
498 __trace_event_discard_commit(buffer, event);
505 void trace_free_pid_list(struct trace_pid_list *pid_list)
507 vfree(pid_list->pids);
512 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
513 * @filtered_pids: The list of pids to check
514 * @search_pid: The PID to find in @filtered_pids
516 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
519 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
522 * If pid_max changed after filtered_pids was created, we
523 * by default ignore all pids greater than the previous pid_max.
525 if (search_pid >= filtered_pids->pid_max)
528 return test_bit(search_pid, filtered_pids->pids);
532 * trace_ignore_this_task - should a task be ignored for tracing
533 * @filtered_pids: The list of pids to check
534 * @filtered_no_pids: The list of pids not to be traced
535 * @task: The task that should be ignored if not filtered
537 * Checks if @task should be traced or not from @filtered_pids.
538 * Returns true if @task should *NOT* be traced.
539 * Returns false if @task should be traced.
542 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
543 struct trace_pid_list *filtered_no_pids,
544 struct task_struct *task)
547 * If filterd_no_pids is not empty, and the task's pid is listed
548 * in filtered_no_pids, then return true.
549 * Otherwise, if filtered_pids is empty, that means we can
550 * trace all tasks. If it has content, then only trace pids
551 * within filtered_pids.
554 return (filtered_pids &&
555 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
557 trace_find_filtered_pid(filtered_no_pids, task->pid));
561 * trace_filter_add_remove_task - Add or remove a task from a pid_list
562 * @pid_list: The list to modify
563 * @self: The current task for fork or NULL for exit
564 * @task: The task to add or remove
566 * If adding a task, if @self is defined, the task is only added if @self
567 * is also included in @pid_list. This happens on fork and tasks should
568 * only be added when the parent is listed. If @self is NULL, then the
569 * @task pid will be removed from the list, which would happen on exit
572 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
573 struct task_struct *self,
574 struct task_struct *task)
579 /* For forks, we only add if the forking task is listed */
581 if (!trace_find_filtered_pid(pid_list, self->pid))
585 /* Sorry, but we don't support pid_max changing after setting */
586 if (task->pid >= pid_list->pid_max)
589 /* "self" is set for forks, and NULL for exits */
591 set_bit(task->pid, pid_list->pids);
593 clear_bit(task->pid, pid_list->pids);
597 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
598 * @pid_list: The pid list to show
599 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
600 * @pos: The position of the file
602 * This is used by the seq_file "next" operation to iterate the pids
603 * listed in a trace_pid_list structure.
605 * Returns the pid+1 as we want to display pid of zero, but NULL would
606 * stop the iteration.
608 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
610 unsigned long pid = (unsigned long)v;
614 /* pid already is +1 of the actual prevous bit */
615 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
617 /* Return pid + 1 to allow zero to be represented */
618 if (pid < pid_list->pid_max)
619 return (void *)(pid + 1);
625 * trace_pid_start - Used for seq_file to start reading pid lists
626 * @pid_list: The pid list to show
627 * @pos: The position of the file
629 * This is used by seq_file "start" operation to start the iteration
632 * Returns the pid+1 as we want to display pid of zero, but NULL would
633 * stop the iteration.
635 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
640 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
641 if (pid >= pid_list->pid_max)
644 /* Return pid + 1 so that zero can be the exit value */
645 for (pid++; pid && l < *pos;
646 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
652 * trace_pid_show - show the current pid in seq_file processing
653 * @m: The seq_file structure to write into
654 * @v: A void pointer of the pid (+1) value to display
656 * Can be directly used by seq_file operations to display the current
659 int trace_pid_show(struct seq_file *m, void *v)
661 unsigned long pid = (unsigned long)v - 1;
663 seq_printf(m, "%lu\n", pid);
667 /* 128 should be much more than enough */
668 #define PID_BUF_SIZE 127
670 int trace_pid_write(struct trace_pid_list *filtered_pids,
671 struct trace_pid_list **new_pid_list,
672 const char __user *ubuf, size_t cnt)
674 struct trace_pid_list *pid_list;
675 struct trace_parser parser;
683 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
687 * Always recreate a new array. The write is an all or nothing
688 * operation. Always create a new array when adding new pids by
689 * the user. If the operation fails, then the current list is
692 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
694 trace_parser_put(&parser);
698 pid_list->pid_max = READ_ONCE(pid_max);
700 /* Only truncating will shrink pid_max */
701 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
702 pid_list->pid_max = filtered_pids->pid_max;
704 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
705 if (!pid_list->pids) {
706 trace_parser_put(&parser);
712 /* copy the current bits to the new max */
713 for_each_set_bit(pid, filtered_pids->pids,
714 filtered_pids->pid_max) {
715 set_bit(pid, pid_list->pids);
724 ret = trace_get_user(&parser, ubuf, cnt, &pos);
725 if (ret < 0 || !trace_parser_loaded(&parser))
733 if (kstrtoul(parser.buffer, 0, &val))
735 if (val >= pid_list->pid_max)
740 set_bit(pid, pid_list->pids);
743 trace_parser_clear(&parser);
746 trace_parser_put(&parser);
749 trace_free_pid_list(pid_list);
754 /* Cleared the list of pids */
755 trace_free_pid_list(pid_list);
760 *new_pid_list = pid_list;
765 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
769 /* Early boot up does not have a buffer yet */
771 return trace_clock_local();
773 ts = ring_buffer_time_stamp(buf->buffer, cpu);
774 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
779 u64 ftrace_now(int cpu)
781 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
785 * tracing_is_enabled - Show if global_trace has been enabled
787 * Shows if the global trace has been enabled or not. It uses the
788 * mirror flag "buffer_disabled" to be used in fast paths such as for
789 * the irqsoff tracer. But it may be inaccurate due to races. If you
790 * need to know the accurate state, use tracing_is_on() which is a little
791 * slower, but accurate.
793 int tracing_is_enabled(void)
796 * For quick access (irqsoff uses this in fast path), just
797 * return the mirror variable of the state of the ring buffer.
798 * It's a little racy, but we don't really care.
801 return !global_trace.buffer_disabled;
805 * trace_buf_size is the size in bytes that is allocated
806 * for a buffer. Note, the number of bytes is always rounded
809 * This number is purposely set to a low number of 16384.
810 * If the dump on oops happens, it will be much appreciated
811 * to not have to wait for all that output. Anyway this can be
812 * boot time and run time configurable.
814 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
816 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
818 /* trace_types holds a link list of available tracers. */
819 static struct tracer *trace_types __read_mostly;
822 * trace_types_lock is used to protect the trace_types list.
824 DEFINE_MUTEX(trace_types_lock);
827 * serialize the access of the ring buffer
829 * ring buffer serializes readers, but it is low level protection.
830 * The validity of the events (which returns by ring_buffer_peek() ..etc)
831 * are not protected by ring buffer.
833 * The content of events may become garbage if we allow other process consumes
834 * these events concurrently:
835 * A) the page of the consumed events may become a normal page
836 * (not reader page) in ring buffer, and this page will be rewrited
837 * by events producer.
838 * B) The page of the consumed events may become a page for splice_read,
839 * and this page will be returned to system.
841 * These primitives allow multi process access to different cpu ring buffer
844 * These primitives don't distinguish read-only and read-consume access.
845 * Multi read-only access are also serialized.
849 static DECLARE_RWSEM(all_cpu_access_lock);
850 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
852 static inline void trace_access_lock(int cpu)
854 if (cpu == RING_BUFFER_ALL_CPUS) {
855 /* gain it for accessing the whole ring buffer. */
856 down_write(&all_cpu_access_lock);
858 /* gain it for accessing a cpu ring buffer. */
860 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
861 down_read(&all_cpu_access_lock);
863 /* Secondly block other access to this @cpu ring buffer. */
864 mutex_lock(&per_cpu(cpu_access_lock, cpu));
868 static inline void trace_access_unlock(int cpu)
870 if (cpu == RING_BUFFER_ALL_CPUS) {
871 up_write(&all_cpu_access_lock);
873 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
874 up_read(&all_cpu_access_lock);
878 static inline void trace_access_lock_init(void)
882 for_each_possible_cpu(cpu)
883 mutex_init(&per_cpu(cpu_access_lock, cpu));
888 static DEFINE_MUTEX(access_lock);
890 static inline void trace_access_lock(int cpu)
893 mutex_lock(&access_lock);
896 static inline void trace_access_unlock(int cpu)
899 mutex_unlock(&access_lock);
902 static inline void trace_access_lock_init(void)
908 #ifdef CONFIG_STACKTRACE
909 static void __ftrace_trace_stack(struct trace_buffer *buffer,
910 unsigned int trace_ctx,
911 int skip, struct pt_regs *regs);
912 static inline void ftrace_trace_stack(struct trace_array *tr,
913 struct trace_buffer *buffer,
914 unsigned int trace_ctx,
915 int skip, struct pt_regs *regs);
918 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
919 unsigned int trace_ctx,
920 int skip, struct pt_regs *regs)
923 static inline void ftrace_trace_stack(struct trace_array *tr,
924 struct trace_buffer *buffer,
925 unsigned long trace_ctx,
926 int skip, struct pt_regs *regs)
932 static __always_inline void
933 trace_event_setup(struct ring_buffer_event *event,
934 int type, unsigned int trace_ctx)
936 struct trace_entry *ent = ring_buffer_event_data(event);
938 tracing_generic_entry_update(ent, type, trace_ctx);
941 static __always_inline struct ring_buffer_event *
942 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945 unsigned int trace_ctx)
947 struct ring_buffer_event *event;
949 event = ring_buffer_lock_reserve(buffer, len);
951 trace_event_setup(event, type, trace_ctx);
956 void tracer_tracing_on(struct trace_array *tr)
958 if (tr->array_buffer.buffer)
959 ring_buffer_record_on(tr->array_buffer.buffer);
961 * This flag is looked at when buffers haven't been allocated
962 * yet, or by some tracers (like irqsoff), that just want to
963 * know if the ring buffer has been disabled, but it can handle
964 * races of where it gets disabled but we still do a record.
965 * As the check is in the fast path of the tracers, it is more
966 * important to be fast than accurate.
968 tr->buffer_disabled = 0;
969 /* Make the flag seen by readers */
974 * tracing_on - enable tracing buffers
976 * This function enables tracing buffers that may have been
977 * disabled with tracing_off.
979 void tracing_on(void)
981 tracer_tracing_on(&global_trace);
983 EXPORT_SYMBOL_GPL(tracing_on);
986 static __always_inline void
987 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
989 __this_cpu_write(trace_taskinfo_save, true);
991 /* If this is the temp buffer, we need to commit fully */
992 if (this_cpu_read(trace_buffered_event) == event) {
993 /* Length is in event->array[0] */
994 ring_buffer_write(buffer, event->array[0], &event->array[1]);
995 /* Release the temp buffer */
996 this_cpu_dec(trace_buffered_event_cnt);
998 ring_buffer_unlock_commit(buffer, event);
1002 * __trace_puts - write a constant string into the trace buffer.
1003 * @ip: The address of the caller
1004 * @str: The constant string to write
1005 * @size: The size of the string.
1007 int __trace_puts(unsigned long ip, const char *str, int size)
1009 struct ring_buffer_event *event;
1010 struct trace_buffer *buffer;
1011 struct print_entry *entry;
1012 unsigned int trace_ctx;
1015 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1018 if (unlikely(tracing_selftest_running || tracing_disabled))
1021 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1023 trace_ctx = tracing_gen_ctx();
1024 buffer = global_trace.array_buffer.buffer;
1025 ring_buffer_nest_start(buffer);
1026 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1033 entry = ring_buffer_event_data(event);
1036 memcpy(&entry->buf, str, size);
1038 /* Add a newline if necessary */
1039 if (entry->buf[size - 1] != '\n') {
1040 entry->buf[size] = '\n';
1041 entry->buf[size + 1] = '\0';
1043 entry->buf[size] = '\0';
1045 __buffer_unlock_commit(buffer, event);
1046 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1048 ring_buffer_nest_end(buffer);
1051 EXPORT_SYMBOL_GPL(__trace_puts);
1054 * __trace_bputs - write the pointer to a constant string into trace buffer
1055 * @ip: The address of the caller
1056 * @str: The constant string to write to the buffer to
1058 int __trace_bputs(unsigned long ip, const char *str)
1060 struct ring_buffer_event *event;
1061 struct trace_buffer *buffer;
1062 struct bputs_entry *entry;
1063 unsigned int trace_ctx;
1064 int size = sizeof(struct bputs_entry);
1067 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1070 if (unlikely(tracing_selftest_running || tracing_disabled))
1073 trace_ctx = tracing_gen_ctx();
1074 buffer = global_trace.array_buffer.buffer;
1076 ring_buffer_nest_start(buffer);
1077 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082 entry = ring_buffer_event_data(event);
1086 __buffer_unlock_commit(buffer, event);
1087 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1091 ring_buffer_nest_end(buffer);
1094 EXPORT_SYMBOL_GPL(__trace_bputs);
1096 #ifdef CONFIG_TRACER_SNAPSHOT
1097 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1100 struct tracer *tracer = tr->current_trace;
1101 unsigned long flags;
1104 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1105 internal_trace_puts("*** snapshot is being ignored ***\n");
1109 if (!tr->allocated_snapshot) {
1110 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1111 internal_trace_puts("*** stopping trace here! ***\n");
1116 /* Note, snapshot can not be used when the tracer uses it */
1117 if (tracer->use_max_tr) {
1118 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1119 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1123 local_irq_save(flags);
1124 update_max_tr(tr, current, smp_processor_id(), cond_data);
1125 local_irq_restore(flags);
1128 void tracing_snapshot_instance(struct trace_array *tr)
1130 tracing_snapshot_instance_cond(tr, NULL);
1134 * tracing_snapshot - take a snapshot of the current buffer.
1136 * This causes a swap between the snapshot buffer and the current live
1137 * tracing buffer. You can use this to take snapshots of the live
1138 * trace when some condition is triggered, but continue to trace.
1140 * Note, make sure to allocate the snapshot with either
1141 * a tracing_snapshot_alloc(), or by doing it manually
1142 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144 * If the snapshot buffer is not allocated, it will stop tracing.
1145 * Basically making a permanent snapshot.
1147 void tracing_snapshot(void)
1149 struct trace_array *tr = &global_trace;
1151 tracing_snapshot_instance(tr);
1153 EXPORT_SYMBOL_GPL(tracing_snapshot);
1156 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1157 * @tr: The tracing instance to snapshot
1158 * @cond_data: The data to be tested conditionally, and possibly saved
1160 * This is the same as tracing_snapshot() except that the snapshot is
1161 * conditional - the snapshot will only happen if the
1162 * cond_snapshot.update() implementation receiving the cond_data
1163 * returns true, which means that the trace array's cond_snapshot
1164 * update() operation used the cond_data to determine whether the
1165 * snapshot should be taken, and if it was, presumably saved it along
1166 * with the snapshot.
1168 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 tracing_snapshot_instance_cond(tr, cond_data);
1172 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1175 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1176 * @tr: The tracing instance
1178 * When the user enables a conditional snapshot using
1179 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1180 * with the snapshot. This accessor is used to retrieve it.
1182 * Should not be called from cond_snapshot.update(), since it takes
1183 * the tr->max_lock lock, which the code calling
1184 * cond_snapshot.update() has already done.
1186 * Returns the cond_data associated with the trace array's snapshot.
1188 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 void *cond_data = NULL;
1192 arch_spin_lock(&tr->max_lock);
1194 if (tr->cond_snapshot)
1195 cond_data = tr->cond_snapshot->cond_data;
1197 arch_spin_unlock(&tr->max_lock);
1201 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1204 struct array_buffer *size_buf, int cpu_id);
1205 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1207 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1211 if (!tr->allocated_snapshot) {
1213 /* allocate spare buffer */
1214 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1215 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1219 tr->allocated_snapshot = true;
1225 static void free_snapshot(struct trace_array *tr)
1228 * We don't free the ring buffer. instead, resize it because
1229 * The max_tr ring buffer has some state (e.g. ring->clock) and
1230 * we want preserve it.
1232 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1233 set_buffer_entries(&tr->max_buffer, 1);
1234 tracing_reset_online_cpus(&tr->max_buffer);
1235 tr->allocated_snapshot = false;
1239 * tracing_alloc_snapshot - allocate snapshot buffer.
1241 * This only allocates the snapshot buffer if it isn't already
1242 * allocated - it doesn't also take a snapshot.
1244 * This is meant to be used in cases where the snapshot buffer needs
1245 * to be set up for events that can't sleep but need to be able to
1246 * trigger a snapshot.
1248 int tracing_alloc_snapshot(void)
1250 struct trace_array *tr = &global_trace;
1253 ret = tracing_alloc_snapshot_instance(tr);
1258 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1261 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1263 * This is similar to tracing_snapshot(), but it will allocate the
1264 * snapshot buffer if it isn't already allocated. Use this only
1265 * where it is safe to sleep, as the allocation may sleep.
1267 * This causes a swap between the snapshot buffer and the current live
1268 * tracing buffer. You can use this to take snapshots of the live
1269 * trace when some condition is triggered, but continue to trace.
1271 void tracing_snapshot_alloc(void)
1275 ret = tracing_alloc_snapshot();
1281 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1284 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1285 * @tr: The tracing instance
1286 * @cond_data: User data to associate with the snapshot
1287 * @update: Implementation of the cond_snapshot update function
1289 * Check whether the conditional snapshot for the given instance has
1290 * already been enabled, or if the current tracer is already using a
1291 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1292 * save the cond_data and update function inside.
1294 * Returns 0 if successful, error otherwise.
1296 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1297 cond_update_fn_t update)
1299 struct cond_snapshot *cond_snapshot;
1302 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1306 cond_snapshot->cond_data = cond_data;
1307 cond_snapshot->update = update;
1309 mutex_lock(&trace_types_lock);
1311 ret = tracing_alloc_snapshot_instance(tr);
1315 if (tr->current_trace->use_max_tr) {
1321 * The cond_snapshot can only change to NULL without the
1322 * trace_types_lock. We don't care if we race with it going
1323 * to NULL, but we want to make sure that it's not set to
1324 * something other than NULL when we get here, which we can
1325 * do safely with only holding the trace_types_lock and not
1326 * having to take the max_lock.
1328 if (tr->cond_snapshot) {
1333 arch_spin_lock(&tr->max_lock);
1334 tr->cond_snapshot = cond_snapshot;
1335 arch_spin_unlock(&tr->max_lock);
1337 mutex_unlock(&trace_types_lock);
1342 mutex_unlock(&trace_types_lock);
1343 kfree(cond_snapshot);
1346 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1349 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1350 * @tr: The tracing instance
1352 * Check whether the conditional snapshot for the given instance is
1353 * enabled; if so, free the cond_snapshot associated with it,
1354 * otherwise return -EINVAL.
1356 * Returns 0 if successful, error otherwise.
1358 int tracing_snapshot_cond_disable(struct trace_array *tr)
1362 arch_spin_lock(&tr->max_lock);
1364 if (!tr->cond_snapshot)
1367 kfree(tr->cond_snapshot);
1368 tr->cond_snapshot = NULL;
1371 arch_spin_unlock(&tr->max_lock);
1375 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1377 void tracing_snapshot(void)
1379 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1381 EXPORT_SYMBOL_GPL(tracing_snapshot);
1382 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1384 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1386 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1387 int tracing_alloc_snapshot(void)
1389 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1392 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1393 void tracing_snapshot_alloc(void)
1398 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1399 void *tracing_cond_snapshot_data(struct trace_array *tr)
1403 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1404 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1409 int tracing_snapshot_cond_disable(struct trace_array *tr)
1413 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1414 #endif /* CONFIG_TRACER_SNAPSHOT */
1416 void tracer_tracing_off(struct trace_array *tr)
1418 if (tr->array_buffer.buffer)
1419 ring_buffer_record_off(tr->array_buffer.buffer);
1421 * This flag is looked at when buffers haven't been allocated
1422 * yet, or by some tracers (like irqsoff), that just want to
1423 * know if the ring buffer has been disabled, but it can handle
1424 * races of where it gets disabled but we still do a record.
1425 * As the check is in the fast path of the tracers, it is more
1426 * important to be fast than accurate.
1428 tr->buffer_disabled = 1;
1429 /* Make the flag seen by readers */
1434 * tracing_off - turn off tracing buffers
1436 * This function stops the tracing buffers from recording data.
1437 * It does not disable any overhead the tracers themselves may
1438 * be causing. This function simply causes all recording to
1439 * the ring buffers to fail.
1441 void tracing_off(void)
1443 tracer_tracing_off(&global_trace);
1445 EXPORT_SYMBOL_GPL(tracing_off);
1447 void disable_trace_on_warning(void)
1449 if (__disable_trace_on_warning) {
1450 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1451 "Disabling tracing due to warning\n");
1457 * tracer_tracing_is_on - show real state of ring buffer enabled
1458 * @tr : the trace array to know if ring buffer is enabled
1460 * Shows real state of the ring buffer if it is enabled or not.
1462 bool tracer_tracing_is_on(struct trace_array *tr)
1464 if (tr->array_buffer.buffer)
1465 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1466 return !tr->buffer_disabled;
1470 * tracing_is_on - show state of ring buffers enabled
1472 int tracing_is_on(void)
1474 return tracer_tracing_is_on(&global_trace);
1476 EXPORT_SYMBOL_GPL(tracing_is_on);
1478 static int __init set_buf_size(char *str)
1480 unsigned long buf_size;
1484 buf_size = memparse(str, &str);
1485 /* nr_entries can not be zero */
1488 trace_buf_size = buf_size;
1491 __setup("trace_buf_size=", set_buf_size);
1493 static int __init set_tracing_thresh(char *str)
1495 unsigned long threshold;
1500 ret = kstrtoul(str, 0, &threshold);
1503 tracing_thresh = threshold * 1000;
1506 __setup("tracing_thresh=", set_tracing_thresh);
1508 unsigned long nsecs_to_usecs(unsigned long nsecs)
1510 return nsecs / 1000;
1514 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1515 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1516 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1517 * of strings in the order that the evals (enum) were defined.
1522 /* These must match the bit postions in trace_iterator_flags */
1523 static const char *trace_options[] = {
1531 int in_ns; /* is this clock in nanoseconds? */
1532 } trace_clocks[] = {
1533 { trace_clock_local, "local", 1 },
1534 { trace_clock_global, "global", 1 },
1535 { trace_clock_counter, "counter", 0 },
1536 { trace_clock_jiffies, "uptime", 0 },
1537 { trace_clock, "perf", 1 },
1538 { ktime_get_mono_fast_ns, "mono", 1 },
1539 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1540 { ktime_get_boot_fast_ns, "boot", 1 },
1544 bool trace_clock_in_ns(struct trace_array *tr)
1546 if (trace_clocks[tr->clock_id].in_ns)
1553 * trace_parser_get_init - gets the buffer for trace parser
1555 int trace_parser_get_init(struct trace_parser *parser, int size)
1557 memset(parser, 0, sizeof(*parser));
1559 parser->buffer = kmalloc(size, GFP_KERNEL);
1560 if (!parser->buffer)
1563 parser->size = size;
1568 * trace_parser_put - frees the buffer for trace parser
1570 void trace_parser_put(struct trace_parser *parser)
1572 kfree(parser->buffer);
1573 parser->buffer = NULL;
1577 * trace_get_user - reads the user input string separated by space
1578 * (matched by isspace(ch))
1580 * For each string found the 'struct trace_parser' is updated,
1581 * and the function returns.
1583 * Returns number of bytes read.
1585 * See kernel/trace/trace.h for 'struct trace_parser' details.
1587 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1588 size_t cnt, loff_t *ppos)
1595 trace_parser_clear(parser);
1597 ret = get_user(ch, ubuf++);
1605 * The parser is not finished with the last write,
1606 * continue reading the user input without skipping spaces.
1608 if (!parser->cont) {
1609 /* skip white space */
1610 while (cnt && isspace(ch)) {
1611 ret = get_user(ch, ubuf++);
1620 /* only spaces were written */
1621 if (isspace(ch) || !ch) {
1628 /* read the non-space input */
1629 while (cnt && !isspace(ch) && ch) {
1630 if (parser->idx < parser->size - 1)
1631 parser->buffer[parser->idx++] = ch;
1636 ret = get_user(ch, ubuf++);
1643 /* We either got finished input or we have to wait for another call. */
1644 if (isspace(ch) || !ch) {
1645 parser->buffer[parser->idx] = 0;
1646 parser->cont = false;
1647 } else if (parser->idx < parser->size - 1) {
1648 parser->cont = true;
1649 parser->buffer[parser->idx++] = ch;
1650 /* Make sure the parsed string always terminates with '\0'. */
1651 parser->buffer[parser->idx] = 0;
1664 /* TODO add a seq_buf_to_buffer() */
1665 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1669 if (trace_seq_used(s) <= s->seq.readpos)
1672 len = trace_seq_used(s) - s->seq.readpos;
1675 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1677 s->seq.readpos += cnt;
1681 unsigned long __read_mostly tracing_thresh;
1682 static const struct file_operations tracing_max_lat_fops;
1684 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1685 defined(CONFIG_FSNOTIFY)
1687 static struct workqueue_struct *fsnotify_wq;
1689 static void latency_fsnotify_workfn(struct work_struct *work)
1691 struct trace_array *tr = container_of(work, struct trace_array,
1693 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1696 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1698 struct trace_array *tr = container_of(iwork, struct trace_array,
1700 queue_work(fsnotify_wq, &tr->fsnotify_work);
1703 static void trace_create_maxlat_file(struct trace_array *tr,
1704 struct dentry *d_tracer)
1706 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1707 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1708 tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1709 d_tracer, &tr->max_latency,
1710 &tracing_max_lat_fops);
1713 __init static int latency_fsnotify_init(void)
1715 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1716 WQ_UNBOUND | WQ_HIGHPRI, 0);
1718 pr_err("Unable to allocate tr_max_lat_wq\n");
1724 late_initcall_sync(latency_fsnotify_init);
1726 void latency_fsnotify(struct trace_array *tr)
1731 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1732 * possible that we are called from __schedule() or do_idle(), which
1733 * could cause a deadlock.
1735 irq_work_queue(&tr->fsnotify_irqwork);
1739 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1740 * defined(CONFIG_FSNOTIFY)
1744 #define trace_create_maxlat_file(tr, d_tracer) \
1745 trace_create_file("tracing_max_latency", 0644, d_tracer, \
1746 &tr->max_latency, &tracing_max_lat_fops)
1750 #ifdef CONFIG_TRACER_MAX_TRACE
1752 * Copy the new maximum trace into the separate maximum-trace
1753 * structure. (this way the maximum trace is permanently saved,
1754 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1757 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1759 struct array_buffer *trace_buf = &tr->array_buffer;
1760 struct array_buffer *max_buf = &tr->max_buffer;
1761 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1762 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1765 max_buf->time_start = data->preempt_timestamp;
1767 max_data->saved_latency = tr->max_latency;
1768 max_data->critical_start = data->critical_start;
1769 max_data->critical_end = data->critical_end;
1771 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1772 max_data->pid = tsk->pid;
1774 * If tsk == current, then use current_uid(), as that does not use
1775 * RCU. The irq tracer can be called out of RCU scope.
1778 max_data->uid = current_uid();
1780 max_data->uid = task_uid(tsk);
1782 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1783 max_data->policy = tsk->policy;
1784 max_data->rt_priority = tsk->rt_priority;
1786 /* record this tasks comm */
1787 tracing_record_cmdline(tsk);
1788 latency_fsnotify(tr);
1792 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1794 * @tsk: the task with the latency
1795 * @cpu: The cpu that initiated the trace.
1796 * @cond_data: User data associated with a conditional snapshot
1798 * Flip the buffers between the @tr and the max_tr and record information
1799 * about which task was the cause of this latency.
1802 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1808 WARN_ON_ONCE(!irqs_disabled());
1810 if (!tr->allocated_snapshot) {
1811 /* Only the nop tracer should hit this when disabling */
1812 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1816 arch_spin_lock(&tr->max_lock);
1818 /* Inherit the recordable setting from array_buffer */
1819 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1820 ring_buffer_record_on(tr->max_buffer.buffer);
1822 ring_buffer_record_off(tr->max_buffer.buffer);
1824 #ifdef CONFIG_TRACER_SNAPSHOT
1825 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1828 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1830 __update_max_tr(tr, tsk, cpu);
1833 arch_spin_unlock(&tr->max_lock);
1837 * update_max_tr_single - only copy one trace over, and reset the rest
1839 * @tsk: task with the latency
1840 * @cpu: the cpu of the buffer to copy.
1842 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1845 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1852 WARN_ON_ONCE(!irqs_disabled());
1853 if (!tr->allocated_snapshot) {
1854 /* Only the nop tracer should hit this when disabling */
1855 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1859 arch_spin_lock(&tr->max_lock);
1861 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1863 if (ret == -EBUSY) {
1865 * We failed to swap the buffer due to a commit taking
1866 * place on this CPU. We fail to record, but we reset
1867 * the max trace buffer (no one writes directly to it)
1868 * and flag that it failed.
1870 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1871 "Failed to swap buffers due to commit in progress\n");
1874 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1876 __update_max_tr(tr, tsk, cpu);
1877 arch_spin_unlock(&tr->max_lock);
1879 #endif /* CONFIG_TRACER_MAX_TRACE */
1881 static int wait_on_pipe(struct trace_iterator *iter, int full)
1883 /* Iterators are static, they should be filled or empty */
1884 if (trace_buffer_iter(iter, iter->cpu_file))
1887 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1891 #ifdef CONFIG_FTRACE_STARTUP_TEST
1892 static bool selftests_can_run;
1894 struct trace_selftests {
1895 struct list_head list;
1896 struct tracer *type;
1899 static LIST_HEAD(postponed_selftests);
1901 static int save_selftest(struct tracer *type)
1903 struct trace_selftests *selftest;
1905 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1909 selftest->type = type;
1910 list_add(&selftest->list, &postponed_selftests);
1914 static int run_tracer_selftest(struct tracer *type)
1916 struct trace_array *tr = &global_trace;
1917 struct tracer *saved_tracer = tr->current_trace;
1920 if (!type->selftest || tracing_selftest_disabled)
1924 * If a tracer registers early in boot up (before scheduling is
1925 * initialized and such), then do not run its selftests yet.
1926 * Instead, run it a little later in the boot process.
1928 if (!selftests_can_run)
1929 return save_selftest(type);
1932 * Run a selftest on this tracer.
1933 * Here we reset the trace buffer, and set the current
1934 * tracer to be this tracer. The tracer can then run some
1935 * internal tracing to verify that everything is in order.
1936 * If we fail, we do not register this tracer.
1938 tracing_reset_online_cpus(&tr->array_buffer);
1940 tr->current_trace = type;
1942 #ifdef CONFIG_TRACER_MAX_TRACE
1943 if (type->use_max_tr) {
1944 /* If we expanded the buffers, make sure the max is expanded too */
1945 if (ring_buffer_expanded)
1946 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1947 RING_BUFFER_ALL_CPUS);
1948 tr->allocated_snapshot = true;
1952 /* the test is responsible for initializing and enabling */
1953 pr_info("Testing tracer %s: ", type->name);
1954 ret = type->selftest(type, tr);
1955 /* the test is responsible for resetting too */
1956 tr->current_trace = saved_tracer;
1958 printk(KERN_CONT "FAILED!\n");
1959 /* Add the warning after printing 'FAILED' */
1963 /* Only reset on passing, to avoid touching corrupted buffers */
1964 tracing_reset_online_cpus(&tr->array_buffer);
1966 #ifdef CONFIG_TRACER_MAX_TRACE
1967 if (type->use_max_tr) {
1968 tr->allocated_snapshot = false;
1970 /* Shrink the max buffer again */
1971 if (ring_buffer_expanded)
1972 ring_buffer_resize(tr->max_buffer.buffer, 1,
1973 RING_BUFFER_ALL_CPUS);
1977 printk(KERN_CONT "PASSED\n");
1981 static __init int init_trace_selftests(void)
1983 struct trace_selftests *p, *n;
1984 struct tracer *t, **last;
1987 selftests_can_run = true;
1989 mutex_lock(&trace_types_lock);
1991 if (list_empty(&postponed_selftests))
1994 pr_info("Running postponed tracer tests:\n");
1996 tracing_selftest_running = true;
1997 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1998 /* This loop can take minutes when sanitizers are enabled, so
1999 * lets make sure we allow RCU processing.
2002 ret = run_tracer_selftest(p->type);
2003 /* If the test fails, then warn and remove from available_tracers */
2005 WARN(1, "tracer: %s failed selftest, disabling\n",
2007 last = &trace_types;
2008 for (t = trace_types; t; t = t->next) {
2019 tracing_selftest_running = false;
2022 mutex_unlock(&trace_types_lock);
2026 core_initcall(init_trace_selftests);
2028 static inline int run_tracer_selftest(struct tracer *type)
2032 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2034 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2036 static void __init apply_trace_boot_options(void);
2039 * register_tracer - register a tracer with the ftrace system.
2040 * @type: the plugin for the tracer
2042 * Register a new plugin tracer.
2044 int __init register_tracer(struct tracer *type)
2050 pr_info("Tracer must have a name\n");
2054 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2055 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2059 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2060 pr_warn("Can not register tracer %s due to lockdown\n",
2065 mutex_lock(&trace_types_lock);
2067 tracing_selftest_running = true;
2069 for (t = trace_types; t; t = t->next) {
2070 if (strcmp(type->name, t->name) == 0) {
2072 pr_info("Tracer %s already registered\n",
2079 if (!type->set_flag)
2080 type->set_flag = &dummy_set_flag;
2082 /*allocate a dummy tracer_flags*/
2083 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2088 type->flags->val = 0;
2089 type->flags->opts = dummy_tracer_opt;
2091 if (!type->flags->opts)
2092 type->flags->opts = dummy_tracer_opt;
2094 /* store the tracer for __set_tracer_option */
2095 type->flags->trace = type;
2097 ret = run_tracer_selftest(type);
2101 type->next = trace_types;
2103 add_tracer_options(&global_trace, type);
2106 tracing_selftest_running = false;
2107 mutex_unlock(&trace_types_lock);
2109 if (ret || !default_bootup_tracer)
2112 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2115 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2116 /* Do we want this tracer to start on bootup? */
2117 tracing_set_tracer(&global_trace, type->name);
2118 default_bootup_tracer = NULL;
2120 apply_trace_boot_options();
2122 /* disable other selftests, since this will break it. */
2123 disable_tracing_selftest("running a tracer");
2129 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2131 struct trace_buffer *buffer = buf->buffer;
2136 ring_buffer_record_disable(buffer);
2138 /* Make sure all commits have finished */
2140 ring_buffer_reset_cpu(buffer, cpu);
2142 ring_buffer_record_enable(buffer);
2145 void tracing_reset_online_cpus(struct array_buffer *buf)
2147 struct trace_buffer *buffer = buf->buffer;
2152 ring_buffer_record_disable(buffer);
2154 /* Make sure all commits have finished */
2157 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2159 ring_buffer_reset_online_cpus(buffer);
2161 ring_buffer_record_enable(buffer);
2164 /* Must have trace_types_lock held */
2165 void tracing_reset_all_online_cpus(void)
2167 struct trace_array *tr;
2169 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2170 if (!tr->clear_trace)
2172 tr->clear_trace = false;
2173 tracing_reset_online_cpus(&tr->array_buffer);
2174 #ifdef CONFIG_TRACER_MAX_TRACE
2175 tracing_reset_online_cpus(&tr->max_buffer);
2180 static int *tgid_map;
2182 #define SAVED_CMDLINES_DEFAULT 128
2183 #define NO_CMDLINE_MAP UINT_MAX
2184 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2185 struct saved_cmdlines_buffer {
2186 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2187 unsigned *map_cmdline_to_pid;
2188 unsigned cmdline_num;
2190 char *saved_cmdlines;
2192 static struct saved_cmdlines_buffer *savedcmd;
2194 /* temporary disable recording */
2195 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2197 static inline char *get_saved_cmdlines(int idx)
2199 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2202 static inline void set_cmdline(int idx, const char *cmdline)
2204 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2207 static int allocate_cmdlines_buffer(unsigned int val,
2208 struct saved_cmdlines_buffer *s)
2210 s->map_cmdline_to_pid = kmalloc_array(val,
2211 sizeof(*s->map_cmdline_to_pid),
2213 if (!s->map_cmdline_to_pid)
2216 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2217 if (!s->saved_cmdlines) {
2218 kfree(s->map_cmdline_to_pid);
2223 s->cmdline_num = val;
2224 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2225 sizeof(s->map_pid_to_cmdline));
2226 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2227 val * sizeof(*s->map_cmdline_to_pid));
2232 static int trace_create_savedcmd(void)
2236 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2240 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2250 int is_tracing_stopped(void)
2252 return global_trace.stop_count;
2256 * tracing_start - quick start of the tracer
2258 * If tracing is enabled but was stopped by tracing_stop,
2259 * this will start the tracer back up.
2261 void tracing_start(void)
2263 struct trace_buffer *buffer;
2264 unsigned long flags;
2266 if (tracing_disabled)
2269 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2270 if (--global_trace.stop_count) {
2271 if (global_trace.stop_count < 0) {
2272 /* Someone screwed up their debugging */
2274 global_trace.stop_count = 0;
2279 /* Prevent the buffers from switching */
2280 arch_spin_lock(&global_trace.max_lock);
2282 buffer = global_trace.array_buffer.buffer;
2284 ring_buffer_record_enable(buffer);
2286 #ifdef CONFIG_TRACER_MAX_TRACE
2287 buffer = global_trace.max_buffer.buffer;
2289 ring_buffer_record_enable(buffer);
2292 arch_spin_unlock(&global_trace.max_lock);
2295 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2298 static void tracing_start_tr(struct trace_array *tr)
2300 struct trace_buffer *buffer;
2301 unsigned long flags;
2303 if (tracing_disabled)
2306 /* If global, we need to also start the max tracer */
2307 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2308 return tracing_start();
2310 raw_spin_lock_irqsave(&tr->start_lock, flags);
2312 if (--tr->stop_count) {
2313 if (tr->stop_count < 0) {
2314 /* Someone screwed up their debugging */
2321 buffer = tr->array_buffer.buffer;
2323 ring_buffer_record_enable(buffer);
2326 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2330 * tracing_stop - quick stop of the tracer
2332 * Light weight way to stop tracing. Use in conjunction with
2335 void tracing_stop(void)
2337 struct trace_buffer *buffer;
2338 unsigned long flags;
2340 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2341 if (global_trace.stop_count++)
2344 /* Prevent the buffers from switching */
2345 arch_spin_lock(&global_trace.max_lock);
2347 buffer = global_trace.array_buffer.buffer;
2349 ring_buffer_record_disable(buffer);
2351 #ifdef CONFIG_TRACER_MAX_TRACE
2352 buffer = global_trace.max_buffer.buffer;
2354 ring_buffer_record_disable(buffer);
2357 arch_spin_unlock(&global_trace.max_lock);
2360 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2363 static void tracing_stop_tr(struct trace_array *tr)
2365 struct trace_buffer *buffer;
2366 unsigned long flags;
2368 /* If global, we need to also stop the max tracer */
2369 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2370 return tracing_stop();
2372 raw_spin_lock_irqsave(&tr->start_lock, flags);
2373 if (tr->stop_count++)
2376 buffer = tr->array_buffer.buffer;
2378 ring_buffer_record_disable(buffer);
2381 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2384 static int trace_save_cmdline(struct task_struct *tsk)
2388 /* treat recording of idle task as a success */
2392 if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2396 * It's not the end of the world if we don't get
2397 * the lock, but we also don't want to spin
2398 * nor do we want to disable interrupts,
2399 * so if we miss here, then better luck next time.
2401 if (!arch_spin_trylock(&trace_cmdline_lock))
2404 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2405 if (idx == NO_CMDLINE_MAP) {
2406 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2409 * Check whether the cmdline buffer at idx has a pid
2410 * mapped. We are going to overwrite that entry so we
2411 * need to clear the map_pid_to_cmdline. Otherwise we
2412 * would read the new comm for the old pid.
2414 pid = savedcmd->map_cmdline_to_pid[idx];
2415 if (pid != NO_CMDLINE_MAP)
2416 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2418 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2419 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2421 savedcmd->cmdline_idx = idx;
2424 set_cmdline(idx, tsk->comm);
2426 arch_spin_unlock(&trace_cmdline_lock);
2431 static void __trace_find_cmdline(int pid, char comm[])
2436 strcpy(comm, "<idle>");
2440 if (WARN_ON_ONCE(pid < 0)) {
2441 strcpy(comm, "<XXX>");
2445 if (pid > PID_MAX_DEFAULT) {
2446 strcpy(comm, "<...>");
2450 map = savedcmd->map_pid_to_cmdline[pid];
2451 if (map != NO_CMDLINE_MAP)
2452 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2454 strcpy(comm, "<...>");
2457 void trace_find_cmdline(int pid, char comm[])
2460 arch_spin_lock(&trace_cmdline_lock);
2462 __trace_find_cmdline(pid, comm);
2464 arch_spin_unlock(&trace_cmdline_lock);
2468 int trace_find_tgid(int pid)
2470 if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2473 return tgid_map[pid];
2476 static int trace_save_tgid(struct task_struct *tsk)
2478 /* treat recording of idle task as a success */
2482 if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2485 tgid_map[tsk->pid] = tsk->tgid;
2489 static bool tracing_record_taskinfo_skip(int flags)
2491 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2493 if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2495 if (!__this_cpu_read(trace_taskinfo_save))
2501 * tracing_record_taskinfo - record the task info of a task
2503 * @task: task to record
2504 * @flags: TRACE_RECORD_CMDLINE for recording comm
2505 * TRACE_RECORD_TGID for recording tgid
2507 void tracing_record_taskinfo(struct task_struct *task, int flags)
2511 if (tracing_record_taskinfo_skip(flags))
2515 * Record as much task information as possible. If some fail, continue
2516 * to try to record the others.
2518 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2519 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2521 /* If recording any information failed, retry again soon. */
2525 __this_cpu_write(trace_taskinfo_save, false);
2529 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2531 * @prev: previous task during sched_switch
2532 * @next: next task during sched_switch
2533 * @flags: TRACE_RECORD_CMDLINE for recording comm
2534 * TRACE_RECORD_TGID for recording tgid
2536 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2537 struct task_struct *next, int flags)
2541 if (tracing_record_taskinfo_skip(flags))
2545 * Record as much task information as possible. If some fail, continue
2546 * to try to record the others.
2548 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2549 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2550 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2551 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2553 /* If recording any information failed, retry again soon. */
2557 __this_cpu_write(trace_taskinfo_save, false);
2560 /* Helpers to record a specific task information */
2561 void tracing_record_cmdline(struct task_struct *task)
2563 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2566 void tracing_record_tgid(struct task_struct *task)
2568 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2572 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2573 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2574 * simplifies those functions and keeps them in sync.
2576 enum print_line_t trace_handle_return(struct trace_seq *s)
2578 return trace_seq_has_overflowed(s) ?
2579 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2581 EXPORT_SYMBOL_GPL(trace_handle_return);
2583 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2585 unsigned int trace_flags = irqs_status;
2588 pc = preempt_count();
2591 trace_flags |= TRACE_FLAG_NMI;
2592 if (pc & HARDIRQ_MASK)
2593 trace_flags |= TRACE_FLAG_HARDIRQ;
2594 if (in_serving_softirq())
2595 trace_flags |= TRACE_FLAG_SOFTIRQ;
2597 if (tif_need_resched())
2598 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2599 if (test_preempt_need_resched())
2600 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2601 return (trace_flags << 16) | (pc & 0xff);
2604 struct ring_buffer_event *
2605 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2608 unsigned int trace_ctx)
2610 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2613 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2614 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2615 static int trace_buffered_event_ref;
2618 * trace_buffered_event_enable - enable buffering events
2620 * When events are being filtered, it is quicker to use a temporary
2621 * buffer to write the event data into if there's a likely chance
2622 * that it will not be committed. The discard of the ring buffer
2623 * is not as fast as committing, and is much slower than copying
2626 * When an event is to be filtered, allocate per cpu buffers to
2627 * write the event data into, and if the event is filtered and discarded
2628 * it is simply dropped, otherwise, the entire data is to be committed
2631 void trace_buffered_event_enable(void)
2633 struct ring_buffer_event *event;
2637 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2639 if (trace_buffered_event_ref++)
2642 for_each_tracing_cpu(cpu) {
2643 page = alloc_pages_node(cpu_to_node(cpu),
2644 GFP_KERNEL | __GFP_NORETRY, 0);
2648 event = page_address(page);
2649 memset(event, 0, sizeof(*event));
2651 per_cpu(trace_buffered_event, cpu) = event;
2654 if (cpu == smp_processor_id() &&
2655 __this_cpu_read(trace_buffered_event) !=
2656 per_cpu(trace_buffered_event, cpu))
2663 trace_buffered_event_disable();
2666 static void enable_trace_buffered_event(void *data)
2668 /* Probably not needed, but do it anyway */
2670 this_cpu_dec(trace_buffered_event_cnt);
2673 static void disable_trace_buffered_event(void *data)
2675 this_cpu_inc(trace_buffered_event_cnt);
2679 * trace_buffered_event_disable - disable buffering events
2681 * When a filter is removed, it is faster to not use the buffered
2682 * events, and to commit directly into the ring buffer. Free up
2683 * the temp buffers when there are no more users. This requires
2684 * special synchronization with current events.
2686 void trace_buffered_event_disable(void)
2690 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2692 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2695 if (--trace_buffered_event_ref)
2699 /* For each CPU, set the buffer as used. */
2700 smp_call_function_many(tracing_buffer_mask,
2701 disable_trace_buffered_event, NULL, 1);
2704 /* Wait for all current users to finish */
2707 for_each_tracing_cpu(cpu) {
2708 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2709 per_cpu(trace_buffered_event, cpu) = NULL;
2712 * Make sure trace_buffered_event is NULL before clearing
2713 * trace_buffered_event_cnt.
2718 /* Do the work on each cpu */
2719 smp_call_function_many(tracing_buffer_mask,
2720 enable_trace_buffered_event, NULL, 1);
2724 static struct trace_buffer *temp_buffer;
2726 struct ring_buffer_event *
2727 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2728 struct trace_event_file *trace_file,
2729 int type, unsigned long len,
2730 unsigned int trace_ctx)
2732 struct ring_buffer_event *entry;
2735 *current_rb = trace_file->tr->array_buffer.buffer;
2737 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2738 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2739 (entry = this_cpu_read(trace_buffered_event))) {
2740 /* Try to use the per cpu buffer first */
2741 val = this_cpu_inc_return(trace_buffered_event_cnt);
2743 trace_event_setup(entry, type, trace_ctx);
2744 entry->array[0] = len;
2747 this_cpu_dec(trace_buffered_event_cnt);
2750 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2753 * If tracing is off, but we have triggers enabled
2754 * we still need to look at the event data. Use the temp_buffer
2755 * to store the trace event for the trigger to use. It's recursive
2756 * safe and will not be recorded anywhere.
2758 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2759 *current_rb = temp_buffer;
2760 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2765 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2767 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2768 static DEFINE_MUTEX(tracepoint_printk_mutex);
2770 static void output_printk(struct trace_event_buffer *fbuffer)
2772 struct trace_event_call *event_call;
2773 struct trace_event_file *file;
2774 struct trace_event *event;
2775 unsigned long flags;
2776 struct trace_iterator *iter = tracepoint_print_iter;
2778 /* We should never get here if iter is NULL */
2779 if (WARN_ON_ONCE(!iter))
2782 event_call = fbuffer->trace_file->event_call;
2783 if (!event_call || !event_call->event.funcs ||
2784 !event_call->event.funcs->trace)
2787 file = fbuffer->trace_file;
2788 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2789 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2790 !filter_match_preds(file->filter, fbuffer->entry)))
2793 event = &fbuffer->trace_file->event_call->event;
2795 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2796 trace_seq_init(&iter->seq);
2797 iter->ent = fbuffer->entry;
2798 event_call->event.funcs->trace(iter, 0, event);
2799 trace_seq_putc(&iter->seq, 0);
2800 printk("%s", iter->seq.buffer);
2802 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2805 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2806 void *buffer, size_t *lenp,
2809 int save_tracepoint_printk;
2812 mutex_lock(&tracepoint_printk_mutex);
2813 save_tracepoint_printk = tracepoint_printk;
2815 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2818 * This will force exiting early, as tracepoint_printk
2819 * is always zero when tracepoint_printk_iter is not allocated
2821 if (!tracepoint_print_iter)
2822 tracepoint_printk = 0;
2824 if (save_tracepoint_printk == tracepoint_printk)
2827 if (tracepoint_printk)
2828 static_key_enable(&tracepoint_printk_key.key);
2830 static_key_disable(&tracepoint_printk_key.key);
2833 mutex_unlock(&tracepoint_printk_mutex);
2838 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2840 if (static_key_false(&tracepoint_printk_key.key))
2841 output_printk(fbuffer);
2843 if (static_branch_unlikely(&trace_event_exports_enabled))
2844 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2845 event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2846 fbuffer->event, fbuffer->entry,
2847 fbuffer->trace_ctx, fbuffer->regs);
2849 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2854 * trace_buffer_unlock_commit_regs()
2855 * trace_event_buffer_commit()
2856 * trace_event_raw_event_xxx()
2858 # define STACK_SKIP 3
2860 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2861 struct trace_buffer *buffer,
2862 struct ring_buffer_event *event,
2863 unsigned int trace_ctx,
2864 struct pt_regs *regs)
2866 __buffer_unlock_commit(buffer, event);
2869 * If regs is not set, then skip the necessary functions.
2870 * Note, we can still get here via blktrace, wakeup tracer
2871 * and mmiotrace, but that's ok if they lose a function or
2872 * two. They are not that meaningful.
2874 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2875 ftrace_trace_userstack(tr, buffer, trace_ctx);
2879 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2882 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2883 struct ring_buffer_event *event)
2885 __buffer_unlock_commit(buffer, event);
2889 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2890 parent_ip, unsigned int trace_ctx)
2892 struct trace_event_call *call = &event_function;
2893 struct trace_buffer *buffer = tr->array_buffer.buffer;
2894 struct ring_buffer_event *event;
2895 struct ftrace_entry *entry;
2897 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2901 entry = ring_buffer_event_data(event);
2903 entry->parent_ip = parent_ip;
2905 if (!call_filter_check_discard(call, entry, buffer, event)) {
2906 if (static_branch_unlikely(&trace_function_exports_enabled))
2907 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2908 __buffer_unlock_commit(buffer, event);
2912 #ifdef CONFIG_STACKTRACE
2914 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2915 #define FTRACE_KSTACK_NESTING 4
2917 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2919 struct ftrace_stack {
2920 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2924 struct ftrace_stacks {
2925 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2928 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2929 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2931 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2932 unsigned int trace_ctx,
2933 int skip, struct pt_regs *regs)
2935 struct trace_event_call *call = &event_kernel_stack;
2936 struct ring_buffer_event *event;
2937 unsigned int size, nr_entries;
2938 struct ftrace_stack *fstack;
2939 struct stack_entry *entry;
2943 * Add one, for this function and the call to save_stack_trace()
2944 * If regs is set, then these functions will not be in the way.
2946 #ifndef CONFIG_UNWINDER_ORC
2951 preempt_disable_notrace();
2953 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2955 /* This should never happen. If it does, yell once and skip */
2956 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2960 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2961 * interrupt will either see the value pre increment or post
2962 * increment. If the interrupt happens pre increment it will have
2963 * restored the counter when it returns. We just need a barrier to
2964 * keep gcc from moving things around.
2968 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2969 size = ARRAY_SIZE(fstack->calls);
2972 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2975 nr_entries = stack_trace_save(fstack->calls, size, skip);
2978 size = nr_entries * sizeof(unsigned long);
2979 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2980 sizeof(*entry) + size, trace_ctx);
2983 entry = ring_buffer_event_data(event);
2985 memcpy(&entry->caller, fstack->calls, size);
2986 entry->size = nr_entries;
2988 if (!call_filter_check_discard(call, entry, buffer, event))
2989 __buffer_unlock_commit(buffer, event);
2992 /* Again, don't let gcc optimize things here */
2994 __this_cpu_dec(ftrace_stack_reserve);
2995 preempt_enable_notrace();
2999 static inline void ftrace_trace_stack(struct trace_array *tr,
3000 struct trace_buffer *buffer,
3001 unsigned int trace_ctx,
3002 int skip, struct pt_regs *regs)
3004 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3007 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3010 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3013 struct trace_buffer *buffer = tr->array_buffer.buffer;
3015 if (rcu_is_watching()) {
3016 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3021 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3022 * but if the above rcu_is_watching() failed, then the NMI
3023 * triggered someplace critical, and rcu_irq_enter() should
3024 * not be called from NMI.
3026 if (unlikely(in_nmi()))
3029 rcu_irq_enter_irqson();
3030 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3031 rcu_irq_exit_irqson();
3035 * trace_dump_stack - record a stack back trace in the trace buffer
3036 * @skip: Number of functions to skip (helper handlers)
3038 void trace_dump_stack(int skip)
3040 if (tracing_disabled || tracing_selftest_running)
3043 #ifndef CONFIG_UNWINDER_ORC
3044 /* Skip 1 to skip this function. */
3047 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3048 tracing_gen_ctx(), skip, NULL);
3050 EXPORT_SYMBOL_GPL(trace_dump_stack);
3052 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3053 static DEFINE_PER_CPU(int, user_stack_count);
3056 ftrace_trace_userstack(struct trace_array *tr,
3057 struct trace_buffer *buffer, unsigned int trace_ctx)
3059 struct trace_event_call *call = &event_user_stack;
3060 struct ring_buffer_event *event;
3061 struct userstack_entry *entry;
3063 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3067 * NMIs can not handle page faults, even with fix ups.
3068 * The save user stack can (and often does) fault.
3070 if (unlikely(in_nmi()))
3074 * prevent recursion, since the user stack tracing may
3075 * trigger other kernel events.
3078 if (__this_cpu_read(user_stack_count))
3081 __this_cpu_inc(user_stack_count);
3083 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3084 sizeof(*entry), trace_ctx);
3086 goto out_drop_count;
3087 entry = ring_buffer_event_data(event);
3089 entry->tgid = current->tgid;
3090 memset(&entry->caller, 0, sizeof(entry->caller));
3092 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3093 if (!call_filter_check_discard(call, entry, buffer, event))
3094 __buffer_unlock_commit(buffer, event);
3097 __this_cpu_dec(user_stack_count);
3101 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3102 static void ftrace_trace_userstack(struct trace_array *tr,
3103 struct trace_buffer *buffer,
3104 unsigned int trace_ctx)
3107 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3109 #endif /* CONFIG_STACKTRACE */
3111 /* created for use with alloc_percpu */
3112 struct trace_buffer_struct {
3114 char buffer[4][TRACE_BUF_SIZE];
3117 static struct trace_buffer_struct *trace_percpu_buffer;
3120 * This allows for lockless recording. If we're nested too deeply, then
3121 * this returns NULL.
3123 static char *get_trace_buf(void)
3125 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3127 if (!buffer || buffer->nesting >= 4)
3132 /* Interrupts must see nesting incremented before we use the buffer */
3134 return &buffer->buffer[buffer->nesting - 1][0];
3137 static void put_trace_buf(void)
3139 /* Don't let the decrement of nesting leak before this */
3141 this_cpu_dec(trace_percpu_buffer->nesting);
3144 static int alloc_percpu_trace_buffer(void)
3146 struct trace_buffer_struct *buffers;
3148 if (trace_percpu_buffer)
3151 buffers = alloc_percpu(struct trace_buffer_struct);
3152 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3155 trace_percpu_buffer = buffers;
3159 static int buffers_allocated;
3161 void trace_printk_init_buffers(void)
3163 if (buffers_allocated)
3166 if (alloc_percpu_trace_buffer())
3169 /* trace_printk() is for debug use only. Don't use it in production. */
3172 pr_warn("**********************************************************\n");
3173 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3175 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3177 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3178 pr_warn("** unsafe for production use. **\n");
3180 pr_warn("** If you see this message and you are not debugging **\n");
3181 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3183 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3184 pr_warn("**********************************************************\n");
3186 /* Expand the buffers to set size */
3187 tracing_update_buffers();
3189 buffers_allocated = 1;
3192 * trace_printk_init_buffers() can be called by modules.
3193 * If that happens, then we need to start cmdline recording
3194 * directly here. If the global_trace.buffer is already
3195 * allocated here, then this was called by module code.
3197 if (global_trace.array_buffer.buffer)
3198 tracing_start_cmdline_record();
3200 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3202 void trace_printk_start_comm(void)
3204 /* Start tracing comms if trace printk is set */
3205 if (!buffers_allocated)
3207 tracing_start_cmdline_record();
3210 static void trace_printk_start_stop_comm(int enabled)
3212 if (!buffers_allocated)
3216 tracing_start_cmdline_record();
3218 tracing_stop_cmdline_record();
3222 * trace_vbprintk - write binary msg to tracing buffer
3223 * @ip: The address of the caller
3224 * @fmt: The string format to write to the buffer
3225 * @args: Arguments for @fmt
3227 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3229 struct trace_event_call *call = &event_bprint;
3230 struct ring_buffer_event *event;
3231 struct trace_buffer *buffer;
3232 struct trace_array *tr = &global_trace;
3233 struct bprint_entry *entry;
3234 unsigned int trace_ctx;
3238 if (unlikely(tracing_selftest_running || tracing_disabled))
3241 /* Don't pollute graph traces with trace_vprintk internals */
3242 pause_graph_tracing();
3244 trace_ctx = tracing_gen_ctx();
3245 preempt_disable_notrace();
3247 tbuffer = get_trace_buf();
3253 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3255 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3258 size = sizeof(*entry) + sizeof(u32) * len;
3259 buffer = tr->array_buffer.buffer;
3260 ring_buffer_nest_start(buffer);
3261 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3265 entry = ring_buffer_event_data(event);
3269 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3270 if (!call_filter_check_discard(call, entry, buffer, event)) {
3271 __buffer_unlock_commit(buffer, event);
3272 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3276 ring_buffer_nest_end(buffer);
3281 preempt_enable_notrace();
3282 unpause_graph_tracing();
3286 EXPORT_SYMBOL_GPL(trace_vbprintk);
3290 __trace_array_vprintk(struct trace_buffer *buffer,
3291 unsigned long ip, const char *fmt, va_list args)
3293 struct trace_event_call *call = &event_print;
3294 struct ring_buffer_event *event;
3296 struct print_entry *entry;
3297 unsigned int trace_ctx;
3300 if (tracing_disabled || tracing_selftest_running)
3303 /* Don't pollute graph traces with trace_vprintk internals */
3304 pause_graph_tracing();
3306 trace_ctx = tracing_gen_ctx();
3307 preempt_disable_notrace();
3310 tbuffer = get_trace_buf();
3316 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3318 size = sizeof(*entry) + len + 1;
3319 ring_buffer_nest_start(buffer);
3320 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3324 entry = ring_buffer_event_data(event);
3327 memcpy(&entry->buf, tbuffer, len + 1);
3328 if (!call_filter_check_discard(call, entry, buffer, event)) {
3329 __buffer_unlock_commit(buffer, event);
3330 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3334 ring_buffer_nest_end(buffer);
3338 preempt_enable_notrace();
3339 unpause_graph_tracing();
3345 int trace_array_vprintk(struct trace_array *tr,
3346 unsigned long ip, const char *fmt, va_list args)
3348 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3352 * trace_array_printk - Print a message to a specific instance
3353 * @tr: The instance trace_array descriptor
3354 * @ip: The instruction pointer that this is called from.
3355 * @fmt: The format to print (printf format)
3357 * If a subsystem sets up its own instance, they have the right to
3358 * printk strings into their tracing instance buffer using this
3359 * function. Note, this function will not write into the top level
3360 * buffer (use trace_printk() for that), as writing into the top level
3361 * buffer should only have events that can be individually disabled.
3362 * trace_printk() is only used for debugging a kernel, and should not
3363 * be ever encorporated in normal use.
3365 * trace_array_printk() can be used, as it will not add noise to the
3366 * top level tracing buffer.
3368 * Note, trace_array_init_printk() must be called on @tr before this
3372 int trace_array_printk(struct trace_array *tr,
3373 unsigned long ip, const char *fmt, ...)
3381 /* This is only allowed for created instances */
3382 if (tr == &global_trace)
3385 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3389 ret = trace_array_vprintk(tr, ip, fmt, ap);
3393 EXPORT_SYMBOL_GPL(trace_array_printk);
3396 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3397 * @tr: The trace array to initialize the buffers for
3399 * As trace_array_printk() only writes into instances, they are OK to
3400 * have in the kernel (unlike trace_printk()). This needs to be called
3401 * before trace_array_printk() can be used on a trace_array.
3403 int trace_array_init_printk(struct trace_array *tr)
3408 /* This is only allowed for created instances */
3409 if (tr == &global_trace)
3412 return alloc_percpu_trace_buffer();
3414 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3417 int trace_array_printk_buf(struct trace_buffer *buffer,
3418 unsigned long ip, const char *fmt, ...)
3423 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3427 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3433 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3435 return trace_array_vprintk(&global_trace, ip, fmt, args);
3437 EXPORT_SYMBOL_GPL(trace_vprintk);
3439 static void trace_iterator_increment(struct trace_iterator *iter)
3441 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3445 ring_buffer_iter_advance(buf_iter);
3448 static struct trace_entry *
3449 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3450 unsigned long *lost_events)
3452 struct ring_buffer_event *event;
3453 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3456 event = ring_buffer_iter_peek(buf_iter, ts);
3458 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3459 (unsigned long)-1 : 0;
3461 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3466 iter->ent_size = ring_buffer_event_length(event);
3467 return ring_buffer_event_data(event);
3473 static struct trace_entry *
3474 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3475 unsigned long *missing_events, u64 *ent_ts)
3477 struct trace_buffer *buffer = iter->array_buffer->buffer;
3478 struct trace_entry *ent, *next = NULL;
3479 unsigned long lost_events = 0, next_lost = 0;
3480 int cpu_file = iter->cpu_file;
3481 u64 next_ts = 0, ts;
3487 * If we are in a per_cpu trace file, don't bother by iterating over
3488 * all cpu and peek directly.
3490 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3491 if (ring_buffer_empty_cpu(buffer, cpu_file))
3493 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3495 *ent_cpu = cpu_file;
3500 for_each_tracing_cpu(cpu) {
3502 if (ring_buffer_empty_cpu(buffer, cpu))
3505 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3508 * Pick the entry with the smallest timestamp:
3510 if (ent && (!next || ts < next_ts)) {
3514 next_lost = lost_events;
3515 next_size = iter->ent_size;
3519 iter->ent_size = next_size;
3522 *ent_cpu = next_cpu;
3528 *missing_events = next_lost;
3533 #define STATIC_TEMP_BUF_SIZE 128
3534 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3536 /* Find the next real entry, without updating the iterator itself */
3537 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3538 int *ent_cpu, u64 *ent_ts)
3540 /* __find_next_entry will reset ent_size */
3541 int ent_size = iter->ent_size;
3542 struct trace_entry *entry;
3545 * If called from ftrace_dump(), then the iter->temp buffer
3546 * will be the static_temp_buf and not created from kmalloc.
3547 * If the entry size is greater than the buffer, we can
3548 * not save it. Just return NULL in that case. This is only
3549 * used to add markers when two consecutive events' time
3550 * stamps have a large delta. See trace_print_lat_context()
3552 if (iter->temp == static_temp_buf &&
3553 STATIC_TEMP_BUF_SIZE < ent_size)
3557 * The __find_next_entry() may call peek_next_entry(), which may
3558 * call ring_buffer_peek() that may make the contents of iter->ent
3559 * undefined. Need to copy iter->ent now.
3561 if (iter->ent && iter->ent != iter->temp) {
3562 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3563 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3565 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3570 iter->temp_size = iter->ent_size;
3572 memcpy(iter->temp, iter->ent, iter->ent_size);
3573 iter->ent = iter->temp;
3575 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3576 /* Put back the original ent_size */
3577 iter->ent_size = ent_size;
3582 /* Find the next real entry, and increment the iterator to the next entry */
3583 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3585 iter->ent = __find_next_entry(iter, &iter->cpu,
3586 &iter->lost_events, &iter->ts);
3589 trace_iterator_increment(iter);
3591 return iter->ent ? iter : NULL;
3594 static void trace_consume(struct trace_iterator *iter)
3596 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3597 &iter->lost_events);
3600 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3602 struct trace_iterator *iter = m->private;
3606 WARN_ON_ONCE(iter->leftover);
3610 /* can't go backwards */
3615 ent = trace_find_next_entry_inc(iter);
3619 while (ent && iter->idx < i)
3620 ent = trace_find_next_entry_inc(iter);
3627 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3629 struct ring_buffer_iter *buf_iter;
3630 unsigned long entries = 0;
3633 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3635 buf_iter = trace_buffer_iter(iter, cpu);
3639 ring_buffer_iter_reset(buf_iter);
3642 * We could have the case with the max latency tracers
3643 * that a reset never took place on a cpu. This is evident
3644 * by the timestamp being before the start of the buffer.
3646 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3647 if (ts >= iter->array_buffer->time_start)
3650 ring_buffer_iter_advance(buf_iter);
3653 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3657 * The current tracer is copied to avoid a global locking
3660 static void *s_start(struct seq_file *m, loff_t *pos)
3662 struct trace_iterator *iter = m->private;
3663 struct trace_array *tr = iter->tr;
3664 int cpu_file = iter->cpu_file;
3670 * copy the tracer to avoid using a global lock all around.
3671 * iter->trace is a copy of current_trace, the pointer to the
3672 * name may be used instead of a strcmp(), as iter->trace->name
3673 * will point to the same string as current_trace->name.
3675 mutex_lock(&trace_types_lock);
3676 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3677 *iter->trace = *tr->current_trace;
3678 mutex_unlock(&trace_types_lock);
3680 #ifdef CONFIG_TRACER_MAX_TRACE
3681 if (iter->snapshot && iter->trace->use_max_tr)
3682 return ERR_PTR(-EBUSY);
3685 if (!iter->snapshot)
3686 atomic_inc(&trace_record_taskinfo_disabled);
3688 if (*pos != iter->pos) {
3693 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3694 for_each_tracing_cpu(cpu)
3695 tracing_iter_reset(iter, cpu);
3697 tracing_iter_reset(iter, cpu_file);
3700 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3705 * If we overflowed the seq_file before, then we want
3706 * to just reuse the trace_seq buffer again.
3712 p = s_next(m, p, &l);
3716 trace_event_read_lock();
3717 trace_access_lock(cpu_file);
3721 static void s_stop(struct seq_file *m, void *p)
3723 struct trace_iterator *iter = m->private;
3725 #ifdef CONFIG_TRACER_MAX_TRACE
3726 if (iter->snapshot && iter->trace->use_max_tr)
3730 if (!iter->snapshot)
3731 atomic_dec(&trace_record_taskinfo_disabled);
3733 trace_access_unlock(iter->cpu_file);
3734 trace_event_read_unlock();
3738 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3739 unsigned long *entries, int cpu)
3741 unsigned long count;
3743 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3745 * If this buffer has skipped entries, then we hold all
3746 * entries for the trace and we need to ignore the
3747 * ones before the time stamp.
3749 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3750 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3751 /* total is the same as the entries */
3755 ring_buffer_overrun_cpu(buf->buffer, cpu);
3760 get_total_entries(struct array_buffer *buf,
3761 unsigned long *total, unsigned long *entries)
3769 for_each_tracing_cpu(cpu) {
3770 get_total_entries_cpu(buf, &t, &e, cpu);
3776 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3778 unsigned long total, entries;
3783 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3788 unsigned long trace_total_entries(struct trace_array *tr)
3790 unsigned long total, entries;
3795 get_total_entries(&tr->array_buffer, &total, &entries);
3800 static void print_lat_help_header(struct seq_file *m)
3802 seq_puts(m, "# _------=> CPU# \n"
3803 "# / _-----=> irqs-off \n"
3804 "# | / _----=> need-resched \n"
3805 "# || / _---=> hardirq/softirq \n"
3806 "# ||| / _--=> preempt-depth \n"
3808 "# cmd pid ||||| time | caller \n"
3809 "# \\ / ||||| \\ | / \n");
3812 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3814 unsigned long total;
3815 unsigned long entries;
3817 get_total_entries(buf, &total, &entries);
3818 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3819 entries, total, num_online_cpus());
3823 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3826 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3828 print_event_info(buf, m);
3830 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3831 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3834 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3837 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3838 const char *space = " ";
3839 int prec = tgid ? 12 : 2;
3841 print_event_info(buf, m);
3843 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
3844 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3845 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3846 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3847 seq_printf(m, "# %.*s||| / delay\n", prec, space);
3848 seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3849 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
3853 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3855 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3856 struct array_buffer *buf = iter->array_buffer;
3857 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3858 struct tracer *type = iter->trace;
3859 unsigned long entries;
3860 unsigned long total;
3861 const char *name = "preemption";
3865 get_total_entries(buf, &total, &entries);
3867 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3869 seq_puts(m, "# -----------------------------------"
3870 "---------------------------------\n");
3871 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3872 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3873 nsecs_to_usecs(data->saved_latency),
3877 #if defined(CONFIG_PREEMPT_NONE)
3879 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3881 #elif defined(CONFIG_PREEMPT)
3883 #elif defined(CONFIG_PREEMPT_RT)
3888 /* These are reserved for later use */
3891 seq_printf(m, " #P:%d)\n", num_online_cpus());
3895 seq_puts(m, "# -----------------\n");
3896 seq_printf(m, "# | task: %.16s-%d "
3897 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3898 data->comm, data->pid,
3899 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3900 data->policy, data->rt_priority);
3901 seq_puts(m, "# -----------------\n");
3903 if (data->critical_start) {
3904 seq_puts(m, "# => started at: ");
3905 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3906 trace_print_seq(m, &iter->seq);
3907 seq_puts(m, "\n# => ended at: ");
3908 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3909 trace_print_seq(m, &iter->seq);
3910 seq_puts(m, "\n#\n");
3916 static void test_cpu_buff_start(struct trace_iterator *iter)
3918 struct trace_seq *s = &iter->seq;
3919 struct trace_array *tr = iter->tr;
3921 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3924 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3927 if (cpumask_available(iter->started) &&
3928 cpumask_test_cpu(iter->cpu, iter->started))
3931 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3934 if (cpumask_available(iter->started))
3935 cpumask_set_cpu(iter->cpu, iter->started);
3937 /* Don't print started cpu buffer for the first entry of the trace */
3939 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3943 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3945 struct trace_array *tr = iter->tr;
3946 struct trace_seq *s = &iter->seq;
3947 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3948 struct trace_entry *entry;
3949 struct trace_event *event;
3953 test_cpu_buff_start(iter);
3955 event = ftrace_find_event(entry->type);
3957 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3958 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3959 trace_print_lat_context(iter);
3961 trace_print_context(iter);
3964 if (trace_seq_has_overflowed(s))
3965 return TRACE_TYPE_PARTIAL_LINE;
3968 return event->funcs->trace(iter, sym_flags, event);
3970 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3972 return trace_handle_return(s);
3975 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3977 struct trace_array *tr = iter->tr;
3978 struct trace_seq *s = &iter->seq;
3979 struct trace_entry *entry;
3980 struct trace_event *event;
3984 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3985 trace_seq_printf(s, "%d %d %llu ",
3986 entry->pid, iter->cpu, iter->ts);
3988 if (trace_seq_has_overflowed(s))
3989 return TRACE_TYPE_PARTIAL_LINE;
3991 event = ftrace_find_event(entry->type);
3993 return event->funcs->raw(iter, 0, event);
3995 trace_seq_printf(s, "%d ?\n", entry->type);
3997 return trace_handle_return(s);
4000 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4002 struct trace_array *tr = iter->tr;
4003 struct trace_seq *s = &iter->seq;
4004 unsigned char newline = '\n';
4005 struct trace_entry *entry;
4006 struct trace_event *event;
4010 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4011 SEQ_PUT_HEX_FIELD(s, entry->pid);
4012 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4013 SEQ_PUT_HEX_FIELD(s, iter->ts);
4014 if (trace_seq_has_overflowed(s))
4015 return TRACE_TYPE_PARTIAL_LINE;
4018 event = ftrace_find_event(entry->type);
4020 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4021 if (ret != TRACE_TYPE_HANDLED)
4025 SEQ_PUT_FIELD(s, newline);
4027 return trace_handle_return(s);
4030 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4032 struct trace_array *tr = iter->tr;
4033 struct trace_seq *s = &iter->seq;
4034 struct trace_entry *entry;
4035 struct trace_event *event;
4039 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4040 SEQ_PUT_FIELD(s, entry->pid);
4041 SEQ_PUT_FIELD(s, iter->cpu);
4042 SEQ_PUT_FIELD(s, iter->ts);
4043 if (trace_seq_has_overflowed(s))
4044 return TRACE_TYPE_PARTIAL_LINE;
4047 event = ftrace_find_event(entry->type);
4048 return event ? event->funcs->binary(iter, 0, event) :
4052 int trace_empty(struct trace_iterator *iter)
4054 struct ring_buffer_iter *buf_iter;
4057 /* If we are looking at one CPU buffer, only check that one */
4058 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4059 cpu = iter->cpu_file;
4060 buf_iter = trace_buffer_iter(iter, cpu);
4062 if (!ring_buffer_iter_empty(buf_iter))
4065 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4071 for_each_tracing_cpu(cpu) {
4072 buf_iter = trace_buffer_iter(iter, cpu);
4074 if (!ring_buffer_iter_empty(buf_iter))
4077 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4085 /* Called with trace_event_read_lock() held. */
4086 enum print_line_t print_trace_line(struct trace_iterator *iter)
4088 struct trace_array *tr = iter->tr;
4089 unsigned long trace_flags = tr->trace_flags;
4090 enum print_line_t ret;
4092 if (iter->lost_events) {
4093 if (iter->lost_events == (unsigned long)-1)
4094 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4097 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4098 iter->cpu, iter->lost_events);
4099 if (trace_seq_has_overflowed(&iter->seq))
4100 return TRACE_TYPE_PARTIAL_LINE;
4103 if (iter->trace && iter->trace->print_line) {
4104 ret = iter->trace->print_line(iter);
4105 if (ret != TRACE_TYPE_UNHANDLED)
4109 if (iter->ent->type == TRACE_BPUTS &&
4110 trace_flags & TRACE_ITER_PRINTK &&
4111 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4112 return trace_print_bputs_msg_only(iter);
4114 if (iter->ent->type == TRACE_BPRINT &&
4115 trace_flags & TRACE_ITER_PRINTK &&
4116 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4117 return trace_print_bprintk_msg_only(iter);
4119 if (iter->ent->type == TRACE_PRINT &&
4120 trace_flags & TRACE_ITER_PRINTK &&
4121 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4122 return trace_print_printk_msg_only(iter);
4124 if (trace_flags & TRACE_ITER_BIN)
4125 return print_bin_fmt(iter);
4127 if (trace_flags & TRACE_ITER_HEX)
4128 return print_hex_fmt(iter);
4130 if (trace_flags & TRACE_ITER_RAW)
4131 return print_raw_fmt(iter);
4133 return print_trace_fmt(iter);
4136 void trace_latency_header(struct seq_file *m)
4138 struct trace_iterator *iter = m->private;
4139 struct trace_array *tr = iter->tr;
4141 /* print nothing if the buffers are empty */
4142 if (trace_empty(iter))
4145 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4146 print_trace_header(m, iter);
4148 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4149 print_lat_help_header(m);
4152 void trace_default_header(struct seq_file *m)
4154 struct trace_iterator *iter = m->private;
4155 struct trace_array *tr = iter->tr;
4156 unsigned long trace_flags = tr->trace_flags;
4158 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4161 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4162 /* print nothing if the buffers are empty */
4163 if (trace_empty(iter))
4165 print_trace_header(m, iter);
4166 if (!(trace_flags & TRACE_ITER_VERBOSE))
4167 print_lat_help_header(m);
4169 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4170 if (trace_flags & TRACE_ITER_IRQ_INFO)
4171 print_func_help_header_irq(iter->array_buffer,
4174 print_func_help_header(iter->array_buffer, m,
4180 static void test_ftrace_alive(struct seq_file *m)
4182 if (!ftrace_is_dead())
4184 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4185 "# MAY BE MISSING FUNCTION EVENTS\n");
4188 #ifdef CONFIG_TRACER_MAX_TRACE
4189 static void show_snapshot_main_help(struct seq_file *m)
4191 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4192 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4193 "# Takes a snapshot of the main buffer.\n"
4194 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4195 "# (Doesn't have to be '2' works with any number that\n"
4196 "# is not a '0' or '1')\n");
4199 static void show_snapshot_percpu_help(struct seq_file *m)
4201 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4202 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4203 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4204 "# Takes a snapshot of the main buffer for this cpu.\n");
4206 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4207 "# Must use main snapshot file to allocate.\n");
4209 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4210 "# (Doesn't have to be '2' works with any number that\n"
4211 "# is not a '0' or '1')\n");
4214 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4216 if (iter->tr->allocated_snapshot)
4217 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4219 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4221 seq_puts(m, "# Snapshot commands:\n");
4222 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4223 show_snapshot_main_help(m);
4225 show_snapshot_percpu_help(m);
4228 /* Should never be called */
4229 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4232 static int s_show(struct seq_file *m, void *v)
4234 struct trace_iterator *iter = v;
4237 if (iter->ent == NULL) {
4239 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4241 test_ftrace_alive(m);
4243 if (iter->snapshot && trace_empty(iter))
4244 print_snapshot_help(m, iter);
4245 else if (iter->trace && iter->trace->print_header)
4246 iter->trace->print_header(m);
4248 trace_default_header(m);
4250 } else if (iter->leftover) {
4252 * If we filled the seq_file buffer earlier, we
4253 * want to just show it now.
4255 ret = trace_print_seq(m, &iter->seq);
4257 /* ret should this time be zero, but you never know */
4258 iter->leftover = ret;
4261 print_trace_line(iter);
4262 ret = trace_print_seq(m, &iter->seq);
4264 * If we overflow the seq_file buffer, then it will
4265 * ask us for this data again at start up.
4267 * ret is 0 if seq_file write succeeded.
4270 iter->leftover = ret;
4277 * Should be used after trace_array_get(), trace_types_lock
4278 * ensures that i_cdev was already initialized.
4280 static inline int tracing_get_cpu(struct inode *inode)
4282 if (inode->i_cdev) /* See trace_create_cpu_file() */
4283 return (long)inode->i_cdev - 1;
4284 return RING_BUFFER_ALL_CPUS;
4287 static const struct seq_operations tracer_seq_ops = {
4294 static struct trace_iterator *
4295 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4297 struct trace_array *tr = inode->i_private;
4298 struct trace_iterator *iter;
4301 if (tracing_disabled)
4302 return ERR_PTR(-ENODEV);
4304 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4306 return ERR_PTR(-ENOMEM);
4308 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4310 if (!iter->buffer_iter)
4314 * trace_find_next_entry() may need to save off iter->ent.
4315 * It will place it into the iter->temp buffer. As most
4316 * events are less than 128, allocate a buffer of that size.
4317 * If one is greater, then trace_find_next_entry() will
4318 * allocate a new buffer to adjust for the bigger iter->ent.
4319 * It's not critical if it fails to get allocated here.
4321 iter->temp = kmalloc(128, GFP_KERNEL);
4323 iter->temp_size = 128;
4326 * We make a copy of the current tracer to avoid concurrent
4327 * changes on it while we are reading.
4329 mutex_lock(&trace_types_lock);
4330 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4334 *iter->trace = *tr->current_trace;
4336 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4341 #ifdef CONFIG_TRACER_MAX_TRACE
4342 /* Currently only the top directory has a snapshot */
4343 if (tr->current_trace->print_max || snapshot)
4344 iter->array_buffer = &tr->max_buffer;
4347 iter->array_buffer = &tr->array_buffer;
4348 iter->snapshot = snapshot;
4350 iter->cpu_file = tracing_get_cpu(inode);
4351 mutex_init(&iter->mutex);
4353 /* Notify the tracer early; before we stop tracing. */
4354 if (iter->trace->open)
4355 iter->trace->open(iter);
4357 /* Annotate start of buffers if we had overruns */
4358 if (ring_buffer_overruns(iter->array_buffer->buffer))
4359 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4361 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4362 if (trace_clocks[tr->clock_id].in_ns)
4363 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4366 * If pause-on-trace is enabled, then stop the trace while
4367 * dumping, unless this is the "snapshot" file
4369 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4370 tracing_stop_tr(tr);
4372 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4373 for_each_tracing_cpu(cpu) {
4374 iter->buffer_iter[cpu] =
4375 ring_buffer_read_prepare(iter->array_buffer->buffer,
4378 ring_buffer_read_prepare_sync();
4379 for_each_tracing_cpu(cpu) {
4380 ring_buffer_read_start(iter->buffer_iter[cpu]);
4381 tracing_iter_reset(iter, cpu);
4384 cpu = iter->cpu_file;
4385 iter->buffer_iter[cpu] =
4386 ring_buffer_read_prepare(iter->array_buffer->buffer,
4388 ring_buffer_read_prepare_sync();
4389 ring_buffer_read_start(iter->buffer_iter[cpu]);
4390 tracing_iter_reset(iter, cpu);
4393 mutex_unlock(&trace_types_lock);
4398 mutex_unlock(&trace_types_lock);
4401 kfree(iter->buffer_iter);
4403 seq_release_private(inode, file);
4404 return ERR_PTR(-ENOMEM);
4407 int tracing_open_generic(struct inode *inode, struct file *filp)
4411 ret = tracing_check_open_get_tr(NULL);
4415 filp->private_data = inode->i_private;
4419 bool tracing_is_disabled(void)
4421 return (tracing_disabled) ? true: false;
4425 * Open and update trace_array ref count.
4426 * Must have the current trace_array passed to it.
4428 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4430 struct trace_array *tr = inode->i_private;
4433 ret = tracing_check_open_get_tr(tr);
4437 filp->private_data = inode->i_private;
4442 static int tracing_release(struct inode *inode, struct file *file)
4444 struct trace_array *tr = inode->i_private;
4445 struct seq_file *m = file->private_data;
4446 struct trace_iterator *iter;
4449 if (!(file->f_mode & FMODE_READ)) {
4450 trace_array_put(tr);
4454 /* Writes do not use seq_file */
4456 mutex_lock(&trace_types_lock);
4458 for_each_tracing_cpu(cpu) {
4459 if (iter->buffer_iter[cpu])
4460 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4463 if (iter->trace && iter->trace->close)
4464 iter->trace->close(iter);
4466 if (!iter->snapshot && tr->stop_count)
4467 /* reenable tracing if it was previously enabled */
4468 tracing_start_tr(tr);
4470 __trace_array_put(tr);
4472 mutex_unlock(&trace_types_lock);
4474 mutex_destroy(&iter->mutex);
4475 free_cpumask_var(iter->started);
4478 kfree(iter->buffer_iter);
4479 seq_release_private(inode, file);
4484 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4486 struct trace_array *tr = inode->i_private;
4488 trace_array_put(tr);
4492 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4494 struct trace_array *tr = inode->i_private;
4496 trace_array_put(tr);
4498 return single_release(inode, file);
4501 static int tracing_open(struct inode *inode, struct file *file)
4503 struct trace_array *tr = inode->i_private;
4504 struct trace_iterator *iter;
4507 ret = tracing_check_open_get_tr(tr);
4511 /* If this file was open for write, then erase contents */
4512 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4513 int cpu = tracing_get_cpu(inode);
4514 struct array_buffer *trace_buf = &tr->array_buffer;
4516 #ifdef CONFIG_TRACER_MAX_TRACE
4517 if (tr->current_trace->print_max)
4518 trace_buf = &tr->max_buffer;
4521 if (cpu == RING_BUFFER_ALL_CPUS)
4522 tracing_reset_online_cpus(trace_buf);
4524 tracing_reset_cpu(trace_buf, cpu);
4527 if (file->f_mode & FMODE_READ) {
4528 iter = __tracing_open(inode, file, false);
4530 ret = PTR_ERR(iter);
4531 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4532 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4536 trace_array_put(tr);
4542 * Some tracers are not suitable for instance buffers.
4543 * A tracer is always available for the global array (toplevel)
4544 * or if it explicitly states that it is.
4547 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4549 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4552 /* Find the next tracer that this trace array may use */
4553 static struct tracer *
4554 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4556 while (t && !trace_ok_for_array(t, tr))
4563 t_next(struct seq_file *m, void *v, loff_t *pos)
4565 struct trace_array *tr = m->private;
4566 struct tracer *t = v;
4571 t = get_tracer_for_array(tr, t->next);
4576 static void *t_start(struct seq_file *m, loff_t *pos)
4578 struct trace_array *tr = m->private;
4582 mutex_lock(&trace_types_lock);
4584 t = get_tracer_for_array(tr, trace_types);
4585 for (; t && l < *pos; t = t_next(m, t, &l))
4591 static void t_stop(struct seq_file *m, void *p)
4593 mutex_unlock(&trace_types_lock);
4596 static int t_show(struct seq_file *m, void *v)
4598 struct tracer *t = v;
4603 seq_puts(m, t->name);
4612 static const struct seq_operations show_traces_seq_ops = {
4619 static int show_traces_open(struct inode *inode, struct file *file)
4621 struct trace_array *tr = inode->i_private;
4625 ret = tracing_check_open_get_tr(tr);
4629 ret = seq_open(file, &show_traces_seq_ops);
4631 trace_array_put(tr);
4635 m = file->private_data;
4641 static int show_traces_release(struct inode *inode, struct file *file)
4643 struct trace_array *tr = inode->i_private;
4645 trace_array_put(tr);
4646 return seq_release(inode, file);
4650 tracing_write_stub(struct file *filp, const char __user *ubuf,
4651 size_t count, loff_t *ppos)
4656 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4660 if (file->f_mode & FMODE_READ)
4661 ret = seq_lseek(file, offset, whence);
4663 file->f_pos = ret = 0;
4668 static const struct file_operations tracing_fops = {
4669 .open = tracing_open,
4671 .write = tracing_write_stub,
4672 .llseek = tracing_lseek,
4673 .release = tracing_release,
4676 static const struct file_operations show_traces_fops = {
4677 .open = show_traces_open,
4679 .llseek = seq_lseek,
4680 .release = show_traces_release,
4684 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4685 size_t count, loff_t *ppos)
4687 struct trace_array *tr = file_inode(filp)->i_private;
4691 len = snprintf(NULL, 0, "%*pb\n",
4692 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4693 mask_str = kmalloc(len, GFP_KERNEL);
4697 len = snprintf(mask_str, len, "%*pb\n",
4698 cpumask_pr_args(tr->tracing_cpumask));
4703 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4711 int tracing_set_cpumask(struct trace_array *tr,
4712 cpumask_var_t tracing_cpumask_new)
4719 local_irq_disable();
4720 arch_spin_lock(&tr->max_lock);
4721 for_each_tracing_cpu(cpu) {
4723 * Increase/decrease the disabled counter if we are
4724 * about to flip a bit in the cpumask:
4726 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4727 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4728 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4729 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4731 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4732 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4733 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4734 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4737 arch_spin_unlock(&tr->max_lock);
4740 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4746 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4747 size_t count, loff_t *ppos)
4749 struct trace_array *tr = file_inode(filp)->i_private;
4750 cpumask_var_t tracing_cpumask_new;
4753 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4756 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4760 err = tracing_set_cpumask(tr, tracing_cpumask_new);
4764 free_cpumask_var(tracing_cpumask_new);
4769 free_cpumask_var(tracing_cpumask_new);
4774 static const struct file_operations tracing_cpumask_fops = {
4775 .open = tracing_open_generic_tr,
4776 .read = tracing_cpumask_read,
4777 .write = tracing_cpumask_write,
4778 .release = tracing_release_generic_tr,
4779 .llseek = generic_file_llseek,
4782 static int tracing_trace_options_show(struct seq_file *m, void *v)
4784 struct tracer_opt *trace_opts;
4785 struct trace_array *tr = m->private;
4789 mutex_lock(&trace_types_lock);
4790 tracer_flags = tr->current_trace->flags->val;
4791 trace_opts = tr->current_trace->flags->opts;
4793 for (i = 0; trace_options[i]; i++) {
4794 if (tr->trace_flags & (1 << i))
4795 seq_printf(m, "%s\n", trace_options[i]);
4797 seq_printf(m, "no%s\n", trace_options[i]);
4800 for (i = 0; trace_opts[i].name; i++) {
4801 if (tracer_flags & trace_opts[i].bit)
4802 seq_printf(m, "%s\n", trace_opts[i].name);
4804 seq_printf(m, "no%s\n", trace_opts[i].name);
4806 mutex_unlock(&trace_types_lock);
4811 static int __set_tracer_option(struct trace_array *tr,
4812 struct tracer_flags *tracer_flags,
4813 struct tracer_opt *opts, int neg)
4815 struct tracer *trace = tracer_flags->trace;
4818 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4823 tracer_flags->val &= ~opts->bit;
4825 tracer_flags->val |= opts->bit;
4829 /* Try to assign a tracer specific option */
4830 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4832 struct tracer *trace = tr->current_trace;
4833 struct tracer_flags *tracer_flags = trace->flags;
4834 struct tracer_opt *opts = NULL;
4837 for (i = 0; tracer_flags->opts[i].name; i++) {
4838 opts = &tracer_flags->opts[i];
4840 if (strcmp(cmp, opts->name) == 0)
4841 return __set_tracer_option(tr, trace->flags, opts, neg);
4847 /* Some tracers require overwrite to stay enabled */
4848 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4850 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4856 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4858 if ((mask == TRACE_ITER_RECORD_TGID) ||
4859 (mask == TRACE_ITER_RECORD_CMD))
4860 lockdep_assert_held(&event_mutex);
4862 /* do nothing if flag is already set */
4863 if (!!(tr->trace_flags & mask) == !!enabled)
4866 /* Give the tracer a chance to approve the change */
4867 if (tr->current_trace->flag_changed)
4868 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4872 tr->trace_flags |= mask;
4874 tr->trace_flags &= ~mask;
4876 if (mask == TRACE_ITER_RECORD_CMD)
4877 trace_event_enable_cmd_record(enabled);
4879 if (mask == TRACE_ITER_RECORD_TGID) {
4881 tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4885 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4889 trace_event_enable_tgid_record(enabled);
4892 if (mask == TRACE_ITER_EVENT_FORK)
4893 trace_event_follow_fork(tr, enabled);
4895 if (mask == TRACE_ITER_FUNC_FORK)
4896 ftrace_pid_follow_fork(tr, enabled);
4898 if (mask == TRACE_ITER_OVERWRITE) {
4899 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4900 #ifdef CONFIG_TRACER_MAX_TRACE
4901 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4905 if (mask == TRACE_ITER_PRINTK) {
4906 trace_printk_start_stop_comm(enabled);
4907 trace_printk_control(enabled);
4913 int trace_set_options(struct trace_array *tr, char *option)
4918 size_t orig_len = strlen(option);
4921 cmp = strstrip(option);
4923 len = str_has_prefix(cmp, "no");
4929 mutex_lock(&event_mutex);
4930 mutex_lock(&trace_types_lock);
4932 ret = match_string(trace_options, -1, cmp);
4933 /* If no option could be set, test the specific tracer options */
4935 ret = set_tracer_option(tr, cmp, neg);
4937 ret = set_tracer_flag(tr, 1 << ret, !neg);
4939 mutex_unlock(&trace_types_lock);
4940 mutex_unlock(&event_mutex);
4943 * If the first trailing whitespace is replaced with '\0' by strstrip,
4944 * turn it back into a space.
4946 if (orig_len > strlen(option))
4947 option[strlen(option)] = ' ';
4952 static void __init apply_trace_boot_options(void)
4954 char *buf = trace_boot_options_buf;
4958 option = strsep(&buf, ",");
4964 trace_set_options(&global_trace, option);
4966 /* Put back the comma to allow this to be called again */
4973 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4974 size_t cnt, loff_t *ppos)
4976 struct seq_file *m = filp->private_data;
4977 struct trace_array *tr = m->private;
4981 if (cnt >= sizeof(buf))
4984 if (copy_from_user(buf, ubuf, cnt))
4989 ret = trace_set_options(tr, buf);
4998 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5000 struct trace_array *tr = inode->i_private;
5003 ret = tracing_check_open_get_tr(tr);
5007 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5009 trace_array_put(tr);
5014 static const struct file_operations tracing_iter_fops = {
5015 .open = tracing_trace_options_open,
5017 .llseek = seq_lseek,
5018 .release = tracing_single_release_tr,
5019 .write = tracing_trace_options_write,
5022 static const char readme_msg[] =
5023 "tracing mini-HOWTO:\n\n"
5024 "# echo 0 > tracing_on : quick way to disable tracing\n"
5025 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5026 " Important files:\n"
5027 " trace\t\t\t- The static contents of the buffer\n"
5028 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5029 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5030 " current_tracer\t- function and latency tracers\n"
5031 " available_tracers\t- list of configured tracers for current_tracer\n"
5032 " error_log\t- error log for failed commands (that support it)\n"
5033 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5034 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5035 " trace_clock\t\t-change the clock used to order events\n"
5036 " local: Per cpu clock but may not be synced across CPUs\n"
5037 " global: Synced across CPUs but slows tracing down.\n"
5038 " counter: Not a clock, but just an increment\n"
5039 " uptime: Jiffy counter from time of boot\n"
5040 " perf: Same clock that perf events use\n"
5041 #ifdef CONFIG_X86_64
5042 " x86-tsc: TSC cycle counter\n"
5044 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5045 " delta: Delta difference against a buffer-wide timestamp\n"
5046 " absolute: Absolute (standalone) timestamp\n"
5047 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5048 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5049 " tracing_cpumask\t- Limit which CPUs to trace\n"
5050 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5051 "\t\t\t Remove sub-buffer with rmdir\n"
5052 " trace_options\t\t- Set format or modify how tracing happens\n"
5053 "\t\t\t Disable an option by prefixing 'no' to the\n"
5054 "\t\t\t option name\n"
5055 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5056 #ifdef CONFIG_DYNAMIC_FTRACE
5057 "\n available_filter_functions - list of functions that can be filtered on\n"
5058 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5059 "\t\t\t functions\n"
5060 "\t accepts: func_full_name or glob-matching-pattern\n"
5061 "\t modules: Can select a group via module\n"
5062 "\t Format: :mod:<module-name>\n"
5063 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5064 "\t triggers: a command to perform when function is hit\n"
5065 "\t Format: <function>:<trigger>[:count]\n"
5066 "\t trigger: traceon, traceoff\n"
5067 "\t\t enable_event:<system>:<event>\n"
5068 "\t\t disable_event:<system>:<event>\n"
5069 #ifdef CONFIG_STACKTRACE
5072 #ifdef CONFIG_TRACER_SNAPSHOT
5077 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5078 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5079 "\t The first one will disable tracing every time do_fault is hit\n"
5080 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5081 "\t The first time do trap is hit and it disables tracing, the\n"
5082 "\t counter will decrement to 2. If tracing is already disabled,\n"
5083 "\t the counter will not decrement. It only decrements when the\n"
5084 "\t trigger did work\n"
5085 "\t To remove trigger without count:\n"
5086 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5087 "\t To remove trigger with a count:\n"
5088 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5089 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5090 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5091 "\t modules: Can select a group via module command :mod:\n"
5092 "\t Does not accept triggers\n"
5093 #endif /* CONFIG_DYNAMIC_FTRACE */
5094 #ifdef CONFIG_FUNCTION_TRACER
5095 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5097 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5100 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5101 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5102 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5103 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5105 #ifdef CONFIG_TRACER_SNAPSHOT
5106 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5107 "\t\t\t snapshot buffer. Read the contents for more\n"
5108 "\t\t\t information\n"
5110 #ifdef CONFIG_STACK_TRACER
5111 " stack_trace\t\t- Shows the max stack trace when active\n"
5112 " stack_max_size\t- Shows current max stack size that was traced\n"
5113 "\t\t\t Write into this file to reset the max size (trigger a\n"
5114 "\t\t\t new trace)\n"
5115 #ifdef CONFIG_DYNAMIC_FTRACE
5116 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5119 #endif /* CONFIG_STACK_TRACER */
5120 #ifdef CONFIG_DYNAMIC_EVENTS
5121 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5122 "\t\t\t Write into this file to define/undefine new trace events.\n"
5124 #ifdef CONFIG_KPROBE_EVENTS
5125 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5126 "\t\t\t Write into this file to define/undefine new trace events.\n"
5128 #ifdef CONFIG_UPROBE_EVENTS
5129 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5130 "\t\t\t Write into this file to define/undefine new trace events.\n"
5132 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5133 "\t accepts: event-definitions (one definition per line)\n"
5134 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5135 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5136 #ifdef CONFIG_HIST_TRIGGERS
5137 "\t s:[synthetic/]<event> <field> [<field>]\n"
5139 "\t -:[<group>/]<event>\n"
5140 #ifdef CONFIG_KPROBE_EVENTS
5141 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5142 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5144 #ifdef CONFIG_UPROBE_EVENTS
5145 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5147 "\t args: <name>=fetcharg[:type]\n"
5148 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5149 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5150 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5152 "\t $stack<index>, $stack, $retval, $comm,\n"
5154 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5155 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5156 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5157 "\t <type>\\[<array-size>\\]\n"
5158 #ifdef CONFIG_HIST_TRIGGERS
5159 "\t field: <stype> <name>;\n"
5160 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5161 "\t [unsigned] char/int/long\n"
5164 " events/\t\t- Directory containing all trace event subsystems:\n"
5165 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5166 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5167 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5169 " filter\t\t- If set, only events passing filter are traced\n"
5170 " events/<system>/<event>/\t- Directory containing control files for\n"
5172 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5173 " filter\t\t- If set, only events passing filter are traced\n"
5174 " trigger\t\t- If set, a command to perform when event is hit\n"
5175 "\t Format: <trigger>[:count][if <filter>]\n"
5176 "\t trigger: traceon, traceoff\n"
5177 "\t enable_event:<system>:<event>\n"
5178 "\t disable_event:<system>:<event>\n"
5179 #ifdef CONFIG_HIST_TRIGGERS
5180 "\t enable_hist:<system>:<event>\n"
5181 "\t disable_hist:<system>:<event>\n"
5183 #ifdef CONFIG_STACKTRACE
5186 #ifdef CONFIG_TRACER_SNAPSHOT
5189 #ifdef CONFIG_HIST_TRIGGERS
5190 "\t\t hist (see below)\n"
5192 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5193 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5194 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5195 "\t events/block/block_unplug/trigger\n"
5196 "\t The first disables tracing every time block_unplug is hit.\n"
5197 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5198 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5199 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5200 "\t Like function triggers, the counter is only decremented if it\n"
5201 "\t enabled or disabled tracing.\n"
5202 "\t To remove a trigger without a count:\n"
5203 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5204 "\t To remove a trigger with a count:\n"
5205 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5206 "\t Filters can be ignored when removing a trigger.\n"
5207 #ifdef CONFIG_HIST_TRIGGERS
5208 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5209 "\t Format: hist:keys=<field1[,field2,...]>\n"
5210 "\t [:values=<field1[,field2,...]>]\n"
5211 "\t [:sort=<field1[,field2,...]>]\n"
5212 "\t [:size=#entries]\n"
5213 "\t [:pause][:continue][:clear]\n"
5214 "\t [:name=histname1]\n"
5215 "\t [:<handler>.<action>]\n"
5216 "\t [if <filter>]\n\n"
5217 "\t When a matching event is hit, an entry is added to a hash\n"
5218 "\t table using the key(s) and value(s) named, and the value of a\n"
5219 "\t sum called 'hitcount' is incremented. Keys and values\n"
5220 "\t correspond to fields in the event's format description. Keys\n"
5221 "\t can be any field, or the special string 'stacktrace'.\n"
5222 "\t Compound keys consisting of up to two fields can be specified\n"
5223 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5224 "\t fields. Sort keys consisting of up to two fields can be\n"
5225 "\t specified using the 'sort' keyword. The sort direction can\n"
5226 "\t be modified by appending '.descending' or '.ascending' to a\n"
5227 "\t sort field. The 'size' parameter can be used to specify more\n"
5228 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5229 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5230 "\t its histogram data will be shared with other triggers of the\n"
5231 "\t same name, and trigger hits will update this common data.\n\n"
5232 "\t Reading the 'hist' file for the event will dump the hash\n"
5233 "\t table in its entirety to stdout. If there are multiple hist\n"
5234 "\t triggers attached to an event, there will be a table for each\n"
5235 "\t trigger in the output. The table displayed for a named\n"
5236 "\t trigger will be the same as any other instance having the\n"
5237 "\t same name. The default format used to display a given field\n"
5238 "\t can be modified by appending any of the following modifiers\n"
5239 "\t to the field name, as applicable:\n\n"
5240 "\t .hex display a number as a hex value\n"
5241 "\t .sym display an address as a symbol\n"
5242 "\t .sym-offset display an address as a symbol and offset\n"
5243 "\t .execname display a common_pid as a program name\n"
5244 "\t .syscall display a syscall id as a syscall name\n"
5245 "\t .log2 display log2 value rather than raw number\n"
5246 "\t .usecs display a common_timestamp in microseconds\n\n"
5247 "\t The 'pause' parameter can be used to pause an existing hist\n"
5248 "\t trigger or to start a hist trigger but not log any events\n"
5249 "\t until told to do so. 'continue' can be used to start or\n"
5250 "\t restart a paused hist trigger.\n\n"
5251 "\t The 'clear' parameter will clear the contents of a running\n"
5252 "\t hist trigger and leave its current paused/active state\n"
5254 "\t The enable_hist and disable_hist triggers can be used to\n"
5255 "\t have one event conditionally start and stop another event's\n"
5256 "\t already-attached hist trigger. The syntax is analogous to\n"
5257 "\t the enable_event and disable_event triggers.\n\n"
5258 "\t Hist trigger handlers and actions are executed whenever a\n"
5259 "\t a histogram entry is added or updated. They take the form:\n\n"
5260 "\t <handler>.<action>\n\n"
5261 "\t The available handlers are:\n\n"
5262 "\t onmatch(matching.event) - invoke on addition or update\n"
5263 "\t onmax(var) - invoke if var exceeds current max\n"
5264 "\t onchange(var) - invoke action if var changes\n\n"
5265 "\t The available actions are:\n\n"
5266 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5267 "\t save(field,...) - save current event fields\n"
5268 #ifdef CONFIG_TRACER_SNAPSHOT
5269 "\t snapshot() - snapshot the trace buffer\n\n"
5271 #ifdef CONFIG_SYNTH_EVENTS
5272 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5273 "\t Write into this file to define/undefine new synthetic events.\n"
5274 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5280 tracing_readme_read(struct file *filp, char __user *ubuf,
5281 size_t cnt, loff_t *ppos)
5283 return simple_read_from_buffer(ubuf, cnt, ppos,
5284 readme_msg, strlen(readme_msg));
5287 static const struct file_operations tracing_readme_fops = {
5288 .open = tracing_open_generic,
5289 .read = tracing_readme_read,
5290 .llseek = generic_file_llseek,
5293 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5297 if (*pos || m->count)
5302 for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5303 if (trace_find_tgid(*ptr))
5310 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5320 v = saved_tgids_next(m, v, &l);
5328 static void saved_tgids_stop(struct seq_file *m, void *v)
5332 static int saved_tgids_show(struct seq_file *m, void *v)
5334 int pid = (int *)v - tgid_map;
5336 seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5340 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5341 .start = saved_tgids_start,
5342 .stop = saved_tgids_stop,
5343 .next = saved_tgids_next,
5344 .show = saved_tgids_show,
5347 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5351 ret = tracing_check_open_get_tr(NULL);
5355 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5359 static const struct file_operations tracing_saved_tgids_fops = {
5360 .open = tracing_saved_tgids_open,
5362 .llseek = seq_lseek,
5363 .release = seq_release,
5366 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5368 unsigned int *ptr = v;
5370 if (*pos || m->count)
5375 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5377 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5386 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5392 arch_spin_lock(&trace_cmdline_lock);
5394 v = &savedcmd->map_cmdline_to_pid[0];
5396 v = saved_cmdlines_next(m, v, &l);
5404 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5406 arch_spin_unlock(&trace_cmdline_lock);
5410 static int saved_cmdlines_show(struct seq_file *m, void *v)
5412 char buf[TASK_COMM_LEN];
5413 unsigned int *pid = v;
5415 __trace_find_cmdline(*pid, buf);
5416 seq_printf(m, "%d %s\n", *pid, buf);
5420 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5421 .start = saved_cmdlines_start,
5422 .next = saved_cmdlines_next,
5423 .stop = saved_cmdlines_stop,
5424 .show = saved_cmdlines_show,
5427 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5431 ret = tracing_check_open_get_tr(NULL);
5435 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5438 static const struct file_operations tracing_saved_cmdlines_fops = {
5439 .open = tracing_saved_cmdlines_open,
5441 .llseek = seq_lseek,
5442 .release = seq_release,
5446 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5447 size_t cnt, loff_t *ppos)
5452 arch_spin_lock(&trace_cmdline_lock);
5453 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5454 arch_spin_unlock(&trace_cmdline_lock);
5456 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5459 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5461 kfree(s->saved_cmdlines);
5462 kfree(s->map_cmdline_to_pid);
5466 static int tracing_resize_saved_cmdlines(unsigned int val)
5468 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5470 s = kmalloc(sizeof(*s), GFP_KERNEL);
5474 if (allocate_cmdlines_buffer(val, s) < 0) {
5479 arch_spin_lock(&trace_cmdline_lock);
5480 savedcmd_temp = savedcmd;
5482 arch_spin_unlock(&trace_cmdline_lock);
5483 free_saved_cmdlines_buffer(savedcmd_temp);
5489 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5490 size_t cnt, loff_t *ppos)
5495 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5499 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5500 if (!val || val > PID_MAX_DEFAULT)
5503 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5512 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5513 .open = tracing_open_generic,
5514 .read = tracing_saved_cmdlines_size_read,
5515 .write = tracing_saved_cmdlines_size_write,
5518 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5519 static union trace_eval_map_item *
5520 update_eval_map(union trace_eval_map_item *ptr)
5522 if (!ptr->map.eval_string) {
5523 if (ptr->tail.next) {
5524 ptr = ptr->tail.next;
5525 /* Set ptr to the next real item (skip head) */
5533 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5535 union trace_eval_map_item *ptr = v;
5538 * Paranoid! If ptr points to end, we don't want to increment past it.
5539 * This really should never happen.
5542 ptr = update_eval_map(ptr);
5543 if (WARN_ON_ONCE(!ptr))
5547 ptr = update_eval_map(ptr);
5552 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5554 union trace_eval_map_item *v;
5557 mutex_lock(&trace_eval_mutex);
5559 v = trace_eval_maps;
5563 while (v && l < *pos) {
5564 v = eval_map_next(m, v, &l);
5570 static void eval_map_stop(struct seq_file *m, void *v)
5572 mutex_unlock(&trace_eval_mutex);
5575 static int eval_map_show(struct seq_file *m, void *v)
5577 union trace_eval_map_item *ptr = v;
5579 seq_printf(m, "%s %ld (%s)\n",
5580 ptr->map.eval_string, ptr->map.eval_value,
5586 static const struct seq_operations tracing_eval_map_seq_ops = {
5587 .start = eval_map_start,
5588 .next = eval_map_next,
5589 .stop = eval_map_stop,
5590 .show = eval_map_show,
5593 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5597 ret = tracing_check_open_get_tr(NULL);
5601 return seq_open(filp, &tracing_eval_map_seq_ops);
5604 static const struct file_operations tracing_eval_map_fops = {
5605 .open = tracing_eval_map_open,
5607 .llseek = seq_lseek,
5608 .release = seq_release,
5611 static inline union trace_eval_map_item *
5612 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5614 /* Return tail of array given the head */
5615 return ptr + ptr->head.length + 1;
5619 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5622 struct trace_eval_map **stop;
5623 struct trace_eval_map **map;
5624 union trace_eval_map_item *map_array;
5625 union trace_eval_map_item *ptr;
5630 * The trace_eval_maps contains the map plus a head and tail item,
5631 * where the head holds the module and length of array, and the
5632 * tail holds a pointer to the next list.
5634 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5636 pr_warn("Unable to allocate trace eval mapping\n");
5640 mutex_lock(&trace_eval_mutex);
5642 if (!trace_eval_maps)
5643 trace_eval_maps = map_array;
5645 ptr = trace_eval_maps;
5647 ptr = trace_eval_jmp_to_tail(ptr);
5648 if (!ptr->tail.next)
5650 ptr = ptr->tail.next;
5653 ptr->tail.next = map_array;
5655 map_array->head.mod = mod;
5656 map_array->head.length = len;
5659 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5660 map_array->map = **map;
5663 memset(map_array, 0, sizeof(*map_array));
5665 mutex_unlock(&trace_eval_mutex);
5668 static void trace_create_eval_file(struct dentry *d_tracer)
5670 trace_create_file("eval_map", 0444, d_tracer,
5671 NULL, &tracing_eval_map_fops);
5674 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5675 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5676 static inline void trace_insert_eval_map_file(struct module *mod,
5677 struct trace_eval_map **start, int len) { }
5678 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5680 static void trace_insert_eval_map(struct module *mod,
5681 struct trace_eval_map **start, int len)
5683 struct trace_eval_map **map;
5690 trace_event_eval_update(map, len);
5692 trace_insert_eval_map_file(mod, start, len);
5696 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5697 size_t cnt, loff_t *ppos)
5699 struct trace_array *tr = filp->private_data;
5700 char buf[MAX_TRACER_SIZE+2];
5703 mutex_lock(&trace_types_lock);
5704 r = sprintf(buf, "%s\n", tr->current_trace->name);
5705 mutex_unlock(&trace_types_lock);
5707 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5710 int tracer_init(struct tracer *t, struct trace_array *tr)
5712 tracing_reset_online_cpus(&tr->array_buffer);
5716 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5720 for_each_tracing_cpu(cpu)
5721 per_cpu_ptr(buf->data, cpu)->entries = val;
5724 #ifdef CONFIG_TRACER_MAX_TRACE
5725 /* resize @tr's buffer to the size of @size_tr's entries */
5726 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5727 struct array_buffer *size_buf, int cpu_id)
5731 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5732 for_each_tracing_cpu(cpu) {
5733 ret = ring_buffer_resize(trace_buf->buffer,
5734 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5737 per_cpu_ptr(trace_buf->data, cpu)->entries =
5738 per_cpu_ptr(size_buf->data, cpu)->entries;
5741 ret = ring_buffer_resize(trace_buf->buffer,
5742 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5744 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5745 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5750 #endif /* CONFIG_TRACER_MAX_TRACE */
5752 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5753 unsigned long size, int cpu)
5758 * If kernel or user changes the size of the ring buffer
5759 * we use the size that was given, and we can forget about
5760 * expanding it later.
5762 ring_buffer_expanded = true;
5764 /* May be called before buffers are initialized */
5765 if (!tr->array_buffer.buffer)
5768 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5772 #ifdef CONFIG_TRACER_MAX_TRACE
5773 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5774 !tr->current_trace->use_max_tr)
5777 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5779 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5780 &tr->array_buffer, cpu);
5783 * AARGH! We are left with different
5784 * size max buffer!!!!
5785 * The max buffer is our "snapshot" buffer.
5786 * When a tracer needs a snapshot (one of the
5787 * latency tracers), it swaps the max buffer
5788 * with the saved snap shot. We succeeded to
5789 * update the size of the main buffer, but failed to
5790 * update the size of the max buffer. But when we tried
5791 * to reset the main buffer to the original size, we
5792 * failed there too. This is very unlikely to
5793 * happen, but if it does, warn and kill all
5797 tracing_disabled = 1;
5802 if (cpu == RING_BUFFER_ALL_CPUS)
5803 set_buffer_entries(&tr->max_buffer, size);
5805 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5808 #endif /* CONFIG_TRACER_MAX_TRACE */
5810 if (cpu == RING_BUFFER_ALL_CPUS)
5811 set_buffer_entries(&tr->array_buffer, size);
5813 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5818 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5819 unsigned long size, int cpu_id)
5823 mutex_lock(&trace_types_lock);
5825 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5826 /* make sure, this cpu is enabled in the mask */
5827 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5833 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5838 mutex_unlock(&trace_types_lock);
5845 * tracing_update_buffers - used by tracing facility to expand ring buffers
5847 * To save on memory when the tracing is never used on a system with it
5848 * configured in. The ring buffers are set to a minimum size. But once
5849 * a user starts to use the tracing facility, then they need to grow
5850 * to their default size.
5852 * This function is to be called when a tracer is about to be used.
5854 int tracing_update_buffers(void)
5858 mutex_lock(&trace_types_lock);
5859 if (!ring_buffer_expanded)
5860 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5861 RING_BUFFER_ALL_CPUS);
5862 mutex_unlock(&trace_types_lock);
5867 struct trace_option_dentry;
5870 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5873 * Used to clear out the tracer before deletion of an instance.
5874 * Must have trace_types_lock held.
5876 static void tracing_set_nop(struct trace_array *tr)
5878 if (tr->current_trace == &nop_trace)
5881 tr->current_trace->enabled--;
5883 if (tr->current_trace->reset)
5884 tr->current_trace->reset(tr);
5886 tr->current_trace = &nop_trace;
5889 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5891 /* Only enable if the directory has been created already. */
5895 create_trace_option_files(tr, t);
5898 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5901 #ifdef CONFIG_TRACER_MAX_TRACE
5906 mutex_lock(&trace_types_lock);
5908 if (!ring_buffer_expanded) {
5909 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5910 RING_BUFFER_ALL_CPUS);
5916 for (t = trace_types; t; t = t->next) {
5917 if (strcmp(t->name, buf) == 0)
5924 if (t == tr->current_trace)
5927 #ifdef CONFIG_TRACER_SNAPSHOT
5928 if (t->use_max_tr) {
5929 arch_spin_lock(&tr->max_lock);
5930 if (tr->cond_snapshot)
5932 arch_spin_unlock(&tr->max_lock);
5937 /* Some tracers won't work on kernel command line */
5938 if (system_state < SYSTEM_RUNNING && t->noboot) {
5939 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5944 /* Some tracers are only allowed for the top level buffer */
5945 if (!trace_ok_for_array(t, tr)) {
5950 /* If trace pipe files are being read, we can't change the tracer */
5951 if (tr->trace_ref) {
5956 trace_branch_disable();
5958 tr->current_trace->enabled--;
5960 if (tr->current_trace->reset)
5961 tr->current_trace->reset(tr);
5963 /* Current trace needs to be nop_trace before synchronize_rcu */
5964 tr->current_trace = &nop_trace;
5966 #ifdef CONFIG_TRACER_MAX_TRACE
5967 had_max_tr = tr->allocated_snapshot;
5969 if (had_max_tr && !t->use_max_tr) {
5971 * We need to make sure that the update_max_tr sees that
5972 * current_trace changed to nop_trace to keep it from
5973 * swapping the buffers after we resize it.
5974 * The update_max_tr is called from interrupts disabled
5975 * so a synchronized_sched() is sufficient.
5982 #ifdef CONFIG_TRACER_MAX_TRACE
5983 if (t->use_max_tr && !had_max_tr) {
5984 ret = tracing_alloc_snapshot_instance(tr);
5991 ret = tracer_init(t, tr);
5996 tr->current_trace = t;
5997 tr->current_trace->enabled++;
5998 trace_branch_enable(tr);
6000 mutex_unlock(&trace_types_lock);
6006 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6007 size_t cnt, loff_t *ppos)
6009 struct trace_array *tr = filp->private_data;
6010 char buf[MAX_TRACER_SIZE+1];
6017 if (cnt > MAX_TRACER_SIZE)
6018 cnt = MAX_TRACER_SIZE;
6020 if (copy_from_user(buf, ubuf, cnt))
6025 /* strip ending whitespace. */
6026 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6029 err = tracing_set_tracer(tr, buf);
6039 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6040 size_t cnt, loff_t *ppos)
6045 r = snprintf(buf, sizeof(buf), "%ld\n",
6046 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6047 if (r > sizeof(buf))
6049 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6053 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6054 size_t cnt, loff_t *ppos)
6059 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6069 tracing_thresh_read(struct file *filp, char __user *ubuf,
6070 size_t cnt, loff_t *ppos)
6072 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6076 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6077 size_t cnt, loff_t *ppos)
6079 struct trace_array *tr = filp->private_data;
6082 mutex_lock(&trace_types_lock);
6083 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6087 if (tr->current_trace->update_thresh) {
6088 ret = tr->current_trace->update_thresh(tr);
6095 mutex_unlock(&trace_types_lock);
6100 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6103 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6104 size_t cnt, loff_t *ppos)
6106 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6110 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6111 size_t cnt, loff_t *ppos)
6113 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6118 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6120 struct trace_array *tr = inode->i_private;
6121 struct trace_iterator *iter;
6124 ret = tracing_check_open_get_tr(tr);
6128 mutex_lock(&trace_types_lock);
6130 /* create a buffer to store the information to pass to userspace */
6131 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6134 __trace_array_put(tr);
6138 trace_seq_init(&iter->seq);
6139 iter->trace = tr->current_trace;
6141 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6146 /* trace pipe does not show start of buffer */
6147 cpumask_setall(iter->started);
6149 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6150 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6152 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6153 if (trace_clocks[tr->clock_id].in_ns)
6154 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6157 iter->array_buffer = &tr->array_buffer;
6158 iter->cpu_file = tracing_get_cpu(inode);
6159 mutex_init(&iter->mutex);
6160 filp->private_data = iter;
6162 if (iter->trace->pipe_open)
6163 iter->trace->pipe_open(iter);
6165 nonseekable_open(inode, filp);
6169 mutex_unlock(&trace_types_lock);
6174 __trace_array_put(tr);
6175 mutex_unlock(&trace_types_lock);
6179 static int tracing_release_pipe(struct inode *inode, struct file *file)
6181 struct trace_iterator *iter = file->private_data;
6182 struct trace_array *tr = inode->i_private;
6184 mutex_lock(&trace_types_lock);
6188 if (iter->trace->pipe_close)
6189 iter->trace->pipe_close(iter);
6191 mutex_unlock(&trace_types_lock);
6193 free_cpumask_var(iter->started);
6194 mutex_destroy(&iter->mutex);
6197 trace_array_put(tr);
6203 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6205 struct trace_array *tr = iter->tr;
6207 /* Iterators are static, they should be filled or empty */
6208 if (trace_buffer_iter(iter, iter->cpu_file))
6209 return EPOLLIN | EPOLLRDNORM;
6211 if (tr->trace_flags & TRACE_ITER_BLOCK)
6213 * Always select as readable when in blocking mode
6215 return EPOLLIN | EPOLLRDNORM;
6217 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6222 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6224 struct trace_iterator *iter = filp->private_data;
6226 return trace_poll(iter, filp, poll_table);
6229 /* Must be called with iter->mutex held. */
6230 static int tracing_wait_pipe(struct file *filp)
6232 struct trace_iterator *iter = filp->private_data;
6235 while (trace_empty(iter)) {
6237 if ((filp->f_flags & O_NONBLOCK)) {
6242 * We block until we read something and tracing is disabled.
6243 * We still block if tracing is disabled, but we have never
6244 * read anything. This allows a user to cat this file, and
6245 * then enable tracing. But after we have read something,
6246 * we give an EOF when tracing is again disabled.
6248 * iter->pos will be 0 if we haven't read anything.
6250 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6253 mutex_unlock(&iter->mutex);
6255 ret = wait_on_pipe(iter, 0);
6257 mutex_lock(&iter->mutex);
6270 tracing_read_pipe(struct file *filp, char __user *ubuf,
6271 size_t cnt, loff_t *ppos)
6273 struct trace_iterator *iter = filp->private_data;
6277 * Avoid more than one consumer on a single file descriptor
6278 * This is just a matter of traces coherency, the ring buffer itself
6281 mutex_lock(&iter->mutex);
6283 /* return any leftover data */
6284 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6288 trace_seq_init(&iter->seq);
6290 if (iter->trace->read) {
6291 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6297 sret = tracing_wait_pipe(filp);
6301 /* stop when tracing is finished */
6302 if (trace_empty(iter)) {
6307 if (cnt >= PAGE_SIZE)
6308 cnt = PAGE_SIZE - 1;
6310 /* reset all but tr, trace, and overruns */
6311 memset(&iter->seq, 0,
6312 sizeof(struct trace_iterator) -
6313 offsetof(struct trace_iterator, seq));
6314 cpumask_clear(iter->started);
6315 trace_seq_init(&iter->seq);
6318 trace_event_read_lock();
6319 trace_access_lock(iter->cpu_file);
6320 while (trace_find_next_entry_inc(iter) != NULL) {
6321 enum print_line_t ret;
6322 int save_len = iter->seq.seq.len;
6324 ret = print_trace_line(iter);
6325 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6326 /* don't print partial lines */
6327 iter->seq.seq.len = save_len;
6330 if (ret != TRACE_TYPE_NO_CONSUME)
6331 trace_consume(iter);
6333 if (trace_seq_used(&iter->seq) >= cnt)
6337 * Setting the full flag means we reached the trace_seq buffer
6338 * size and we should leave by partial output condition above.
6339 * One of the trace_seq_* functions is not used properly.
6341 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6344 trace_access_unlock(iter->cpu_file);
6345 trace_event_read_unlock();
6347 /* Now copy what we have to the user */
6348 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6349 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6350 trace_seq_init(&iter->seq);
6353 * If there was nothing to send to user, in spite of consuming trace
6354 * entries, go back to wait for more entries.
6360 mutex_unlock(&iter->mutex);
6365 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6368 __free_page(spd->pages[idx]);
6372 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6378 /* Seq buffer is page-sized, exactly what we need. */
6380 save_len = iter->seq.seq.len;
6381 ret = print_trace_line(iter);
6383 if (trace_seq_has_overflowed(&iter->seq)) {
6384 iter->seq.seq.len = save_len;
6389 * This should not be hit, because it should only
6390 * be set if the iter->seq overflowed. But check it
6391 * anyway to be safe.
6393 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6394 iter->seq.seq.len = save_len;
6398 count = trace_seq_used(&iter->seq) - save_len;
6401 iter->seq.seq.len = save_len;
6405 if (ret != TRACE_TYPE_NO_CONSUME)
6406 trace_consume(iter);
6408 if (!trace_find_next_entry_inc(iter)) {
6418 static ssize_t tracing_splice_read_pipe(struct file *filp,
6420 struct pipe_inode_info *pipe,
6424 struct page *pages_def[PIPE_DEF_BUFFERS];
6425 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6426 struct trace_iterator *iter = filp->private_data;
6427 struct splice_pipe_desc spd = {
6429 .partial = partial_def,
6430 .nr_pages = 0, /* This gets updated below. */
6431 .nr_pages_max = PIPE_DEF_BUFFERS,
6432 .ops = &default_pipe_buf_ops,
6433 .spd_release = tracing_spd_release_pipe,
6439 if (splice_grow_spd(pipe, &spd))
6442 mutex_lock(&iter->mutex);
6444 if (iter->trace->splice_read) {
6445 ret = iter->trace->splice_read(iter, filp,
6446 ppos, pipe, len, flags);
6451 ret = tracing_wait_pipe(filp);
6455 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6460 trace_event_read_lock();
6461 trace_access_lock(iter->cpu_file);
6463 /* Fill as many pages as possible. */
6464 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6465 spd.pages[i] = alloc_page(GFP_KERNEL);
6469 rem = tracing_fill_pipe_page(rem, iter);
6471 /* Copy the data into the page, so we can start over. */
6472 ret = trace_seq_to_buffer(&iter->seq,
6473 page_address(spd.pages[i]),
6474 trace_seq_used(&iter->seq));
6476 __free_page(spd.pages[i]);
6479 spd.partial[i].offset = 0;
6480 spd.partial[i].len = trace_seq_used(&iter->seq);
6482 trace_seq_init(&iter->seq);
6485 trace_access_unlock(iter->cpu_file);
6486 trace_event_read_unlock();
6487 mutex_unlock(&iter->mutex);
6492 ret = splice_to_pipe(pipe, &spd);
6496 splice_shrink_spd(&spd);
6500 mutex_unlock(&iter->mutex);
6505 tracing_entries_read(struct file *filp, char __user *ubuf,
6506 size_t cnt, loff_t *ppos)
6508 struct inode *inode = file_inode(filp);
6509 struct trace_array *tr = inode->i_private;
6510 int cpu = tracing_get_cpu(inode);
6515 mutex_lock(&trace_types_lock);
6517 if (cpu == RING_BUFFER_ALL_CPUS) {
6518 int cpu, buf_size_same;
6523 /* check if all cpu sizes are same */
6524 for_each_tracing_cpu(cpu) {
6525 /* fill in the size from first enabled cpu */
6527 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6528 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6534 if (buf_size_same) {
6535 if (!ring_buffer_expanded)
6536 r = sprintf(buf, "%lu (expanded: %lu)\n",
6538 trace_buf_size >> 10);
6540 r = sprintf(buf, "%lu\n", size >> 10);
6542 r = sprintf(buf, "X\n");
6544 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6546 mutex_unlock(&trace_types_lock);
6548 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6553 tracing_entries_write(struct file *filp, const char __user *ubuf,
6554 size_t cnt, loff_t *ppos)
6556 struct inode *inode = file_inode(filp);
6557 struct trace_array *tr = inode->i_private;
6561 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6565 /* must have at least 1 entry */
6569 /* value is in KB */
6571 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6581 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6582 size_t cnt, loff_t *ppos)
6584 struct trace_array *tr = filp->private_data;
6587 unsigned long size = 0, expanded_size = 0;
6589 mutex_lock(&trace_types_lock);
6590 for_each_tracing_cpu(cpu) {
6591 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6592 if (!ring_buffer_expanded)
6593 expanded_size += trace_buf_size >> 10;
6595 if (ring_buffer_expanded)
6596 r = sprintf(buf, "%lu\n", size);
6598 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6599 mutex_unlock(&trace_types_lock);
6601 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6605 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6606 size_t cnt, loff_t *ppos)
6609 * There is no need to read what the user has written, this function
6610 * is just to make sure that there is no error when "echo" is used
6619 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6621 struct trace_array *tr = inode->i_private;
6623 /* disable tracing ? */
6624 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6625 tracer_tracing_off(tr);
6626 /* resize the ring buffer to 0 */
6627 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6629 trace_array_put(tr);
6635 tracing_mark_write(struct file *filp, const char __user *ubuf,
6636 size_t cnt, loff_t *fpos)
6638 struct trace_array *tr = filp->private_data;
6639 struct ring_buffer_event *event;
6640 enum event_trigger_type tt = ETT_NONE;
6641 struct trace_buffer *buffer;
6642 struct print_entry *entry;
6647 /* Used in tracing_mark_raw_write() as well */
6648 #define FAULTED_STR "<faulted>"
6649 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6651 if (tracing_disabled)
6654 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6657 if (cnt > TRACE_BUF_SIZE)
6658 cnt = TRACE_BUF_SIZE;
6660 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6662 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6664 /* If less than "<faulted>", then make sure we can still add that */
6665 if (cnt < FAULTED_SIZE)
6666 size += FAULTED_SIZE - cnt;
6668 buffer = tr->array_buffer.buffer;
6669 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6671 if (unlikely(!event))
6672 /* Ring buffer disabled, return as if not open for write */
6675 entry = ring_buffer_event_data(event);
6676 entry->ip = _THIS_IP_;
6678 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6680 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6686 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6687 /* do not add \n before testing triggers, but add \0 */
6688 entry->buf[cnt] = '\0';
6689 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6692 if (entry->buf[cnt - 1] != '\n') {
6693 entry->buf[cnt] = '\n';
6694 entry->buf[cnt + 1] = '\0';
6696 entry->buf[cnt] = '\0';
6698 if (static_branch_unlikely(&trace_marker_exports_enabled))
6699 ftrace_exports(event, TRACE_EXPORT_MARKER);
6700 __buffer_unlock_commit(buffer, event);
6703 event_triggers_post_call(tr->trace_marker_file, tt);
6711 /* Limit it for now to 3K (including tag) */
6712 #define RAW_DATA_MAX_SIZE (1024*3)
6715 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6716 size_t cnt, loff_t *fpos)
6718 struct trace_array *tr = filp->private_data;
6719 struct ring_buffer_event *event;
6720 struct trace_buffer *buffer;
6721 struct raw_data_entry *entry;
6726 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6728 if (tracing_disabled)
6731 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6734 /* The marker must at least have a tag id */
6735 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6738 if (cnt > TRACE_BUF_SIZE)
6739 cnt = TRACE_BUF_SIZE;
6741 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6743 size = sizeof(*entry) + cnt;
6744 if (cnt < FAULT_SIZE_ID)
6745 size += FAULT_SIZE_ID - cnt;
6747 buffer = tr->array_buffer.buffer;
6748 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6751 /* Ring buffer disabled, return as if not open for write */
6754 entry = ring_buffer_event_data(event);
6756 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6759 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6764 __buffer_unlock_commit(buffer, event);
6772 static int tracing_clock_show(struct seq_file *m, void *v)
6774 struct trace_array *tr = m->private;
6777 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6779 "%s%s%s%s", i ? " " : "",
6780 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6781 i == tr->clock_id ? "]" : "");
6787 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6791 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6792 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6795 if (i == ARRAY_SIZE(trace_clocks))
6798 mutex_lock(&trace_types_lock);
6802 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6805 * New clock may not be consistent with the previous clock.
6806 * Reset the buffer so that it doesn't have incomparable timestamps.
6808 tracing_reset_online_cpus(&tr->array_buffer);
6810 #ifdef CONFIG_TRACER_MAX_TRACE
6811 if (tr->max_buffer.buffer)
6812 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6813 tracing_reset_online_cpus(&tr->max_buffer);
6816 mutex_unlock(&trace_types_lock);
6821 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6822 size_t cnt, loff_t *fpos)
6824 struct seq_file *m = filp->private_data;
6825 struct trace_array *tr = m->private;
6827 const char *clockstr;
6830 if (cnt >= sizeof(buf))
6833 if (copy_from_user(buf, ubuf, cnt))
6838 clockstr = strstrip(buf);
6840 ret = tracing_set_clock(tr, clockstr);
6849 static int tracing_clock_open(struct inode *inode, struct file *file)
6851 struct trace_array *tr = inode->i_private;
6854 ret = tracing_check_open_get_tr(tr);
6858 ret = single_open(file, tracing_clock_show, inode->i_private);
6860 trace_array_put(tr);
6865 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6867 struct trace_array *tr = m->private;
6869 mutex_lock(&trace_types_lock);
6871 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6872 seq_puts(m, "delta [absolute]\n");
6874 seq_puts(m, "[delta] absolute\n");
6876 mutex_unlock(&trace_types_lock);
6881 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6883 struct trace_array *tr = inode->i_private;
6886 ret = tracing_check_open_get_tr(tr);
6890 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6892 trace_array_put(tr);
6897 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6901 mutex_lock(&trace_types_lock);
6903 if (abs && tr->time_stamp_abs_ref++)
6907 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6912 if (--tr->time_stamp_abs_ref)
6916 ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6918 #ifdef CONFIG_TRACER_MAX_TRACE
6919 if (tr->max_buffer.buffer)
6920 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6923 mutex_unlock(&trace_types_lock);
6928 struct ftrace_buffer_info {
6929 struct trace_iterator iter;
6931 unsigned int spare_cpu;
6935 #ifdef CONFIG_TRACER_SNAPSHOT
6936 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6938 struct trace_array *tr = inode->i_private;
6939 struct trace_iterator *iter;
6943 ret = tracing_check_open_get_tr(tr);
6947 if (file->f_mode & FMODE_READ) {
6948 iter = __tracing_open(inode, file, true);
6950 ret = PTR_ERR(iter);
6952 /* Writes still need the seq_file to hold the private data */
6954 m = kzalloc(sizeof(*m), GFP_KERNEL);
6957 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6965 iter->array_buffer = &tr->max_buffer;
6966 iter->cpu_file = tracing_get_cpu(inode);
6968 file->private_data = m;
6972 trace_array_put(tr);
6978 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6981 struct seq_file *m = filp->private_data;
6982 struct trace_iterator *iter = m->private;
6983 struct trace_array *tr = iter->tr;
6987 ret = tracing_update_buffers();
6991 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6995 mutex_lock(&trace_types_lock);
6997 if (tr->current_trace->use_max_tr) {
7002 arch_spin_lock(&tr->max_lock);
7003 if (tr->cond_snapshot)
7005 arch_spin_unlock(&tr->max_lock);
7011 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7015 if (tr->allocated_snapshot)
7019 /* Only allow per-cpu swap if the ring buffer supports it */
7020 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7021 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7026 if (tr->allocated_snapshot)
7027 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7028 &tr->array_buffer, iter->cpu_file);
7030 ret = tracing_alloc_snapshot_instance(tr);
7033 local_irq_disable();
7034 /* Now, we're going to swap */
7035 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7036 update_max_tr(tr, current, smp_processor_id(), NULL);
7038 update_max_tr_single(tr, current, iter->cpu_file);
7042 if (tr->allocated_snapshot) {
7043 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7044 tracing_reset_online_cpus(&tr->max_buffer);
7046 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7056 mutex_unlock(&trace_types_lock);
7060 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7062 struct seq_file *m = file->private_data;
7065 ret = tracing_release(inode, file);
7067 if (file->f_mode & FMODE_READ)
7070 /* If write only, the seq_file is just a stub */
7078 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7079 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7080 size_t count, loff_t *ppos);
7081 static int tracing_buffers_release(struct inode *inode, struct file *file);
7082 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7083 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7085 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7087 struct ftrace_buffer_info *info;
7090 /* The following checks for tracefs lockdown */
7091 ret = tracing_buffers_open(inode, filp);
7095 info = filp->private_data;
7097 if (info->iter.trace->use_max_tr) {
7098 tracing_buffers_release(inode, filp);
7102 info->iter.snapshot = true;
7103 info->iter.array_buffer = &info->iter.tr->max_buffer;
7108 #endif /* CONFIG_TRACER_SNAPSHOT */
7111 static const struct file_operations tracing_thresh_fops = {
7112 .open = tracing_open_generic,
7113 .read = tracing_thresh_read,
7114 .write = tracing_thresh_write,
7115 .llseek = generic_file_llseek,
7118 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7119 static const struct file_operations tracing_max_lat_fops = {
7120 .open = tracing_open_generic,
7121 .read = tracing_max_lat_read,
7122 .write = tracing_max_lat_write,
7123 .llseek = generic_file_llseek,
7127 static const struct file_operations set_tracer_fops = {
7128 .open = tracing_open_generic,
7129 .read = tracing_set_trace_read,
7130 .write = tracing_set_trace_write,
7131 .llseek = generic_file_llseek,
7134 static const struct file_operations tracing_pipe_fops = {
7135 .open = tracing_open_pipe,
7136 .poll = tracing_poll_pipe,
7137 .read = tracing_read_pipe,
7138 .splice_read = tracing_splice_read_pipe,
7139 .release = tracing_release_pipe,
7140 .llseek = no_llseek,
7143 static const struct file_operations tracing_entries_fops = {
7144 .open = tracing_open_generic_tr,
7145 .read = tracing_entries_read,
7146 .write = tracing_entries_write,
7147 .llseek = generic_file_llseek,
7148 .release = tracing_release_generic_tr,
7151 static const struct file_operations tracing_total_entries_fops = {
7152 .open = tracing_open_generic_tr,
7153 .read = tracing_total_entries_read,
7154 .llseek = generic_file_llseek,
7155 .release = tracing_release_generic_tr,
7158 static const struct file_operations tracing_free_buffer_fops = {
7159 .open = tracing_open_generic_tr,
7160 .write = tracing_free_buffer_write,
7161 .release = tracing_free_buffer_release,
7164 static const struct file_operations tracing_mark_fops = {
7165 .open = tracing_open_generic_tr,
7166 .write = tracing_mark_write,
7167 .llseek = generic_file_llseek,
7168 .release = tracing_release_generic_tr,
7171 static const struct file_operations tracing_mark_raw_fops = {
7172 .open = tracing_open_generic_tr,
7173 .write = tracing_mark_raw_write,
7174 .llseek = generic_file_llseek,
7175 .release = tracing_release_generic_tr,
7178 static const struct file_operations trace_clock_fops = {
7179 .open = tracing_clock_open,
7181 .llseek = seq_lseek,
7182 .release = tracing_single_release_tr,
7183 .write = tracing_clock_write,
7186 static const struct file_operations trace_time_stamp_mode_fops = {
7187 .open = tracing_time_stamp_mode_open,
7189 .llseek = seq_lseek,
7190 .release = tracing_single_release_tr,
7193 #ifdef CONFIG_TRACER_SNAPSHOT
7194 static const struct file_operations snapshot_fops = {
7195 .open = tracing_snapshot_open,
7197 .write = tracing_snapshot_write,
7198 .llseek = tracing_lseek,
7199 .release = tracing_snapshot_release,
7202 static const struct file_operations snapshot_raw_fops = {
7203 .open = snapshot_raw_open,
7204 .read = tracing_buffers_read,
7205 .release = tracing_buffers_release,
7206 .splice_read = tracing_buffers_splice_read,
7207 .llseek = no_llseek,
7210 #endif /* CONFIG_TRACER_SNAPSHOT */
7212 #define TRACING_LOG_ERRS_MAX 8
7213 #define TRACING_LOG_LOC_MAX 128
7215 #define CMD_PREFIX " Command: "
7218 const char **errs; /* ptr to loc-specific array of err strings */
7219 u8 type; /* index into errs -> specific err string */
7220 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7224 struct tracing_log_err {
7225 struct list_head list;
7226 struct err_info info;
7227 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7228 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7231 static DEFINE_MUTEX(tracing_err_log_lock);
7233 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7235 struct tracing_log_err *err;
7237 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7238 err = kzalloc(sizeof(*err), GFP_KERNEL);
7240 err = ERR_PTR(-ENOMEM);
7241 tr->n_err_log_entries++;
7246 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7247 list_del(&err->list);
7253 * err_pos - find the position of a string within a command for error careting
7254 * @cmd: The tracing command that caused the error
7255 * @str: The string to position the caret at within @cmd
7257 * Finds the position of the first occurence of @str within @cmd. The
7258 * return value can be passed to tracing_log_err() for caret placement
7261 * Returns the index within @cmd of the first occurence of @str or 0
7262 * if @str was not found.
7264 unsigned int err_pos(char *cmd, const char *str)
7268 if (WARN_ON(!strlen(cmd)))
7271 found = strstr(cmd, str);
7279 * tracing_log_err - write an error to the tracing error log
7280 * @tr: The associated trace array for the error (NULL for top level array)
7281 * @loc: A string describing where the error occurred
7282 * @cmd: The tracing command that caused the error
7283 * @errs: The array of loc-specific static error strings
7284 * @type: The index into errs[], which produces the specific static err string
7285 * @pos: The position the caret should be placed in the cmd
7287 * Writes an error into tracing/error_log of the form:
7289 * <loc>: error: <text>
7293 * tracing/error_log is a small log file containing the last
7294 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7295 * unless there has been a tracing error, and the error log can be
7296 * cleared and have its memory freed by writing the empty string in
7297 * truncation mode to it i.e. echo > tracing/error_log.
7299 * NOTE: the @errs array along with the @type param are used to
7300 * produce a static error string - this string is not copied and saved
7301 * when the error is logged - only a pointer to it is saved. See
7302 * existing callers for examples of how static strings are typically
7303 * defined for use with tracing_log_err().
7305 void tracing_log_err(struct trace_array *tr,
7306 const char *loc, const char *cmd,
7307 const char **errs, u8 type, u8 pos)
7309 struct tracing_log_err *err;
7314 mutex_lock(&tracing_err_log_lock);
7315 err = get_tracing_log_err(tr);
7316 if (PTR_ERR(err) == -ENOMEM) {
7317 mutex_unlock(&tracing_err_log_lock);
7321 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7322 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7324 err->info.errs = errs;
7325 err->info.type = type;
7326 err->info.pos = pos;
7327 err->info.ts = local_clock();
7329 list_add_tail(&err->list, &tr->err_log);
7330 mutex_unlock(&tracing_err_log_lock);
7333 static void clear_tracing_err_log(struct trace_array *tr)
7335 struct tracing_log_err *err, *next;
7337 mutex_lock(&tracing_err_log_lock);
7338 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7339 list_del(&err->list);
7343 tr->n_err_log_entries = 0;
7344 mutex_unlock(&tracing_err_log_lock);
7347 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7349 struct trace_array *tr = m->private;
7351 mutex_lock(&tracing_err_log_lock);
7353 return seq_list_start(&tr->err_log, *pos);
7356 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7358 struct trace_array *tr = m->private;
7360 return seq_list_next(v, &tr->err_log, pos);
7363 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7365 mutex_unlock(&tracing_err_log_lock);
7368 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7372 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7374 for (i = 0; i < pos; i++)
7379 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7381 struct tracing_log_err *err = v;
7384 const char *err_text = err->info.errs[err->info.type];
7385 u64 sec = err->info.ts;
7388 nsec = do_div(sec, NSEC_PER_SEC);
7389 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7390 err->loc, err_text);
7391 seq_printf(m, "%s", err->cmd);
7392 tracing_err_log_show_pos(m, err->info.pos);
7398 static const struct seq_operations tracing_err_log_seq_ops = {
7399 .start = tracing_err_log_seq_start,
7400 .next = tracing_err_log_seq_next,
7401 .stop = tracing_err_log_seq_stop,
7402 .show = tracing_err_log_seq_show
7405 static int tracing_err_log_open(struct inode *inode, struct file *file)
7407 struct trace_array *tr = inode->i_private;
7410 ret = tracing_check_open_get_tr(tr);
7414 /* If this file was opened for write, then erase contents */
7415 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7416 clear_tracing_err_log(tr);
7418 if (file->f_mode & FMODE_READ) {
7419 ret = seq_open(file, &tracing_err_log_seq_ops);
7421 struct seq_file *m = file->private_data;
7424 trace_array_put(tr);
7430 static ssize_t tracing_err_log_write(struct file *file,
7431 const char __user *buffer,
7432 size_t count, loff_t *ppos)
7437 static int tracing_err_log_release(struct inode *inode, struct file *file)
7439 struct trace_array *tr = inode->i_private;
7441 trace_array_put(tr);
7443 if (file->f_mode & FMODE_READ)
7444 seq_release(inode, file);
7449 static const struct file_operations tracing_err_log_fops = {
7450 .open = tracing_err_log_open,
7451 .write = tracing_err_log_write,
7453 .llseek = seq_lseek,
7454 .release = tracing_err_log_release,
7457 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7459 struct trace_array *tr = inode->i_private;
7460 struct ftrace_buffer_info *info;
7463 ret = tracing_check_open_get_tr(tr);
7467 info = kvzalloc(sizeof(*info), GFP_KERNEL);
7469 trace_array_put(tr);
7473 mutex_lock(&trace_types_lock);
7476 info->iter.cpu_file = tracing_get_cpu(inode);
7477 info->iter.trace = tr->current_trace;
7478 info->iter.array_buffer = &tr->array_buffer;
7480 /* Force reading ring buffer for first read */
7481 info->read = (unsigned int)-1;
7483 filp->private_data = info;
7487 mutex_unlock(&trace_types_lock);
7489 ret = nonseekable_open(inode, filp);
7491 trace_array_put(tr);
7497 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7499 struct ftrace_buffer_info *info = filp->private_data;
7500 struct trace_iterator *iter = &info->iter;
7502 return trace_poll(iter, filp, poll_table);
7506 tracing_buffers_read(struct file *filp, char __user *ubuf,
7507 size_t count, loff_t *ppos)
7509 struct ftrace_buffer_info *info = filp->private_data;
7510 struct trace_iterator *iter = &info->iter;
7517 #ifdef CONFIG_TRACER_MAX_TRACE
7518 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7523 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7525 if (IS_ERR(info->spare)) {
7526 ret = PTR_ERR(info->spare);
7529 info->spare_cpu = iter->cpu_file;
7535 /* Do we have previous read data to read? */
7536 if (info->read < PAGE_SIZE)
7540 trace_access_lock(iter->cpu_file);
7541 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7545 trace_access_unlock(iter->cpu_file);
7548 if (trace_empty(iter)) {
7549 if ((filp->f_flags & O_NONBLOCK))
7552 ret = wait_on_pipe(iter, 0);
7563 size = PAGE_SIZE - info->read;
7567 ret = copy_to_user(ubuf, info->spare + info->read, size);
7579 static int tracing_buffers_release(struct inode *inode, struct file *file)
7581 struct ftrace_buffer_info *info = file->private_data;
7582 struct trace_iterator *iter = &info->iter;
7584 mutex_lock(&trace_types_lock);
7586 iter->tr->trace_ref--;
7588 __trace_array_put(iter->tr);
7591 ring_buffer_free_read_page(iter->array_buffer->buffer,
7592 info->spare_cpu, info->spare);
7595 mutex_unlock(&trace_types_lock);
7601 struct trace_buffer *buffer;
7604 refcount_t refcount;
7607 static void buffer_ref_release(struct buffer_ref *ref)
7609 if (!refcount_dec_and_test(&ref->refcount))
7611 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7615 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7616 struct pipe_buffer *buf)
7618 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7620 buffer_ref_release(ref);
7624 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7625 struct pipe_buffer *buf)
7627 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7629 if (refcount_read(&ref->refcount) > INT_MAX/2)
7632 refcount_inc(&ref->refcount);
7636 /* Pipe buffer operations for a buffer. */
7637 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7638 .release = buffer_pipe_buf_release,
7639 .get = buffer_pipe_buf_get,
7643 * Callback from splice_to_pipe(), if we need to release some pages
7644 * at the end of the spd in case we error'ed out in filling the pipe.
7646 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7648 struct buffer_ref *ref =
7649 (struct buffer_ref *)spd->partial[i].private;
7651 buffer_ref_release(ref);
7652 spd->partial[i].private = 0;
7656 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7657 struct pipe_inode_info *pipe, size_t len,
7660 struct ftrace_buffer_info *info = file->private_data;
7661 struct trace_iterator *iter = &info->iter;
7662 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7663 struct page *pages_def[PIPE_DEF_BUFFERS];
7664 struct splice_pipe_desc spd = {
7666 .partial = partial_def,
7667 .nr_pages_max = PIPE_DEF_BUFFERS,
7668 .ops = &buffer_pipe_buf_ops,
7669 .spd_release = buffer_spd_release,
7671 struct buffer_ref *ref;
7675 #ifdef CONFIG_TRACER_MAX_TRACE
7676 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7680 if (*ppos & (PAGE_SIZE - 1))
7683 if (len & (PAGE_SIZE - 1)) {
7684 if (len < PAGE_SIZE)
7689 if (splice_grow_spd(pipe, &spd))
7693 trace_access_lock(iter->cpu_file);
7694 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7696 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7700 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7706 refcount_set(&ref->refcount, 1);
7707 ref->buffer = iter->array_buffer->buffer;
7708 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7709 if (IS_ERR(ref->page)) {
7710 ret = PTR_ERR(ref->page);
7715 ref->cpu = iter->cpu_file;
7717 r = ring_buffer_read_page(ref->buffer, &ref->page,
7718 len, iter->cpu_file, 1);
7720 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7726 page = virt_to_page(ref->page);
7728 spd.pages[i] = page;
7729 spd.partial[i].len = PAGE_SIZE;
7730 spd.partial[i].offset = 0;
7731 spd.partial[i].private = (unsigned long)ref;
7735 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7738 trace_access_unlock(iter->cpu_file);
7741 /* did we read anything? */
7742 if (!spd.nr_pages) {
7747 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7750 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7757 ret = splice_to_pipe(pipe, &spd);
7759 splice_shrink_spd(&spd);
7764 static const struct file_operations tracing_buffers_fops = {
7765 .open = tracing_buffers_open,
7766 .read = tracing_buffers_read,
7767 .poll = tracing_buffers_poll,
7768 .release = tracing_buffers_release,
7769 .splice_read = tracing_buffers_splice_read,
7770 .llseek = no_llseek,
7774 tracing_stats_read(struct file *filp, char __user *ubuf,
7775 size_t count, loff_t *ppos)
7777 struct inode *inode = file_inode(filp);
7778 struct trace_array *tr = inode->i_private;
7779 struct array_buffer *trace_buf = &tr->array_buffer;
7780 int cpu = tracing_get_cpu(inode);
7781 struct trace_seq *s;
7783 unsigned long long t;
7784 unsigned long usec_rem;
7786 s = kmalloc(sizeof(*s), GFP_KERNEL);
7792 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7793 trace_seq_printf(s, "entries: %ld\n", cnt);
7795 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7796 trace_seq_printf(s, "overrun: %ld\n", cnt);
7798 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7799 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7801 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7802 trace_seq_printf(s, "bytes: %ld\n", cnt);
7804 if (trace_clocks[tr->clock_id].in_ns) {
7805 /* local or global for trace_clock */
7806 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7807 usec_rem = do_div(t, USEC_PER_SEC);
7808 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7811 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7812 usec_rem = do_div(t, USEC_PER_SEC);
7813 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7815 /* counter or tsc mode for trace_clock */
7816 trace_seq_printf(s, "oldest event ts: %llu\n",
7817 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7819 trace_seq_printf(s, "now ts: %llu\n",
7820 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7823 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7824 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7826 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7827 trace_seq_printf(s, "read events: %ld\n", cnt);
7829 count = simple_read_from_buffer(ubuf, count, ppos,
7830 s->buffer, trace_seq_used(s));
7837 static const struct file_operations tracing_stats_fops = {
7838 .open = tracing_open_generic_tr,
7839 .read = tracing_stats_read,
7840 .llseek = generic_file_llseek,
7841 .release = tracing_release_generic_tr,
7844 #ifdef CONFIG_DYNAMIC_FTRACE
7847 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7848 size_t cnt, loff_t *ppos)
7854 /* 256 should be plenty to hold the amount needed */
7855 buf = kmalloc(256, GFP_KERNEL);
7859 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7860 ftrace_update_tot_cnt,
7861 ftrace_number_of_pages,
7862 ftrace_number_of_groups);
7864 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7869 static const struct file_operations tracing_dyn_info_fops = {
7870 .open = tracing_open_generic,
7871 .read = tracing_read_dyn_info,
7872 .llseek = generic_file_llseek,
7874 #endif /* CONFIG_DYNAMIC_FTRACE */
7876 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7878 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7879 struct trace_array *tr, struct ftrace_probe_ops *ops,
7882 tracing_snapshot_instance(tr);
7886 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7887 struct trace_array *tr, struct ftrace_probe_ops *ops,
7890 struct ftrace_func_mapper *mapper = data;
7894 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7904 tracing_snapshot_instance(tr);
7908 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7909 struct ftrace_probe_ops *ops, void *data)
7911 struct ftrace_func_mapper *mapper = data;
7914 seq_printf(m, "%ps:", (void *)ip);
7916 seq_puts(m, "snapshot");
7919 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7922 seq_printf(m, ":count=%ld\n", *count);
7924 seq_puts(m, ":unlimited\n");
7930 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7931 unsigned long ip, void *init_data, void **data)
7933 struct ftrace_func_mapper *mapper = *data;
7936 mapper = allocate_ftrace_func_mapper();
7942 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7946 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7947 unsigned long ip, void *data)
7949 struct ftrace_func_mapper *mapper = data;
7954 free_ftrace_func_mapper(mapper, NULL);
7958 ftrace_func_mapper_remove_ip(mapper, ip);
7961 static struct ftrace_probe_ops snapshot_probe_ops = {
7962 .func = ftrace_snapshot,
7963 .print = ftrace_snapshot_print,
7966 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7967 .func = ftrace_count_snapshot,
7968 .print = ftrace_snapshot_print,
7969 .init = ftrace_snapshot_init,
7970 .free = ftrace_snapshot_free,
7974 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7975 char *glob, char *cmd, char *param, int enable)
7977 struct ftrace_probe_ops *ops;
7978 void *count = (void *)-1;
7985 /* hash funcs only work with set_ftrace_filter */
7989 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
7992 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7997 number = strsep(¶m, ":");
7999 if (!strlen(number))
8003 * We use the callback data field (which is a pointer)
8006 ret = kstrtoul(number, 0, (unsigned long *)&count);
8011 ret = tracing_alloc_snapshot_instance(tr);
8015 ret = register_ftrace_function_probe(glob, tr, ops, count);
8018 return ret < 0 ? ret : 0;
8021 static struct ftrace_func_command ftrace_snapshot_cmd = {
8023 .func = ftrace_trace_snapshot_callback,
8026 static __init int register_snapshot_cmd(void)
8028 return register_ftrace_command(&ftrace_snapshot_cmd);
8031 static inline __init int register_snapshot_cmd(void) { return 0; }
8032 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8034 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8036 if (WARN_ON(!tr->dir))
8037 return ERR_PTR(-ENODEV);
8039 /* Top directory uses NULL as the parent */
8040 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8043 /* All sub buffers have a descriptor */
8047 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8049 struct dentry *d_tracer;
8052 return tr->percpu_dir;
8054 d_tracer = tracing_get_dentry(tr);
8055 if (IS_ERR(d_tracer))
8058 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8060 MEM_FAIL(!tr->percpu_dir,
8061 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8063 return tr->percpu_dir;
8066 static struct dentry *
8067 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8068 void *data, long cpu, const struct file_operations *fops)
8070 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8072 if (ret) /* See tracing_get_cpu() */
8073 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8078 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8080 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8081 struct dentry *d_cpu;
8082 char cpu_dir[30]; /* 30 characters should be more than enough */
8087 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8088 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8090 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8094 /* per cpu trace_pipe */
8095 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8096 tr, cpu, &tracing_pipe_fops);
8099 trace_create_cpu_file("trace", 0644, d_cpu,
8100 tr, cpu, &tracing_fops);
8102 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8103 tr, cpu, &tracing_buffers_fops);
8105 trace_create_cpu_file("stats", 0444, d_cpu,
8106 tr, cpu, &tracing_stats_fops);
8108 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8109 tr, cpu, &tracing_entries_fops);
8111 #ifdef CONFIG_TRACER_SNAPSHOT
8112 trace_create_cpu_file("snapshot", 0644, d_cpu,
8113 tr, cpu, &snapshot_fops);
8115 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8116 tr, cpu, &snapshot_raw_fops);
8120 #ifdef CONFIG_FTRACE_SELFTEST
8121 /* Let selftest have access to static functions in this file */
8122 #include "trace_selftest.c"
8126 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8129 struct trace_option_dentry *topt = filp->private_data;
8132 if (topt->flags->val & topt->opt->bit)
8137 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8141 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8144 struct trace_option_dentry *topt = filp->private_data;
8148 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8152 if (val != 0 && val != 1)
8155 if (!!(topt->flags->val & topt->opt->bit) != val) {
8156 mutex_lock(&trace_types_lock);
8157 ret = __set_tracer_option(topt->tr, topt->flags,
8159 mutex_unlock(&trace_types_lock);
8170 static const struct file_operations trace_options_fops = {
8171 .open = tracing_open_generic,
8172 .read = trace_options_read,
8173 .write = trace_options_write,
8174 .llseek = generic_file_llseek,
8178 * In order to pass in both the trace_array descriptor as well as the index
8179 * to the flag that the trace option file represents, the trace_array
8180 * has a character array of trace_flags_index[], which holds the index
8181 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8182 * The address of this character array is passed to the flag option file
8183 * read/write callbacks.
8185 * In order to extract both the index and the trace_array descriptor,
8186 * get_tr_index() uses the following algorithm.
8190 * As the pointer itself contains the address of the index (remember
8193 * Then to get the trace_array descriptor, by subtracting that index
8194 * from the ptr, we get to the start of the index itself.
8196 * ptr - idx == &index[0]
8198 * Then a simple container_of() from that pointer gets us to the
8199 * trace_array descriptor.
8201 static void get_tr_index(void *data, struct trace_array **ptr,
8202 unsigned int *pindex)
8204 *pindex = *(unsigned char *)data;
8206 *ptr = container_of(data - *pindex, struct trace_array,
8211 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8214 void *tr_index = filp->private_data;
8215 struct trace_array *tr;
8219 get_tr_index(tr_index, &tr, &index);
8221 if (tr->trace_flags & (1 << index))
8226 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8230 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8233 void *tr_index = filp->private_data;
8234 struct trace_array *tr;
8239 get_tr_index(tr_index, &tr, &index);
8241 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8245 if (val != 0 && val != 1)
8248 mutex_lock(&event_mutex);
8249 mutex_lock(&trace_types_lock);
8250 ret = set_tracer_flag(tr, 1 << index, val);
8251 mutex_unlock(&trace_types_lock);
8252 mutex_unlock(&event_mutex);
8262 static const struct file_operations trace_options_core_fops = {
8263 .open = tracing_open_generic,
8264 .read = trace_options_core_read,
8265 .write = trace_options_core_write,
8266 .llseek = generic_file_llseek,
8269 struct dentry *trace_create_file(const char *name,
8271 struct dentry *parent,
8273 const struct file_operations *fops)
8277 ret = tracefs_create_file(name, mode, parent, data, fops);
8279 pr_warn("Could not create tracefs '%s' entry\n", name);
8285 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8287 struct dentry *d_tracer;
8292 d_tracer = tracing_get_dentry(tr);
8293 if (IS_ERR(d_tracer))
8296 tr->options = tracefs_create_dir("options", d_tracer);
8298 pr_warn("Could not create tracefs directory 'options'\n");
8306 create_trace_option_file(struct trace_array *tr,
8307 struct trace_option_dentry *topt,
8308 struct tracer_flags *flags,
8309 struct tracer_opt *opt)
8311 struct dentry *t_options;
8313 t_options = trace_options_init_dentry(tr);
8317 topt->flags = flags;
8321 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8322 &trace_options_fops);
8327 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8329 struct trace_option_dentry *topts;
8330 struct trace_options *tr_topts;
8331 struct tracer_flags *flags;
8332 struct tracer_opt *opts;
8339 flags = tracer->flags;
8341 if (!flags || !flags->opts)
8345 * If this is an instance, only create flags for tracers
8346 * the instance may have.
8348 if (!trace_ok_for_array(tracer, tr))
8351 for (i = 0; i < tr->nr_topts; i++) {
8352 /* Make sure there's no duplicate flags. */
8353 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8359 for (cnt = 0; opts[cnt].name; cnt++)
8362 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8366 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8373 tr->topts = tr_topts;
8374 tr->topts[tr->nr_topts].tracer = tracer;
8375 tr->topts[tr->nr_topts].topts = topts;
8378 for (cnt = 0; opts[cnt].name; cnt++) {
8379 create_trace_option_file(tr, &topts[cnt], flags,
8381 MEM_FAIL(topts[cnt].entry == NULL,
8382 "Failed to create trace option: %s",
8387 static struct dentry *
8388 create_trace_option_core_file(struct trace_array *tr,
8389 const char *option, long index)
8391 struct dentry *t_options;
8393 t_options = trace_options_init_dentry(tr);
8397 return trace_create_file(option, 0644, t_options,
8398 (void *)&tr->trace_flags_index[index],
8399 &trace_options_core_fops);
8402 static void create_trace_options_dir(struct trace_array *tr)
8404 struct dentry *t_options;
8405 bool top_level = tr == &global_trace;
8408 t_options = trace_options_init_dentry(tr);
8412 for (i = 0; trace_options[i]; i++) {
8414 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8415 create_trace_option_core_file(tr, trace_options[i], i);
8420 rb_simple_read(struct file *filp, char __user *ubuf,
8421 size_t cnt, loff_t *ppos)
8423 struct trace_array *tr = filp->private_data;
8427 r = tracer_tracing_is_on(tr);
8428 r = sprintf(buf, "%d\n", r);
8430 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8434 rb_simple_write(struct file *filp, const char __user *ubuf,
8435 size_t cnt, loff_t *ppos)
8437 struct trace_array *tr = filp->private_data;
8438 struct trace_buffer *buffer = tr->array_buffer.buffer;
8442 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8447 mutex_lock(&trace_types_lock);
8448 if (!!val == tracer_tracing_is_on(tr)) {
8449 val = 0; /* do nothing */
8451 tracer_tracing_on(tr);
8452 if (tr->current_trace->start)
8453 tr->current_trace->start(tr);
8455 tracer_tracing_off(tr);
8456 if (tr->current_trace->stop)
8457 tr->current_trace->stop(tr);
8459 mutex_unlock(&trace_types_lock);
8467 static const struct file_operations rb_simple_fops = {
8468 .open = tracing_open_generic_tr,
8469 .read = rb_simple_read,
8470 .write = rb_simple_write,
8471 .release = tracing_release_generic_tr,
8472 .llseek = default_llseek,
8476 buffer_percent_read(struct file *filp, char __user *ubuf,
8477 size_t cnt, loff_t *ppos)
8479 struct trace_array *tr = filp->private_data;
8483 r = tr->buffer_percent;
8484 r = sprintf(buf, "%d\n", r);
8486 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8490 buffer_percent_write(struct file *filp, const char __user *ubuf,
8491 size_t cnt, loff_t *ppos)
8493 struct trace_array *tr = filp->private_data;
8497 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8507 tr->buffer_percent = val;
8514 static const struct file_operations buffer_percent_fops = {
8515 .open = tracing_open_generic_tr,
8516 .read = buffer_percent_read,
8517 .write = buffer_percent_write,
8518 .release = tracing_release_generic_tr,
8519 .llseek = default_llseek,
8522 static struct dentry *trace_instance_dir;
8525 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8528 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8530 enum ring_buffer_flags rb_flags;
8532 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8536 buf->buffer = ring_buffer_alloc(size, rb_flags);
8540 buf->data = alloc_percpu(struct trace_array_cpu);
8542 ring_buffer_free(buf->buffer);
8547 /* Allocate the first page for all buffers */
8548 set_buffer_entries(&tr->array_buffer,
8549 ring_buffer_size(tr->array_buffer.buffer, 0));
8554 static int allocate_trace_buffers(struct trace_array *tr, int size)
8558 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8562 #ifdef CONFIG_TRACER_MAX_TRACE
8563 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8564 allocate_snapshot ? size : 1);
8565 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8566 ring_buffer_free(tr->array_buffer.buffer);
8567 tr->array_buffer.buffer = NULL;
8568 free_percpu(tr->array_buffer.data);
8569 tr->array_buffer.data = NULL;
8572 tr->allocated_snapshot = allocate_snapshot;
8575 * Only the top level trace array gets its snapshot allocated
8576 * from the kernel command line.
8578 allocate_snapshot = false;
8584 static void free_trace_buffer(struct array_buffer *buf)
8587 ring_buffer_free(buf->buffer);
8589 free_percpu(buf->data);
8594 static void free_trace_buffers(struct trace_array *tr)
8599 free_trace_buffer(&tr->array_buffer);
8601 #ifdef CONFIG_TRACER_MAX_TRACE
8602 free_trace_buffer(&tr->max_buffer);
8606 static void init_trace_flags_index(struct trace_array *tr)
8610 /* Used by the trace options files */
8611 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8612 tr->trace_flags_index[i] = i;
8615 static void __update_tracer_options(struct trace_array *tr)
8619 for (t = trace_types; t; t = t->next)
8620 add_tracer_options(tr, t);
8623 static void update_tracer_options(struct trace_array *tr)
8625 mutex_lock(&trace_types_lock);
8626 __update_tracer_options(tr);
8627 mutex_unlock(&trace_types_lock);
8630 /* Must have trace_types_lock held */
8631 struct trace_array *trace_array_find(const char *instance)
8633 struct trace_array *tr, *found = NULL;
8635 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8636 if (tr->name && strcmp(tr->name, instance) == 0) {
8645 struct trace_array *trace_array_find_get(const char *instance)
8647 struct trace_array *tr;
8649 mutex_lock(&trace_types_lock);
8650 tr = trace_array_find(instance);
8653 mutex_unlock(&trace_types_lock);
8658 static int trace_array_create_dir(struct trace_array *tr)
8662 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8666 ret = event_trace_add_tracer(tr->dir, tr);
8668 tracefs_remove(tr->dir);
8670 init_tracer_tracefs(tr, tr->dir);
8671 __update_tracer_options(tr);
8676 static struct trace_array *trace_array_create(const char *name)
8678 struct trace_array *tr;
8682 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8684 return ERR_PTR(ret);
8686 tr->name = kstrdup(name, GFP_KERNEL);
8690 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8693 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8695 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8697 raw_spin_lock_init(&tr->start_lock);
8699 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8701 tr->current_trace = &nop_trace;
8703 INIT_LIST_HEAD(&tr->systems);
8704 INIT_LIST_HEAD(&tr->events);
8705 INIT_LIST_HEAD(&tr->hist_vars);
8706 INIT_LIST_HEAD(&tr->err_log);
8708 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8711 if (ftrace_allocate_ftrace_ops(tr) < 0)
8714 ftrace_init_trace_array(tr);
8716 init_trace_flags_index(tr);
8718 if (trace_instance_dir) {
8719 ret = trace_array_create_dir(tr);
8723 __trace_early_add_events(tr);
8725 list_add(&tr->list, &ftrace_trace_arrays);
8732 ftrace_free_ftrace_ops(tr);
8733 free_trace_buffers(tr);
8734 free_cpumask_var(tr->tracing_cpumask);
8738 return ERR_PTR(ret);
8741 static int instance_mkdir(const char *name)
8743 struct trace_array *tr;
8746 mutex_lock(&event_mutex);
8747 mutex_lock(&trace_types_lock);
8750 if (trace_array_find(name))
8753 tr = trace_array_create(name);
8755 ret = PTR_ERR_OR_ZERO(tr);
8758 mutex_unlock(&trace_types_lock);
8759 mutex_unlock(&event_mutex);
8764 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8765 * @name: The name of the trace array to be looked up/created.
8767 * Returns pointer to trace array with given name.
8768 * NULL, if it cannot be created.
8770 * NOTE: This function increments the reference counter associated with the
8771 * trace array returned. This makes sure it cannot be freed while in use.
8772 * Use trace_array_put() once the trace array is no longer needed.
8773 * If the trace_array is to be freed, trace_array_destroy() needs to
8774 * be called after the trace_array_put(), or simply let user space delete
8775 * it from the tracefs instances directory. But until the
8776 * trace_array_put() is called, user space can not delete it.
8779 struct trace_array *trace_array_get_by_name(const char *name)
8781 struct trace_array *tr;
8783 mutex_lock(&event_mutex);
8784 mutex_lock(&trace_types_lock);
8786 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8787 if (tr->name && strcmp(tr->name, name) == 0)
8791 tr = trace_array_create(name);
8799 mutex_unlock(&trace_types_lock);
8800 mutex_unlock(&event_mutex);
8803 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8805 static int __remove_instance(struct trace_array *tr)
8809 /* Reference counter for a newly created trace array = 1. */
8810 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8813 list_del(&tr->list);
8815 /* Disable all the flags that were enabled coming in */
8816 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8817 if ((1 << i) & ZEROED_TRACE_FLAGS)
8818 set_tracer_flag(tr, 1 << i, 0);
8821 tracing_set_nop(tr);
8822 clear_ftrace_function_probes(tr);
8823 event_trace_del_tracer(tr);
8824 ftrace_clear_pids(tr);
8825 ftrace_destroy_function_files(tr);
8826 tracefs_remove(tr->dir);
8827 free_trace_buffers(tr);
8829 for (i = 0; i < tr->nr_topts; i++) {
8830 kfree(tr->topts[i].topts);
8834 free_cpumask_var(tr->tracing_cpumask);
8841 int trace_array_destroy(struct trace_array *this_tr)
8843 struct trace_array *tr;
8849 mutex_lock(&event_mutex);
8850 mutex_lock(&trace_types_lock);
8854 /* Making sure trace array exists before destroying it. */
8855 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8856 if (tr == this_tr) {
8857 ret = __remove_instance(tr);
8862 mutex_unlock(&trace_types_lock);
8863 mutex_unlock(&event_mutex);
8867 EXPORT_SYMBOL_GPL(trace_array_destroy);
8869 static int instance_rmdir(const char *name)
8871 struct trace_array *tr;
8874 mutex_lock(&event_mutex);
8875 mutex_lock(&trace_types_lock);
8878 tr = trace_array_find(name);
8880 ret = __remove_instance(tr);
8882 mutex_unlock(&trace_types_lock);
8883 mutex_unlock(&event_mutex);
8888 static __init void create_trace_instances(struct dentry *d_tracer)
8890 struct trace_array *tr;
8892 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8895 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8898 mutex_lock(&event_mutex);
8899 mutex_lock(&trace_types_lock);
8901 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8904 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8905 "Failed to create instance directory\n"))
8909 mutex_unlock(&trace_types_lock);
8910 mutex_unlock(&event_mutex);
8914 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8916 struct trace_event_file *file;
8919 trace_create_file("available_tracers", 0444, d_tracer,
8920 tr, &show_traces_fops);
8922 trace_create_file("current_tracer", 0644, d_tracer,
8923 tr, &set_tracer_fops);
8925 trace_create_file("tracing_cpumask", 0644, d_tracer,
8926 tr, &tracing_cpumask_fops);
8928 trace_create_file("trace_options", 0644, d_tracer,
8929 tr, &tracing_iter_fops);
8931 trace_create_file("trace", 0644, d_tracer,
8934 trace_create_file("trace_pipe", 0444, d_tracer,
8935 tr, &tracing_pipe_fops);
8937 trace_create_file("buffer_size_kb", 0644, d_tracer,
8938 tr, &tracing_entries_fops);
8940 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8941 tr, &tracing_total_entries_fops);
8943 trace_create_file("free_buffer", 0200, d_tracer,
8944 tr, &tracing_free_buffer_fops);
8946 trace_create_file("trace_marker", 0220, d_tracer,
8947 tr, &tracing_mark_fops);
8949 file = __find_event_file(tr, "ftrace", "print");
8950 if (file && file->dir)
8951 trace_create_file("trigger", 0644, file->dir, file,
8952 &event_trigger_fops);
8953 tr->trace_marker_file = file;
8955 trace_create_file("trace_marker_raw", 0220, d_tracer,
8956 tr, &tracing_mark_raw_fops);
8958 trace_create_file("trace_clock", 0644, d_tracer, tr,
8961 trace_create_file("tracing_on", 0644, d_tracer,
8962 tr, &rb_simple_fops);
8964 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8965 &trace_time_stamp_mode_fops);
8967 tr->buffer_percent = 50;
8969 trace_create_file("buffer_percent", 0444, d_tracer,
8970 tr, &buffer_percent_fops);
8972 create_trace_options_dir(tr);
8974 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8975 trace_create_maxlat_file(tr, d_tracer);
8978 if (ftrace_create_function_files(tr, d_tracer))
8979 MEM_FAIL(1, "Could not allocate function filter files");
8981 #ifdef CONFIG_TRACER_SNAPSHOT
8982 trace_create_file("snapshot", 0644, d_tracer,
8983 tr, &snapshot_fops);
8986 trace_create_file("error_log", 0644, d_tracer,
8987 tr, &tracing_err_log_fops);
8989 for_each_tracing_cpu(cpu)
8990 tracing_init_tracefs_percpu(tr, cpu);
8992 ftrace_init_tracefs(tr, d_tracer);
8995 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8997 struct vfsmount *mnt;
8998 struct file_system_type *type;
9001 * To maintain backward compatibility for tools that mount
9002 * debugfs to get to the tracing facility, tracefs is automatically
9003 * mounted to the debugfs/tracing directory.
9005 type = get_fs_type("tracefs");
9008 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9009 put_filesystem(type);
9018 * tracing_init_dentry - initialize top level trace array
9020 * This is called when creating files or directories in the tracing
9021 * directory. It is called via fs_initcall() by any of the boot up code
9022 * and expects to return the dentry of the top level tracing directory.
9024 int tracing_init_dentry(void)
9026 struct trace_array *tr = &global_trace;
9028 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9029 pr_warn("Tracing disabled due to lockdown\n");
9033 /* The top level trace array uses NULL as parent */
9037 if (WARN_ON(!tracefs_initialized()))
9041 * As there may still be users that expect the tracing
9042 * files to exist in debugfs/tracing, we must automount
9043 * the tracefs file system there, so older tools still
9044 * work with the newer kerenl.
9046 tr->dir = debugfs_create_automount("tracing", NULL,
9047 trace_automount, NULL);
9052 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9053 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9055 static struct workqueue_struct *eval_map_wq __initdata;
9056 static struct work_struct eval_map_work __initdata;
9058 static void __init eval_map_work_func(struct work_struct *work)
9062 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9063 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9066 static int __init trace_eval_init(void)
9068 INIT_WORK(&eval_map_work, eval_map_work_func);
9070 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9072 pr_err("Unable to allocate eval_map_wq\n");
9074 eval_map_work_func(&eval_map_work);
9078 queue_work(eval_map_wq, &eval_map_work);
9082 static int __init trace_eval_sync(void)
9084 /* Make sure the eval map updates are finished */
9086 destroy_workqueue(eval_map_wq);
9090 late_initcall_sync(trace_eval_sync);
9093 #ifdef CONFIG_MODULES
9094 static void trace_module_add_evals(struct module *mod)
9096 if (!mod->num_trace_evals)
9100 * Modules with bad taint do not have events created, do
9101 * not bother with enums either.
9103 if (trace_module_has_bad_taint(mod))
9106 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9109 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9110 static void trace_module_remove_evals(struct module *mod)
9112 union trace_eval_map_item *map;
9113 union trace_eval_map_item **last = &trace_eval_maps;
9115 if (!mod->num_trace_evals)
9118 mutex_lock(&trace_eval_mutex);
9120 map = trace_eval_maps;
9123 if (map->head.mod == mod)
9125 map = trace_eval_jmp_to_tail(map);
9126 last = &map->tail.next;
9127 map = map->tail.next;
9132 *last = trace_eval_jmp_to_tail(map)->tail.next;
9135 mutex_unlock(&trace_eval_mutex);
9138 static inline void trace_module_remove_evals(struct module *mod) { }
9139 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9141 static int trace_module_notify(struct notifier_block *self,
9142 unsigned long val, void *data)
9144 struct module *mod = data;
9147 case MODULE_STATE_COMING:
9148 trace_module_add_evals(mod);
9150 case MODULE_STATE_GOING:
9151 trace_module_remove_evals(mod);
9158 static struct notifier_block trace_module_nb = {
9159 .notifier_call = trace_module_notify,
9162 #endif /* CONFIG_MODULES */
9164 static __init int tracer_init_tracefs(void)
9168 trace_access_lock_init();
9170 ret = tracing_init_dentry();
9176 init_tracer_tracefs(&global_trace, NULL);
9177 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9179 trace_create_file("tracing_thresh", 0644, NULL,
9180 &global_trace, &tracing_thresh_fops);
9182 trace_create_file("README", 0444, NULL,
9183 NULL, &tracing_readme_fops);
9185 trace_create_file("saved_cmdlines", 0444, NULL,
9186 NULL, &tracing_saved_cmdlines_fops);
9188 trace_create_file("saved_cmdlines_size", 0644, NULL,
9189 NULL, &tracing_saved_cmdlines_size_fops);
9191 trace_create_file("saved_tgids", 0444, NULL,
9192 NULL, &tracing_saved_tgids_fops);
9196 trace_create_eval_file(NULL);
9198 #ifdef CONFIG_MODULES
9199 register_module_notifier(&trace_module_nb);
9202 #ifdef CONFIG_DYNAMIC_FTRACE
9203 trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9204 NULL, &tracing_dyn_info_fops);
9207 create_trace_instances(NULL);
9209 update_tracer_options(&global_trace);
9214 static int trace_panic_handler(struct notifier_block *this,
9215 unsigned long event, void *unused)
9217 if (ftrace_dump_on_oops)
9218 ftrace_dump(ftrace_dump_on_oops);
9222 static struct notifier_block trace_panic_notifier = {
9223 .notifier_call = trace_panic_handler,
9225 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9228 static int trace_die_handler(struct notifier_block *self,
9234 if (ftrace_dump_on_oops)
9235 ftrace_dump(ftrace_dump_on_oops);
9243 static struct notifier_block trace_die_notifier = {
9244 .notifier_call = trace_die_handler,
9249 * printk is set to max of 1024, we really don't need it that big.
9250 * Nothing should be printing 1000 characters anyway.
9252 #define TRACE_MAX_PRINT 1000
9255 * Define here KERN_TRACE so that we have one place to modify
9256 * it if we decide to change what log level the ftrace dump
9259 #define KERN_TRACE KERN_EMERG
9262 trace_printk_seq(struct trace_seq *s)
9264 /* Probably should print a warning here. */
9265 if (s->seq.len >= TRACE_MAX_PRINT)
9266 s->seq.len = TRACE_MAX_PRINT;
9269 * More paranoid code. Although the buffer size is set to
9270 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9271 * an extra layer of protection.
9273 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9274 s->seq.len = s->seq.size - 1;
9276 /* should be zero ended, but we are paranoid. */
9277 s->buffer[s->seq.len] = 0;
9279 printk(KERN_TRACE "%s", s->buffer);
9284 void trace_init_global_iter(struct trace_iterator *iter)
9286 iter->tr = &global_trace;
9287 iter->trace = iter->tr->current_trace;
9288 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9289 iter->array_buffer = &global_trace.array_buffer;
9291 if (iter->trace && iter->trace->open)
9292 iter->trace->open(iter);
9294 /* Annotate start of buffers if we had overruns */
9295 if (ring_buffer_overruns(iter->array_buffer->buffer))
9296 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9298 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9299 if (trace_clocks[iter->tr->clock_id].in_ns)
9300 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9303 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9305 /* use static because iter can be a bit big for the stack */
9306 static struct trace_iterator iter;
9307 static atomic_t dump_running;
9308 struct trace_array *tr = &global_trace;
9309 unsigned int old_userobj;
9310 unsigned long flags;
9313 /* Only allow one dump user at a time. */
9314 if (atomic_inc_return(&dump_running) != 1) {
9315 atomic_dec(&dump_running);
9320 * Always turn off tracing when we dump.
9321 * We don't need to show trace output of what happens
9322 * between multiple crashes.
9324 * If the user does a sysrq-z, then they can re-enable
9325 * tracing with echo 1 > tracing_on.
9329 local_irq_save(flags);
9330 printk_nmi_direct_enter();
9332 /* Simulate the iterator */
9333 trace_init_global_iter(&iter);
9334 /* Can not use kmalloc for iter.temp */
9335 iter.temp = static_temp_buf;
9336 iter.temp_size = STATIC_TEMP_BUF_SIZE;
9338 for_each_tracing_cpu(cpu) {
9339 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9342 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9344 /* don't look at user memory in panic mode */
9345 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9347 switch (oops_dump_mode) {
9349 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9352 iter.cpu_file = raw_smp_processor_id();
9357 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9358 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9361 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9363 /* Did function tracer already get disabled? */
9364 if (ftrace_is_dead()) {
9365 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9366 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9370 * We need to stop all tracing on all CPUS to read
9371 * the next buffer. This is a bit expensive, but is
9372 * not done often. We fill all what we can read,
9373 * and then release the locks again.
9376 while (!trace_empty(&iter)) {
9379 printk(KERN_TRACE "---------------------------------\n");
9383 trace_iterator_reset(&iter);
9384 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9386 if (trace_find_next_entry_inc(&iter) != NULL) {
9389 ret = print_trace_line(&iter);
9390 if (ret != TRACE_TYPE_NO_CONSUME)
9391 trace_consume(&iter);
9393 touch_nmi_watchdog();
9395 trace_printk_seq(&iter.seq);
9399 printk(KERN_TRACE " (ftrace buffer empty)\n");
9401 printk(KERN_TRACE "---------------------------------\n");
9404 tr->trace_flags |= old_userobj;
9406 for_each_tracing_cpu(cpu) {
9407 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9409 atomic_dec(&dump_running);
9410 printk_nmi_direct_exit();
9411 local_irq_restore(flags);
9413 EXPORT_SYMBOL_GPL(ftrace_dump);
9415 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9422 argv = argv_split(GFP_KERNEL, buf, &argc);
9427 ret = createfn(argc, argv);
9434 #define WRITE_BUFSIZE 4096
9436 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9437 size_t count, loff_t *ppos,
9438 int (*createfn)(int, char **))
9440 char *kbuf, *buf, *tmp;
9445 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9449 while (done < count) {
9450 size = count - done;
9452 if (size >= WRITE_BUFSIZE)
9453 size = WRITE_BUFSIZE - 1;
9455 if (copy_from_user(kbuf, buffer + done, size)) {
9462 tmp = strchr(buf, '\n');
9465 size = tmp - buf + 1;
9468 if (done + size < count) {
9471 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9472 pr_warn("Line length is too long: Should be less than %d\n",
9480 /* Remove comments */
9481 tmp = strchr(buf, '#');
9486 ret = trace_run_command(buf, createfn);
9491 } while (done < count);
9501 __init static int tracer_alloc_buffers(void)
9507 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9508 pr_warn("Tracing disabled due to lockdown\n");
9513 * Make sure we don't accidentally add more trace options
9514 * than we have bits for.
9516 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9518 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9521 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9522 goto out_free_buffer_mask;
9524 /* Only allocate trace_printk buffers if a trace_printk exists */
9525 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9526 /* Must be called before global_trace.buffer is allocated */
9527 trace_printk_init_buffers();
9529 /* To save memory, keep the ring buffer size to its minimum */
9530 if (ring_buffer_expanded)
9531 ring_buf_size = trace_buf_size;
9535 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9536 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9538 raw_spin_lock_init(&global_trace.start_lock);
9541 * The prepare callbacks allocates some memory for the ring buffer. We
9542 * don't free the buffer if the CPU goes down. If we were to free
9543 * the buffer, then the user would lose any trace that was in the
9544 * buffer. The memory will be removed once the "instance" is removed.
9546 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9547 "trace/RB:preapre", trace_rb_cpu_prepare,
9550 goto out_free_cpumask;
9551 /* Used for event triggers */
9553 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9555 goto out_rm_hp_state;
9557 if (trace_create_savedcmd() < 0)
9558 goto out_free_temp_buffer;
9560 /* TODO: make the number of buffers hot pluggable with CPUS */
9561 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9562 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9563 goto out_free_savedcmd;
9566 if (global_trace.buffer_disabled)
9569 if (trace_boot_clock) {
9570 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9572 pr_warn("Trace clock %s not defined, going back to default\n",
9577 * register_tracer() might reference current_trace, so it
9578 * needs to be set before we register anything. This is
9579 * just a bootstrap of current_trace anyway.
9581 global_trace.current_trace = &nop_trace;
9583 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9585 ftrace_init_global_array_ops(&global_trace);
9587 init_trace_flags_index(&global_trace);
9589 register_tracer(&nop_trace);
9591 /* Function tracing may start here (via kernel command line) */
9592 init_function_trace();
9594 /* All seems OK, enable tracing */
9595 tracing_disabled = 0;
9597 atomic_notifier_chain_register(&panic_notifier_list,
9598 &trace_panic_notifier);
9600 register_die_notifier(&trace_die_notifier);
9602 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9604 INIT_LIST_HEAD(&global_trace.systems);
9605 INIT_LIST_HEAD(&global_trace.events);
9606 INIT_LIST_HEAD(&global_trace.hist_vars);
9607 INIT_LIST_HEAD(&global_trace.err_log);
9608 list_add(&global_trace.list, &ftrace_trace_arrays);
9610 apply_trace_boot_options();
9612 register_snapshot_cmd();
9617 free_saved_cmdlines_buffer(savedcmd);
9618 out_free_temp_buffer:
9619 ring_buffer_free(temp_buffer);
9621 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9623 free_cpumask_var(global_trace.tracing_cpumask);
9624 out_free_buffer_mask:
9625 free_cpumask_var(tracing_buffer_mask);
9630 void __init early_trace_init(void)
9632 if (tracepoint_printk) {
9633 tracepoint_print_iter =
9634 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9635 if (MEM_FAIL(!tracepoint_print_iter,
9636 "Failed to allocate trace iterator\n"))
9637 tracepoint_printk = 0;
9639 static_key_enable(&tracepoint_printk_key.key);
9641 tracer_alloc_buffers();
9644 void __init trace_init(void)
9649 __init static int clear_boot_tracer(void)
9652 * The default tracer at boot buffer is an init section.
9653 * This function is called in lateinit. If we did not
9654 * find the boot tracer, then clear it out, to prevent
9655 * later registration from accessing the buffer that is
9656 * about to be freed.
9658 if (!default_bootup_tracer)
9661 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9662 default_bootup_tracer);
9663 default_bootup_tracer = NULL;
9668 fs_initcall(tracer_init_tracefs);
9669 late_initcall_sync(clear_boot_tracer);
9671 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9672 __init static int tracing_set_default_clock(void)
9674 /* sched_clock_stable() is determined in late_initcall */
9675 if (!trace_boot_clock && !sched_clock_stable()) {
9676 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9677 pr_warn("Can not set tracing clock due to lockdown\n");
9682 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9683 "If you want to keep using the local clock, then add:\n"
9684 " \"trace_clock=local\"\n"
9685 "on the kernel command line\n");
9686 tracing_set_clock(&global_trace, "global");
9691 late_initcall_sync(tracing_set_default_clock);