1 // SPDX-License-Identifier: GPL-2.0
3 * ring buffer based function tracer
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
53 #include "trace_output.h"
56 * On boot up, the ring buffer is set to the minimum size, so that
57 * we do not waste memory on systems that are not using tracing.
59 bool ring_buffer_expanded;
62 * We need to change this state when a selftest is running.
63 * A selftest will lurk into the ring-buffer to count the
64 * entries inserted during the selftest although some concurrent
65 * insertions into the ring-buffer such as trace_printk could occurred
66 * at the same time, giving false positive or negative results.
68 static bool __read_mostly tracing_selftest_running;
71 * If boot-time tracing including tracers/events via kernel cmdline
72 * is running, we do not want to run SELFTEST.
74 bool __read_mostly tracing_selftest_disabled;
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
79 if (!tracing_selftest_disabled) {
80 tracing_selftest_disabled = true;
81 pr_info("Ftrace startup test is disabled due to %s\n", reason);
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 * To prevent the comm cache from being overwritten when no
104 * tracing is active, only save the comm when a trace event
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 * Kill all tracing for good (never come back).
111 * It is initialized to 1 but will turn to zero if the initialization
112 * of the tracer is successful. But that is the only place that sets
115 static int tracing_disabled = 1;
117 cpumask_var_t __read_mostly tracing_buffer_mask;
120 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123 * is set, then ftrace_dump is called. This will output the contents
124 * of the ftrace buffers to the console. This is very useful for
125 * capturing traces that lead to crashes and outputing it to a
128 * It is default off, but you can enable it with either specifying
129 * "ftrace_dump_on_oops" in the kernel command line, or setting
130 * /proc/sys/kernel/ftrace_dump_on_oops
131 * Set 1 if you want to dump buffers of all CPUs
132 * Set 2 if you want to dump the buffer of the CPU that triggered oops
135 enum ftrace_dump_mode ftrace_dump_on_oops;
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
144 unsigned long length;
147 union trace_eval_map_item;
149 struct trace_eval_map_tail {
151 * "end" is first and points to NULL as it must be different
152 * than "mod" or "eval_string"
154 union trace_eval_map_item *next;
155 const char *end; /* points to NULL */
158 static DEFINE_MUTEX(trace_eval_mutex);
161 * The trace_eval_maps are saved in an array with two extra elements,
162 * one at the beginning, and one at the end. The beginning item contains
163 * the count of the saved maps (head.length), and the module they
164 * belong to if not built in (head.mod). The ending item contains a
165 * pointer to the next array of saved eval_map items.
167 union trace_eval_map_item {
168 struct trace_eval_map map;
169 struct trace_eval_map_head head;
170 struct trace_eval_map_tail tail;
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 struct trace_buffer *buffer,
179 unsigned int trace_ctx);
181 #define MAX_TRACER_SIZE 100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
185 static bool allocate_snapshot;
187 static int __init set_cmdline_ftrace(char *str)
189 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190 default_bootup_tracer = bootup_tracer_buf;
191 /* We are using ftrace early, expand it */
192 ring_buffer_expanded = true;
195 __setup("ftrace=", set_cmdline_ftrace);
197 static int __init set_ftrace_dump_on_oops(char *str)
199 if (*str++ != '=' || !*str) {
200 ftrace_dump_on_oops = DUMP_ALL;
204 if (!strcmp("orig_cpu", str)) {
205 ftrace_dump_on_oops = DUMP_ORIG;
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
213 static int __init stop_trace_on_warning(char *str)
215 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216 __disable_trace_on_warning = 1;
219 __setup("traceoff_on_warning", stop_trace_on_warning);
221 static int __init boot_alloc_snapshot(char *str)
223 allocate_snapshot = true;
224 /* We also need the main ring buffer expanded */
225 ring_buffer_expanded = true;
228 __setup("alloc_snapshot", boot_alloc_snapshot);
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
233 static int __init set_trace_boot_options(char *str)
235 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 __setup("trace_options=", set_trace_boot_options);
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
243 static int __init set_trace_boot_clock(char *str)
245 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246 trace_boot_clock = trace_boot_clock_buf;
249 __setup("trace_clock=", set_trace_boot_clock);
251 static int __init set_tracepoint_printk(char *str)
253 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254 tracepoint_printk = 1;
257 __setup("tp_printk", set_tracepoint_printk);
259 unsigned long long ns2usecs(u64 nsec)
267 trace_process_export(struct trace_export *export,
268 struct ring_buffer_event *event, int flag)
270 struct trace_entry *entry;
271 unsigned int size = 0;
273 if (export->flags & flag) {
274 entry = ring_buffer_event_data(event);
275 size = ring_buffer_event_length(event);
276 export->write(export, entry, size);
280 static DEFINE_MUTEX(ftrace_export_lock);
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
288 static inline void ftrace_exports_enable(struct trace_export *export)
290 if (export->flags & TRACE_EXPORT_FUNCTION)
291 static_branch_inc(&trace_function_exports_enabled);
293 if (export->flags & TRACE_EXPORT_EVENT)
294 static_branch_inc(&trace_event_exports_enabled);
296 if (export->flags & TRACE_EXPORT_MARKER)
297 static_branch_inc(&trace_marker_exports_enabled);
300 static inline void ftrace_exports_disable(struct trace_export *export)
302 if (export->flags & TRACE_EXPORT_FUNCTION)
303 static_branch_dec(&trace_function_exports_enabled);
305 if (export->flags & TRACE_EXPORT_EVENT)
306 static_branch_dec(&trace_event_exports_enabled);
308 if (export->flags & TRACE_EXPORT_MARKER)
309 static_branch_dec(&trace_marker_exports_enabled);
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
314 struct trace_export *export;
316 preempt_disable_notrace();
318 export = rcu_dereference_raw_check(ftrace_exports_list);
320 trace_process_export(export, event, flag);
321 export = rcu_dereference_raw_check(export->next);
324 preempt_enable_notrace();
328 add_trace_export(struct trace_export **list, struct trace_export *export)
330 rcu_assign_pointer(export->next, *list);
332 * We are entering export into the list but another
333 * CPU might be walking that list. We need to make sure
334 * the export->next pointer is valid before another CPU sees
335 * the export pointer included into the list.
337 rcu_assign_pointer(*list, export);
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
343 struct trace_export **p;
345 for (p = list; *p != NULL; p = &(*p)->next)
352 rcu_assign_pointer(*p, (*p)->next);
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
360 ftrace_exports_enable(export);
362 add_trace_export(list, export);
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
370 ret = rm_trace_export(list, export);
371 ftrace_exports_disable(export);
376 int register_ftrace_export(struct trace_export *export)
378 if (WARN_ON_ONCE(!export->write))
381 mutex_lock(&ftrace_export_lock);
383 add_ftrace_export(&ftrace_exports_list, export);
385 mutex_unlock(&ftrace_export_lock);
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
391 int unregister_ftrace_export(struct trace_export *export)
395 mutex_lock(&ftrace_export_lock);
397 ret = rm_ftrace_export(&ftrace_exports_list, export);
399 mutex_unlock(&ftrace_export_lock);
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS \
407 (FUNCTION_DEFAULT_FLAGS | \
408 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
409 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
410 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
411 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
414 /* trace_options that are only supported by global_trace */
415 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
416 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
418 /* trace_flags that are default zero for instances */
419 #define ZEROED_TRACE_FLAGS \
420 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
423 * The global_trace is the descriptor that holds the top-level tracing
424 * buffers for the live tracing.
426 static struct trace_array global_trace = {
427 .trace_flags = TRACE_DEFAULT_FLAGS,
430 LIST_HEAD(ftrace_trace_arrays);
432 int trace_array_get(struct trace_array *this_tr)
434 struct trace_array *tr;
437 mutex_lock(&trace_types_lock);
438 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
445 mutex_unlock(&trace_types_lock);
450 static void __trace_array_put(struct trace_array *this_tr)
452 WARN_ON(!this_tr->ref);
457 * trace_array_put - Decrement the reference counter for this trace array.
458 * @this_tr : pointer to the trace array
460 * NOTE: Use this when we no longer need the trace array returned by
461 * trace_array_get_by_name(). This ensures the trace array can be later
465 void trace_array_put(struct trace_array *this_tr)
470 mutex_lock(&trace_types_lock);
471 __trace_array_put(this_tr);
472 mutex_unlock(&trace_types_lock);
474 EXPORT_SYMBOL_GPL(trace_array_put);
476 int tracing_check_open_get_tr(struct trace_array *tr)
480 ret = security_locked_down(LOCKDOWN_TRACEFS);
484 if (tracing_disabled)
487 if (tr && trace_array_get(tr) < 0)
493 int call_filter_check_discard(struct trace_event_call *call, void *rec,
494 struct trace_buffer *buffer,
495 struct ring_buffer_event *event)
497 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
498 !filter_match_preds(call->filter, rec)) {
499 __trace_event_discard_commit(buffer, event);
506 void trace_free_pid_list(struct trace_pid_list *pid_list)
508 vfree(pid_list->pids);
513 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
514 * @filtered_pids: The list of pids to check
515 * @search_pid: The PID to find in @filtered_pids
517 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
520 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
523 * If pid_max changed after filtered_pids was created, we
524 * by default ignore all pids greater than the previous pid_max.
526 if (search_pid >= filtered_pids->pid_max)
529 return test_bit(search_pid, filtered_pids->pids);
533 * trace_ignore_this_task - should a task be ignored for tracing
534 * @filtered_pids: The list of pids to check
535 * @filtered_no_pids: The list of pids not to be traced
536 * @task: The task that should be ignored if not filtered
538 * Checks if @task should be traced or not from @filtered_pids.
539 * Returns true if @task should *NOT* be traced.
540 * Returns false if @task should be traced.
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544 struct trace_pid_list *filtered_no_pids,
545 struct task_struct *task)
548 * If filterd_no_pids is not empty, and the task's pid is listed
549 * in filtered_no_pids, then return true.
550 * Otherwise, if filtered_pids is empty, that means we can
551 * trace all tasks. If it has content, then only trace pids
552 * within filtered_pids.
555 return (filtered_pids &&
556 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
558 trace_find_filtered_pid(filtered_no_pids, task->pid));
562 * trace_filter_add_remove_task - Add or remove a task from a pid_list
563 * @pid_list: The list to modify
564 * @self: The current task for fork or NULL for exit
565 * @task: The task to add or remove
567 * If adding a task, if @self is defined, the task is only added if @self
568 * is also included in @pid_list. This happens on fork and tasks should
569 * only be added when the parent is listed. If @self is NULL, then the
570 * @task pid will be removed from the list, which would happen on exit
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574 struct task_struct *self,
575 struct task_struct *task)
580 /* For forks, we only add if the forking task is listed */
582 if (!trace_find_filtered_pid(pid_list, self->pid))
586 /* Sorry, but we don't support pid_max changing after setting */
587 if (task->pid >= pid_list->pid_max)
590 /* "self" is set for forks, and NULL for exits */
592 set_bit(task->pid, pid_list->pids);
594 clear_bit(task->pid, pid_list->pids);
598 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
599 * @pid_list: The pid list to show
600 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
601 * @pos: The position of the file
603 * This is used by the seq_file "next" operation to iterate the pids
604 * listed in a trace_pid_list structure.
606 * Returns the pid+1 as we want to display pid of zero, but NULL would
607 * stop the iteration.
609 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
611 unsigned long pid = (unsigned long)v;
615 /* pid already is +1 of the actual prevous bit */
616 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
618 /* Return pid + 1 to allow zero to be represented */
619 if (pid < pid_list->pid_max)
620 return (void *)(pid + 1);
626 * trace_pid_start - Used for seq_file to start reading pid lists
627 * @pid_list: The pid list to show
628 * @pos: The position of the file
630 * This is used by seq_file "start" operation to start the iteration
633 * Returns the pid+1 as we want to display pid of zero, but NULL would
634 * stop the iteration.
636 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
641 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
642 if (pid >= pid_list->pid_max)
645 /* Return pid + 1 so that zero can be the exit value */
646 for (pid++; pid && l < *pos;
647 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
653 * trace_pid_show - show the current pid in seq_file processing
654 * @m: The seq_file structure to write into
655 * @v: A void pointer of the pid (+1) value to display
657 * Can be directly used by seq_file operations to display the current
660 int trace_pid_show(struct seq_file *m, void *v)
662 unsigned long pid = (unsigned long)v - 1;
664 seq_printf(m, "%lu\n", pid);
668 /* 128 should be much more than enough */
669 #define PID_BUF_SIZE 127
671 int trace_pid_write(struct trace_pid_list *filtered_pids,
672 struct trace_pid_list **new_pid_list,
673 const char __user *ubuf, size_t cnt)
675 struct trace_pid_list *pid_list;
676 struct trace_parser parser;
684 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
688 * Always recreate a new array. The write is an all or nothing
689 * operation. Always create a new array when adding new pids by
690 * the user. If the operation fails, then the current list is
693 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
695 trace_parser_put(&parser);
699 pid_list->pid_max = READ_ONCE(pid_max);
701 /* Only truncating will shrink pid_max */
702 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
703 pid_list->pid_max = filtered_pids->pid_max;
705 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
706 if (!pid_list->pids) {
707 trace_parser_put(&parser);
713 /* copy the current bits to the new max */
714 for_each_set_bit(pid, filtered_pids->pids,
715 filtered_pids->pid_max) {
716 set_bit(pid, pid_list->pids);
725 ret = trace_get_user(&parser, ubuf, cnt, &pos);
726 if (ret < 0 || !trace_parser_loaded(&parser))
734 if (kstrtoul(parser.buffer, 0, &val))
736 if (val >= pid_list->pid_max)
741 set_bit(pid, pid_list->pids);
744 trace_parser_clear(&parser);
747 trace_parser_put(&parser);
750 trace_free_pid_list(pid_list);
755 /* Cleared the list of pids */
756 trace_free_pid_list(pid_list);
761 *new_pid_list = pid_list;
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
770 /* Early boot up does not have a buffer yet */
772 return trace_clock_local();
774 ts = ring_buffer_time_stamp(buf->buffer, cpu);
775 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
780 u64 ftrace_now(int cpu)
782 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
786 * tracing_is_enabled - Show if global_trace has been enabled
788 * Shows if the global trace has been enabled or not. It uses the
789 * mirror flag "buffer_disabled" to be used in fast paths such as for
790 * the irqsoff tracer. But it may be inaccurate due to races. If you
791 * need to know the accurate state, use tracing_is_on() which is a little
792 * slower, but accurate.
794 int tracing_is_enabled(void)
797 * For quick access (irqsoff uses this in fast path), just
798 * return the mirror variable of the state of the ring buffer.
799 * It's a little racy, but we don't really care.
802 return !global_trace.buffer_disabled;
806 * trace_buf_size is the size in bytes that is allocated
807 * for a buffer. Note, the number of bytes is always rounded
810 * This number is purposely set to a low number of 16384.
811 * If the dump on oops happens, it will be much appreciated
812 * to not have to wait for all that output. Anyway this can be
813 * boot time and run time configurable.
815 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
817 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer *trace_types __read_mostly;
823 * trace_types_lock is used to protect the trace_types list.
825 DEFINE_MUTEX(trace_types_lock);
828 * serialize the access of the ring buffer
830 * ring buffer serializes readers, but it is low level protection.
831 * The validity of the events (which returns by ring_buffer_peek() ..etc)
832 * are not protected by ring buffer.
834 * The content of events may become garbage if we allow other process consumes
835 * these events concurrently:
836 * A) the page of the consumed events may become a normal page
837 * (not reader page) in ring buffer, and this page will be rewrited
838 * by events producer.
839 * B) The page of the consumed events may become a page for splice_read,
840 * and this page will be returned to system.
842 * These primitives allow multi process access to different cpu ring buffer
845 * These primitives don't distinguish read-only and read-consume access.
846 * Multi read-only access are also serialized.
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853 static inline void trace_access_lock(int cpu)
855 if (cpu == RING_BUFFER_ALL_CPUS) {
856 /* gain it for accessing the whole ring buffer. */
857 down_write(&all_cpu_access_lock);
859 /* gain it for accessing a cpu ring buffer. */
861 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862 down_read(&all_cpu_access_lock);
864 /* Secondly block other access to this @cpu ring buffer. */
865 mutex_lock(&per_cpu(cpu_access_lock, cpu));
869 static inline void trace_access_unlock(int cpu)
871 if (cpu == RING_BUFFER_ALL_CPUS) {
872 up_write(&all_cpu_access_lock);
874 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875 up_read(&all_cpu_access_lock);
879 static inline void trace_access_lock_init(void)
883 for_each_possible_cpu(cpu)
884 mutex_init(&per_cpu(cpu_access_lock, cpu));
889 static DEFINE_MUTEX(access_lock);
891 static inline void trace_access_lock(int cpu)
894 mutex_lock(&access_lock);
897 static inline void trace_access_unlock(int cpu)
900 mutex_unlock(&access_lock);
903 static inline void trace_access_lock_init(void)
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911 unsigned int trace_ctx,
912 int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914 struct trace_buffer *buffer,
915 unsigned int trace_ctx,
916 int skip, struct pt_regs *regs);
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920 unsigned int trace_ctx,
921 int skip, struct pt_regs *regs)
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925 struct trace_buffer *buffer,
926 unsigned long trace_ctx,
927 int skip, struct pt_regs *regs)
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935 int type, unsigned int trace_ctx)
937 struct trace_entry *ent = ring_buffer_event_data(event);
939 tracing_generic_entry_update(ent, type, trace_ctx);
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
946 unsigned int trace_ctx)
948 struct ring_buffer_event *event;
950 event = ring_buffer_lock_reserve(buffer, len);
952 trace_event_setup(event, type, trace_ctx);
957 void tracer_tracing_on(struct trace_array *tr)
959 if (tr->array_buffer.buffer)
960 ring_buffer_record_on(tr->array_buffer.buffer);
962 * This flag is looked at when buffers haven't been allocated
963 * yet, or by some tracers (like irqsoff), that just want to
964 * know if the ring buffer has been disabled, but it can handle
965 * races of where it gets disabled but we still do a record.
966 * As the check is in the fast path of the tracers, it is more
967 * important to be fast than accurate.
969 tr->buffer_disabled = 0;
970 /* Make the flag seen by readers */
975 * tracing_on - enable tracing buffers
977 * This function enables tracing buffers that may have been
978 * disabled with tracing_off.
980 void tracing_on(void)
982 tracer_tracing_on(&global_trace);
984 EXPORT_SYMBOL_GPL(tracing_on);
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 __this_cpu_write(trace_taskinfo_save, true);
992 /* If this is the temp buffer, we need to commit fully */
993 if (this_cpu_read(trace_buffered_event) == event) {
994 /* Length is in event->array[0] */
995 ring_buffer_write(buffer, event->array[0], &event->array[1]);
996 /* Release the temp buffer */
997 this_cpu_dec(trace_buffered_event_cnt);
999 ring_buffer_unlock_commit(buffer, event);
1003 * __trace_puts - write a constant string into the trace buffer.
1004 * @ip: The address of the caller
1005 * @str: The constant string to write
1006 * @size: The size of the string.
1008 int __trace_puts(unsigned long ip, const char *str, int size)
1010 struct ring_buffer_event *event;
1011 struct trace_buffer *buffer;
1012 struct print_entry *entry;
1013 unsigned int trace_ctx;
1016 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1019 if (unlikely(tracing_selftest_running || tracing_disabled))
1022 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1024 trace_ctx = tracing_gen_ctx();
1025 buffer = global_trace.array_buffer.buffer;
1026 ring_buffer_nest_start(buffer);
1027 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1034 entry = ring_buffer_event_data(event);
1037 memcpy(&entry->buf, str, size);
1039 /* Add a newline if necessary */
1040 if (entry->buf[size - 1] != '\n') {
1041 entry->buf[size] = '\n';
1042 entry->buf[size + 1] = '\0';
1044 entry->buf[size] = '\0';
1046 __buffer_unlock_commit(buffer, event);
1047 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1049 ring_buffer_nest_end(buffer);
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1055 * __trace_bputs - write the pointer to a constant string into trace buffer
1056 * @ip: The address of the caller
1057 * @str: The constant string to write to the buffer to
1059 int __trace_bputs(unsigned long ip, const char *str)
1061 struct ring_buffer_event *event;
1062 struct trace_buffer *buffer;
1063 struct bputs_entry *entry;
1064 unsigned int trace_ctx;
1065 int size = sizeof(struct bputs_entry);
1068 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1071 if (unlikely(tracing_selftest_running || tracing_disabled))
1074 trace_ctx = tracing_gen_ctx();
1075 buffer = global_trace.array_buffer.buffer;
1077 ring_buffer_nest_start(buffer);
1078 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1083 entry = ring_buffer_event_data(event);
1087 __buffer_unlock_commit(buffer, event);
1088 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092 ring_buffer_nest_end(buffer);
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1097 #ifdef CONFIG_TRACER_SNAPSHOT
1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1101 struct tracer *tracer = tr->current_trace;
1102 unsigned long flags;
1105 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106 internal_trace_puts("*** snapshot is being ignored ***\n");
1110 if (!tr->allocated_snapshot) {
1111 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1112 internal_trace_puts("*** stopping trace here! ***\n");
1117 /* Note, snapshot can not be used when the tracer uses it */
1118 if (tracer->use_max_tr) {
1119 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1120 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124 local_irq_save(flags);
1125 update_max_tr(tr, current, smp_processor_id(), cond_data);
1126 local_irq_restore(flags);
1129 void tracing_snapshot_instance(struct trace_array *tr)
1131 tracing_snapshot_instance_cond(tr, NULL);
1135 * tracing_snapshot - take a snapshot of the current buffer.
1137 * This causes a swap between the snapshot buffer and the current live
1138 * tracing buffer. You can use this to take snapshots of the live
1139 * trace when some condition is triggered, but continue to trace.
1141 * Note, make sure to allocate the snapshot with either
1142 * a tracing_snapshot_alloc(), or by doing it manually
1143 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1145 * If the snapshot buffer is not allocated, it will stop tracing.
1146 * Basically making a permanent snapshot.
1148 void tracing_snapshot(void)
1150 struct trace_array *tr = &global_trace;
1152 tracing_snapshot_instance(tr);
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1157 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158 * @tr: The tracing instance to snapshot
1159 * @cond_data: The data to be tested conditionally, and possibly saved
1161 * This is the same as tracing_snapshot() except that the snapshot is
1162 * conditional - the snapshot will only happen if the
1163 * cond_snapshot.update() implementation receiving the cond_data
1164 * returns true, which means that the trace array's cond_snapshot
1165 * update() operation used the cond_data to determine whether the
1166 * snapshot should be taken, and if it was, presumably saved it along
1167 * with the snapshot.
1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1171 tracing_snapshot_instance_cond(tr, cond_data);
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1176 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177 * @tr: The tracing instance
1179 * When the user enables a conditional snapshot using
1180 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181 * with the snapshot. This accessor is used to retrieve it.
1183 * Should not be called from cond_snapshot.update(), since it takes
1184 * the tr->max_lock lock, which the code calling
1185 * cond_snapshot.update() has already done.
1187 * Returns the cond_data associated with the trace array's snapshot.
1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1191 void *cond_data = NULL;
1193 arch_spin_lock(&tr->max_lock);
1195 if (tr->cond_snapshot)
1196 cond_data = tr->cond_snapshot->cond_data;
1198 arch_spin_unlock(&tr->max_lock);
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205 struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1212 if (!tr->allocated_snapshot) {
1214 /* allocate spare buffer */
1215 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1220 tr->allocated_snapshot = true;
1226 static void free_snapshot(struct trace_array *tr)
1229 * We don't free the ring buffer. instead, resize it because
1230 * The max_tr ring buffer has some state (e.g. ring->clock) and
1231 * we want preserve it.
1233 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234 set_buffer_entries(&tr->max_buffer, 1);
1235 tracing_reset_online_cpus(&tr->max_buffer);
1236 tr->allocated_snapshot = false;
1240 * tracing_alloc_snapshot - allocate snapshot buffer.
1242 * This only allocates the snapshot buffer if it isn't already
1243 * allocated - it doesn't also take a snapshot.
1245 * This is meant to be used in cases where the snapshot buffer needs
1246 * to be set up for events that can't sleep but need to be able to
1247 * trigger a snapshot.
1249 int tracing_alloc_snapshot(void)
1251 struct trace_array *tr = &global_trace;
1254 ret = tracing_alloc_snapshot_instance(tr);
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1262 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1264 * This is similar to tracing_snapshot(), but it will allocate the
1265 * snapshot buffer if it isn't already allocated. Use this only
1266 * where it is safe to sleep, as the allocation may sleep.
1268 * This causes a swap between the snapshot buffer and the current live
1269 * tracing buffer. You can use this to take snapshots of the live
1270 * trace when some condition is triggered, but continue to trace.
1272 void tracing_snapshot_alloc(void)
1276 ret = tracing_alloc_snapshot();
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1285 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286 * @tr: The tracing instance
1287 * @cond_data: User data to associate with the snapshot
1288 * @update: Implementation of the cond_snapshot update function
1290 * Check whether the conditional snapshot for the given instance has
1291 * already been enabled, or if the current tracer is already using a
1292 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293 * save the cond_data and update function inside.
1295 * Returns 0 if successful, error otherwise.
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298 cond_update_fn_t update)
1300 struct cond_snapshot *cond_snapshot;
1303 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1307 cond_snapshot->cond_data = cond_data;
1308 cond_snapshot->update = update;
1310 mutex_lock(&trace_types_lock);
1312 ret = tracing_alloc_snapshot_instance(tr);
1316 if (tr->current_trace->use_max_tr) {
1322 * The cond_snapshot can only change to NULL without the
1323 * trace_types_lock. We don't care if we race with it going
1324 * to NULL, but we want to make sure that it's not set to
1325 * something other than NULL when we get here, which we can
1326 * do safely with only holding the trace_types_lock and not
1327 * having to take the max_lock.
1329 if (tr->cond_snapshot) {
1334 arch_spin_lock(&tr->max_lock);
1335 tr->cond_snapshot = cond_snapshot;
1336 arch_spin_unlock(&tr->max_lock);
1338 mutex_unlock(&trace_types_lock);
1343 mutex_unlock(&trace_types_lock);
1344 kfree(cond_snapshot);
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1350 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1351 * @tr: The tracing instance
1353 * Check whether the conditional snapshot for the given instance is
1354 * enabled; if so, free the cond_snapshot associated with it,
1355 * otherwise return -EINVAL.
1357 * Returns 0 if successful, error otherwise.
1359 int tracing_snapshot_cond_disable(struct trace_array *tr)
1363 arch_spin_lock(&tr->max_lock);
1365 if (!tr->cond_snapshot)
1368 kfree(tr->cond_snapshot);
1369 tr->cond_snapshot = NULL;
1372 arch_spin_unlock(&tr->max_lock);
1376 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1378 void tracing_snapshot(void)
1380 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1382 EXPORT_SYMBOL_GPL(tracing_snapshot);
1383 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1385 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1387 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1388 int tracing_alloc_snapshot(void)
1390 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1393 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1394 void tracing_snapshot_alloc(void)
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1400 void *tracing_cond_snapshot_data(struct trace_array *tr)
1404 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1409 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1415 #endif /* CONFIG_TRACER_SNAPSHOT */
1417 void tracer_tracing_off(struct trace_array *tr)
1419 if (tr->array_buffer.buffer)
1420 ring_buffer_record_off(tr->array_buffer.buffer);
1422 * This flag is looked at when buffers haven't been allocated
1423 * yet, or by some tracers (like irqsoff), that just want to
1424 * know if the ring buffer has been disabled, but it can handle
1425 * races of where it gets disabled but we still do a record.
1426 * As the check is in the fast path of the tracers, it is more
1427 * important to be fast than accurate.
1429 tr->buffer_disabled = 1;
1430 /* Make the flag seen by readers */
1435 * tracing_off - turn off tracing buffers
1437 * This function stops the tracing buffers from recording data.
1438 * It does not disable any overhead the tracers themselves may
1439 * be causing. This function simply causes all recording to
1440 * the ring buffers to fail.
1442 void tracing_off(void)
1444 tracer_tracing_off(&global_trace);
1446 EXPORT_SYMBOL_GPL(tracing_off);
1448 void disable_trace_on_warning(void)
1450 if (__disable_trace_on_warning) {
1451 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1452 "Disabling tracing due to warning\n");
1458 * tracer_tracing_is_on - show real state of ring buffer enabled
1459 * @tr : the trace array to know if ring buffer is enabled
1461 * Shows real state of the ring buffer if it is enabled or not.
1463 bool tracer_tracing_is_on(struct trace_array *tr)
1465 if (tr->array_buffer.buffer)
1466 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1467 return !tr->buffer_disabled;
1471 * tracing_is_on - show state of ring buffers enabled
1473 int tracing_is_on(void)
1475 return tracer_tracing_is_on(&global_trace);
1477 EXPORT_SYMBOL_GPL(tracing_is_on);
1479 static int __init set_buf_size(char *str)
1481 unsigned long buf_size;
1485 buf_size = memparse(str, &str);
1486 /* nr_entries can not be zero */
1489 trace_buf_size = buf_size;
1492 __setup("trace_buf_size=", set_buf_size);
1494 static int __init set_tracing_thresh(char *str)
1496 unsigned long threshold;
1501 ret = kstrtoul(str, 0, &threshold);
1504 tracing_thresh = threshold * 1000;
1507 __setup("tracing_thresh=", set_tracing_thresh);
1509 unsigned long nsecs_to_usecs(unsigned long nsecs)
1511 return nsecs / 1000;
1515 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1516 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1517 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1518 * of strings in the order that the evals (enum) were defined.
1523 /* These must match the bit postions in trace_iterator_flags */
1524 static const char *trace_options[] = {
1532 int in_ns; /* is this clock in nanoseconds? */
1533 } trace_clocks[] = {
1534 { trace_clock_local, "local", 1 },
1535 { trace_clock_global, "global", 1 },
1536 { trace_clock_counter, "counter", 0 },
1537 { trace_clock_jiffies, "uptime", 0 },
1538 { trace_clock, "perf", 1 },
1539 { ktime_get_mono_fast_ns, "mono", 1 },
1540 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1541 { ktime_get_boot_fast_ns, "boot", 1 },
1545 bool trace_clock_in_ns(struct trace_array *tr)
1547 if (trace_clocks[tr->clock_id].in_ns)
1554 * trace_parser_get_init - gets the buffer for trace parser
1556 int trace_parser_get_init(struct trace_parser *parser, int size)
1558 memset(parser, 0, sizeof(*parser));
1560 parser->buffer = kmalloc(size, GFP_KERNEL);
1561 if (!parser->buffer)
1564 parser->size = size;
1569 * trace_parser_put - frees the buffer for trace parser
1571 void trace_parser_put(struct trace_parser *parser)
1573 kfree(parser->buffer);
1574 parser->buffer = NULL;
1578 * trace_get_user - reads the user input string separated by space
1579 * (matched by isspace(ch))
1581 * For each string found the 'struct trace_parser' is updated,
1582 * and the function returns.
1584 * Returns number of bytes read.
1586 * See kernel/trace/trace.h for 'struct trace_parser' details.
1588 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1589 size_t cnt, loff_t *ppos)
1596 trace_parser_clear(parser);
1598 ret = get_user(ch, ubuf++);
1606 * The parser is not finished with the last write,
1607 * continue reading the user input without skipping spaces.
1609 if (!parser->cont) {
1610 /* skip white space */
1611 while (cnt && isspace(ch)) {
1612 ret = get_user(ch, ubuf++);
1621 /* only spaces were written */
1622 if (isspace(ch) || !ch) {
1629 /* read the non-space input */
1630 while (cnt && !isspace(ch) && ch) {
1631 if (parser->idx < parser->size - 1)
1632 parser->buffer[parser->idx++] = ch;
1637 ret = get_user(ch, ubuf++);
1644 /* We either got finished input or we have to wait for another call. */
1645 if (isspace(ch) || !ch) {
1646 parser->buffer[parser->idx] = 0;
1647 parser->cont = false;
1648 } else if (parser->idx < parser->size - 1) {
1649 parser->cont = true;
1650 parser->buffer[parser->idx++] = ch;
1651 /* Make sure the parsed string always terminates with '\0'. */
1652 parser->buffer[parser->idx] = 0;
1665 /* TODO add a seq_buf_to_buffer() */
1666 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1670 if (trace_seq_used(s) <= s->seq.readpos)
1673 len = trace_seq_used(s) - s->seq.readpos;
1676 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1678 s->seq.readpos += cnt;
1682 unsigned long __read_mostly tracing_thresh;
1683 static const struct file_operations tracing_max_lat_fops;
1685 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1686 defined(CONFIG_FSNOTIFY)
1688 static struct workqueue_struct *fsnotify_wq;
1690 static void latency_fsnotify_workfn(struct work_struct *work)
1692 struct trace_array *tr = container_of(work, struct trace_array,
1694 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1697 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1699 struct trace_array *tr = container_of(iwork, struct trace_array,
1701 queue_work(fsnotify_wq, &tr->fsnotify_work);
1704 static void trace_create_maxlat_file(struct trace_array *tr,
1705 struct dentry *d_tracer)
1707 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1708 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1709 tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1710 d_tracer, &tr->max_latency,
1711 &tracing_max_lat_fops);
1714 __init static int latency_fsnotify_init(void)
1716 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1717 WQ_UNBOUND | WQ_HIGHPRI, 0);
1719 pr_err("Unable to allocate tr_max_lat_wq\n");
1725 late_initcall_sync(latency_fsnotify_init);
1727 void latency_fsnotify(struct trace_array *tr)
1732 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1733 * possible that we are called from __schedule() or do_idle(), which
1734 * could cause a deadlock.
1736 irq_work_queue(&tr->fsnotify_irqwork);
1740 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1741 * defined(CONFIG_FSNOTIFY)
1745 #define trace_create_maxlat_file(tr, d_tracer) \
1746 trace_create_file("tracing_max_latency", 0644, d_tracer, \
1747 &tr->max_latency, &tracing_max_lat_fops)
1751 #ifdef CONFIG_TRACER_MAX_TRACE
1753 * Copy the new maximum trace into the separate maximum-trace
1754 * structure. (this way the maximum trace is permanently saved,
1755 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1758 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1760 struct array_buffer *trace_buf = &tr->array_buffer;
1761 struct array_buffer *max_buf = &tr->max_buffer;
1762 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1763 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1766 max_buf->time_start = data->preempt_timestamp;
1768 max_data->saved_latency = tr->max_latency;
1769 max_data->critical_start = data->critical_start;
1770 max_data->critical_end = data->critical_end;
1772 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1773 max_data->pid = tsk->pid;
1775 * If tsk == current, then use current_uid(), as that does not use
1776 * RCU. The irq tracer can be called out of RCU scope.
1779 max_data->uid = current_uid();
1781 max_data->uid = task_uid(tsk);
1783 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1784 max_data->policy = tsk->policy;
1785 max_data->rt_priority = tsk->rt_priority;
1787 /* record this tasks comm */
1788 tracing_record_cmdline(tsk);
1789 latency_fsnotify(tr);
1793 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1795 * @tsk: the task with the latency
1796 * @cpu: The cpu that initiated the trace.
1797 * @cond_data: User data associated with a conditional snapshot
1799 * Flip the buffers between the @tr and the max_tr and record information
1800 * about which task was the cause of this latency.
1803 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1809 WARN_ON_ONCE(!irqs_disabled());
1811 if (!tr->allocated_snapshot) {
1812 /* Only the nop tracer should hit this when disabling */
1813 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1817 arch_spin_lock(&tr->max_lock);
1819 /* Inherit the recordable setting from array_buffer */
1820 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1821 ring_buffer_record_on(tr->max_buffer.buffer);
1823 ring_buffer_record_off(tr->max_buffer.buffer);
1825 #ifdef CONFIG_TRACER_SNAPSHOT
1826 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1829 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1831 __update_max_tr(tr, tsk, cpu);
1834 arch_spin_unlock(&tr->max_lock);
1838 * update_max_tr_single - only copy one trace over, and reset the rest
1840 * @tsk: task with the latency
1841 * @cpu: the cpu of the buffer to copy.
1843 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1846 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1853 WARN_ON_ONCE(!irqs_disabled());
1854 if (!tr->allocated_snapshot) {
1855 /* Only the nop tracer should hit this when disabling */
1856 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1860 arch_spin_lock(&tr->max_lock);
1862 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1864 if (ret == -EBUSY) {
1866 * We failed to swap the buffer due to a commit taking
1867 * place on this CPU. We fail to record, but we reset
1868 * the max trace buffer (no one writes directly to it)
1869 * and flag that it failed.
1871 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1872 "Failed to swap buffers due to commit in progress\n");
1875 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1877 __update_max_tr(tr, tsk, cpu);
1878 arch_spin_unlock(&tr->max_lock);
1880 #endif /* CONFIG_TRACER_MAX_TRACE */
1882 static int wait_on_pipe(struct trace_iterator *iter, int full)
1884 /* Iterators are static, they should be filled or empty */
1885 if (trace_buffer_iter(iter, iter->cpu_file))
1888 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1892 #ifdef CONFIG_FTRACE_STARTUP_TEST
1893 static bool selftests_can_run;
1895 struct trace_selftests {
1896 struct list_head list;
1897 struct tracer *type;
1900 static LIST_HEAD(postponed_selftests);
1902 static int save_selftest(struct tracer *type)
1904 struct trace_selftests *selftest;
1906 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1910 selftest->type = type;
1911 list_add(&selftest->list, &postponed_selftests);
1915 static int run_tracer_selftest(struct tracer *type)
1917 struct trace_array *tr = &global_trace;
1918 struct tracer *saved_tracer = tr->current_trace;
1921 if (!type->selftest || tracing_selftest_disabled)
1925 * If a tracer registers early in boot up (before scheduling is
1926 * initialized and such), then do not run its selftests yet.
1927 * Instead, run it a little later in the boot process.
1929 if (!selftests_can_run)
1930 return save_selftest(type);
1933 * Run a selftest on this tracer.
1934 * Here we reset the trace buffer, and set the current
1935 * tracer to be this tracer. The tracer can then run some
1936 * internal tracing to verify that everything is in order.
1937 * If we fail, we do not register this tracer.
1939 tracing_reset_online_cpus(&tr->array_buffer);
1941 tr->current_trace = type;
1943 #ifdef CONFIG_TRACER_MAX_TRACE
1944 if (type->use_max_tr) {
1945 /* If we expanded the buffers, make sure the max is expanded too */
1946 if (ring_buffer_expanded)
1947 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1948 RING_BUFFER_ALL_CPUS);
1949 tr->allocated_snapshot = true;
1953 /* the test is responsible for initializing and enabling */
1954 pr_info("Testing tracer %s: ", type->name);
1955 ret = type->selftest(type, tr);
1956 /* the test is responsible for resetting too */
1957 tr->current_trace = saved_tracer;
1959 printk(KERN_CONT "FAILED!\n");
1960 /* Add the warning after printing 'FAILED' */
1964 /* Only reset on passing, to avoid touching corrupted buffers */
1965 tracing_reset_online_cpus(&tr->array_buffer);
1967 #ifdef CONFIG_TRACER_MAX_TRACE
1968 if (type->use_max_tr) {
1969 tr->allocated_snapshot = false;
1971 /* Shrink the max buffer again */
1972 if (ring_buffer_expanded)
1973 ring_buffer_resize(tr->max_buffer.buffer, 1,
1974 RING_BUFFER_ALL_CPUS);
1978 printk(KERN_CONT "PASSED\n");
1982 static __init int init_trace_selftests(void)
1984 struct trace_selftests *p, *n;
1985 struct tracer *t, **last;
1988 selftests_can_run = true;
1990 mutex_lock(&trace_types_lock);
1992 if (list_empty(&postponed_selftests))
1995 pr_info("Running postponed tracer tests:\n");
1997 tracing_selftest_running = true;
1998 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1999 /* This loop can take minutes when sanitizers are enabled, so
2000 * lets make sure we allow RCU processing.
2003 ret = run_tracer_selftest(p->type);
2004 /* If the test fails, then warn and remove from available_tracers */
2006 WARN(1, "tracer: %s failed selftest, disabling\n",
2008 last = &trace_types;
2009 for (t = trace_types; t; t = t->next) {
2020 tracing_selftest_running = false;
2023 mutex_unlock(&trace_types_lock);
2027 core_initcall(init_trace_selftests);
2029 static inline int run_tracer_selftest(struct tracer *type)
2033 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2035 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2037 static void __init apply_trace_boot_options(void);
2040 * register_tracer - register a tracer with the ftrace system.
2041 * @type: the plugin for the tracer
2043 * Register a new plugin tracer.
2045 int __init register_tracer(struct tracer *type)
2051 pr_info("Tracer must have a name\n");
2055 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2056 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2060 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2061 pr_warn("Can not register tracer %s due to lockdown\n",
2066 mutex_lock(&trace_types_lock);
2068 tracing_selftest_running = true;
2070 for (t = trace_types; t; t = t->next) {
2071 if (strcmp(type->name, t->name) == 0) {
2073 pr_info("Tracer %s already registered\n",
2080 if (!type->set_flag)
2081 type->set_flag = &dummy_set_flag;
2083 /*allocate a dummy tracer_flags*/
2084 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2089 type->flags->val = 0;
2090 type->flags->opts = dummy_tracer_opt;
2092 if (!type->flags->opts)
2093 type->flags->opts = dummy_tracer_opt;
2095 /* store the tracer for __set_tracer_option */
2096 type->flags->trace = type;
2098 ret = run_tracer_selftest(type);
2102 type->next = trace_types;
2104 add_tracer_options(&global_trace, type);
2107 tracing_selftest_running = false;
2108 mutex_unlock(&trace_types_lock);
2110 if (ret || !default_bootup_tracer)
2113 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2116 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2117 /* Do we want this tracer to start on bootup? */
2118 tracing_set_tracer(&global_trace, type->name);
2119 default_bootup_tracer = NULL;
2121 apply_trace_boot_options();
2123 /* disable other selftests, since this will break it. */
2124 disable_tracing_selftest("running a tracer");
2130 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2132 struct trace_buffer *buffer = buf->buffer;
2137 ring_buffer_record_disable(buffer);
2139 /* Make sure all commits have finished */
2141 ring_buffer_reset_cpu(buffer, cpu);
2143 ring_buffer_record_enable(buffer);
2146 void tracing_reset_online_cpus(struct array_buffer *buf)
2148 struct trace_buffer *buffer = buf->buffer;
2153 ring_buffer_record_disable(buffer);
2155 /* Make sure all commits have finished */
2158 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2160 ring_buffer_reset_online_cpus(buffer);
2162 ring_buffer_record_enable(buffer);
2165 /* Must have trace_types_lock held */
2166 void tracing_reset_all_online_cpus(void)
2168 struct trace_array *tr;
2170 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2171 if (!tr->clear_trace)
2173 tr->clear_trace = false;
2174 tracing_reset_online_cpus(&tr->array_buffer);
2175 #ifdef CONFIG_TRACER_MAX_TRACE
2176 tracing_reset_online_cpus(&tr->max_buffer);
2181 static int *tgid_map;
2183 #define SAVED_CMDLINES_DEFAULT 128
2184 #define NO_CMDLINE_MAP UINT_MAX
2185 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2186 struct saved_cmdlines_buffer {
2187 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2188 unsigned *map_cmdline_to_pid;
2189 unsigned cmdline_num;
2191 char *saved_cmdlines;
2193 static struct saved_cmdlines_buffer *savedcmd;
2195 /* temporary disable recording */
2196 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2198 static inline char *get_saved_cmdlines(int idx)
2200 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2203 static inline void set_cmdline(int idx, const char *cmdline)
2205 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2208 static int allocate_cmdlines_buffer(unsigned int val,
2209 struct saved_cmdlines_buffer *s)
2211 s->map_cmdline_to_pid = kmalloc_array(val,
2212 sizeof(*s->map_cmdline_to_pid),
2214 if (!s->map_cmdline_to_pid)
2217 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2218 if (!s->saved_cmdlines) {
2219 kfree(s->map_cmdline_to_pid);
2224 s->cmdline_num = val;
2225 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2226 sizeof(s->map_pid_to_cmdline));
2227 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2228 val * sizeof(*s->map_cmdline_to_pid));
2233 static int trace_create_savedcmd(void)
2237 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2241 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2251 int is_tracing_stopped(void)
2253 return global_trace.stop_count;
2257 * tracing_start - quick start of the tracer
2259 * If tracing is enabled but was stopped by tracing_stop,
2260 * this will start the tracer back up.
2262 void tracing_start(void)
2264 struct trace_buffer *buffer;
2265 unsigned long flags;
2267 if (tracing_disabled)
2270 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2271 if (--global_trace.stop_count) {
2272 if (global_trace.stop_count < 0) {
2273 /* Someone screwed up their debugging */
2275 global_trace.stop_count = 0;
2280 /* Prevent the buffers from switching */
2281 arch_spin_lock(&global_trace.max_lock);
2283 buffer = global_trace.array_buffer.buffer;
2285 ring_buffer_record_enable(buffer);
2287 #ifdef CONFIG_TRACER_MAX_TRACE
2288 buffer = global_trace.max_buffer.buffer;
2290 ring_buffer_record_enable(buffer);
2293 arch_spin_unlock(&global_trace.max_lock);
2296 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2299 static void tracing_start_tr(struct trace_array *tr)
2301 struct trace_buffer *buffer;
2302 unsigned long flags;
2304 if (tracing_disabled)
2307 /* If global, we need to also start the max tracer */
2308 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2309 return tracing_start();
2311 raw_spin_lock_irqsave(&tr->start_lock, flags);
2313 if (--tr->stop_count) {
2314 if (tr->stop_count < 0) {
2315 /* Someone screwed up their debugging */
2322 buffer = tr->array_buffer.buffer;
2324 ring_buffer_record_enable(buffer);
2327 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2331 * tracing_stop - quick stop of the tracer
2333 * Light weight way to stop tracing. Use in conjunction with
2336 void tracing_stop(void)
2338 struct trace_buffer *buffer;
2339 unsigned long flags;
2341 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2342 if (global_trace.stop_count++)
2345 /* Prevent the buffers from switching */
2346 arch_spin_lock(&global_trace.max_lock);
2348 buffer = global_trace.array_buffer.buffer;
2350 ring_buffer_record_disable(buffer);
2352 #ifdef CONFIG_TRACER_MAX_TRACE
2353 buffer = global_trace.max_buffer.buffer;
2355 ring_buffer_record_disable(buffer);
2358 arch_spin_unlock(&global_trace.max_lock);
2361 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2364 static void tracing_stop_tr(struct trace_array *tr)
2366 struct trace_buffer *buffer;
2367 unsigned long flags;
2369 /* If global, we need to also stop the max tracer */
2370 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2371 return tracing_stop();
2373 raw_spin_lock_irqsave(&tr->start_lock, flags);
2374 if (tr->stop_count++)
2377 buffer = tr->array_buffer.buffer;
2379 ring_buffer_record_disable(buffer);
2382 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2385 static int trace_save_cmdline(struct task_struct *tsk)
2389 /* treat recording of idle task as a success */
2393 if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2397 * It's not the end of the world if we don't get
2398 * the lock, but we also don't want to spin
2399 * nor do we want to disable interrupts,
2400 * so if we miss here, then better luck next time.
2402 if (!arch_spin_trylock(&trace_cmdline_lock))
2405 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2406 if (idx == NO_CMDLINE_MAP) {
2407 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2410 * Check whether the cmdline buffer at idx has a pid
2411 * mapped. We are going to overwrite that entry so we
2412 * need to clear the map_pid_to_cmdline. Otherwise we
2413 * would read the new comm for the old pid.
2415 pid = savedcmd->map_cmdline_to_pid[idx];
2416 if (pid != NO_CMDLINE_MAP)
2417 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2419 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2420 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2422 savedcmd->cmdline_idx = idx;
2425 set_cmdline(idx, tsk->comm);
2427 arch_spin_unlock(&trace_cmdline_lock);
2432 static void __trace_find_cmdline(int pid, char comm[])
2437 strcpy(comm, "<idle>");
2441 if (WARN_ON_ONCE(pid < 0)) {
2442 strcpy(comm, "<XXX>");
2446 if (pid > PID_MAX_DEFAULT) {
2447 strcpy(comm, "<...>");
2451 map = savedcmd->map_pid_to_cmdline[pid];
2452 if (map != NO_CMDLINE_MAP)
2453 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2455 strcpy(comm, "<...>");
2458 void trace_find_cmdline(int pid, char comm[])
2461 arch_spin_lock(&trace_cmdline_lock);
2463 __trace_find_cmdline(pid, comm);
2465 arch_spin_unlock(&trace_cmdline_lock);
2469 int trace_find_tgid(int pid)
2471 if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2474 return tgid_map[pid];
2477 static int trace_save_tgid(struct task_struct *tsk)
2479 /* treat recording of idle task as a success */
2483 if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2486 tgid_map[tsk->pid] = tsk->tgid;
2490 static bool tracing_record_taskinfo_skip(int flags)
2492 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2494 if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2496 if (!__this_cpu_read(trace_taskinfo_save))
2502 * tracing_record_taskinfo - record the task info of a task
2504 * @task: task to record
2505 * @flags: TRACE_RECORD_CMDLINE for recording comm
2506 * TRACE_RECORD_TGID for recording tgid
2508 void tracing_record_taskinfo(struct task_struct *task, int flags)
2512 if (tracing_record_taskinfo_skip(flags))
2516 * Record as much task information as possible. If some fail, continue
2517 * to try to record the others.
2519 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2520 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2522 /* If recording any information failed, retry again soon. */
2526 __this_cpu_write(trace_taskinfo_save, false);
2530 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2532 * @prev: previous task during sched_switch
2533 * @next: next task during sched_switch
2534 * @flags: TRACE_RECORD_CMDLINE for recording comm
2535 * TRACE_RECORD_TGID for recording tgid
2537 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2538 struct task_struct *next, int flags)
2542 if (tracing_record_taskinfo_skip(flags))
2546 * Record as much task information as possible. If some fail, continue
2547 * to try to record the others.
2549 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2550 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2551 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2552 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2554 /* If recording any information failed, retry again soon. */
2558 __this_cpu_write(trace_taskinfo_save, false);
2561 /* Helpers to record a specific task information */
2562 void tracing_record_cmdline(struct task_struct *task)
2564 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2567 void tracing_record_tgid(struct task_struct *task)
2569 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2573 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2574 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2575 * simplifies those functions and keeps them in sync.
2577 enum print_line_t trace_handle_return(struct trace_seq *s)
2579 return trace_seq_has_overflowed(s) ?
2580 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2582 EXPORT_SYMBOL_GPL(trace_handle_return);
2584 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2586 unsigned int trace_flags = irqs_status;
2589 pc = preempt_count();
2592 trace_flags |= TRACE_FLAG_NMI;
2593 if (pc & HARDIRQ_MASK)
2594 trace_flags |= TRACE_FLAG_HARDIRQ;
2595 if (in_serving_softirq())
2596 trace_flags |= TRACE_FLAG_SOFTIRQ;
2598 if (tif_need_resched())
2599 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2600 if (test_preempt_need_resched())
2601 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2602 return (trace_flags << 16) | (pc & 0xff);
2605 struct ring_buffer_event *
2606 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2609 unsigned int trace_ctx)
2611 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2614 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2615 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2616 static int trace_buffered_event_ref;
2619 * trace_buffered_event_enable - enable buffering events
2621 * When events are being filtered, it is quicker to use a temporary
2622 * buffer to write the event data into if there's a likely chance
2623 * that it will not be committed. The discard of the ring buffer
2624 * is not as fast as committing, and is much slower than copying
2627 * When an event is to be filtered, allocate per cpu buffers to
2628 * write the event data into, and if the event is filtered and discarded
2629 * it is simply dropped, otherwise, the entire data is to be committed
2632 void trace_buffered_event_enable(void)
2634 struct ring_buffer_event *event;
2638 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2640 if (trace_buffered_event_ref++)
2643 for_each_tracing_cpu(cpu) {
2644 page = alloc_pages_node(cpu_to_node(cpu),
2645 GFP_KERNEL | __GFP_NORETRY, 0);
2649 event = page_address(page);
2650 memset(event, 0, sizeof(*event));
2652 per_cpu(trace_buffered_event, cpu) = event;
2655 if (cpu == smp_processor_id() &&
2656 __this_cpu_read(trace_buffered_event) !=
2657 per_cpu(trace_buffered_event, cpu))
2664 trace_buffered_event_disable();
2667 static void enable_trace_buffered_event(void *data)
2669 /* Probably not needed, but do it anyway */
2671 this_cpu_dec(trace_buffered_event_cnt);
2674 static void disable_trace_buffered_event(void *data)
2676 this_cpu_inc(trace_buffered_event_cnt);
2680 * trace_buffered_event_disable - disable buffering events
2682 * When a filter is removed, it is faster to not use the buffered
2683 * events, and to commit directly into the ring buffer. Free up
2684 * the temp buffers when there are no more users. This requires
2685 * special synchronization with current events.
2687 void trace_buffered_event_disable(void)
2691 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2693 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2696 if (--trace_buffered_event_ref)
2700 /* For each CPU, set the buffer as used. */
2701 smp_call_function_many(tracing_buffer_mask,
2702 disable_trace_buffered_event, NULL, 1);
2705 /* Wait for all current users to finish */
2708 for_each_tracing_cpu(cpu) {
2709 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2710 per_cpu(trace_buffered_event, cpu) = NULL;
2713 * Make sure trace_buffered_event is NULL before clearing
2714 * trace_buffered_event_cnt.
2719 /* Do the work on each cpu */
2720 smp_call_function_many(tracing_buffer_mask,
2721 enable_trace_buffered_event, NULL, 1);
2725 static struct trace_buffer *temp_buffer;
2727 struct ring_buffer_event *
2728 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2729 struct trace_event_file *trace_file,
2730 int type, unsigned long len,
2731 unsigned int trace_ctx)
2733 struct ring_buffer_event *entry;
2736 *current_rb = trace_file->tr->array_buffer.buffer;
2738 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2739 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2740 (entry = this_cpu_read(trace_buffered_event))) {
2741 /* Try to use the per cpu buffer first */
2742 val = this_cpu_inc_return(trace_buffered_event_cnt);
2743 if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) {
2744 trace_event_setup(entry, type, trace_ctx);
2745 entry->array[0] = len;
2748 this_cpu_dec(trace_buffered_event_cnt);
2751 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2754 * If tracing is off, but we have triggers enabled
2755 * we still need to look at the event data. Use the temp_buffer
2756 * to store the trace event for the trigger to use. It's recursive
2757 * safe and will not be recorded anywhere.
2759 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2760 *current_rb = temp_buffer;
2761 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2766 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2768 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2769 static DEFINE_MUTEX(tracepoint_printk_mutex);
2771 static void output_printk(struct trace_event_buffer *fbuffer)
2773 struct trace_event_call *event_call;
2774 struct trace_event_file *file;
2775 struct trace_event *event;
2776 unsigned long flags;
2777 struct trace_iterator *iter = tracepoint_print_iter;
2779 /* We should never get here if iter is NULL */
2780 if (WARN_ON_ONCE(!iter))
2783 event_call = fbuffer->trace_file->event_call;
2784 if (!event_call || !event_call->event.funcs ||
2785 !event_call->event.funcs->trace)
2788 file = fbuffer->trace_file;
2789 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2790 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2791 !filter_match_preds(file->filter, fbuffer->entry)))
2794 event = &fbuffer->trace_file->event_call->event;
2796 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2797 trace_seq_init(&iter->seq);
2798 iter->ent = fbuffer->entry;
2799 event_call->event.funcs->trace(iter, 0, event);
2800 trace_seq_putc(&iter->seq, 0);
2801 printk("%s", iter->seq.buffer);
2803 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2806 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2807 void *buffer, size_t *lenp,
2810 int save_tracepoint_printk;
2813 mutex_lock(&tracepoint_printk_mutex);
2814 save_tracepoint_printk = tracepoint_printk;
2816 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2819 * This will force exiting early, as tracepoint_printk
2820 * is always zero when tracepoint_printk_iter is not allocated
2822 if (!tracepoint_print_iter)
2823 tracepoint_printk = 0;
2825 if (save_tracepoint_printk == tracepoint_printk)
2828 if (tracepoint_printk)
2829 static_key_enable(&tracepoint_printk_key.key);
2831 static_key_disable(&tracepoint_printk_key.key);
2834 mutex_unlock(&tracepoint_printk_mutex);
2839 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2841 if (static_key_false(&tracepoint_printk_key.key))
2842 output_printk(fbuffer);
2844 if (static_branch_unlikely(&trace_event_exports_enabled))
2845 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2846 event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2847 fbuffer->event, fbuffer->entry,
2848 fbuffer->trace_ctx, fbuffer->regs);
2850 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2855 * trace_buffer_unlock_commit_regs()
2856 * trace_event_buffer_commit()
2857 * trace_event_raw_event_xxx()
2859 # define STACK_SKIP 3
2861 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2862 struct trace_buffer *buffer,
2863 struct ring_buffer_event *event,
2864 unsigned int trace_ctx,
2865 struct pt_regs *regs)
2867 __buffer_unlock_commit(buffer, event);
2870 * If regs is not set, then skip the necessary functions.
2871 * Note, we can still get here via blktrace, wakeup tracer
2872 * and mmiotrace, but that's ok if they lose a function or
2873 * two. They are not that meaningful.
2875 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2876 ftrace_trace_userstack(tr, buffer, trace_ctx);
2880 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2883 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2884 struct ring_buffer_event *event)
2886 __buffer_unlock_commit(buffer, event);
2890 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2891 parent_ip, unsigned int trace_ctx)
2893 struct trace_event_call *call = &event_function;
2894 struct trace_buffer *buffer = tr->array_buffer.buffer;
2895 struct ring_buffer_event *event;
2896 struct ftrace_entry *entry;
2898 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2902 entry = ring_buffer_event_data(event);
2904 entry->parent_ip = parent_ip;
2906 if (!call_filter_check_discard(call, entry, buffer, event)) {
2907 if (static_branch_unlikely(&trace_function_exports_enabled))
2908 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2909 __buffer_unlock_commit(buffer, event);
2913 #ifdef CONFIG_STACKTRACE
2915 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2916 #define FTRACE_KSTACK_NESTING 4
2918 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2920 struct ftrace_stack {
2921 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2925 struct ftrace_stacks {
2926 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2929 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2930 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2932 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2933 unsigned int trace_ctx,
2934 int skip, struct pt_regs *regs)
2936 struct trace_event_call *call = &event_kernel_stack;
2937 struct ring_buffer_event *event;
2938 unsigned int size, nr_entries;
2939 struct ftrace_stack *fstack;
2940 struct stack_entry *entry;
2944 * Add one, for this function and the call to save_stack_trace()
2945 * If regs is set, then these functions will not be in the way.
2947 #ifndef CONFIG_UNWINDER_ORC
2952 preempt_disable_notrace();
2954 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2956 /* This should never happen. If it does, yell once and skip */
2957 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2961 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2962 * interrupt will either see the value pre increment or post
2963 * increment. If the interrupt happens pre increment it will have
2964 * restored the counter when it returns. We just need a barrier to
2965 * keep gcc from moving things around.
2969 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2970 size = ARRAY_SIZE(fstack->calls);
2973 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2976 nr_entries = stack_trace_save(fstack->calls, size, skip);
2979 size = nr_entries * sizeof(unsigned long);
2980 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2981 sizeof(*entry) + size, trace_ctx);
2984 entry = ring_buffer_event_data(event);
2986 memcpy(&entry->caller, fstack->calls, size);
2987 entry->size = nr_entries;
2989 if (!call_filter_check_discard(call, entry, buffer, event))
2990 __buffer_unlock_commit(buffer, event);
2993 /* Again, don't let gcc optimize things here */
2995 __this_cpu_dec(ftrace_stack_reserve);
2996 preempt_enable_notrace();
3000 static inline void ftrace_trace_stack(struct trace_array *tr,
3001 struct trace_buffer *buffer,
3002 unsigned int trace_ctx,
3003 int skip, struct pt_regs *regs)
3005 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3008 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3011 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3014 struct trace_buffer *buffer = tr->array_buffer.buffer;
3016 if (rcu_is_watching()) {
3017 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3022 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3023 * but if the above rcu_is_watching() failed, then the NMI
3024 * triggered someplace critical, and rcu_irq_enter() should
3025 * not be called from NMI.
3027 if (unlikely(in_nmi()))
3030 rcu_irq_enter_irqson();
3031 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3032 rcu_irq_exit_irqson();
3036 * trace_dump_stack - record a stack back trace in the trace buffer
3037 * @skip: Number of functions to skip (helper handlers)
3039 void trace_dump_stack(int skip)
3041 if (tracing_disabled || tracing_selftest_running)
3044 #ifndef CONFIG_UNWINDER_ORC
3045 /* Skip 1 to skip this function. */
3048 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3049 tracing_gen_ctx(), skip, NULL);
3051 EXPORT_SYMBOL_GPL(trace_dump_stack);
3053 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3054 static DEFINE_PER_CPU(int, user_stack_count);
3057 ftrace_trace_userstack(struct trace_array *tr,
3058 struct trace_buffer *buffer, unsigned int trace_ctx)
3060 struct trace_event_call *call = &event_user_stack;
3061 struct ring_buffer_event *event;
3062 struct userstack_entry *entry;
3064 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3068 * NMIs can not handle page faults, even with fix ups.
3069 * The save user stack can (and often does) fault.
3071 if (unlikely(in_nmi()))
3075 * prevent recursion, since the user stack tracing may
3076 * trigger other kernel events.
3079 if (__this_cpu_read(user_stack_count))
3082 __this_cpu_inc(user_stack_count);
3084 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3085 sizeof(*entry), trace_ctx);
3087 goto out_drop_count;
3088 entry = ring_buffer_event_data(event);
3090 entry->tgid = current->tgid;
3091 memset(&entry->caller, 0, sizeof(entry->caller));
3093 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3094 if (!call_filter_check_discard(call, entry, buffer, event))
3095 __buffer_unlock_commit(buffer, event);
3098 __this_cpu_dec(user_stack_count);
3102 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3103 static void ftrace_trace_userstack(struct trace_array *tr,
3104 struct trace_buffer *buffer,
3105 unsigned int trace_ctx)
3108 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3110 #endif /* CONFIG_STACKTRACE */
3112 /* created for use with alloc_percpu */
3113 struct trace_buffer_struct {
3115 char buffer[4][TRACE_BUF_SIZE];
3118 static struct trace_buffer_struct *trace_percpu_buffer;
3121 * This allows for lockless recording. If we're nested too deeply, then
3122 * this returns NULL.
3124 static char *get_trace_buf(void)
3126 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3128 if (!buffer || buffer->nesting >= 4)
3133 /* Interrupts must see nesting incremented before we use the buffer */
3135 return &buffer->buffer[buffer->nesting - 1][0];
3138 static void put_trace_buf(void)
3140 /* Don't let the decrement of nesting leak before this */
3142 this_cpu_dec(trace_percpu_buffer->nesting);
3145 static int alloc_percpu_trace_buffer(void)
3147 struct trace_buffer_struct *buffers;
3149 if (trace_percpu_buffer)
3152 buffers = alloc_percpu(struct trace_buffer_struct);
3153 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3156 trace_percpu_buffer = buffers;
3160 static int buffers_allocated;
3162 void trace_printk_init_buffers(void)
3164 if (buffers_allocated)
3167 if (alloc_percpu_trace_buffer())
3170 /* trace_printk() is for debug use only. Don't use it in production. */
3173 pr_warn("**********************************************************\n");
3174 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3176 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3178 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3179 pr_warn("** unsafe for production use. **\n");
3181 pr_warn("** If you see this message and you are not debugging **\n");
3182 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3184 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3185 pr_warn("**********************************************************\n");
3187 /* Expand the buffers to set size */
3188 tracing_update_buffers();
3190 buffers_allocated = 1;
3193 * trace_printk_init_buffers() can be called by modules.
3194 * If that happens, then we need to start cmdline recording
3195 * directly here. If the global_trace.buffer is already
3196 * allocated here, then this was called by module code.
3198 if (global_trace.array_buffer.buffer)
3199 tracing_start_cmdline_record();
3201 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3203 void trace_printk_start_comm(void)
3205 /* Start tracing comms if trace printk is set */
3206 if (!buffers_allocated)
3208 tracing_start_cmdline_record();
3211 static void trace_printk_start_stop_comm(int enabled)
3213 if (!buffers_allocated)
3217 tracing_start_cmdline_record();
3219 tracing_stop_cmdline_record();
3223 * trace_vbprintk - write binary msg to tracing buffer
3224 * @ip: The address of the caller
3225 * @fmt: The string format to write to the buffer
3226 * @args: Arguments for @fmt
3228 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3230 struct trace_event_call *call = &event_bprint;
3231 struct ring_buffer_event *event;
3232 struct trace_buffer *buffer;
3233 struct trace_array *tr = &global_trace;
3234 struct bprint_entry *entry;
3235 unsigned int trace_ctx;
3239 if (unlikely(tracing_selftest_running || tracing_disabled))
3242 /* Don't pollute graph traces with trace_vprintk internals */
3243 pause_graph_tracing();
3245 trace_ctx = tracing_gen_ctx();
3246 preempt_disable_notrace();
3248 tbuffer = get_trace_buf();
3254 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3256 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3259 size = sizeof(*entry) + sizeof(u32) * len;
3260 buffer = tr->array_buffer.buffer;
3261 ring_buffer_nest_start(buffer);
3262 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3266 entry = ring_buffer_event_data(event);
3270 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3271 if (!call_filter_check_discard(call, entry, buffer, event)) {
3272 __buffer_unlock_commit(buffer, event);
3273 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3277 ring_buffer_nest_end(buffer);
3282 preempt_enable_notrace();
3283 unpause_graph_tracing();
3287 EXPORT_SYMBOL_GPL(trace_vbprintk);
3291 __trace_array_vprintk(struct trace_buffer *buffer,
3292 unsigned long ip, const char *fmt, va_list args)
3294 struct trace_event_call *call = &event_print;
3295 struct ring_buffer_event *event;
3297 struct print_entry *entry;
3298 unsigned int trace_ctx;
3301 if (tracing_disabled || tracing_selftest_running)
3304 /* Don't pollute graph traces with trace_vprintk internals */
3305 pause_graph_tracing();
3307 trace_ctx = tracing_gen_ctx();
3308 preempt_disable_notrace();
3311 tbuffer = get_trace_buf();
3317 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3319 size = sizeof(*entry) + len + 1;
3320 ring_buffer_nest_start(buffer);
3321 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3325 entry = ring_buffer_event_data(event);
3328 memcpy(&entry->buf, tbuffer, len + 1);
3329 if (!call_filter_check_discard(call, entry, buffer, event)) {
3330 __buffer_unlock_commit(buffer, event);
3331 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3335 ring_buffer_nest_end(buffer);
3339 preempt_enable_notrace();
3340 unpause_graph_tracing();
3346 int trace_array_vprintk(struct trace_array *tr,
3347 unsigned long ip, const char *fmt, va_list args)
3349 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3353 * trace_array_printk - Print a message to a specific instance
3354 * @tr: The instance trace_array descriptor
3355 * @ip: The instruction pointer that this is called from.
3356 * @fmt: The format to print (printf format)
3358 * If a subsystem sets up its own instance, they have the right to
3359 * printk strings into their tracing instance buffer using this
3360 * function. Note, this function will not write into the top level
3361 * buffer (use trace_printk() for that), as writing into the top level
3362 * buffer should only have events that can be individually disabled.
3363 * trace_printk() is only used for debugging a kernel, and should not
3364 * be ever encorporated in normal use.
3366 * trace_array_printk() can be used, as it will not add noise to the
3367 * top level tracing buffer.
3369 * Note, trace_array_init_printk() must be called on @tr before this
3373 int trace_array_printk(struct trace_array *tr,
3374 unsigned long ip, const char *fmt, ...)
3382 /* This is only allowed for created instances */
3383 if (tr == &global_trace)
3386 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3390 ret = trace_array_vprintk(tr, ip, fmt, ap);
3394 EXPORT_SYMBOL_GPL(trace_array_printk);
3397 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3398 * @tr: The trace array to initialize the buffers for
3400 * As trace_array_printk() only writes into instances, they are OK to
3401 * have in the kernel (unlike trace_printk()). This needs to be called
3402 * before trace_array_printk() can be used on a trace_array.
3404 int trace_array_init_printk(struct trace_array *tr)
3409 /* This is only allowed for created instances */
3410 if (tr == &global_trace)
3413 return alloc_percpu_trace_buffer();
3415 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3418 int trace_array_printk_buf(struct trace_buffer *buffer,
3419 unsigned long ip, const char *fmt, ...)
3424 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3428 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3434 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3436 return trace_array_vprintk(&global_trace, ip, fmt, args);
3438 EXPORT_SYMBOL_GPL(trace_vprintk);
3440 static void trace_iterator_increment(struct trace_iterator *iter)
3442 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3446 ring_buffer_iter_advance(buf_iter);
3449 static struct trace_entry *
3450 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3451 unsigned long *lost_events)
3453 struct ring_buffer_event *event;
3454 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3457 event = ring_buffer_iter_peek(buf_iter, ts);
3459 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3460 (unsigned long)-1 : 0;
3462 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3467 iter->ent_size = ring_buffer_event_length(event);
3468 return ring_buffer_event_data(event);
3474 static struct trace_entry *
3475 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3476 unsigned long *missing_events, u64 *ent_ts)
3478 struct trace_buffer *buffer = iter->array_buffer->buffer;
3479 struct trace_entry *ent, *next = NULL;
3480 unsigned long lost_events = 0, next_lost = 0;
3481 int cpu_file = iter->cpu_file;
3482 u64 next_ts = 0, ts;
3488 * If we are in a per_cpu trace file, don't bother by iterating over
3489 * all cpu and peek directly.
3491 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3492 if (ring_buffer_empty_cpu(buffer, cpu_file))
3494 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3496 *ent_cpu = cpu_file;
3501 for_each_tracing_cpu(cpu) {
3503 if (ring_buffer_empty_cpu(buffer, cpu))
3506 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3509 * Pick the entry with the smallest timestamp:
3511 if (ent && (!next || ts < next_ts)) {
3515 next_lost = lost_events;
3516 next_size = iter->ent_size;
3520 iter->ent_size = next_size;
3523 *ent_cpu = next_cpu;
3529 *missing_events = next_lost;
3534 #define STATIC_FMT_BUF_SIZE 128
3535 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3537 static char *trace_iter_expand_format(struct trace_iterator *iter)
3541 if (iter->fmt == static_fmt_buf)
3544 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3547 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3554 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3556 const char *p, *new_fmt;
3559 if (WARN_ON_ONCE(!fmt))
3562 if (iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3566 new_fmt = q = iter->fmt;
3568 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3569 if (!trace_iter_expand_format(iter))
3572 q += iter->fmt - new_fmt;
3573 new_fmt = iter->fmt;
3578 /* Replace %p with %px */
3582 } else if (p[0] == 'p' && !isalnum(p[1])) {
3593 #define STATIC_TEMP_BUF_SIZE 128
3594 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3596 /* Find the next real entry, without updating the iterator itself */
3597 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3598 int *ent_cpu, u64 *ent_ts)
3600 /* __find_next_entry will reset ent_size */
3601 int ent_size = iter->ent_size;
3602 struct trace_entry *entry;
3605 * If called from ftrace_dump(), then the iter->temp buffer
3606 * will be the static_temp_buf and not created from kmalloc.
3607 * If the entry size is greater than the buffer, we can
3608 * not save it. Just return NULL in that case. This is only
3609 * used to add markers when two consecutive events' time
3610 * stamps have a large delta. See trace_print_lat_context()
3612 if (iter->temp == static_temp_buf &&
3613 STATIC_TEMP_BUF_SIZE < ent_size)
3617 * The __find_next_entry() may call peek_next_entry(), which may
3618 * call ring_buffer_peek() that may make the contents of iter->ent
3619 * undefined. Need to copy iter->ent now.
3621 if (iter->ent && iter->ent != iter->temp) {
3622 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3623 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3625 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3630 iter->temp_size = iter->ent_size;
3632 memcpy(iter->temp, iter->ent, iter->ent_size);
3633 iter->ent = iter->temp;
3635 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3636 /* Put back the original ent_size */
3637 iter->ent_size = ent_size;
3642 /* Find the next real entry, and increment the iterator to the next entry */
3643 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3645 iter->ent = __find_next_entry(iter, &iter->cpu,
3646 &iter->lost_events, &iter->ts);
3649 trace_iterator_increment(iter);
3651 return iter->ent ? iter : NULL;
3654 static void trace_consume(struct trace_iterator *iter)
3656 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3657 &iter->lost_events);
3660 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3662 struct trace_iterator *iter = m->private;
3666 WARN_ON_ONCE(iter->leftover);
3670 /* can't go backwards */
3675 ent = trace_find_next_entry_inc(iter);
3679 while (ent && iter->idx < i)
3680 ent = trace_find_next_entry_inc(iter);
3687 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3689 struct ring_buffer_iter *buf_iter;
3690 unsigned long entries = 0;
3693 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3695 buf_iter = trace_buffer_iter(iter, cpu);
3699 ring_buffer_iter_reset(buf_iter);
3702 * We could have the case with the max latency tracers
3703 * that a reset never took place on a cpu. This is evident
3704 * by the timestamp being before the start of the buffer.
3706 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3707 if (ts >= iter->array_buffer->time_start)
3710 ring_buffer_iter_advance(buf_iter);
3713 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3717 * The current tracer is copied to avoid a global locking
3720 static void *s_start(struct seq_file *m, loff_t *pos)
3722 struct trace_iterator *iter = m->private;
3723 struct trace_array *tr = iter->tr;
3724 int cpu_file = iter->cpu_file;
3730 * copy the tracer to avoid using a global lock all around.
3731 * iter->trace is a copy of current_trace, the pointer to the
3732 * name may be used instead of a strcmp(), as iter->trace->name
3733 * will point to the same string as current_trace->name.
3735 mutex_lock(&trace_types_lock);
3736 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3737 *iter->trace = *tr->current_trace;
3738 mutex_unlock(&trace_types_lock);
3740 #ifdef CONFIG_TRACER_MAX_TRACE
3741 if (iter->snapshot && iter->trace->use_max_tr)
3742 return ERR_PTR(-EBUSY);
3745 if (!iter->snapshot)
3746 atomic_inc(&trace_record_taskinfo_disabled);
3748 if (*pos != iter->pos) {
3753 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3754 for_each_tracing_cpu(cpu)
3755 tracing_iter_reset(iter, cpu);
3757 tracing_iter_reset(iter, cpu_file);
3760 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3765 * If we overflowed the seq_file before, then we want
3766 * to just reuse the trace_seq buffer again.
3772 p = s_next(m, p, &l);
3776 trace_event_read_lock();
3777 trace_access_lock(cpu_file);
3781 static void s_stop(struct seq_file *m, void *p)
3783 struct trace_iterator *iter = m->private;
3785 #ifdef CONFIG_TRACER_MAX_TRACE
3786 if (iter->snapshot && iter->trace->use_max_tr)
3790 if (!iter->snapshot)
3791 atomic_dec(&trace_record_taskinfo_disabled);
3793 trace_access_unlock(iter->cpu_file);
3794 trace_event_read_unlock();
3798 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3799 unsigned long *entries, int cpu)
3801 unsigned long count;
3803 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3805 * If this buffer has skipped entries, then we hold all
3806 * entries for the trace and we need to ignore the
3807 * ones before the time stamp.
3809 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3810 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3811 /* total is the same as the entries */
3815 ring_buffer_overrun_cpu(buf->buffer, cpu);
3820 get_total_entries(struct array_buffer *buf,
3821 unsigned long *total, unsigned long *entries)
3829 for_each_tracing_cpu(cpu) {
3830 get_total_entries_cpu(buf, &t, &e, cpu);
3836 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3838 unsigned long total, entries;
3843 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3848 unsigned long trace_total_entries(struct trace_array *tr)
3850 unsigned long total, entries;
3855 get_total_entries(&tr->array_buffer, &total, &entries);
3860 static void print_lat_help_header(struct seq_file *m)
3862 seq_puts(m, "# _------=> CPU# \n"
3863 "# / _-----=> irqs-off \n"
3864 "# | / _----=> need-resched \n"
3865 "# || / _---=> hardirq/softirq \n"
3866 "# ||| / _--=> preempt-depth \n"
3868 "# cmd pid ||||| time | caller \n"
3869 "# \\ / ||||| \\ | / \n");
3872 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3874 unsigned long total;
3875 unsigned long entries;
3877 get_total_entries(buf, &total, &entries);
3878 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3879 entries, total, num_online_cpus());
3883 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3886 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3888 print_event_info(buf, m);
3890 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3891 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3894 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3897 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3898 const char *space = " ";
3899 int prec = tgid ? 12 : 2;
3901 print_event_info(buf, m);
3903 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
3904 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3905 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3906 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3907 seq_printf(m, "# %.*s||| / delay\n", prec, space);
3908 seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3909 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
3913 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3915 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3916 struct array_buffer *buf = iter->array_buffer;
3917 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3918 struct tracer *type = iter->trace;
3919 unsigned long entries;
3920 unsigned long total;
3921 const char *name = "preemption";
3925 get_total_entries(buf, &total, &entries);
3927 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3929 seq_puts(m, "# -----------------------------------"
3930 "---------------------------------\n");
3931 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3932 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3933 nsecs_to_usecs(data->saved_latency),
3937 #if defined(CONFIG_PREEMPT_NONE)
3939 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3941 #elif defined(CONFIG_PREEMPT)
3943 #elif defined(CONFIG_PREEMPT_RT)
3948 /* These are reserved for later use */
3951 seq_printf(m, " #P:%d)\n", num_online_cpus());
3955 seq_puts(m, "# -----------------\n");
3956 seq_printf(m, "# | task: %.16s-%d "
3957 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3958 data->comm, data->pid,
3959 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3960 data->policy, data->rt_priority);
3961 seq_puts(m, "# -----------------\n");
3963 if (data->critical_start) {
3964 seq_puts(m, "# => started at: ");
3965 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3966 trace_print_seq(m, &iter->seq);
3967 seq_puts(m, "\n# => ended at: ");
3968 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3969 trace_print_seq(m, &iter->seq);
3970 seq_puts(m, "\n#\n");
3976 static void test_cpu_buff_start(struct trace_iterator *iter)
3978 struct trace_seq *s = &iter->seq;
3979 struct trace_array *tr = iter->tr;
3981 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3984 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3987 if (cpumask_available(iter->started) &&
3988 cpumask_test_cpu(iter->cpu, iter->started))
3991 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3994 if (cpumask_available(iter->started))
3995 cpumask_set_cpu(iter->cpu, iter->started);
3997 /* Don't print started cpu buffer for the first entry of the trace */
3999 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4003 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4005 struct trace_array *tr = iter->tr;
4006 struct trace_seq *s = &iter->seq;
4007 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4008 struct trace_entry *entry;
4009 struct trace_event *event;
4013 test_cpu_buff_start(iter);
4015 event = ftrace_find_event(entry->type);
4017 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4018 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4019 trace_print_lat_context(iter);
4021 trace_print_context(iter);
4024 if (trace_seq_has_overflowed(s))
4025 return TRACE_TYPE_PARTIAL_LINE;
4028 return event->funcs->trace(iter, sym_flags, event);
4030 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4032 return trace_handle_return(s);
4035 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4037 struct trace_array *tr = iter->tr;
4038 struct trace_seq *s = &iter->seq;
4039 struct trace_entry *entry;
4040 struct trace_event *event;
4044 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4045 trace_seq_printf(s, "%d %d %llu ",
4046 entry->pid, iter->cpu, iter->ts);
4048 if (trace_seq_has_overflowed(s))
4049 return TRACE_TYPE_PARTIAL_LINE;
4051 event = ftrace_find_event(entry->type);
4053 return event->funcs->raw(iter, 0, event);
4055 trace_seq_printf(s, "%d ?\n", entry->type);
4057 return trace_handle_return(s);
4060 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4062 struct trace_array *tr = iter->tr;
4063 struct trace_seq *s = &iter->seq;
4064 unsigned char newline = '\n';
4065 struct trace_entry *entry;
4066 struct trace_event *event;
4070 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4071 SEQ_PUT_HEX_FIELD(s, entry->pid);
4072 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4073 SEQ_PUT_HEX_FIELD(s, iter->ts);
4074 if (trace_seq_has_overflowed(s))
4075 return TRACE_TYPE_PARTIAL_LINE;
4078 event = ftrace_find_event(entry->type);
4080 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4081 if (ret != TRACE_TYPE_HANDLED)
4085 SEQ_PUT_FIELD(s, newline);
4087 return trace_handle_return(s);
4090 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4092 struct trace_array *tr = iter->tr;
4093 struct trace_seq *s = &iter->seq;
4094 struct trace_entry *entry;
4095 struct trace_event *event;
4099 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4100 SEQ_PUT_FIELD(s, entry->pid);
4101 SEQ_PUT_FIELD(s, iter->cpu);
4102 SEQ_PUT_FIELD(s, iter->ts);
4103 if (trace_seq_has_overflowed(s))
4104 return TRACE_TYPE_PARTIAL_LINE;
4107 event = ftrace_find_event(entry->type);
4108 return event ? event->funcs->binary(iter, 0, event) :
4112 int trace_empty(struct trace_iterator *iter)
4114 struct ring_buffer_iter *buf_iter;
4117 /* If we are looking at one CPU buffer, only check that one */
4118 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4119 cpu = iter->cpu_file;
4120 buf_iter = trace_buffer_iter(iter, cpu);
4122 if (!ring_buffer_iter_empty(buf_iter))
4125 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4131 for_each_tracing_cpu(cpu) {
4132 buf_iter = trace_buffer_iter(iter, cpu);
4134 if (!ring_buffer_iter_empty(buf_iter))
4137 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4145 /* Called with trace_event_read_lock() held. */
4146 enum print_line_t print_trace_line(struct trace_iterator *iter)
4148 struct trace_array *tr = iter->tr;
4149 unsigned long trace_flags = tr->trace_flags;
4150 enum print_line_t ret;
4152 if (iter->lost_events) {
4153 if (iter->lost_events == (unsigned long)-1)
4154 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4157 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4158 iter->cpu, iter->lost_events);
4159 if (trace_seq_has_overflowed(&iter->seq))
4160 return TRACE_TYPE_PARTIAL_LINE;
4163 if (iter->trace && iter->trace->print_line) {
4164 ret = iter->trace->print_line(iter);
4165 if (ret != TRACE_TYPE_UNHANDLED)
4169 if (iter->ent->type == TRACE_BPUTS &&
4170 trace_flags & TRACE_ITER_PRINTK &&
4171 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4172 return trace_print_bputs_msg_only(iter);
4174 if (iter->ent->type == TRACE_BPRINT &&
4175 trace_flags & TRACE_ITER_PRINTK &&
4176 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4177 return trace_print_bprintk_msg_only(iter);
4179 if (iter->ent->type == TRACE_PRINT &&
4180 trace_flags & TRACE_ITER_PRINTK &&
4181 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4182 return trace_print_printk_msg_only(iter);
4184 if (trace_flags & TRACE_ITER_BIN)
4185 return print_bin_fmt(iter);
4187 if (trace_flags & TRACE_ITER_HEX)
4188 return print_hex_fmt(iter);
4190 if (trace_flags & TRACE_ITER_RAW)
4191 return print_raw_fmt(iter);
4193 return print_trace_fmt(iter);
4196 void trace_latency_header(struct seq_file *m)
4198 struct trace_iterator *iter = m->private;
4199 struct trace_array *tr = iter->tr;
4201 /* print nothing if the buffers are empty */
4202 if (trace_empty(iter))
4205 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4206 print_trace_header(m, iter);
4208 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4209 print_lat_help_header(m);
4212 void trace_default_header(struct seq_file *m)
4214 struct trace_iterator *iter = m->private;
4215 struct trace_array *tr = iter->tr;
4216 unsigned long trace_flags = tr->trace_flags;
4218 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4221 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4222 /* print nothing if the buffers are empty */
4223 if (trace_empty(iter))
4225 print_trace_header(m, iter);
4226 if (!(trace_flags & TRACE_ITER_VERBOSE))
4227 print_lat_help_header(m);
4229 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4230 if (trace_flags & TRACE_ITER_IRQ_INFO)
4231 print_func_help_header_irq(iter->array_buffer,
4234 print_func_help_header(iter->array_buffer, m,
4240 static void test_ftrace_alive(struct seq_file *m)
4242 if (!ftrace_is_dead())
4244 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4245 "# MAY BE MISSING FUNCTION EVENTS\n");
4248 #ifdef CONFIG_TRACER_MAX_TRACE
4249 static void show_snapshot_main_help(struct seq_file *m)
4251 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4252 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4253 "# Takes a snapshot of the main buffer.\n"
4254 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4255 "# (Doesn't have to be '2' works with any number that\n"
4256 "# is not a '0' or '1')\n");
4259 static void show_snapshot_percpu_help(struct seq_file *m)
4261 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4262 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4263 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4264 "# Takes a snapshot of the main buffer for this cpu.\n");
4266 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4267 "# Must use main snapshot file to allocate.\n");
4269 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4270 "# (Doesn't have to be '2' works with any number that\n"
4271 "# is not a '0' or '1')\n");
4274 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4276 if (iter->tr->allocated_snapshot)
4277 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4279 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4281 seq_puts(m, "# Snapshot commands:\n");
4282 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4283 show_snapshot_main_help(m);
4285 show_snapshot_percpu_help(m);
4288 /* Should never be called */
4289 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4292 static int s_show(struct seq_file *m, void *v)
4294 struct trace_iterator *iter = v;
4297 if (iter->ent == NULL) {
4299 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4301 test_ftrace_alive(m);
4303 if (iter->snapshot && trace_empty(iter))
4304 print_snapshot_help(m, iter);
4305 else if (iter->trace && iter->trace->print_header)
4306 iter->trace->print_header(m);
4308 trace_default_header(m);
4310 } else if (iter->leftover) {
4312 * If we filled the seq_file buffer earlier, we
4313 * want to just show it now.
4315 ret = trace_print_seq(m, &iter->seq);
4317 /* ret should this time be zero, but you never know */
4318 iter->leftover = ret;
4321 print_trace_line(iter);
4322 ret = trace_print_seq(m, &iter->seq);
4324 * If we overflow the seq_file buffer, then it will
4325 * ask us for this data again at start up.
4327 * ret is 0 if seq_file write succeeded.
4330 iter->leftover = ret;
4337 * Should be used after trace_array_get(), trace_types_lock
4338 * ensures that i_cdev was already initialized.
4340 static inline int tracing_get_cpu(struct inode *inode)
4342 if (inode->i_cdev) /* See trace_create_cpu_file() */
4343 return (long)inode->i_cdev - 1;
4344 return RING_BUFFER_ALL_CPUS;
4347 static const struct seq_operations tracer_seq_ops = {
4354 static struct trace_iterator *
4355 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4357 struct trace_array *tr = inode->i_private;
4358 struct trace_iterator *iter;
4361 if (tracing_disabled)
4362 return ERR_PTR(-ENODEV);
4364 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4366 return ERR_PTR(-ENOMEM);
4368 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4370 if (!iter->buffer_iter)
4374 * trace_find_next_entry() may need to save off iter->ent.
4375 * It will place it into the iter->temp buffer. As most
4376 * events are less than 128, allocate a buffer of that size.
4377 * If one is greater, then trace_find_next_entry() will
4378 * allocate a new buffer to adjust for the bigger iter->ent.
4379 * It's not critical if it fails to get allocated here.
4381 iter->temp = kmalloc(128, GFP_KERNEL);
4383 iter->temp_size = 128;
4386 * trace_event_printf() may need to modify given format
4387 * string to replace %p with %px so that it shows real address
4388 * instead of hash value. However, that is only for the event
4389 * tracing, other tracer may not need. Defer the allocation
4390 * until it is needed.
4396 * We make a copy of the current tracer to avoid concurrent
4397 * changes on it while we are reading.
4399 mutex_lock(&trace_types_lock);
4400 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4404 *iter->trace = *tr->current_trace;
4406 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4411 #ifdef CONFIG_TRACER_MAX_TRACE
4412 /* Currently only the top directory has a snapshot */
4413 if (tr->current_trace->print_max || snapshot)
4414 iter->array_buffer = &tr->max_buffer;
4417 iter->array_buffer = &tr->array_buffer;
4418 iter->snapshot = snapshot;
4420 iter->cpu_file = tracing_get_cpu(inode);
4421 mutex_init(&iter->mutex);
4423 /* Notify the tracer early; before we stop tracing. */
4424 if (iter->trace->open)
4425 iter->trace->open(iter);
4427 /* Annotate start of buffers if we had overruns */
4428 if (ring_buffer_overruns(iter->array_buffer->buffer))
4429 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4431 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4432 if (trace_clocks[tr->clock_id].in_ns)
4433 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4436 * If pause-on-trace is enabled, then stop the trace while
4437 * dumping, unless this is the "snapshot" file
4439 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4440 tracing_stop_tr(tr);
4442 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4443 for_each_tracing_cpu(cpu) {
4444 iter->buffer_iter[cpu] =
4445 ring_buffer_read_prepare(iter->array_buffer->buffer,
4448 ring_buffer_read_prepare_sync();
4449 for_each_tracing_cpu(cpu) {
4450 ring_buffer_read_start(iter->buffer_iter[cpu]);
4451 tracing_iter_reset(iter, cpu);
4454 cpu = iter->cpu_file;
4455 iter->buffer_iter[cpu] =
4456 ring_buffer_read_prepare(iter->array_buffer->buffer,
4458 ring_buffer_read_prepare_sync();
4459 ring_buffer_read_start(iter->buffer_iter[cpu]);
4460 tracing_iter_reset(iter, cpu);
4463 mutex_unlock(&trace_types_lock);
4468 mutex_unlock(&trace_types_lock);
4471 kfree(iter->buffer_iter);
4473 seq_release_private(inode, file);
4474 return ERR_PTR(-ENOMEM);
4477 int tracing_open_generic(struct inode *inode, struct file *filp)
4481 ret = tracing_check_open_get_tr(NULL);
4485 filp->private_data = inode->i_private;
4489 bool tracing_is_disabled(void)
4491 return (tracing_disabled) ? true: false;
4495 * Open and update trace_array ref count.
4496 * Must have the current trace_array passed to it.
4498 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4500 struct trace_array *tr = inode->i_private;
4503 ret = tracing_check_open_get_tr(tr);
4507 filp->private_data = inode->i_private;
4512 static int tracing_release(struct inode *inode, struct file *file)
4514 struct trace_array *tr = inode->i_private;
4515 struct seq_file *m = file->private_data;
4516 struct trace_iterator *iter;
4519 if (!(file->f_mode & FMODE_READ)) {
4520 trace_array_put(tr);
4524 /* Writes do not use seq_file */
4526 mutex_lock(&trace_types_lock);
4528 for_each_tracing_cpu(cpu) {
4529 if (iter->buffer_iter[cpu])
4530 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4533 if (iter->trace && iter->trace->close)
4534 iter->trace->close(iter);
4536 if (!iter->snapshot && tr->stop_count)
4537 /* reenable tracing if it was previously enabled */
4538 tracing_start_tr(tr);
4540 __trace_array_put(tr);
4542 mutex_unlock(&trace_types_lock);
4544 mutex_destroy(&iter->mutex);
4545 free_cpumask_var(iter->started);
4549 kfree(iter->buffer_iter);
4550 seq_release_private(inode, file);
4555 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4557 struct trace_array *tr = inode->i_private;
4559 trace_array_put(tr);
4563 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4565 struct trace_array *tr = inode->i_private;
4567 trace_array_put(tr);
4569 return single_release(inode, file);
4572 static int tracing_open(struct inode *inode, struct file *file)
4574 struct trace_array *tr = inode->i_private;
4575 struct trace_iterator *iter;
4578 ret = tracing_check_open_get_tr(tr);
4582 /* If this file was open for write, then erase contents */
4583 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4584 int cpu = tracing_get_cpu(inode);
4585 struct array_buffer *trace_buf = &tr->array_buffer;
4587 #ifdef CONFIG_TRACER_MAX_TRACE
4588 if (tr->current_trace->print_max)
4589 trace_buf = &tr->max_buffer;
4592 if (cpu == RING_BUFFER_ALL_CPUS)
4593 tracing_reset_online_cpus(trace_buf);
4595 tracing_reset_cpu(trace_buf, cpu);
4598 if (file->f_mode & FMODE_READ) {
4599 iter = __tracing_open(inode, file, false);
4601 ret = PTR_ERR(iter);
4602 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4603 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4607 trace_array_put(tr);
4613 * Some tracers are not suitable for instance buffers.
4614 * A tracer is always available for the global array (toplevel)
4615 * or if it explicitly states that it is.
4618 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4620 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4623 /* Find the next tracer that this trace array may use */
4624 static struct tracer *
4625 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4627 while (t && !trace_ok_for_array(t, tr))
4634 t_next(struct seq_file *m, void *v, loff_t *pos)
4636 struct trace_array *tr = m->private;
4637 struct tracer *t = v;
4642 t = get_tracer_for_array(tr, t->next);
4647 static void *t_start(struct seq_file *m, loff_t *pos)
4649 struct trace_array *tr = m->private;
4653 mutex_lock(&trace_types_lock);
4655 t = get_tracer_for_array(tr, trace_types);
4656 for (; t && l < *pos; t = t_next(m, t, &l))
4662 static void t_stop(struct seq_file *m, void *p)
4664 mutex_unlock(&trace_types_lock);
4667 static int t_show(struct seq_file *m, void *v)
4669 struct tracer *t = v;
4674 seq_puts(m, t->name);
4683 static const struct seq_operations show_traces_seq_ops = {
4690 static int show_traces_open(struct inode *inode, struct file *file)
4692 struct trace_array *tr = inode->i_private;
4696 ret = tracing_check_open_get_tr(tr);
4700 ret = seq_open(file, &show_traces_seq_ops);
4702 trace_array_put(tr);
4706 m = file->private_data;
4712 static int show_traces_release(struct inode *inode, struct file *file)
4714 struct trace_array *tr = inode->i_private;
4716 trace_array_put(tr);
4717 return seq_release(inode, file);
4721 tracing_write_stub(struct file *filp, const char __user *ubuf,
4722 size_t count, loff_t *ppos)
4727 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4731 if (file->f_mode & FMODE_READ)
4732 ret = seq_lseek(file, offset, whence);
4734 file->f_pos = ret = 0;
4739 static const struct file_operations tracing_fops = {
4740 .open = tracing_open,
4742 .write = tracing_write_stub,
4743 .llseek = tracing_lseek,
4744 .release = tracing_release,
4747 static const struct file_operations show_traces_fops = {
4748 .open = show_traces_open,
4750 .llseek = seq_lseek,
4751 .release = show_traces_release,
4755 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4756 size_t count, loff_t *ppos)
4758 struct trace_array *tr = file_inode(filp)->i_private;
4762 len = snprintf(NULL, 0, "%*pb\n",
4763 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4764 mask_str = kmalloc(len, GFP_KERNEL);
4768 len = snprintf(mask_str, len, "%*pb\n",
4769 cpumask_pr_args(tr->tracing_cpumask));
4774 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4782 int tracing_set_cpumask(struct trace_array *tr,
4783 cpumask_var_t tracing_cpumask_new)
4790 local_irq_disable();
4791 arch_spin_lock(&tr->max_lock);
4792 for_each_tracing_cpu(cpu) {
4794 * Increase/decrease the disabled counter if we are
4795 * about to flip a bit in the cpumask:
4797 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4798 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4799 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4800 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4802 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4803 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4804 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4805 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4808 arch_spin_unlock(&tr->max_lock);
4811 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4817 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4818 size_t count, loff_t *ppos)
4820 struct trace_array *tr = file_inode(filp)->i_private;
4821 cpumask_var_t tracing_cpumask_new;
4824 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4827 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4831 err = tracing_set_cpumask(tr, tracing_cpumask_new);
4835 free_cpumask_var(tracing_cpumask_new);
4840 free_cpumask_var(tracing_cpumask_new);
4845 static const struct file_operations tracing_cpumask_fops = {
4846 .open = tracing_open_generic_tr,
4847 .read = tracing_cpumask_read,
4848 .write = tracing_cpumask_write,
4849 .release = tracing_release_generic_tr,
4850 .llseek = generic_file_llseek,
4853 static int tracing_trace_options_show(struct seq_file *m, void *v)
4855 struct tracer_opt *trace_opts;
4856 struct trace_array *tr = m->private;
4860 mutex_lock(&trace_types_lock);
4861 tracer_flags = tr->current_trace->flags->val;
4862 trace_opts = tr->current_trace->flags->opts;
4864 for (i = 0; trace_options[i]; i++) {
4865 if (tr->trace_flags & (1 << i))
4866 seq_printf(m, "%s\n", trace_options[i]);
4868 seq_printf(m, "no%s\n", trace_options[i]);
4871 for (i = 0; trace_opts[i].name; i++) {
4872 if (tracer_flags & trace_opts[i].bit)
4873 seq_printf(m, "%s\n", trace_opts[i].name);
4875 seq_printf(m, "no%s\n", trace_opts[i].name);
4877 mutex_unlock(&trace_types_lock);
4882 static int __set_tracer_option(struct trace_array *tr,
4883 struct tracer_flags *tracer_flags,
4884 struct tracer_opt *opts, int neg)
4886 struct tracer *trace = tracer_flags->trace;
4889 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4894 tracer_flags->val &= ~opts->bit;
4896 tracer_flags->val |= opts->bit;
4900 /* Try to assign a tracer specific option */
4901 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4903 struct tracer *trace = tr->current_trace;
4904 struct tracer_flags *tracer_flags = trace->flags;
4905 struct tracer_opt *opts = NULL;
4908 for (i = 0; tracer_flags->opts[i].name; i++) {
4909 opts = &tracer_flags->opts[i];
4911 if (strcmp(cmp, opts->name) == 0)
4912 return __set_tracer_option(tr, trace->flags, opts, neg);
4918 /* Some tracers require overwrite to stay enabled */
4919 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4921 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4927 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4929 if ((mask == TRACE_ITER_RECORD_TGID) ||
4930 (mask == TRACE_ITER_RECORD_CMD))
4931 lockdep_assert_held(&event_mutex);
4933 /* do nothing if flag is already set */
4934 if (!!(tr->trace_flags & mask) == !!enabled)
4937 /* Give the tracer a chance to approve the change */
4938 if (tr->current_trace->flag_changed)
4939 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4943 tr->trace_flags |= mask;
4945 tr->trace_flags &= ~mask;
4947 if (mask == TRACE_ITER_RECORD_CMD)
4948 trace_event_enable_cmd_record(enabled);
4950 if (mask == TRACE_ITER_RECORD_TGID) {
4952 tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4956 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4960 trace_event_enable_tgid_record(enabled);
4963 if (mask == TRACE_ITER_EVENT_FORK)
4964 trace_event_follow_fork(tr, enabled);
4966 if (mask == TRACE_ITER_FUNC_FORK)
4967 ftrace_pid_follow_fork(tr, enabled);
4969 if (mask == TRACE_ITER_OVERWRITE) {
4970 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4971 #ifdef CONFIG_TRACER_MAX_TRACE
4972 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4976 if (mask == TRACE_ITER_PRINTK) {
4977 trace_printk_start_stop_comm(enabled);
4978 trace_printk_control(enabled);
4984 int trace_set_options(struct trace_array *tr, char *option)
4989 size_t orig_len = strlen(option);
4992 cmp = strstrip(option);
4994 len = str_has_prefix(cmp, "no");
5000 mutex_lock(&event_mutex);
5001 mutex_lock(&trace_types_lock);
5003 ret = match_string(trace_options, -1, cmp);
5004 /* If no option could be set, test the specific tracer options */
5006 ret = set_tracer_option(tr, cmp, neg);
5008 ret = set_tracer_flag(tr, 1 << ret, !neg);
5010 mutex_unlock(&trace_types_lock);
5011 mutex_unlock(&event_mutex);
5014 * If the first trailing whitespace is replaced with '\0' by strstrip,
5015 * turn it back into a space.
5017 if (orig_len > strlen(option))
5018 option[strlen(option)] = ' ';
5023 static void __init apply_trace_boot_options(void)
5025 char *buf = trace_boot_options_buf;
5029 option = strsep(&buf, ",");
5035 trace_set_options(&global_trace, option);
5037 /* Put back the comma to allow this to be called again */
5044 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5045 size_t cnt, loff_t *ppos)
5047 struct seq_file *m = filp->private_data;
5048 struct trace_array *tr = m->private;
5052 if (cnt >= sizeof(buf))
5055 if (copy_from_user(buf, ubuf, cnt))
5060 ret = trace_set_options(tr, buf);
5069 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5071 struct trace_array *tr = inode->i_private;
5074 ret = tracing_check_open_get_tr(tr);
5078 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5080 trace_array_put(tr);
5085 static const struct file_operations tracing_iter_fops = {
5086 .open = tracing_trace_options_open,
5088 .llseek = seq_lseek,
5089 .release = tracing_single_release_tr,
5090 .write = tracing_trace_options_write,
5093 static const char readme_msg[] =
5094 "tracing mini-HOWTO:\n\n"
5095 "# echo 0 > tracing_on : quick way to disable tracing\n"
5096 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5097 " Important files:\n"
5098 " trace\t\t\t- The static contents of the buffer\n"
5099 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5100 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5101 " current_tracer\t- function and latency tracers\n"
5102 " available_tracers\t- list of configured tracers for current_tracer\n"
5103 " error_log\t- error log for failed commands (that support it)\n"
5104 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5105 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5106 " trace_clock\t\t-change the clock used to order events\n"
5107 " local: Per cpu clock but may not be synced across CPUs\n"
5108 " global: Synced across CPUs but slows tracing down.\n"
5109 " counter: Not a clock, but just an increment\n"
5110 " uptime: Jiffy counter from time of boot\n"
5111 " perf: Same clock that perf events use\n"
5112 #ifdef CONFIG_X86_64
5113 " x86-tsc: TSC cycle counter\n"
5115 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5116 " delta: Delta difference against a buffer-wide timestamp\n"
5117 " absolute: Absolute (standalone) timestamp\n"
5118 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5119 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5120 " tracing_cpumask\t- Limit which CPUs to trace\n"
5121 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5122 "\t\t\t Remove sub-buffer with rmdir\n"
5123 " trace_options\t\t- Set format or modify how tracing happens\n"
5124 "\t\t\t Disable an option by prefixing 'no' to the\n"
5125 "\t\t\t option name\n"
5126 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5127 #ifdef CONFIG_DYNAMIC_FTRACE
5128 "\n available_filter_functions - list of functions that can be filtered on\n"
5129 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5130 "\t\t\t functions\n"
5131 "\t accepts: func_full_name or glob-matching-pattern\n"
5132 "\t modules: Can select a group via module\n"
5133 "\t Format: :mod:<module-name>\n"
5134 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5135 "\t triggers: a command to perform when function is hit\n"
5136 "\t Format: <function>:<trigger>[:count]\n"
5137 "\t trigger: traceon, traceoff\n"
5138 "\t\t enable_event:<system>:<event>\n"
5139 "\t\t disable_event:<system>:<event>\n"
5140 #ifdef CONFIG_STACKTRACE
5143 #ifdef CONFIG_TRACER_SNAPSHOT
5148 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5149 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5150 "\t The first one will disable tracing every time do_fault is hit\n"
5151 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5152 "\t The first time do trap is hit and it disables tracing, the\n"
5153 "\t counter will decrement to 2. If tracing is already disabled,\n"
5154 "\t the counter will not decrement. It only decrements when the\n"
5155 "\t trigger did work\n"
5156 "\t To remove trigger without count:\n"
5157 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5158 "\t To remove trigger with a count:\n"
5159 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5160 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5161 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5162 "\t modules: Can select a group via module command :mod:\n"
5163 "\t Does not accept triggers\n"
5164 #endif /* CONFIG_DYNAMIC_FTRACE */
5165 #ifdef CONFIG_FUNCTION_TRACER
5166 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5168 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5171 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5172 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5173 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5174 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5176 #ifdef CONFIG_TRACER_SNAPSHOT
5177 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5178 "\t\t\t snapshot buffer. Read the contents for more\n"
5179 "\t\t\t information\n"
5181 #ifdef CONFIG_STACK_TRACER
5182 " stack_trace\t\t- Shows the max stack trace when active\n"
5183 " stack_max_size\t- Shows current max stack size that was traced\n"
5184 "\t\t\t Write into this file to reset the max size (trigger a\n"
5185 "\t\t\t new trace)\n"
5186 #ifdef CONFIG_DYNAMIC_FTRACE
5187 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5190 #endif /* CONFIG_STACK_TRACER */
5191 #ifdef CONFIG_DYNAMIC_EVENTS
5192 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5193 "\t\t\t Write into this file to define/undefine new trace events.\n"
5195 #ifdef CONFIG_KPROBE_EVENTS
5196 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5197 "\t\t\t Write into this file to define/undefine new trace events.\n"
5199 #ifdef CONFIG_UPROBE_EVENTS
5200 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5201 "\t\t\t Write into this file to define/undefine new trace events.\n"
5203 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5204 "\t accepts: event-definitions (one definition per line)\n"
5205 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5206 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5207 #ifdef CONFIG_HIST_TRIGGERS
5208 "\t s:[synthetic/]<event> <field> [<field>]\n"
5210 "\t -:[<group>/]<event>\n"
5211 #ifdef CONFIG_KPROBE_EVENTS
5212 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5213 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5215 #ifdef CONFIG_UPROBE_EVENTS
5216 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5218 "\t args: <name>=fetcharg[:type]\n"
5219 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5220 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5221 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5223 "\t $stack<index>, $stack, $retval, $comm,\n"
5225 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5226 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5227 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5228 "\t <type>\\[<array-size>\\]\n"
5229 #ifdef CONFIG_HIST_TRIGGERS
5230 "\t field: <stype> <name>;\n"
5231 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5232 "\t [unsigned] char/int/long\n"
5235 " events/\t\t- Directory containing all trace event subsystems:\n"
5236 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5237 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5238 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5240 " filter\t\t- If set, only events passing filter are traced\n"
5241 " events/<system>/<event>/\t- Directory containing control files for\n"
5243 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5244 " filter\t\t- If set, only events passing filter are traced\n"
5245 " trigger\t\t- If set, a command to perform when event is hit\n"
5246 "\t Format: <trigger>[:count][if <filter>]\n"
5247 "\t trigger: traceon, traceoff\n"
5248 "\t enable_event:<system>:<event>\n"
5249 "\t disable_event:<system>:<event>\n"
5250 #ifdef CONFIG_HIST_TRIGGERS
5251 "\t enable_hist:<system>:<event>\n"
5252 "\t disable_hist:<system>:<event>\n"
5254 #ifdef CONFIG_STACKTRACE
5257 #ifdef CONFIG_TRACER_SNAPSHOT
5260 #ifdef CONFIG_HIST_TRIGGERS
5261 "\t\t hist (see below)\n"
5263 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5264 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5265 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5266 "\t events/block/block_unplug/trigger\n"
5267 "\t The first disables tracing every time block_unplug is hit.\n"
5268 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5269 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5270 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5271 "\t Like function triggers, the counter is only decremented if it\n"
5272 "\t enabled or disabled tracing.\n"
5273 "\t To remove a trigger without a count:\n"
5274 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5275 "\t To remove a trigger with a count:\n"
5276 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5277 "\t Filters can be ignored when removing a trigger.\n"
5278 #ifdef CONFIG_HIST_TRIGGERS
5279 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5280 "\t Format: hist:keys=<field1[,field2,...]>\n"
5281 "\t [:values=<field1[,field2,...]>]\n"
5282 "\t [:sort=<field1[,field2,...]>]\n"
5283 "\t [:size=#entries]\n"
5284 "\t [:pause][:continue][:clear]\n"
5285 "\t [:name=histname1]\n"
5286 "\t [:<handler>.<action>]\n"
5287 "\t [if <filter>]\n\n"
5288 "\t When a matching event is hit, an entry is added to a hash\n"
5289 "\t table using the key(s) and value(s) named, and the value of a\n"
5290 "\t sum called 'hitcount' is incremented. Keys and values\n"
5291 "\t correspond to fields in the event's format description. Keys\n"
5292 "\t can be any field, or the special string 'stacktrace'.\n"
5293 "\t Compound keys consisting of up to two fields can be specified\n"
5294 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5295 "\t fields. Sort keys consisting of up to two fields can be\n"
5296 "\t specified using the 'sort' keyword. The sort direction can\n"
5297 "\t be modified by appending '.descending' or '.ascending' to a\n"
5298 "\t sort field. The 'size' parameter can be used to specify more\n"
5299 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5300 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5301 "\t its histogram data will be shared with other triggers of the\n"
5302 "\t same name, and trigger hits will update this common data.\n\n"
5303 "\t Reading the 'hist' file for the event will dump the hash\n"
5304 "\t table in its entirety to stdout. If there are multiple hist\n"
5305 "\t triggers attached to an event, there will be a table for each\n"
5306 "\t trigger in the output. The table displayed for a named\n"
5307 "\t trigger will be the same as any other instance having the\n"
5308 "\t same name. The default format used to display a given field\n"
5309 "\t can be modified by appending any of the following modifiers\n"
5310 "\t to the field name, as applicable:\n\n"
5311 "\t .hex display a number as a hex value\n"
5312 "\t .sym display an address as a symbol\n"
5313 "\t .sym-offset display an address as a symbol and offset\n"
5314 "\t .execname display a common_pid as a program name\n"
5315 "\t .syscall display a syscall id as a syscall name\n"
5316 "\t .log2 display log2 value rather than raw number\n"
5317 "\t .usecs display a common_timestamp in microseconds\n\n"
5318 "\t The 'pause' parameter can be used to pause an existing hist\n"
5319 "\t trigger or to start a hist trigger but not log any events\n"
5320 "\t until told to do so. 'continue' can be used to start or\n"
5321 "\t restart a paused hist trigger.\n\n"
5322 "\t The 'clear' parameter will clear the contents of a running\n"
5323 "\t hist trigger and leave its current paused/active state\n"
5325 "\t The enable_hist and disable_hist triggers can be used to\n"
5326 "\t have one event conditionally start and stop another event's\n"
5327 "\t already-attached hist trigger. The syntax is analogous to\n"
5328 "\t the enable_event and disable_event triggers.\n\n"
5329 "\t Hist trigger handlers and actions are executed whenever a\n"
5330 "\t a histogram entry is added or updated. They take the form:\n\n"
5331 "\t <handler>.<action>\n\n"
5332 "\t The available handlers are:\n\n"
5333 "\t onmatch(matching.event) - invoke on addition or update\n"
5334 "\t onmax(var) - invoke if var exceeds current max\n"
5335 "\t onchange(var) - invoke action if var changes\n\n"
5336 "\t The available actions are:\n\n"
5337 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5338 "\t save(field,...) - save current event fields\n"
5339 #ifdef CONFIG_TRACER_SNAPSHOT
5340 "\t snapshot() - snapshot the trace buffer\n\n"
5342 #ifdef CONFIG_SYNTH_EVENTS
5343 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5344 "\t Write into this file to define/undefine new synthetic events.\n"
5345 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5351 tracing_readme_read(struct file *filp, char __user *ubuf,
5352 size_t cnt, loff_t *ppos)
5354 return simple_read_from_buffer(ubuf, cnt, ppos,
5355 readme_msg, strlen(readme_msg));
5358 static const struct file_operations tracing_readme_fops = {
5359 .open = tracing_open_generic,
5360 .read = tracing_readme_read,
5361 .llseek = generic_file_llseek,
5364 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5368 if (*pos || m->count)
5373 for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5374 if (trace_find_tgid(*ptr))
5381 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5391 v = saved_tgids_next(m, v, &l);
5399 static void saved_tgids_stop(struct seq_file *m, void *v)
5403 static int saved_tgids_show(struct seq_file *m, void *v)
5405 int pid = (int *)v - tgid_map;
5407 seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5411 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5412 .start = saved_tgids_start,
5413 .stop = saved_tgids_stop,
5414 .next = saved_tgids_next,
5415 .show = saved_tgids_show,
5418 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5422 ret = tracing_check_open_get_tr(NULL);
5426 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5430 static const struct file_operations tracing_saved_tgids_fops = {
5431 .open = tracing_saved_tgids_open,
5433 .llseek = seq_lseek,
5434 .release = seq_release,
5437 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5439 unsigned int *ptr = v;
5441 if (*pos || m->count)
5446 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5448 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5457 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5463 arch_spin_lock(&trace_cmdline_lock);
5465 v = &savedcmd->map_cmdline_to_pid[0];
5467 v = saved_cmdlines_next(m, v, &l);
5475 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5477 arch_spin_unlock(&trace_cmdline_lock);
5481 static int saved_cmdlines_show(struct seq_file *m, void *v)
5483 char buf[TASK_COMM_LEN];
5484 unsigned int *pid = v;
5486 __trace_find_cmdline(*pid, buf);
5487 seq_printf(m, "%d %s\n", *pid, buf);
5491 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5492 .start = saved_cmdlines_start,
5493 .next = saved_cmdlines_next,
5494 .stop = saved_cmdlines_stop,
5495 .show = saved_cmdlines_show,
5498 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5502 ret = tracing_check_open_get_tr(NULL);
5506 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5509 static const struct file_operations tracing_saved_cmdlines_fops = {
5510 .open = tracing_saved_cmdlines_open,
5512 .llseek = seq_lseek,
5513 .release = seq_release,
5517 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5518 size_t cnt, loff_t *ppos)
5523 arch_spin_lock(&trace_cmdline_lock);
5524 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5525 arch_spin_unlock(&trace_cmdline_lock);
5527 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5530 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5532 kfree(s->saved_cmdlines);
5533 kfree(s->map_cmdline_to_pid);
5537 static int tracing_resize_saved_cmdlines(unsigned int val)
5539 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5541 s = kmalloc(sizeof(*s), GFP_KERNEL);
5545 if (allocate_cmdlines_buffer(val, s) < 0) {
5550 arch_spin_lock(&trace_cmdline_lock);
5551 savedcmd_temp = savedcmd;
5553 arch_spin_unlock(&trace_cmdline_lock);
5554 free_saved_cmdlines_buffer(savedcmd_temp);
5560 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5561 size_t cnt, loff_t *ppos)
5566 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5570 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5571 if (!val || val > PID_MAX_DEFAULT)
5574 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5583 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5584 .open = tracing_open_generic,
5585 .read = tracing_saved_cmdlines_size_read,
5586 .write = tracing_saved_cmdlines_size_write,
5589 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5590 static union trace_eval_map_item *
5591 update_eval_map(union trace_eval_map_item *ptr)
5593 if (!ptr->map.eval_string) {
5594 if (ptr->tail.next) {
5595 ptr = ptr->tail.next;
5596 /* Set ptr to the next real item (skip head) */
5604 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5606 union trace_eval_map_item *ptr = v;
5609 * Paranoid! If ptr points to end, we don't want to increment past it.
5610 * This really should never happen.
5613 ptr = update_eval_map(ptr);
5614 if (WARN_ON_ONCE(!ptr))
5618 ptr = update_eval_map(ptr);
5623 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5625 union trace_eval_map_item *v;
5628 mutex_lock(&trace_eval_mutex);
5630 v = trace_eval_maps;
5634 while (v && l < *pos) {
5635 v = eval_map_next(m, v, &l);
5641 static void eval_map_stop(struct seq_file *m, void *v)
5643 mutex_unlock(&trace_eval_mutex);
5646 static int eval_map_show(struct seq_file *m, void *v)
5648 union trace_eval_map_item *ptr = v;
5650 seq_printf(m, "%s %ld (%s)\n",
5651 ptr->map.eval_string, ptr->map.eval_value,
5657 static const struct seq_operations tracing_eval_map_seq_ops = {
5658 .start = eval_map_start,
5659 .next = eval_map_next,
5660 .stop = eval_map_stop,
5661 .show = eval_map_show,
5664 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5668 ret = tracing_check_open_get_tr(NULL);
5672 return seq_open(filp, &tracing_eval_map_seq_ops);
5675 static const struct file_operations tracing_eval_map_fops = {
5676 .open = tracing_eval_map_open,
5678 .llseek = seq_lseek,
5679 .release = seq_release,
5682 static inline union trace_eval_map_item *
5683 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5685 /* Return tail of array given the head */
5686 return ptr + ptr->head.length + 1;
5690 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5693 struct trace_eval_map **stop;
5694 struct trace_eval_map **map;
5695 union trace_eval_map_item *map_array;
5696 union trace_eval_map_item *ptr;
5701 * The trace_eval_maps contains the map plus a head and tail item,
5702 * where the head holds the module and length of array, and the
5703 * tail holds a pointer to the next list.
5705 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5707 pr_warn("Unable to allocate trace eval mapping\n");
5711 mutex_lock(&trace_eval_mutex);
5713 if (!trace_eval_maps)
5714 trace_eval_maps = map_array;
5716 ptr = trace_eval_maps;
5718 ptr = trace_eval_jmp_to_tail(ptr);
5719 if (!ptr->tail.next)
5721 ptr = ptr->tail.next;
5724 ptr->tail.next = map_array;
5726 map_array->head.mod = mod;
5727 map_array->head.length = len;
5730 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5731 map_array->map = **map;
5734 memset(map_array, 0, sizeof(*map_array));
5736 mutex_unlock(&trace_eval_mutex);
5739 static void trace_create_eval_file(struct dentry *d_tracer)
5741 trace_create_file("eval_map", 0444, d_tracer,
5742 NULL, &tracing_eval_map_fops);
5745 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5746 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5747 static inline void trace_insert_eval_map_file(struct module *mod,
5748 struct trace_eval_map **start, int len) { }
5749 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5751 static void trace_insert_eval_map(struct module *mod,
5752 struct trace_eval_map **start, int len)
5754 struct trace_eval_map **map;
5761 trace_event_eval_update(map, len);
5763 trace_insert_eval_map_file(mod, start, len);
5767 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5768 size_t cnt, loff_t *ppos)
5770 struct trace_array *tr = filp->private_data;
5771 char buf[MAX_TRACER_SIZE+2];
5774 mutex_lock(&trace_types_lock);
5775 r = sprintf(buf, "%s\n", tr->current_trace->name);
5776 mutex_unlock(&trace_types_lock);
5778 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5781 int tracer_init(struct tracer *t, struct trace_array *tr)
5783 tracing_reset_online_cpus(&tr->array_buffer);
5787 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5791 for_each_tracing_cpu(cpu)
5792 per_cpu_ptr(buf->data, cpu)->entries = val;
5795 #ifdef CONFIG_TRACER_MAX_TRACE
5796 /* resize @tr's buffer to the size of @size_tr's entries */
5797 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5798 struct array_buffer *size_buf, int cpu_id)
5802 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5803 for_each_tracing_cpu(cpu) {
5804 ret = ring_buffer_resize(trace_buf->buffer,
5805 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5808 per_cpu_ptr(trace_buf->data, cpu)->entries =
5809 per_cpu_ptr(size_buf->data, cpu)->entries;
5812 ret = ring_buffer_resize(trace_buf->buffer,
5813 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5815 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5816 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5821 #endif /* CONFIG_TRACER_MAX_TRACE */
5823 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5824 unsigned long size, int cpu)
5829 * If kernel or user changes the size of the ring buffer
5830 * we use the size that was given, and we can forget about
5831 * expanding it later.
5833 ring_buffer_expanded = true;
5835 /* May be called before buffers are initialized */
5836 if (!tr->array_buffer.buffer)
5839 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5843 #ifdef CONFIG_TRACER_MAX_TRACE
5844 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5845 !tr->current_trace->use_max_tr)
5848 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5850 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5851 &tr->array_buffer, cpu);
5854 * AARGH! We are left with different
5855 * size max buffer!!!!
5856 * The max buffer is our "snapshot" buffer.
5857 * When a tracer needs a snapshot (one of the
5858 * latency tracers), it swaps the max buffer
5859 * with the saved snap shot. We succeeded to
5860 * update the size of the main buffer, but failed to
5861 * update the size of the max buffer. But when we tried
5862 * to reset the main buffer to the original size, we
5863 * failed there too. This is very unlikely to
5864 * happen, but if it does, warn and kill all
5868 tracing_disabled = 1;
5873 if (cpu == RING_BUFFER_ALL_CPUS)
5874 set_buffer_entries(&tr->max_buffer, size);
5876 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5879 #endif /* CONFIG_TRACER_MAX_TRACE */
5881 if (cpu == RING_BUFFER_ALL_CPUS)
5882 set_buffer_entries(&tr->array_buffer, size);
5884 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5889 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5890 unsigned long size, int cpu_id)
5894 mutex_lock(&trace_types_lock);
5896 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5897 /* make sure, this cpu is enabled in the mask */
5898 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5904 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5909 mutex_unlock(&trace_types_lock);
5916 * tracing_update_buffers - used by tracing facility to expand ring buffers
5918 * To save on memory when the tracing is never used on a system with it
5919 * configured in. The ring buffers are set to a minimum size. But once
5920 * a user starts to use the tracing facility, then they need to grow
5921 * to their default size.
5923 * This function is to be called when a tracer is about to be used.
5925 int tracing_update_buffers(void)
5929 mutex_lock(&trace_types_lock);
5930 if (!ring_buffer_expanded)
5931 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5932 RING_BUFFER_ALL_CPUS);
5933 mutex_unlock(&trace_types_lock);
5938 struct trace_option_dentry;
5941 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5944 * Used to clear out the tracer before deletion of an instance.
5945 * Must have trace_types_lock held.
5947 static void tracing_set_nop(struct trace_array *tr)
5949 if (tr->current_trace == &nop_trace)
5952 tr->current_trace->enabled--;
5954 if (tr->current_trace->reset)
5955 tr->current_trace->reset(tr);
5957 tr->current_trace = &nop_trace;
5960 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5962 /* Only enable if the directory has been created already. */
5966 create_trace_option_files(tr, t);
5969 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5972 #ifdef CONFIG_TRACER_MAX_TRACE
5977 mutex_lock(&trace_types_lock);
5979 if (!ring_buffer_expanded) {
5980 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5981 RING_BUFFER_ALL_CPUS);
5987 for (t = trace_types; t; t = t->next) {
5988 if (strcmp(t->name, buf) == 0)
5995 if (t == tr->current_trace)
5998 #ifdef CONFIG_TRACER_SNAPSHOT
5999 if (t->use_max_tr) {
6000 arch_spin_lock(&tr->max_lock);
6001 if (tr->cond_snapshot)
6003 arch_spin_unlock(&tr->max_lock);
6008 /* Some tracers won't work on kernel command line */
6009 if (system_state < SYSTEM_RUNNING && t->noboot) {
6010 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6015 /* Some tracers are only allowed for the top level buffer */
6016 if (!trace_ok_for_array(t, tr)) {
6021 /* If trace pipe files are being read, we can't change the tracer */
6022 if (tr->trace_ref) {
6027 trace_branch_disable();
6029 tr->current_trace->enabled--;
6031 if (tr->current_trace->reset)
6032 tr->current_trace->reset(tr);
6034 /* Current trace needs to be nop_trace before synchronize_rcu */
6035 tr->current_trace = &nop_trace;
6037 #ifdef CONFIG_TRACER_MAX_TRACE
6038 had_max_tr = tr->allocated_snapshot;
6040 if (had_max_tr && !t->use_max_tr) {
6042 * We need to make sure that the update_max_tr sees that
6043 * current_trace changed to nop_trace to keep it from
6044 * swapping the buffers after we resize it.
6045 * The update_max_tr is called from interrupts disabled
6046 * so a synchronized_sched() is sufficient.
6053 #ifdef CONFIG_TRACER_MAX_TRACE
6054 if (t->use_max_tr && !had_max_tr) {
6055 ret = tracing_alloc_snapshot_instance(tr);
6062 ret = tracer_init(t, tr);
6067 tr->current_trace = t;
6068 tr->current_trace->enabled++;
6069 trace_branch_enable(tr);
6071 mutex_unlock(&trace_types_lock);
6077 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6078 size_t cnt, loff_t *ppos)
6080 struct trace_array *tr = filp->private_data;
6081 char buf[MAX_TRACER_SIZE+1];
6088 if (cnt > MAX_TRACER_SIZE)
6089 cnt = MAX_TRACER_SIZE;
6091 if (copy_from_user(buf, ubuf, cnt))
6096 /* strip ending whitespace. */
6097 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6100 err = tracing_set_tracer(tr, buf);
6110 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6111 size_t cnt, loff_t *ppos)
6116 r = snprintf(buf, sizeof(buf), "%ld\n",
6117 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6118 if (r > sizeof(buf))
6120 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6124 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6125 size_t cnt, loff_t *ppos)
6130 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6140 tracing_thresh_read(struct file *filp, char __user *ubuf,
6141 size_t cnt, loff_t *ppos)
6143 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6147 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6148 size_t cnt, loff_t *ppos)
6150 struct trace_array *tr = filp->private_data;
6153 mutex_lock(&trace_types_lock);
6154 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6158 if (tr->current_trace->update_thresh) {
6159 ret = tr->current_trace->update_thresh(tr);
6166 mutex_unlock(&trace_types_lock);
6171 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6174 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6175 size_t cnt, loff_t *ppos)
6177 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6181 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6182 size_t cnt, loff_t *ppos)
6184 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6189 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6191 struct trace_array *tr = inode->i_private;
6192 struct trace_iterator *iter;
6195 ret = tracing_check_open_get_tr(tr);
6199 mutex_lock(&trace_types_lock);
6201 /* create a buffer to store the information to pass to userspace */
6202 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6205 __trace_array_put(tr);
6209 trace_seq_init(&iter->seq);
6210 iter->trace = tr->current_trace;
6212 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6217 /* trace pipe does not show start of buffer */
6218 cpumask_setall(iter->started);
6220 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6221 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6223 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6224 if (trace_clocks[tr->clock_id].in_ns)
6225 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6228 iter->array_buffer = &tr->array_buffer;
6229 iter->cpu_file = tracing_get_cpu(inode);
6230 mutex_init(&iter->mutex);
6231 filp->private_data = iter;
6233 if (iter->trace->pipe_open)
6234 iter->trace->pipe_open(iter);
6236 nonseekable_open(inode, filp);
6240 mutex_unlock(&trace_types_lock);
6245 __trace_array_put(tr);
6246 mutex_unlock(&trace_types_lock);
6250 static int tracing_release_pipe(struct inode *inode, struct file *file)
6252 struct trace_iterator *iter = file->private_data;
6253 struct trace_array *tr = inode->i_private;
6255 mutex_lock(&trace_types_lock);
6259 if (iter->trace->pipe_close)
6260 iter->trace->pipe_close(iter);
6262 mutex_unlock(&trace_types_lock);
6264 free_cpumask_var(iter->started);
6265 mutex_destroy(&iter->mutex);
6268 trace_array_put(tr);
6274 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6276 struct trace_array *tr = iter->tr;
6278 /* Iterators are static, they should be filled or empty */
6279 if (trace_buffer_iter(iter, iter->cpu_file))
6280 return EPOLLIN | EPOLLRDNORM;
6282 if (tr->trace_flags & TRACE_ITER_BLOCK)
6284 * Always select as readable when in blocking mode
6286 return EPOLLIN | EPOLLRDNORM;
6288 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6293 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6295 struct trace_iterator *iter = filp->private_data;
6297 return trace_poll(iter, filp, poll_table);
6300 /* Must be called with iter->mutex held. */
6301 static int tracing_wait_pipe(struct file *filp)
6303 struct trace_iterator *iter = filp->private_data;
6306 while (trace_empty(iter)) {
6308 if ((filp->f_flags & O_NONBLOCK)) {
6313 * We block until we read something and tracing is disabled.
6314 * We still block if tracing is disabled, but we have never
6315 * read anything. This allows a user to cat this file, and
6316 * then enable tracing. But after we have read something,
6317 * we give an EOF when tracing is again disabled.
6319 * iter->pos will be 0 if we haven't read anything.
6321 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6324 mutex_unlock(&iter->mutex);
6326 ret = wait_on_pipe(iter, 0);
6328 mutex_lock(&iter->mutex);
6341 tracing_read_pipe(struct file *filp, char __user *ubuf,
6342 size_t cnt, loff_t *ppos)
6344 struct trace_iterator *iter = filp->private_data;
6348 * Avoid more than one consumer on a single file descriptor
6349 * This is just a matter of traces coherency, the ring buffer itself
6352 mutex_lock(&iter->mutex);
6354 /* return any leftover data */
6355 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6359 trace_seq_init(&iter->seq);
6361 if (iter->trace->read) {
6362 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6368 sret = tracing_wait_pipe(filp);
6372 /* stop when tracing is finished */
6373 if (trace_empty(iter)) {
6378 if (cnt >= PAGE_SIZE)
6379 cnt = PAGE_SIZE - 1;
6381 /* reset all but tr, trace, and overruns */
6382 memset(&iter->seq, 0,
6383 sizeof(struct trace_iterator) -
6384 offsetof(struct trace_iterator, seq));
6385 cpumask_clear(iter->started);
6386 trace_seq_init(&iter->seq);
6389 trace_event_read_lock();
6390 trace_access_lock(iter->cpu_file);
6391 while (trace_find_next_entry_inc(iter) != NULL) {
6392 enum print_line_t ret;
6393 int save_len = iter->seq.seq.len;
6395 ret = print_trace_line(iter);
6396 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6397 /* don't print partial lines */
6398 iter->seq.seq.len = save_len;
6401 if (ret != TRACE_TYPE_NO_CONSUME)
6402 trace_consume(iter);
6404 if (trace_seq_used(&iter->seq) >= cnt)
6408 * Setting the full flag means we reached the trace_seq buffer
6409 * size and we should leave by partial output condition above.
6410 * One of the trace_seq_* functions is not used properly.
6412 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6415 trace_access_unlock(iter->cpu_file);
6416 trace_event_read_unlock();
6418 /* Now copy what we have to the user */
6419 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6420 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6421 trace_seq_init(&iter->seq);
6424 * If there was nothing to send to user, in spite of consuming trace
6425 * entries, go back to wait for more entries.
6431 mutex_unlock(&iter->mutex);
6436 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6439 __free_page(spd->pages[idx]);
6443 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6449 /* Seq buffer is page-sized, exactly what we need. */
6451 save_len = iter->seq.seq.len;
6452 ret = print_trace_line(iter);
6454 if (trace_seq_has_overflowed(&iter->seq)) {
6455 iter->seq.seq.len = save_len;
6460 * This should not be hit, because it should only
6461 * be set if the iter->seq overflowed. But check it
6462 * anyway to be safe.
6464 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6465 iter->seq.seq.len = save_len;
6469 count = trace_seq_used(&iter->seq) - save_len;
6472 iter->seq.seq.len = save_len;
6476 if (ret != TRACE_TYPE_NO_CONSUME)
6477 trace_consume(iter);
6479 if (!trace_find_next_entry_inc(iter)) {
6489 static ssize_t tracing_splice_read_pipe(struct file *filp,
6491 struct pipe_inode_info *pipe,
6495 struct page *pages_def[PIPE_DEF_BUFFERS];
6496 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6497 struct trace_iterator *iter = filp->private_data;
6498 struct splice_pipe_desc spd = {
6500 .partial = partial_def,
6501 .nr_pages = 0, /* This gets updated below. */
6502 .nr_pages_max = PIPE_DEF_BUFFERS,
6503 .ops = &default_pipe_buf_ops,
6504 .spd_release = tracing_spd_release_pipe,
6510 if (splice_grow_spd(pipe, &spd))
6513 mutex_lock(&iter->mutex);
6515 if (iter->trace->splice_read) {
6516 ret = iter->trace->splice_read(iter, filp,
6517 ppos, pipe, len, flags);
6522 ret = tracing_wait_pipe(filp);
6526 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6531 trace_event_read_lock();
6532 trace_access_lock(iter->cpu_file);
6534 /* Fill as many pages as possible. */
6535 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6536 spd.pages[i] = alloc_page(GFP_KERNEL);
6540 rem = tracing_fill_pipe_page(rem, iter);
6542 /* Copy the data into the page, so we can start over. */
6543 ret = trace_seq_to_buffer(&iter->seq,
6544 page_address(spd.pages[i]),
6545 trace_seq_used(&iter->seq));
6547 __free_page(spd.pages[i]);
6550 spd.partial[i].offset = 0;
6551 spd.partial[i].len = trace_seq_used(&iter->seq);
6553 trace_seq_init(&iter->seq);
6556 trace_access_unlock(iter->cpu_file);
6557 trace_event_read_unlock();
6558 mutex_unlock(&iter->mutex);
6563 ret = splice_to_pipe(pipe, &spd);
6567 splice_shrink_spd(&spd);
6571 mutex_unlock(&iter->mutex);
6576 tracing_entries_read(struct file *filp, char __user *ubuf,
6577 size_t cnt, loff_t *ppos)
6579 struct inode *inode = file_inode(filp);
6580 struct trace_array *tr = inode->i_private;
6581 int cpu = tracing_get_cpu(inode);
6586 mutex_lock(&trace_types_lock);
6588 if (cpu == RING_BUFFER_ALL_CPUS) {
6589 int cpu, buf_size_same;
6594 /* check if all cpu sizes are same */
6595 for_each_tracing_cpu(cpu) {
6596 /* fill in the size from first enabled cpu */
6598 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6599 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6605 if (buf_size_same) {
6606 if (!ring_buffer_expanded)
6607 r = sprintf(buf, "%lu (expanded: %lu)\n",
6609 trace_buf_size >> 10);
6611 r = sprintf(buf, "%lu\n", size >> 10);
6613 r = sprintf(buf, "X\n");
6615 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6617 mutex_unlock(&trace_types_lock);
6619 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6624 tracing_entries_write(struct file *filp, const char __user *ubuf,
6625 size_t cnt, loff_t *ppos)
6627 struct inode *inode = file_inode(filp);
6628 struct trace_array *tr = inode->i_private;
6632 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6636 /* must have at least 1 entry */
6640 /* value is in KB */
6642 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6652 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6653 size_t cnt, loff_t *ppos)
6655 struct trace_array *tr = filp->private_data;
6658 unsigned long size = 0, expanded_size = 0;
6660 mutex_lock(&trace_types_lock);
6661 for_each_tracing_cpu(cpu) {
6662 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6663 if (!ring_buffer_expanded)
6664 expanded_size += trace_buf_size >> 10;
6666 if (ring_buffer_expanded)
6667 r = sprintf(buf, "%lu\n", size);
6669 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6670 mutex_unlock(&trace_types_lock);
6672 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6676 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6677 size_t cnt, loff_t *ppos)
6680 * There is no need to read what the user has written, this function
6681 * is just to make sure that there is no error when "echo" is used
6690 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6692 struct trace_array *tr = inode->i_private;
6694 /* disable tracing ? */
6695 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6696 tracer_tracing_off(tr);
6697 /* resize the ring buffer to 0 */
6698 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6700 trace_array_put(tr);
6706 tracing_mark_write(struct file *filp, const char __user *ubuf,
6707 size_t cnt, loff_t *fpos)
6709 struct trace_array *tr = filp->private_data;
6710 struct ring_buffer_event *event;
6711 enum event_trigger_type tt = ETT_NONE;
6712 struct trace_buffer *buffer;
6713 struct print_entry *entry;
6718 /* Used in tracing_mark_raw_write() as well */
6719 #define FAULTED_STR "<faulted>"
6720 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6722 if (tracing_disabled)
6725 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6728 if (cnt > TRACE_BUF_SIZE)
6729 cnt = TRACE_BUF_SIZE;
6731 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6733 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6735 /* If less than "<faulted>", then make sure we can still add that */
6736 if (cnt < FAULTED_SIZE)
6737 size += FAULTED_SIZE - cnt;
6739 buffer = tr->array_buffer.buffer;
6740 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6742 if (unlikely(!event))
6743 /* Ring buffer disabled, return as if not open for write */
6746 entry = ring_buffer_event_data(event);
6747 entry->ip = _THIS_IP_;
6749 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6751 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6757 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6758 /* do not add \n before testing triggers, but add \0 */
6759 entry->buf[cnt] = '\0';
6760 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6763 if (entry->buf[cnt - 1] != '\n') {
6764 entry->buf[cnt] = '\n';
6765 entry->buf[cnt + 1] = '\0';
6767 entry->buf[cnt] = '\0';
6769 if (static_branch_unlikely(&trace_marker_exports_enabled))
6770 ftrace_exports(event, TRACE_EXPORT_MARKER);
6771 __buffer_unlock_commit(buffer, event);
6774 event_triggers_post_call(tr->trace_marker_file, tt);
6782 /* Limit it for now to 3K (including tag) */
6783 #define RAW_DATA_MAX_SIZE (1024*3)
6786 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6787 size_t cnt, loff_t *fpos)
6789 struct trace_array *tr = filp->private_data;
6790 struct ring_buffer_event *event;
6791 struct trace_buffer *buffer;
6792 struct raw_data_entry *entry;
6797 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6799 if (tracing_disabled)
6802 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6805 /* The marker must at least have a tag id */
6806 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6809 if (cnt > TRACE_BUF_SIZE)
6810 cnt = TRACE_BUF_SIZE;
6812 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6814 size = sizeof(*entry) + cnt;
6815 if (cnt < FAULT_SIZE_ID)
6816 size += FAULT_SIZE_ID - cnt;
6818 buffer = tr->array_buffer.buffer;
6819 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6822 /* Ring buffer disabled, return as if not open for write */
6825 entry = ring_buffer_event_data(event);
6827 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6830 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6835 __buffer_unlock_commit(buffer, event);
6843 static int tracing_clock_show(struct seq_file *m, void *v)
6845 struct trace_array *tr = m->private;
6848 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6850 "%s%s%s%s", i ? " " : "",
6851 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6852 i == tr->clock_id ? "]" : "");
6858 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6862 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6863 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6866 if (i == ARRAY_SIZE(trace_clocks))
6869 mutex_lock(&trace_types_lock);
6873 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6876 * New clock may not be consistent with the previous clock.
6877 * Reset the buffer so that it doesn't have incomparable timestamps.
6879 tracing_reset_online_cpus(&tr->array_buffer);
6881 #ifdef CONFIG_TRACER_MAX_TRACE
6882 if (tr->max_buffer.buffer)
6883 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6884 tracing_reset_online_cpus(&tr->max_buffer);
6887 mutex_unlock(&trace_types_lock);
6892 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6893 size_t cnt, loff_t *fpos)
6895 struct seq_file *m = filp->private_data;
6896 struct trace_array *tr = m->private;
6898 const char *clockstr;
6901 if (cnt >= sizeof(buf))
6904 if (copy_from_user(buf, ubuf, cnt))
6909 clockstr = strstrip(buf);
6911 ret = tracing_set_clock(tr, clockstr);
6920 static int tracing_clock_open(struct inode *inode, struct file *file)
6922 struct trace_array *tr = inode->i_private;
6925 ret = tracing_check_open_get_tr(tr);
6929 ret = single_open(file, tracing_clock_show, inode->i_private);
6931 trace_array_put(tr);
6936 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6938 struct trace_array *tr = m->private;
6940 mutex_lock(&trace_types_lock);
6942 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6943 seq_puts(m, "delta [absolute]\n");
6945 seq_puts(m, "[delta] absolute\n");
6947 mutex_unlock(&trace_types_lock);
6952 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6954 struct trace_array *tr = inode->i_private;
6957 ret = tracing_check_open_get_tr(tr);
6961 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6963 trace_array_put(tr);
6968 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6972 mutex_lock(&trace_types_lock);
6974 if (abs && tr->time_stamp_abs_ref++)
6978 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6983 if (--tr->time_stamp_abs_ref)
6987 ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6989 #ifdef CONFIG_TRACER_MAX_TRACE
6990 if (tr->max_buffer.buffer)
6991 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6994 mutex_unlock(&trace_types_lock);
6999 struct ftrace_buffer_info {
7000 struct trace_iterator iter;
7002 unsigned int spare_cpu;
7006 #ifdef CONFIG_TRACER_SNAPSHOT
7007 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7009 struct trace_array *tr = inode->i_private;
7010 struct trace_iterator *iter;
7014 ret = tracing_check_open_get_tr(tr);
7018 if (file->f_mode & FMODE_READ) {
7019 iter = __tracing_open(inode, file, true);
7021 ret = PTR_ERR(iter);
7023 /* Writes still need the seq_file to hold the private data */
7025 m = kzalloc(sizeof(*m), GFP_KERNEL);
7028 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7036 iter->array_buffer = &tr->max_buffer;
7037 iter->cpu_file = tracing_get_cpu(inode);
7039 file->private_data = m;
7043 trace_array_put(tr);
7049 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7052 struct seq_file *m = filp->private_data;
7053 struct trace_iterator *iter = m->private;
7054 struct trace_array *tr = iter->tr;
7058 ret = tracing_update_buffers();
7062 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7066 mutex_lock(&trace_types_lock);
7068 if (tr->current_trace->use_max_tr) {
7073 arch_spin_lock(&tr->max_lock);
7074 if (tr->cond_snapshot)
7076 arch_spin_unlock(&tr->max_lock);
7082 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7086 if (tr->allocated_snapshot)
7090 /* Only allow per-cpu swap if the ring buffer supports it */
7091 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7092 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7097 if (tr->allocated_snapshot)
7098 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7099 &tr->array_buffer, iter->cpu_file);
7101 ret = tracing_alloc_snapshot_instance(tr);
7104 local_irq_disable();
7105 /* Now, we're going to swap */
7106 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7107 update_max_tr(tr, current, smp_processor_id(), NULL);
7109 update_max_tr_single(tr, current, iter->cpu_file);
7113 if (tr->allocated_snapshot) {
7114 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7115 tracing_reset_online_cpus(&tr->max_buffer);
7117 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7127 mutex_unlock(&trace_types_lock);
7131 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7133 struct seq_file *m = file->private_data;
7136 ret = tracing_release(inode, file);
7138 if (file->f_mode & FMODE_READ)
7141 /* If write only, the seq_file is just a stub */
7149 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7150 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7151 size_t count, loff_t *ppos);
7152 static int tracing_buffers_release(struct inode *inode, struct file *file);
7153 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7154 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7156 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7158 struct ftrace_buffer_info *info;
7161 /* The following checks for tracefs lockdown */
7162 ret = tracing_buffers_open(inode, filp);
7166 info = filp->private_data;
7168 if (info->iter.trace->use_max_tr) {
7169 tracing_buffers_release(inode, filp);
7173 info->iter.snapshot = true;
7174 info->iter.array_buffer = &info->iter.tr->max_buffer;
7179 #endif /* CONFIG_TRACER_SNAPSHOT */
7182 static const struct file_operations tracing_thresh_fops = {
7183 .open = tracing_open_generic,
7184 .read = tracing_thresh_read,
7185 .write = tracing_thresh_write,
7186 .llseek = generic_file_llseek,
7189 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7190 static const struct file_operations tracing_max_lat_fops = {
7191 .open = tracing_open_generic,
7192 .read = tracing_max_lat_read,
7193 .write = tracing_max_lat_write,
7194 .llseek = generic_file_llseek,
7198 static const struct file_operations set_tracer_fops = {
7199 .open = tracing_open_generic,
7200 .read = tracing_set_trace_read,
7201 .write = tracing_set_trace_write,
7202 .llseek = generic_file_llseek,
7205 static const struct file_operations tracing_pipe_fops = {
7206 .open = tracing_open_pipe,
7207 .poll = tracing_poll_pipe,
7208 .read = tracing_read_pipe,
7209 .splice_read = tracing_splice_read_pipe,
7210 .release = tracing_release_pipe,
7211 .llseek = no_llseek,
7214 static const struct file_operations tracing_entries_fops = {
7215 .open = tracing_open_generic_tr,
7216 .read = tracing_entries_read,
7217 .write = tracing_entries_write,
7218 .llseek = generic_file_llseek,
7219 .release = tracing_release_generic_tr,
7222 static const struct file_operations tracing_total_entries_fops = {
7223 .open = tracing_open_generic_tr,
7224 .read = tracing_total_entries_read,
7225 .llseek = generic_file_llseek,
7226 .release = tracing_release_generic_tr,
7229 static const struct file_operations tracing_free_buffer_fops = {
7230 .open = tracing_open_generic_tr,
7231 .write = tracing_free_buffer_write,
7232 .release = tracing_free_buffer_release,
7235 static const struct file_operations tracing_mark_fops = {
7236 .open = tracing_open_generic_tr,
7237 .write = tracing_mark_write,
7238 .llseek = generic_file_llseek,
7239 .release = tracing_release_generic_tr,
7242 static const struct file_operations tracing_mark_raw_fops = {
7243 .open = tracing_open_generic_tr,
7244 .write = tracing_mark_raw_write,
7245 .llseek = generic_file_llseek,
7246 .release = tracing_release_generic_tr,
7249 static const struct file_operations trace_clock_fops = {
7250 .open = tracing_clock_open,
7252 .llseek = seq_lseek,
7253 .release = tracing_single_release_tr,
7254 .write = tracing_clock_write,
7257 static const struct file_operations trace_time_stamp_mode_fops = {
7258 .open = tracing_time_stamp_mode_open,
7260 .llseek = seq_lseek,
7261 .release = tracing_single_release_tr,
7264 #ifdef CONFIG_TRACER_SNAPSHOT
7265 static const struct file_operations snapshot_fops = {
7266 .open = tracing_snapshot_open,
7268 .write = tracing_snapshot_write,
7269 .llseek = tracing_lseek,
7270 .release = tracing_snapshot_release,
7273 static const struct file_operations snapshot_raw_fops = {
7274 .open = snapshot_raw_open,
7275 .read = tracing_buffers_read,
7276 .release = tracing_buffers_release,
7277 .splice_read = tracing_buffers_splice_read,
7278 .llseek = no_llseek,
7281 #endif /* CONFIG_TRACER_SNAPSHOT */
7283 #define TRACING_LOG_ERRS_MAX 8
7284 #define TRACING_LOG_LOC_MAX 128
7286 #define CMD_PREFIX " Command: "
7289 const char **errs; /* ptr to loc-specific array of err strings */
7290 u8 type; /* index into errs -> specific err string */
7291 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7295 struct tracing_log_err {
7296 struct list_head list;
7297 struct err_info info;
7298 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7299 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7302 static DEFINE_MUTEX(tracing_err_log_lock);
7304 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7306 struct tracing_log_err *err;
7308 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7309 err = kzalloc(sizeof(*err), GFP_KERNEL);
7311 err = ERR_PTR(-ENOMEM);
7312 tr->n_err_log_entries++;
7317 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7318 list_del(&err->list);
7324 * err_pos - find the position of a string within a command for error careting
7325 * @cmd: The tracing command that caused the error
7326 * @str: The string to position the caret at within @cmd
7328 * Finds the position of the first occurence of @str within @cmd. The
7329 * return value can be passed to tracing_log_err() for caret placement
7332 * Returns the index within @cmd of the first occurence of @str or 0
7333 * if @str was not found.
7335 unsigned int err_pos(char *cmd, const char *str)
7339 if (WARN_ON(!strlen(cmd)))
7342 found = strstr(cmd, str);
7350 * tracing_log_err - write an error to the tracing error log
7351 * @tr: The associated trace array for the error (NULL for top level array)
7352 * @loc: A string describing where the error occurred
7353 * @cmd: The tracing command that caused the error
7354 * @errs: The array of loc-specific static error strings
7355 * @type: The index into errs[], which produces the specific static err string
7356 * @pos: The position the caret should be placed in the cmd
7358 * Writes an error into tracing/error_log of the form:
7360 * <loc>: error: <text>
7364 * tracing/error_log is a small log file containing the last
7365 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7366 * unless there has been a tracing error, and the error log can be
7367 * cleared and have its memory freed by writing the empty string in
7368 * truncation mode to it i.e. echo > tracing/error_log.
7370 * NOTE: the @errs array along with the @type param are used to
7371 * produce a static error string - this string is not copied and saved
7372 * when the error is logged - only a pointer to it is saved. See
7373 * existing callers for examples of how static strings are typically
7374 * defined for use with tracing_log_err().
7376 void tracing_log_err(struct trace_array *tr,
7377 const char *loc, const char *cmd,
7378 const char **errs, u8 type, u8 pos)
7380 struct tracing_log_err *err;
7385 mutex_lock(&tracing_err_log_lock);
7386 err = get_tracing_log_err(tr);
7387 if (PTR_ERR(err) == -ENOMEM) {
7388 mutex_unlock(&tracing_err_log_lock);
7392 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7393 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7395 err->info.errs = errs;
7396 err->info.type = type;
7397 err->info.pos = pos;
7398 err->info.ts = local_clock();
7400 list_add_tail(&err->list, &tr->err_log);
7401 mutex_unlock(&tracing_err_log_lock);
7404 static void clear_tracing_err_log(struct trace_array *tr)
7406 struct tracing_log_err *err, *next;
7408 mutex_lock(&tracing_err_log_lock);
7409 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7410 list_del(&err->list);
7414 tr->n_err_log_entries = 0;
7415 mutex_unlock(&tracing_err_log_lock);
7418 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7420 struct trace_array *tr = m->private;
7422 mutex_lock(&tracing_err_log_lock);
7424 return seq_list_start(&tr->err_log, *pos);
7427 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7429 struct trace_array *tr = m->private;
7431 return seq_list_next(v, &tr->err_log, pos);
7434 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7436 mutex_unlock(&tracing_err_log_lock);
7439 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7443 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7445 for (i = 0; i < pos; i++)
7450 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7452 struct tracing_log_err *err = v;
7455 const char *err_text = err->info.errs[err->info.type];
7456 u64 sec = err->info.ts;
7459 nsec = do_div(sec, NSEC_PER_SEC);
7460 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7461 err->loc, err_text);
7462 seq_printf(m, "%s", err->cmd);
7463 tracing_err_log_show_pos(m, err->info.pos);
7469 static const struct seq_operations tracing_err_log_seq_ops = {
7470 .start = tracing_err_log_seq_start,
7471 .next = tracing_err_log_seq_next,
7472 .stop = tracing_err_log_seq_stop,
7473 .show = tracing_err_log_seq_show
7476 static int tracing_err_log_open(struct inode *inode, struct file *file)
7478 struct trace_array *tr = inode->i_private;
7481 ret = tracing_check_open_get_tr(tr);
7485 /* If this file was opened for write, then erase contents */
7486 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7487 clear_tracing_err_log(tr);
7489 if (file->f_mode & FMODE_READ) {
7490 ret = seq_open(file, &tracing_err_log_seq_ops);
7492 struct seq_file *m = file->private_data;
7495 trace_array_put(tr);
7501 static ssize_t tracing_err_log_write(struct file *file,
7502 const char __user *buffer,
7503 size_t count, loff_t *ppos)
7508 static int tracing_err_log_release(struct inode *inode, struct file *file)
7510 struct trace_array *tr = inode->i_private;
7512 trace_array_put(tr);
7514 if (file->f_mode & FMODE_READ)
7515 seq_release(inode, file);
7520 static const struct file_operations tracing_err_log_fops = {
7521 .open = tracing_err_log_open,
7522 .write = tracing_err_log_write,
7524 .llseek = seq_lseek,
7525 .release = tracing_err_log_release,
7528 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7530 struct trace_array *tr = inode->i_private;
7531 struct ftrace_buffer_info *info;
7534 ret = tracing_check_open_get_tr(tr);
7538 info = kvzalloc(sizeof(*info), GFP_KERNEL);
7540 trace_array_put(tr);
7544 mutex_lock(&trace_types_lock);
7547 info->iter.cpu_file = tracing_get_cpu(inode);
7548 info->iter.trace = tr->current_trace;
7549 info->iter.array_buffer = &tr->array_buffer;
7551 /* Force reading ring buffer for first read */
7552 info->read = (unsigned int)-1;
7554 filp->private_data = info;
7558 mutex_unlock(&trace_types_lock);
7560 ret = nonseekable_open(inode, filp);
7562 trace_array_put(tr);
7568 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7570 struct ftrace_buffer_info *info = filp->private_data;
7571 struct trace_iterator *iter = &info->iter;
7573 return trace_poll(iter, filp, poll_table);
7577 tracing_buffers_read(struct file *filp, char __user *ubuf,
7578 size_t count, loff_t *ppos)
7580 struct ftrace_buffer_info *info = filp->private_data;
7581 struct trace_iterator *iter = &info->iter;
7588 #ifdef CONFIG_TRACER_MAX_TRACE
7589 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7594 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7596 if (IS_ERR(info->spare)) {
7597 ret = PTR_ERR(info->spare);
7600 info->spare_cpu = iter->cpu_file;
7606 /* Do we have previous read data to read? */
7607 if (info->read < PAGE_SIZE)
7611 trace_access_lock(iter->cpu_file);
7612 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7616 trace_access_unlock(iter->cpu_file);
7619 if (trace_empty(iter)) {
7620 if ((filp->f_flags & O_NONBLOCK))
7623 ret = wait_on_pipe(iter, 0);
7634 size = PAGE_SIZE - info->read;
7638 ret = copy_to_user(ubuf, info->spare + info->read, size);
7650 static int tracing_buffers_release(struct inode *inode, struct file *file)
7652 struct ftrace_buffer_info *info = file->private_data;
7653 struct trace_iterator *iter = &info->iter;
7655 mutex_lock(&trace_types_lock);
7657 iter->tr->trace_ref--;
7659 __trace_array_put(iter->tr);
7662 ring_buffer_free_read_page(iter->array_buffer->buffer,
7663 info->spare_cpu, info->spare);
7666 mutex_unlock(&trace_types_lock);
7672 struct trace_buffer *buffer;
7675 refcount_t refcount;
7678 static void buffer_ref_release(struct buffer_ref *ref)
7680 if (!refcount_dec_and_test(&ref->refcount))
7682 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7686 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7687 struct pipe_buffer *buf)
7689 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7691 buffer_ref_release(ref);
7695 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7696 struct pipe_buffer *buf)
7698 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7700 if (refcount_read(&ref->refcount) > INT_MAX/2)
7703 refcount_inc(&ref->refcount);
7707 /* Pipe buffer operations for a buffer. */
7708 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7709 .release = buffer_pipe_buf_release,
7710 .get = buffer_pipe_buf_get,
7714 * Callback from splice_to_pipe(), if we need to release some pages
7715 * at the end of the spd in case we error'ed out in filling the pipe.
7717 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7719 struct buffer_ref *ref =
7720 (struct buffer_ref *)spd->partial[i].private;
7722 buffer_ref_release(ref);
7723 spd->partial[i].private = 0;
7727 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7728 struct pipe_inode_info *pipe, size_t len,
7731 struct ftrace_buffer_info *info = file->private_data;
7732 struct trace_iterator *iter = &info->iter;
7733 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7734 struct page *pages_def[PIPE_DEF_BUFFERS];
7735 struct splice_pipe_desc spd = {
7737 .partial = partial_def,
7738 .nr_pages_max = PIPE_DEF_BUFFERS,
7739 .ops = &buffer_pipe_buf_ops,
7740 .spd_release = buffer_spd_release,
7742 struct buffer_ref *ref;
7746 #ifdef CONFIG_TRACER_MAX_TRACE
7747 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7751 if (*ppos & (PAGE_SIZE - 1))
7754 if (len & (PAGE_SIZE - 1)) {
7755 if (len < PAGE_SIZE)
7760 if (splice_grow_spd(pipe, &spd))
7764 trace_access_lock(iter->cpu_file);
7765 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7767 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7771 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7777 refcount_set(&ref->refcount, 1);
7778 ref->buffer = iter->array_buffer->buffer;
7779 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7780 if (IS_ERR(ref->page)) {
7781 ret = PTR_ERR(ref->page);
7786 ref->cpu = iter->cpu_file;
7788 r = ring_buffer_read_page(ref->buffer, &ref->page,
7789 len, iter->cpu_file, 1);
7791 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7797 page = virt_to_page(ref->page);
7799 spd.pages[i] = page;
7800 spd.partial[i].len = PAGE_SIZE;
7801 spd.partial[i].offset = 0;
7802 spd.partial[i].private = (unsigned long)ref;
7806 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7809 trace_access_unlock(iter->cpu_file);
7812 /* did we read anything? */
7813 if (!spd.nr_pages) {
7818 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7821 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7828 ret = splice_to_pipe(pipe, &spd);
7830 splice_shrink_spd(&spd);
7835 static const struct file_operations tracing_buffers_fops = {
7836 .open = tracing_buffers_open,
7837 .read = tracing_buffers_read,
7838 .poll = tracing_buffers_poll,
7839 .release = tracing_buffers_release,
7840 .splice_read = tracing_buffers_splice_read,
7841 .llseek = no_llseek,
7845 tracing_stats_read(struct file *filp, char __user *ubuf,
7846 size_t count, loff_t *ppos)
7848 struct inode *inode = file_inode(filp);
7849 struct trace_array *tr = inode->i_private;
7850 struct array_buffer *trace_buf = &tr->array_buffer;
7851 int cpu = tracing_get_cpu(inode);
7852 struct trace_seq *s;
7854 unsigned long long t;
7855 unsigned long usec_rem;
7857 s = kmalloc(sizeof(*s), GFP_KERNEL);
7863 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7864 trace_seq_printf(s, "entries: %ld\n", cnt);
7866 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7867 trace_seq_printf(s, "overrun: %ld\n", cnt);
7869 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7870 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7872 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7873 trace_seq_printf(s, "bytes: %ld\n", cnt);
7875 if (trace_clocks[tr->clock_id].in_ns) {
7876 /* local or global for trace_clock */
7877 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7878 usec_rem = do_div(t, USEC_PER_SEC);
7879 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7882 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7883 usec_rem = do_div(t, USEC_PER_SEC);
7884 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7886 /* counter or tsc mode for trace_clock */
7887 trace_seq_printf(s, "oldest event ts: %llu\n",
7888 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7890 trace_seq_printf(s, "now ts: %llu\n",
7891 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7894 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7895 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7897 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7898 trace_seq_printf(s, "read events: %ld\n", cnt);
7900 count = simple_read_from_buffer(ubuf, count, ppos,
7901 s->buffer, trace_seq_used(s));
7908 static const struct file_operations tracing_stats_fops = {
7909 .open = tracing_open_generic_tr,
7910 .read = tracing_stats_read,
7911 .llseek = generic_file_llseek,
7912 .release = tracing_release_generic_tr,
7915 #ifdef CONFIG_DYNAMIC_FTRACE
7918 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7919 size_t cnt, loff_t *ppos)
7925 /* 256 should be plenty to hold the amount needed */
7926 buf = kmalloc(256, GFP_KERNEL);
7930 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7931 ftrace_update_tot_cnt,
7932 ftrace_number_of_pages,
7933 ftrace_number_of_groups);
7935 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7940 static const struct file_operations tracing_dyn_info_fops = {
7941 .open = tracing_open_generic,
7942 .read = tracing_read_dyn_info,
7943 .llseek = generic_file_llseek,
7945 #endif /* CONFIG_DYNAMIC_FTRACE */
7947 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7949 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7950 struct trace_array *tr, struct ftrace_probe_ops *ops,
7953 tracing_snapshot_instance(tr);
7957 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7958 struct trace_array *tr, struct ftrace_probe_ops *ops,
7961 struct ftrace_func_mapper *mapper = data;
7965 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7975 tracing_snapshot_instance(tr);
7979 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7980 struct ftrace_probe_ops *ops, void *data)
7982 struct ftrace_func_mapper *mapper = data;
7985 seq_printf(m, "%ps:", (void *)ip);
7987 seq_puts(m, "snapshot");
7990 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7993 seq_printf(m, ":count=%ld\n", *count);
7995 seq_puts(m, ":unlimited\n");
8001 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8002 unsigned long ip, void *init_data, void **data)
8004 struct ftrace_func_mapper *mapper = *data;
8007 mapper = allocate_ftrace_func_mapper();
8013 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8017 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8018 unsigned long ip, void *data)
8020 struct ftrace_func_mapper *mapper = data;
8025 free_ftrace_func_mapper(mapper, NULL);
8029 ftrace_func_mapper_remove_ip(mapper, ip);
8032 static struct ftrace_probe_ops snapshot_probe_ops = {
8033 .func = ftrace_snapshot,
8034 .print = ftrace_snapshot_print,
8037 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8038 .func = ftrace_count_snapshot,
8039 .print = ftrace_snapshot_print,
8040 .init = ftrace_snapshot_init,
8041 .free = ftrace_snapshot_free,
8045 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8046 char *glob, char *cmd, char *param, int enable)
8048 struct ftrace_probe_ops *ops;
8049 void *count = (void *)-1;
8056 /* hash funcs only work with set_ftrace_filter */
8060 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8063 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8068 number = strsep(¶m, ":");
8070 if (!strlen(number))
8074 * We use the callback data field (which is a pointer)
8077 ret = kstrtoul(number, 0, (unsigned long *)&count);
8082 ret = tracing_alloc_snapshot_instance(tr);
8086 ret = register_ftrace_function_probe(glob, tr, ops, count);
8089 return ret < 0 ? ret : 0;
8092 static struct ftrace_func_command ftrace_snapshot_cmd = {
8094 .func = ftrace_trace_snapshot_callback,
8097 static __init int register_snapshot_cmd(void)
8099 return register_ftrace_command(&ftrace_snapshot_cmd);
8102 static inline __init int register_snapshot_cmd(void) { return 0; }
8103 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8105 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8107 if (WARN_ON(!tr->dir))
8108 return ERR_PTR(-ENODEV);
8110 /* Top directory uses NULL as the parent */
8111 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8114 /* All sub buffers have a descriptor */
8118 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8120 struct dentry *d_tracer;
8123 return tr->percpu_dir;
8125 d_tracer = tracing_get_dentry(tr);
8126 if (IS_ERR(d_tracer))
8129 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8131 MEM_FAIL(!tr->percpu_dir,
8132 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8134 return tr->percpu_dir;
8137 static struct dentry *
8138 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8139 void *data, long cpu, const struct file_operations *fops)
8141 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8143 if (ret) /* See tracing_get_cpu() */
8144 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8149 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8151 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8152 struct dentry *d_cpu;
8153 char cpu_dir[30]; /* 30 characters should be more than enough */
8158 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8159 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8161 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8165 /* per cpu trace_pipe */
8166 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8167 tr, cpu, &tracing_pipe_fops);
8170 trace_create_cpu_file("trace", 0644, d_cpu,
8171 tr, cpu, &tracing_fops);
8173 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8174 tr, cpu, &tracing_buffers_fops);
8176 trace_create_cpu_file("stats", 0444, d_cpu,
8177 tr, cpu, &tracing_stats_fops);
8179 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8180 tr, cpu, &tracing_entries_fops);
8182 #ifdef CONFIG_TRACER_SNAPSHOT
8183 trace_create_cpu_file("snapshot", 0644, d_cpu,
8184 tr, cpu, &snapshot_fops);
8186 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8187 tr, cpu, &snapshot_raw_fops);
8191 #ifdef CONFIG_FTRACE_SELFTEST
8192 /* Let selftest have access to static functions in this file */
8193 #include "trace_selftest.c"
8197 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8200 struct trace_option_dentry *topt = filp->private_data;
8203 if (topt->flags->val & topt->opt->bit)
8208 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8212 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8215 struct trace_option_dentry *topt = filp->private_data;
8219 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8223 if (val != 0 && val != 1)
8226 if (!!(topt->flags->val & topt->opt->bit) != val) {
8227 mutex_lock(&trace_types_lock);
8228 ret = __set_tracer_option(topt->tr, topt->flags,
8230 mutex_unlock(&trace_types_lock);
8241 static const struct file_operations trace_options_fops = {
8242 .open = tracing_open_generic,
8243 .read = trace_options_read,
8244 .write = trace_options_write,
8245 .llseek = generic_file_llseek,
8249 * In order to pass in both the trace_array descriptor as well as the index
8250 * to the flag that the trace option file represents, the trace_array
8251 * has a character array of trace_flags_index[], which holds the index
8252 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8253 * The address of this character array is passed to the flag option file
8254 * read/write callbacks.
8256 * In order to extract both the index and the trace_array descriptor,
8257 * get_tr_index() uses the following algorithm.
8261 * As the pointer itself contains the address of the index (remember
8264 * Then to get the trace_array descriptor, by subtracting that index
8265 * from the ptr, we get to the start of the index itself.
8267 * ptr - idx == &index[0]
8269 * Then a simple container_of() from that pointer gets us to the
8270 * trace_array descriptor.
8272 static void get_tr_index(void *data, struct trace_array **ptr,
8273 unsigned int *pindex)
8275 *pindex = *(unsigned char *)data;
8277 *ptr = container_of(data - *pindex, struct trace_array,
8282 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8285 void *tr_index = filp->private_data;
8286 struct trace_array *tr;
8290 get_tr_index(tr_index, &tr, &index);
8292 if (tr->trace_flags & (1 << index))
8297 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8301 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8304 void *tr_index = filp->private_data;
8305 struct trace_array *tr;
8310 get_tr_index(tr_index, &tr, &index);
8312 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8316 if (val != 0 && val != 1)
8319 mutex_lock(&event_mutex);
8320 mutex_lock(&trace_types_lock);
8321 ret = set_tracer_flag(tr, 1 << index, val);
8322 mutex_unlock(&trace_types_lock);
8323 mutex_unlock(&event_mutex);
8333 static const struct file_operations trace_options_core_fops = {
8334 .open = tracing_open_generic,
8335 .read = trace_options_core_read,
8336 .write = trace_options_core_write,
8337 .llseek = generic_file_llseek,
8340 struct dentry *trace_create_file(const char *name,
8342 struct dentry *parent,
8344 const struct file_operations *fops)
8348 ret = tracefs_create_file(name, mode, parent, data, fops);
8350 pr_warn("Could not create tracefs '%s' entry\n", name);
8356 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8358 struct dentry *d_tracer;
8363 d_tracer = tracing_get_dentry(tr);
8364 if (IS_ERR(d_tracer))
8367 tr->options = tracefs_create_dir("options", d_tracer);
8369 pr_warn("Could not create tracefs directory 'options'\n");
8377 create_trace_option_file(struct trace_array *tr,
8378 struct trace_option_dentry *topt,
8379 struct tracer_flags *flags,
8380 struct tracer_opt *opt)
8382 struct dentry *t_options;
8384 t_options = trace_options_init_dentry(tr);
8388 topt->flags = flags;
8392 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8393 &trace_options_fops);
8398 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8400 struct trace_option_dentry *topts;
8401 struct trace_options *tr_topts;
8402 struct tracer_flags *flags;
8403 struct tracer_opt *opts;
8410 flags = tracer->flags;
8412 if (!flags || !flags->opts)
8416 * If this is an instance, only create flags for tracers
8417 * the instance may have.
8419 if (!trace_ok_for_array(tracer, tr))
8422 for (i = 0; i < tr->nr_topts; i++) {
8423 /* Make sure there's no duplicate flags. */
8424 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8430 for (cnt = 0; opts[cnt].name; cnt++)
8433 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8437 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8444 tr->topts = tr_topts;
8445 tr->topts[tr->nr_topts].tracer = tracer;
8446 tr->topts[tr->nr_topts].topts = topts;
8449 for (cnt = 0; opts[cnt].name; cnt++) {
8450 create_trace_option_file(tr, &topts[cnt], flags,
8452 MEM_FAIL(topts[cnt].entry == NULL,
8453 "Failed to create trace option: %s",
8458 static struct dentry *
8459 create_trace_option_core_file(struct trace_array *tr,
8460 const char *option, long index)
8462 struct dentry *t_options;
8464 t_options = trace_options_init_dentry(tr);
8468 return trace_create_file(option, 0644, t_options,
8469 (void *)&tr->trace_flags_index[index],
8470 &trace_options_core_fops);
8473 static void create_trace_options_dir(struct trace_array *tr)
8475 struct dentry *t_options;
8476 bool top_level = tr == &global_trace;
8479 t_options = trace_options_init_dentry(tr);
8483 for (i = 0; trace_options[i]; i++) {
8485 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8486 create_trace_option_core_file(tr, trace_options[i], i);
8491 rb_simple_read(struct file *filp, char __user *ubuf,
8492 size_t cnt, loff_t *ppos)
8494 struct trace_array *tr = filp->private_data;
8498 r = tracer_tracing_is_on(tr);
8499 r = sprintf(buf, "%d\n", r);
8501 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8505 rb_simple_write(struct file *filp, const char __user *ubuf,
8506 size_t cnt, loff_t *ppos)
8508 struct trace_array *tr = filp->private_data;
8509 struct trace_buffer *buffer = tr->array_buffer.buffer;
8513 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8518 mutex_lock(&trace_types_lock);
8519 if (!!val == tracer_tracing_is_on(tr)) {
8520 val = 0; /* do nothing */
8522 tracer_tracing_on(tr);
8523 if (tr->current_trace->start)
8524 tr->current_trace->start(tr);
8526 tracer_tracing_off(tr);
8527 if (tr->current_trace->stop)
8528 tr->current_trace->stop(tr);
8530 mutex_unlock(&trace_types_lock);
8538 static const struct file_operations rb_simple_fops = {
8539 .open = tracing_open_generic_tr,
8540 .read = rb_simple_read,
8541 .write = rb_simple_write,
8542 .release = tracing_release_generic_tr,
8543 .llseek = default_llseek,
8547 buffer_percent_read(struct file *filp, char __user *ubuf,
8548 size_t cnt, loff_t *ppos)
8550 struct trace_array *tr = filp->private_data;
8554 r = tr->buffer_percent;
8555 r = sprintf(buf, "%d\n", r);
8557 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8561 buffer_percent_write(struct file *filp, const char __user *ubuf,
8562 size_t cnt, loff_t *ppos)
8564 struct trace_array *tr = filp->private_data;
8568 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8578 tr->buffer_percent = val;
8585 static const struct file_operations buffer_percent_fops = {
8586 .open = tracing_open_generic_tr,
8587 .read = buffer_percent_read,
8588 .write = buffer_percent_write,
8589 .release = tracing_release_generic_tr,
8590 .llseek = default_llseek,
8593 static struct dentry *trace_instance_dir;
8596 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8599 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8601 enum ring_buffer_flags rb_flags;
8603 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8607 buf->buffer = ring_buffer_alloc(size, rb_flags);
8611 buf->data = alloc_percpu(struct trace_array_cpu);
8613 ring_buffer_free(buf->buffer);
8618 /* Allocate the first page for all buffers */
8619 set_buffer_entries(&tr->array_buffer,
8620 ring_buffer_size(tr->array_buffer.buffer, 0));
8625 static int allocate_trace_buffers(struct trace_array *tr, int size)
8629 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8633 #ifdef CONFIG_TRACER_MAX_TRACE
8634 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8635 allocate_snapshot ? size : 1);
8636 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8637 ring_buffer_free(tr->array_buffer.buffer);
8638 tr->array_buffer.buffer = NULL;
8639 free_percpu(tr->array_buffer.data);
8640 tr->array_buffer.data = NULL;
8643 tr->allocated_snapshot = allocate_snapshot;
8646 * Only the top level trace array gets its snapshot allocated
8647 * from the kernel command line.
8649 allocate_snapshot = false;
8655 static void free_trace_buffer(struct array_buffer *buf)
8658 ring_buffer_free(buf->buffer);
8660 free_percpu(buf->data);
8665 static void free_trace_buffers(struct trace_array *tr)
8670 free_trace_buffer(&tr->array_buffer);
8672 #ifdef CONFIG_TRACER_MAX_TRACE
8673 free_trace_buffer(&tr->max_buffer);
8677 static void init_trace_flags_index(struct trace_array *tr)
8681 /* Used by the trace options files */
8682 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8683 tr->trace_flags_index[i] = i;
8686 static void __update_tracer_options(struct trace_array *tr)
8690 for (t = trace_types; t; t = t->next)
8691 add_tracer_options(tr, t);
8694 static void update_tracer_options(struct trace_array *tr)
8696 mutex_lock(&trace_types_lock);
8697 __update_tracer_options(tr);
8698 mutex_unlock(&trace_types_lock);
8701 /* Must have trace_types_lock held */
8702 struct trace_array *trace_array_find(const char *instance)
8704 struct trace_array *tr, *found = NULL;
8706 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8707 if (tr->name && strcmp(tr->name, instance) == 0) {
8716 struct trace_array *trace_array_find_get(const char *instance)
8718 struct trace_array *tr;
8720 mutex_lock(&trace_types_lock);
8721 tr = trace_array_find(instance);
8724 mutex_unlock(&trace_types_lock);
8729 static int trace_array_create_dir(struct trace_array *tr)
8733 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8737 ret = event_trace_add_tracer(tr->dir, tr);
8739 tracefs_remove(tr->dir);
8741 init_tracer_tracefs(tr, tr->dir);
8742 __update_tracer_options(tr);
8747 static struct trace_array *trace_array_create(const char *name)
8749 struct trace_array *tr;
8753 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8755 return ERR_PTR(ret);
8757 tr->name = kstrdup(name, GFP_KERNEL);
8761 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8764 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8766 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8768 raw_spin_lock_init(&tr->start_lock);
8770 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8772 tr->current_trace = &nop_trace;
8774 INIT_LIST_HEAD(&tr->systems);
8775 INIT_LIST_HEAD(&tr->events);
8776 INIT_LIST_HEAD(&tr->hist_vars);
8777 INIT_LIST_HEAD(&tr->err_log);
8779 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8782 if (ftrace_allocate_ftrace_ops(tr) < 0)
8785 ftrace_init_trace_array(tr);
8787 init_trace_flags_index(tr);
8789 if (trace_instance_dir) {
8790 ret = trace_array_create_dir(tr);
8794 __trace_early_add_events(tr);
8796 list_add(&tr->list, &ftrace_trace_arrays);
8803 ftrace_free_ftrace_ops(tr);
8804 free_trace_buffers(tr);
8805 free_cpumask_var(tr->tracing_cpumask);
8809 return ERR_PTR(ret);
8812 static int instance_mkdir(const char *name)
8814 struct trace_array *tr;
8817 mutex_lock(&event_mutex);
8818 mutex_lock(&trace_types_lock);
8821 if (trace_array_find(name))
8824 tr = trace_array_create(name);
8826 ret = PTR_ERR_OR_ZERO(tr);
8829 mutex_unlock(&trace_types_lock);
8830 mutex_unlock(&event_mutex);
8835 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8836 * @name: The name of the trace array to be looked up/created.
8838 * Returns pointer to trace array with given name.
8839 * NULL, if it cannot be created.
8841 * NOTE: This function increments the reference counter associated with the
8842 * trace array returned. This makes sure it cannot be freed while in use.
8843 * Use trace_array_put() once the trace array is no longer needed.
8844 * If the trace_array is to be freed, trace_array_destroy() needs to
8845 * be called after the trace_array_put(), or simply let user space delete
8846 * it from the tracefs instances directory. But until the
8847 * trace_array_put() is called, user space can not delete it.
8850 struct trace_array *trace_array_get_by_name(const char *name)
8852 struct trace_array *tr;
8854 mutex_lock(&event_mutex);
8855 mutex_lock(&trace_types_lock);
8857 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8858 if (tr->name && strcmp(tr->name, name) == 0)
8862 tr = trace_array_create(name);
8870 mutex_unlock(&trace_types_lock);
8871 mutex_unlock(&event_mutex);
8874 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8876 static int __remove_instance(struct trace_array *tr)
8880 /* Reference counter for a newly created trace array = 1. */
8881 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8884 list_del(&tr->list);
8886 /* Disable all the flags that were enabled coming in */
8887 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8888 if ((1 << i) & ZEROED_TRACE_FLAGS)
8889 set_tracer_flag(tr, 1 << i, 0);
8892 tracing_set_nop(tr);
8893 clear_ftrace_function_probes(tr);
8894 event_trace_del_tracer(tr);
8895 ftrace_clear_pids(tr);
8896 ftrace_destroy_function_files(tr);
8897 tracefs_remove(tr->dir);
8898 free_trace_buffers(tr);
8900 for (i = 0; i < tr->nr_topts; i++) {
8901 kfree(tr->topts[i].topts);
8905 free_cpumask_var(tr->tracing_cpumask);
8912 int trace_array_destroy(struct trace_array *this_tr)
8914 struct trace_array *tr;
8920 mutex_lock(&event_mutex);
8921 mutex_lock(&trace_types_lock);
8925 /* Making sure trace array exists before destroying it. */
8926 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8927 if (tr == this_tr) {
8928 ret = __remove_instance(tr);
8933 mutex_unlock(&trace_types_lock);
8934 mutex_unlock(&event_mutex);
8938 EXPORT_SYMBOL_GPL(trace_array_destroy);
8940 static int instance_rmdir(const char *name)
8942 struct trace_array *tr;
8945 mutex_lock(&event_mutex);
8946 mutex_lock(&trace_types_lock);
8949 tr = trace_array_find(name);
8951 ret = __remove_instance(tr);
8953 mutex_unlock(&trace_types_lock);
8954 mutex_unlock(&event_mutex);
8959 static __init void create_trace_instances(struct dentry *d_tracer)
8961 struct trace_array *tr;
8963 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8966 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8969 mutex_lock(&event_mutex);
8970 mutex_lock(&trace_types_lock);
8972 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8975 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8976 "Failed to create instance directory\n"))
8980 mutex_unlock(&trace_types_lock);
8981 mutex_unlock(&event_mutex);
8985 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8987 struct trace_event_file *file;
8990 trace_create_file("available_tracers", 0444, d_tracer,
8991 tr, &show_traces_fops);
8993 trace_create_file("current_tracer", 0644, d_tracer,
8994 tr, &set_tracer_fops);
8996 trace_create_file("tracing_cpumask", 0644, d_tracer,
8997 tr, &tracing_cpumask_fops);
8999 trace_create_file("trace_options", 0644, d_tracer,
9000 tr, &tracing_iter_fops);
9002 trace_create_file("trace", 0644, d_tracer,
9005 trace_create_file("trace_pipe", 0444, d_tracer,
9006 tr, &tracing_pipe_fops);
9008 trace_create_file("buffer_size_kb", 0644, d_tracer,
9009 tr, &tracing_entries_fops);
9011 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9012 tr, &tracing_total_entries_fops);
9014 trace_create_file("free_buffer", 0200, d_tracer,
9015 tr, &tracing_free_buffer_fops);
9017 trace_create_file("trace_marker", 0220, d_tracer,
9018 tr, &tracing_mark_fops);
9020 file = __find_event_file(tr, "ftrace", "print");
9021 if (file && file->dir)
9022 trace_create_file("trigger", 0644, file->dir, file,
9023 &event_trigger_fops);
9024 tr->trace_marker_file = file;
9026 trace_create_file("trace_marker_raw", 0220, d_tracer,
9027 tr, &tracing_mark_raw_fops);
9029 trace_create_file("trace_clock", 0644, d_tracer, tr,
9032 trace_create_file("tracing_on", 0644, d_tracer,
9033 tr, &rb_simple_fops);
9035 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9036 &trace_time_stamp_mode_fops);
9038 tr->buffer_percent = 50;
9040 trace_create_file("buffer_percent", 0444, d_tracer,
9041 tr, &buffer_percent_fops);
9043 create_trace_options_dir(tr);
9045 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9046 trace_create_maxlat_file(tr, d_tracer);
9049 if (ftrace_create_function_files(tr, d_tracer))
9050 MEM_FAIL(1, "Could not allocate function filter files");
9052 #ifdef CONFIG_TRACER_SNAPSHOT
9053 trace_create_file("snapshot", 0644, d_tracer,
9054 tr, &snapshot_fops);
9057 trace_create_file("error_log", 0644, d_tracer,
9058 tr, &tracing_err_log_fops);
9060 for_each_tracing_cpu(cpu)
9061 tracing_init_tracefs_percpu(tr, cpu);
9063 ftrace_init_tracefs(tr, d_tracer);
9066 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9068 struct vfsmount *mnt;
9069 struct file_system_type *type;
9072 * To maintain backward compatibility for tools that mount
9073 * debugfs to get to the tracing facility, tracefs is automatically
9074 * mounted to the debugfs/tracing directory.
9076 type = get_fs_type("tracefs");
9079 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9080 put_filesystem(type);
9089 * tracing_init_dentry - initialize top level trace array
9091 * This is called when creating files or directories in the tracing
9092 * directory. It is called via fs_initcall() by any of the boot up code
9093 * and expects to return the dentry of the top level tracing directory.
9095 int tracing_init_dentry(void)
9097 struct trace_array *tr = &global_trace;
9099 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9100 pr_warn("Tracing disabled due to lockdown\n");
9104 /* The top level trace array uses NULL as parent */
9108 if (WARN_ON(!tracefs_initialized()))
9112 * As there may still be users that expect the tracing
9113 * files to exist in debugfs/tracing, we must automount
9114 * the tracefs file system there, so older tools still
9115 * work with the newer kerenl.
9117 tr->dir = debugfs_create_automount("tracing", NULL,
9118 trace_automount, NULL);
9123 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9124 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9126 static struct workqueue_struct *eval_map_wq __initdata;
9127 static struct work_struct eval_map_work __initdata;
9129 static void __init eval_map_work_func(struct work_struct *work)
9133 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9134 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9137 static int __init trace_eval_init(void)
9139 INIT_WORK(&eval_map_work, eval_map_work_func);
9141 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9143 pr_err("Unable to allocate eval_map_wq\n");
9145 eval_map_work_func(&eval_map_work);
9149 queue_work(eval_map_wq, &eval_map_work);
9153 static int __init trace_eval_sync(void)
9155 /* Make sure the eval map updates are finished */
9157 destroy_workqueue(eval_map_wq);
9161 late_initcall_sync(trace_eval_sync);
9164 #ifdef CONFIG_MODULES
9165 static void trace_module_add_evals(struct module *mod)
9167 if (!mod->num_trace_evals)
9171 * Modules with bad taint do not have events created, do
9172 * not bother with enums either.
9174 if (trace_module_has_bad_taint(mod))
9177 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9180 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9181 static void trace_module_remove_evals(struct module *mod)
9183 union trace_eval_map_item *map;
9184 union trace_eval_map_item **last = &trace_eval_maps;
9186 if (!mod->num_trace_evals)
9189 mutex_lock(&trace_eval_mutex);
9191 map = trace_eval_maps;
9194 if (map->head.mod == mod)
9196 map = trace_eval_jmp_to_tail(map);
9197 last = &map->tail.next;
9198 map = map->tail.next;
9203 *last = trace_eval_jmp_to_tail(map)->tail.next;
9206 mutex_unlock(&trace_eval_mutex);
9209 static inline void trace_module_remove_evals(struct module *mod) { }
9210 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9212 static int trace_module_notify(struct notifier_block *self,
9213 unsigned long val, void *data)
9215 struct module *mod = data;
9218 case MODULE_STATE_COMING:
9219 trace_module_add_evals(mod);
9221 case MODULE_STATE_GOING:
9222 trace_module_remove_evals(mod);
9229 static struct notifier_block trace_module_nb = {
9230 .notifier_call = trace_module_notify,
9233 #endif /* CONFIG_MODULES */
9235 static __init int tracer_init_tracefs(void)
9239 trace_access_lock_init();
9241 ret = tracing_init_dentry();
9247 init_tracer_tracefs(&global_trace, NULL);
9248 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9250 trace_create_file("tracing_thresh", 0644, NULL,
9251 &global_trace, &tracing_thresh_fops);
9253 trace_create_file("README", 0444, NULL,
9254 NULL, &tracing_readme_fops);
9256 trace_create_file("saved_cmdlines", 0444, NULL,
9257 NULL, &tracing_saved_cmdlines_fops);
9259 trace_create_file("saved_cmdlines_size", 0644, NULL,
9260 NULL, &tracing_saved_cmdlines_size_fops);
9262 trace_create_file("saved_tgids", 0444, NULL,
9263 NULL, &tracing_saved_tgids_fops);
9267 trace_create_eval_file(NULL);
9269 #ifdef CONFIG_MODULES
9270 register_module_notifier(&trace_module_nb);
9273 #ifdef CONFIG_DYNAMIC_FTRACE
9274 trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9275 NULL, &tracing_dyn_info_fops);
9278 create_trace_instances(NULL);
9280 update_tracer_options(&global_trace);
9285 static int trace_panic_handler(struct notifier_block *this,
9286 unsigned long event, void *unused)
9288 if (ftrace_dump_on_oops)
9289 ftrace_dump(ftrace_dump_on_oops);
9293 static struct notifier_block trace_panic_notifier = {
9294 .notifier_call = trace_panic_handler,
9296 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9299 static int trace_die_handler(struct notifier_block *self,
9305 if (ftrace_dump_on_oops)
9306 ftrace_dump(ftrace_dump_on_oops);
9314 static struct notifier_block trace_die_notifier = {
9315 .notifier_call = trace_die_handler,
9320 * printk is set to max of 1024, we really don't need it that big.
9321 * Nothing should be printing 1000 characters anyway.
9323 #define TRACE_MAX_PRINT 1000
9326 * Define here KERN_TRACE so that we have one place to modify
9327 * it if we decide to change what log level the ftrace dump
9330 #define KERN_TRACE KERN_EMERG
9333 trace_printk_seq(struct trace_seq *s)
9335 /* Probably should print a warning here. */
9336 if (s->seq.len >= TRACE_MAX_PRINT)
9337 s->seq.len = TRACE_MAX_PRINT;
9340 * More paranoid code. Although the buffer size is set to
9341 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9342 * an extra layer of protection.
9344 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9345 s->seq.len = s->seq.size - 1;
9347 /* should be zero ended, but we are paranoid. */
9348 s->buffer[s->seq.len] = 0;
9350 printk(KERN_TRACE "%s", s->buffer);
9355 void trace_init_global_iter(struct trace_iterator *iter)
9357 iter->tr = &global_trace;
9358 iter->trace = iter->tr->current_trace;
9359 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9360 iter->array_buffer = &global_trace.array_buffer;
9362 if (iter->trace && iter->trace->open)
9363 iter->trace->open(iter);
9365 /* Annotate start of buffers if we had overruns */
9366 if (ring_buffer_overruns(iter->array_buffer->buffer))
9367 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9369 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9370 if (trace_clocks[iter->tr->clock_id].in_ns)
9371 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9374 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9376 /* use static because iter can be a bit big for the stack */
9377 static struct trace_iterator iter;
9378 static atomic_t dump_running;
9379 struct trace_array *tr = &global_trace;
9380 unsigned int old_userobj;
9381 unsigned long flags;
9384 /* Only allow one dump user at a time. */
9385 if (atomic_inc_return(&dump_running) != 1) {
9386 atomic_dec(&dump_running);
9391 * Always turn off tracing when we dump.
9392 * We don't need to show trace output of what happens
9393 * between multiple crashes.
9395 * If the user does a sysrq-z, then they can re-enable
9396 * tracing with echo 1 > tracing_on.
9400 local_irq_save(flags);
9401 printk_nmi_direct_enter();
9403 /* Simulate the iterator */
9404 trace_init_global_iter(&iter);
9405 /* Can not use kmalloc for iter.temp and iter.fmt */
9406 iter.temp = static_temp_buf;
9407 iter.temp_size = STATIC_TEMP_BUF_SIZE;
9408 iter.fmt = static_fmt_buf;
9409 iter.fmt_size = STATIC_FMT_BUF_SIZE;
9411 for_each_tracing_cpu(cpu) {
9412 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9415 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9417 /* don't look at user memory in panic mode */
9418 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9420 switch (oops_dump_mode) {
9422 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9425 iter.cpu_file = raw_smp_processor_id();
9430 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9431 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9434 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9436 /* Did function tracer already get disabled? */
9437 if (ftrace_is_dead()) {
9438 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9439 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9443 * We need to stop all tracing on all CPUS to read
9444 * the next buffer. This is a bit expensive, but is
9445 * not done often. We fill all what we can read,
9446 * and then release the locks again.
9449 while (!trace_empty(&iter)) {
9452 printk(KERN_TRACE "---------------------------------\n");
9456 trace_iterator_reset(&iter);
9457 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9459 if (trace_find_next_entry_inc(&iter) != NULL) {
9462 ret = print_trace_line(&iter);
9463 if (ret != TRACE_TYPE_NO_CONSUME)
9464 trace_consume(&iter);
9466 touch_nmi_watchdog();
9468 trace_printk_seq(&iter.seq);
9472 printk(KERN_TRACE " (ftrace buffer empty)\n");
9474 printk(KERN_TRACE "---------------------------------\n");
9477 tr->trace_flags |= old_userobj;
9479 for_each_tracing_cpu(cpu) {
9480 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9482 atomic_dec(&dump_running);
9483 printk_nmi_direct_exit();
9484 local_irq_restore(flags);
9486 EXPORT_SYMBOL_GPL(ftrace_dump);
9488 #define WRITE_BUFSIZE 4096
9490 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9491 size_t count, loff_t *ppos,
9492 int (*createfn)(const char *))
9494 char *kbuf, *buf, *tmp;
9499 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9503 while (done < count) {
9504 size = count - done;
9506 if (size >= WRITE_BUFSIZE)
9507 size = WRITE_BUFSIZE - 1;
9509 if (copy_from_user(kbuf, buffer + done, size)) {
9516 tmp = strchr(buf, '\n');
9519 size = tmp - buf + 1;
9522 if (done + size < count) {
9525 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9526 pr_warn("Line length is too long: Should be less than %d\n",
9534 /* Remove comments */
9535 tmp = strchr(buf, '#');
9540 ret = createfn(buf);
9545 } while (done < count);
9555 __init static int tracer_alloc_buffers(void)
9561 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9562 pr_warn("Tracing disabled due to lockdown\n");
9567 * Make sure we don't accidentally add more trace options
9568 * than we have bits for.
9570 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9572 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9575 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9576 goto out_free_buffer_mask;
9578 /* Only allocate trace_printk buffers if a trace_printk exists */
9579 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9580 /* Must be called before global_trace.buffer is allocated */
9581 trace_printk_init_buffers();
9583 /* To save memory, keep the ring buffer size to its minimum */
9584 if (ring_buffer_expanded)
9585 ring_buf_size = trace_buf_size;
9589 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9590 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9592 raw_spin_lock_init(&global_trace.start_lock);
9595 * The prepare callbacks allocates some memory for the ring buffer. We
9596 * don't free the buffer if the CPU goes down. If we were to free
9597 * the buffer, then the user would lose any trace that was in the
9598 * buffer. The memory will be removed once the "instance" is removed.
9600 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9601 "trace/RB:preapre", trace_rb_cpu_prepare,
9604 goto out_free_cpumask;
9605 /* Used for event triggers */
9607 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9609 goto out_rm_hp_state;
9611 if (trace_create_savedcmd() < 0)
9612 goto out_free_temp_buffer;
9614 /* TODO: make the number of buffers hot pluggable with CPUS */
9615 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9616 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9617 goto out_free_savedcmd;
9620 if (global_trace.buffer_disabled)
9623 if (trace_boot_clock) {
9624 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9626 pr_warn("Trace clock %s not defined, going back to default\n",
9631 * register_tracer() might reference current_trace, so it
9632 * needs to be set before we register anything. This is
9633 * just a bootstrap of current_trace anyway.
9635 global_trace.current_trace = &nop_trace;
9637 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9639 ftrace_init_global_array_ops(&global_trace);
9641 init_trace_flags_index(&global_trace);
9643 register_tracer(&nop_trace);
9645 /* Function tracing may start here (via kernel command line) */
9646 init_function_trace();
9648 /* All seems OK, enable tracing */
9649 tracing_disabled = 0;
9651 atomic_notifier_chain_register(&panic_notifier_list,
9652 &trace_panic_notifier);
9654 register_die_notifier(&trace_die_notifier);
9656 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9658 INIT_LIST_HEAD(&global_trace.systems);
9659 INIT_LIST_HEAD(&global_trace.events);
9660 INIT_LIST_HEAD(&global_trace.hist_vars);
9661 INIT_LIST_HEAD(&global_trace.err_log);
9662 list_add(&global_trace.list, &ftrace_trace_arrays);
9664 apply_trace_boot_options();
9666 register_snapshot_cmd();
9671 free_saved_cmdlines_buffer(savedcmd);
9672 out_free_temp_buffer:
9673 ring_buffer_free(temp_buffer);
9675 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9677 free_cpumask_var(global_trace.tracing_cpumask);
9678 out_free_buffer_mask:
9679 free_cpumask_var(tracing_buffer_mask);
9684 void __init early_trace_init(void)
9686 if (tracepoint_printk) {
9687 tracepoint_print_iter =
9688 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9689 if (MEM_FAIL(!tracepoint_print_iter,
9690 "Failed to allocate trace iterator\n"))
9691 tracepoint_printk = 0;
9693 static_key_enable(&tracepoint_printk_key.key);
9695 tracer_alloc_buffers();
9698 void __init trace_init(void)
9703 __init static int clear_boot_tracer(void)
9706 * The default tracer at boot buffer is an init section.
9707 * This function is called in lateinit. If we did not
9708 * find the boot tracer, then clear it out, to prevent
9709 * later registration from accessing the buffer that is
9710 * about to be freed.
9712 if (!default_bootup_tracer)
9715 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9716 default_bootup_tracer);
9717 default_bootup_tracer = NULL;
9722 fs_initcall(tracer_init_tracefs);
9723 late_initcall_sync(clear_boot_tracer);
9725 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9726 __init static int tracing_set_default_clock(void)
9728 /* sched_clock_stable() is determined in late_initcall */
9729 if (!trace_boot_clock && !sched_clock_stable()) {
9730 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9731 pr_warn("Can not set tracing clock due to lockdown\n");
9736 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9737 "If you want to keep using the local clock, then add:\n"
9738 " \"trace_clock=local\"\n"
9739 "on the kernel command line\n");
9740 tracing_set_clock(&global_trace, "global");
9745 late_initcall_sync(tracing_set_default_clock);