1 // SPDX-License-Identifier: GPL-2.0
3 * ring buffer based function tracer
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
53 #include "trace_output.h"
56 * On boot up, the ring buffer is set to the minimum size, so that
57 * we do not waste memory on systems that are not using tracing.
59 bool ring_buffer_expanded;
62 * We need to change this state when a selftest is running.
63 * A selftest will lurk into the ring-buffer to count the
64 * entries inserted during the selftest although some concurrent
65 * insertions into the ring-buffer such as trace_printk could occurred
66 * at the same time, giving false positive or negative results.
68 static bool __read_mostly tracing_selftest_running;
71 * If a tracer is running, we do not want to run SELFTEST.
73 bool __read_mostly tracing_selftest_disabled;
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
92 * To prevent the comm cache from being overwritten when no
93 * tracing is active, only save the comm when a trace event
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
99 * Kill all tracing for good (never come back).
100 * It is initialized to 1 but will turn to zero if the initialization
101 * of the tracer is successful. But that is the only place that sets
104 static int tracing_disabled = 1;
106 cpumask_var_t __read_mostly tracing_buffer_mask;
109 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
111 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112 * is set, then ftrace_dump is called. This will output the contents
113 * of the ftrace buffers to the console. This is very useful for
114 * capturing traces that lead to crashes and outputing it to a
117 * It is default off, but you can enable it with either specifying
118 * "ftrace_dump_on_oops" in the kernel command line, or setting
119 * /proc/sys/kernel/ftrace_dump_on_oops
120 * Set 1 if you want to dump buffers of all CPUs
121 * Set 2 if you want to dump the buffer of the CPU that triggered oops
124 enum ftrace_dump_mode ftrace_dump_on_oops;
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
133 unsigned long length;
136 union trace_eval_map_item;
138 struct trace_eval_map_tail {
140 * "end" is first and points to NULL as it must be different
141 * than "mod" or "eval_string"
143 union trace_eval_map_item *next;
144 const char *end; /* points to NULL */
147 static DEFINE_MUTEX(trace_eval_mutex);
150 * The trace_eval_maps are saved in an array with two extra elements,
151 * one at the beginning, and one at the end. The beginning item contains
152 * the count of the saved maps (head.length), and the module they
153 * belong to if not built in (head.mod). The ending item contains a
154 * pointer to the next array of saved eval_map items.
156 union trace_eval_map_item {
157 struct trace_eval_map map;
158 struct trace_eval_map_head head;
159 struct trace_eval_map_tail tail;
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167 unsigned long flags, int pc);
169 #define MAX_TRACER_SIZE 100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
173 static bool allocate_snapshot;
175 static int __init set_cmdline_ftrace(char *str)
177 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178 default_bootup_tracer = bootup_tracer_buf;
179 /* We are using ftrace early, expand it */
180 ring_buffer_expanded = true;
183 __setup("ftrace=", set_cmdline_ftrace);
185 static int __init set_ftrace_dump_on_oops(char *str)
187 if (*str++ != '=' || !*str) {
188 ftrace_dump_on_oops = DUMP_ALL;
192 if (!strcmp("orig_cpu", str)) {
193 ftrace_dump_on_oops = DUMP_ORIG;
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
201 static int __init stop_trace_on_warning(char *str)
203 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204 __disable_trace_on_warning = 1;
207 __setup("traceoff_on_warning", stop_trace_on_warning);
209 static int __init boot_alloc_snapshot(char *str)
211 allocate_snapshot = true;
212 /* We also need the main ring buffer expanded */
213 ring_buffer_expanded = true;
216 __setup("alloc_snapshot", boot_alloc_snapshot);
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
221 static int __init set_trace_boot_options(char *str)
223 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
226 __setup("trace_options=", set_trace_boot_options);
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
231 static int __init set_trace_boot_clock(char *str)
233 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234 trace_boot_clock = trace_boot_clock_buf;
237 __setup("trace_clock=", set_trace_boot_clock);
239 static int __init set_tracepoint_printk(char *str)
241 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242 tracepoint_printk = 1;
245 __setup("tp_printk", set_tracepoint_printk);
247 unsigned long long ns2usecs(u64 nsec)
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS \
256 (FUNCTION_DEFAULT_FLAGS | \
257 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
258 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
259 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
260 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
264 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
271 * The global_trace is the descriptor that holds the top-level tracing
272 * buffers for the live tracing.
274 static struct trace_array global_trace = {
275 .trace_flags = TRACE_DEFAULT_FLAGS,
278 LIST_HEAD(ftrace_trace_arrays);
280 int trace_array_get(struct trace_array *this_tr)
282 struct trace_array *tr;
285 mutex_lock(&trace_types_lock);
286 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
293 mutex_unlock(&trace_types_lock);
298 static void __trace_array_put(struct trace_array *this_tr)
300 WARN_ON(!this_tr->ref);
305 * trace_array_put - Decrement the reference counter for this trace array.
307 * NOTE: Use this when we no longer need the trace array returned by
308 * trace_array_get_by_name(). This ensures the trace array can be later
312 void trace_array_put(struct trace_array *this_tr)
317 mutex_lock(&trace_types_lock);
318 __trace_array_put(this_tr);
319 mutex_unlock(&trace_types_lock);
321 EXPORT_SYMBOL_GPL(trace_array_put);
323 int tracing_check_open_get_tr(struct trace_array *tr)
327 ret = security_locked_down(LOCKDOWN_TRACEFS);
331 if (tracing_disabled)
334 if (tr && trace_array_get(tr) < 0)
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341 struct trace_buffer *buffer,
342 struct ring_buffer_event *event)
344 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345 !filter_match_preds(call->filter, rec)) {
346 __trace_event_discard_commit(buffer, event);
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
355 vfree(pid_list->pids);
360 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361 * @filtered_pids: The list of pids to check
362 * @search_pid: The PID to find in @filtered_pids
364 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
370 * If pid_max changed after filtered_pids was created, we
371 * by default ignore all pids greater than the previous pid_max.
373 if (search_pid >= filtered_pids->pid_max)
376 return test_bit(search_pid, filtered_pids->pids);
380 * trace_ignore_this_task - should a task be ignored for tracing
381 * @filtered_pids: The list of pids to check
382 * @task: The task that should be ignored if not filtered
384 * Checks if @task should be traced or not from @filtered_pids.
385 * Returns true if @task should *NOT* be traced.
386 * Returns false if @task should be traced.
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390 struct trace_pid_list *filtered_no_pids,
391 struct task_struct *task)
394 * If filterd_no_pids is not empty, and the task's pid is listed
395 * in filtered_no_pids, then return true.
396 * Otherwise, if filtered_pids is empty, that means we can
397 * trace all tasks. If it has content, then only trace pids
398 * within filtered_pids.
401 return (filtered_pids &&
402 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
404 trace_find_filtered_pid(filtered_no_pids, task->pid));
408 * trace_filter_add_remove_task - Add or remove a task from a pid_list
409 * @pid_list: The list to modify
410 * @self: The current task for fork or NULL for exit
411 * @task: The task to add or remove
413 * If adding a task, if @self is defined, the task is only added if @self
414 * is also included in @pid_list. This happens on fork and tasks should
415 * only be added when the parent is listed. If @self is NULL, then the
416 * @task pid will be removed from the list, which would happen on exit
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420 struct task_struct *self,
421 struct task_struct *task)
426 /* For forks, we only add if the forking task is listed */
428 if (!trace_find_filtered_pid(pid_list, self->pid))
432 /* Sorry, but we don't support pid_max changing after setting */
433 if (task->pid >= pid_list->pid_max)
436 /* "self" is set for forks, and NULL for exits */
438 set_bit(task->pid, pid_list->pids);
440 clear_bit(task->pid, pid_list->pids);
444 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445 * @pid_list: The pid list to show
446 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447 * @pos: The position of the file
449 * This is used by the seq_file "next" operation to iterate the pids
450 * listed in a trace_pid_list structure.
452 * Returns the pid+1 as we want to display pid of zero, but NULL would
453 * stop the iteration.
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
457 unsigned long pid = (unsigned long)v;
461 /* pid already is +1 of the actual prevous bit */
462 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
464 /* Return pid + 1 to allow zero to be represented */
465 if (pid < pid_list->pid_max)
466 return (void *)(pid + 1);
472 * trace_pid_start - Used for seq_file to start reading pid lists
473 * @pid_list: The pid list to show
474 * @pos: The position of the file
476 * This is used by seq_file "start" operation to start the iteration
479 * Returns the pid+1 as we want to display pid of zero, but NULL would
480 * stop the iteration.
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
487 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488 if (pid >= pid_list->pid_max)
491 /* Return pid + 1 so that zero can be the exit value */
492 for (pid++; pid && l < *pos;
493 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
499 * trace_pid_show - show the current pid in seq_file processing
500 * @m: The seq_file structure to write into
501 * @v: A void pointer of the pid (+1) value to display
503 * Can be directly used by seq_file operations to display the current
506 int trace_pid_show(struct seq_file *m, void *v)
508 unsigned long pid = (unsigned long)v - 1;
510 seq_printf(m, "%lu\n", pid);
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE 127
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518 struct trace_pid_list **new_pid_list,
519 const char __user *ubuf, size_t cnt)
521 struct trace_pid_list *pid_list;
522 struct trace_parser parser;
530 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
534 * Always recreate a new array. The write is an all or nothing
535 * operation. Always create a new array when adding new pids by
536 * the user. If the operation fails, then the current list is
539 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
541 trace_parser_put(&parser);
545 pid_list->pid_max = READ_ONCE(pid_max);
547 /* Only truncating will shrink pid_max */
548 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549 pid_list->pid_max = filtered_pids->pid_max;
551 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552 if (!pid_list->pids) {
553 trace_parser_put(&parser);
559 /* copy the current bits to the new max */
560 for_each_set_bit(pid, filtered_pids->pids,
561 filtered_pids->pid_max) {
562 set_bit(pid, pid_list->pids);
571 ret = trace_get_user(&parser, ubuf, cnt, &pos);
572 if (ret < 0 || !trace_parser_loaded(&parser))
580 if (kstrtoul(parser.buffer, 0, &val))
582 if (val >= pid_list->pid_max)
587 set_bit(pid, pid_list->pids);
590 trace_parser_clear(&parser);
593 trace_parser_put(&parser);
596 trace_free_pid_list(pid_list);
601 /* Cleared the list of pids */
602 trace_free_pid_list(pid_list);
607 *new_pid_list = pid_list;
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
616 /* Early boot up does not have a buffer yet */
618 return trace_clock_local();
620 ts = ring_buffer_time_stamp(buf->buffer, cpu);
621 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
626 u64 ftrace_now(int cpu)
628 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
632 * tracing_is_enabled - Show if global_trace has been disabled
634 * Shows if the global trace has been enabled or not. It uses the
635 * mirror flag "buffer_disabled" to be used in fast paths such as for
636 * the irqsoff tracer. But it may be inaccurate due to races. If you
637 * need to know the accurate state, use tracing_is_on() which is a little
638 * slower, but accurate.
640 int tracing_is_enabled(void)
643 * For quick access (irqsoff uses this in fast path), just
644 * return the mirror variable of the state of the ring buffer.
645 * It's a little racy, but we don't really care.
648 return !global_trace.buffer_disabled;
652 * trace_buf_size is the size in bytes that is allocated
653 * for a buffer. Note, the number of bytes is always rounded
656 * This number is purposely set to a low number of 16384.
657 * If the dump on oops happens, it will be much appreciated
658 * to not have to wait for all that output. Anyway this can be
659 * boot time and run time configurable.
661 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
663 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer *trace_types __read_mostly;
669 * trace_types_lock is used to protect the trace_types list.
671 DEFINE_MUTEX(trace_types_lock);
674 * serialize the access of the ring buffer
676 * ring buffer serializes readers, but it is low level protection.
677 * The validity of the events (which returns by ring_buffer_peek() ..etc)
678 * are not protected by ring buffer.
680 * The content of events may become garbage if we allow other process consumes
681 * these events concurrently:
682 * A) the page of the consumed events may become a normal page
683 * (not reader page) in ring buffer, and this page will be rewrited
684 * by events producer.
685 * B) The page of the consumed events may become a page for splice_read,
686 * and this page will be returned to system.
688 * These primitives allow multi process access to different cpu ring buffer
691 * These primitives don't distinguish read-only and read-consume access.
692 * Multi read-only access are also serialized.
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
699 static inline void trace_access_lock(int cpu)
701 if (cpu == RING_BUFFER_ALL_CPUS) {
702 /* gain it for accessing the whole ring buffer. */
703 down_write(&all_cpu_access_lock);
705 /* gain it for accessing a cpu ring buffer. */
707 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708 down_read(&all_cpu_access_lock);
710 /* Secondly block other access to this @cpu ring buffer. */
711 mutex_lock(&per_cpu(cpu_access_lock, cpu));
715 static inline void trace_access_unlock(int cpu)
717 if (cpu == RING_BUFFER_ALL_CPUS) {
718 up_write(&all_cpu_access_lock);
720 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721 up_read(&all_cpu_access_lock);
725 static inline void trace_access_lock_init(void)
729 for_each_possible_cpu(cpu)
730 mutex_init(&per_cpu(cpu_access_lock, cpu));
735 static DEFINE_MUTEX(access_lock);
737 static inline void trace_access_lock(int cpu)
740 mutex_lock(&access_lock);
743 static inline void trace_access_unlock(int cpu)
746 mutex_unlock(&access_lock);
749 static inline void trace_access_lock_init(void)
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
758 int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760 struct trace_buffer *buffer,
762 int skip, int pc, struct pt_regs *regs);
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
767 int skip, int pc, struct pt_regs *regs)
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771 struct trace_buffer *buffer,
773 int skip, int pc, struct pt_regs *regs)
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781 int type, unsigned long flags, int pc)
783 struct trace_entry *ent = ring_buffer_event_data(event);
785 tracing_generic_entry_update(ent, type, flags, pc);
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
792 unsigned long flags, int pc)
794 struct ring_buffer_event *event;
796 event = ring_buffer_lock_reserve(buffer, len);
798 trace_event_setup(event, type, flags, pc);
803 void tracer_tracing_on(struct trace_array *tr)
805 if (tr->array_buffer.buffer)
806 ring_buffer_record_on(tr->array_buffer.buffer);
808 * This flag is looked at when buffers haven't been allocated
809 * yet, or by some tracers (like irqsoff), that just want to
810 * know if the ring buffer has been disabled, but it can handle
811 * races of where it gets disabled but we still do a record.
812 * As the check is in the fast path of the tracers, it is more
813 * important to be fast than accurate.
815 tr->buffer_disabled = 0;
816 /* Make the flag seen by readers */
821 * tracing_on - enable tracing buffers
823 * This function enables tracing buffers that may have been
824 * disabled with tracing_off.
826 void tracing_on(void)
828 tracer_tracing_on(&global_trace);
830 EXPORT_SYMBOL_GPL(tracing_on);
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
836 __this_cpu_write(trace_taskinfo_save, true);
838 /* If this is the temp buffer, we need to commit fully */
839 if (this_cpu_read(trace_buffered_event) == event) {
840 /* Length is in event->array[0] */
841 ring_buffer_write(buffer, event->array[0], &event->array[1]);
842 /* Release the temp buffer */
843 this_cpu_dec(trace_buffered_event_cnt);
845 ring_buffer_unlock_commit(buffer, event);
849 * __trace_puts - write a constant string into the trace buffer.
850 * @ip: The address of the caller
851 * @str: The constant string to write
852 * @size: The size of the string.
854 int __trace_puts(unsigned long ip, const char *str, int size)
856 struct ring_buffer_event *event;
857 struct trace_buffer *buffer;
858 struct print_entry *entry;
859 unsigned long irq_flags;
863 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
866 pc = preempt_count();
868 if (unlikely(tracing_selftest_running || tracing_disabled))
871 alloc = sizeof(*entry) + size + 2; /* possible \n added */
873 local_save_flags(irq_flags);
874 buffer = global_trace.array_buffer.buffer;
875 ring_buffer_nest_start(buffer);
876 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
883 entry = ring_buffer_event_data(event);
886 memcpy(&entry->buf, str, size);
888 /* Add a newline if necessary */
889 if (entry->buf[size - 1] != '\n') {
890 entry->buf[size] = '\n';
891 entry->buf[size + 1] = '\0';
893 entry->buf[size] = '\0';
895 __buffer_unlock_commit(buffer, event);
896 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
898 ring_buffer_nest_end(buffer);
901 EXPORT_SYMBOL_GPL(__trace_puts);
904 * __trace_bputs - write the pointer to a constant string into trace buffer
905 * @ip: The address of the caller
906 * @str: The constant string to write to the buffer to
908 int __trace_bputs(unsigned long ip, const char *str)
910 struct ring_buffer_event *event;
911 struct trace_buffer *buffer;
912 struct bputs_entry *entry;
913 unsigned long irq_flags;
914 int size = sizeof(struct bputs_entry);
918 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
921 pc = preempt_count();
923 if (unlikely(tracing_selftest_running || tracing_disabled))
926 local_save_flags(irq_flags);
927 buffer = global_trace.array_buffer.buffer;
929 ring_buffer_nest_start(buffer);
930 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
935 entry = ring_buffer_event_data(event);
939 __buffer_unlock_commit(buffer, event);
940 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
944 ring_buffer_nest_end(buffer);
947 EXPORT_SYMBOL_GPL(__trace_bputs);
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
953 struct tracer *tracer = tr->current_trace;
957 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958 internal_trace_puts("*** snapshot is being ignored ***\n");
962 if (!tr->allocated_snapshot) {
963 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964 internal_trace_puts("*** stopping trace here! ***\n");
969 /* Note, snapshot can not be used when the tracer uses it */
970 if (tracer->use_max_tr) {
971 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
976 local_irq_save(flags);
977 update_max_tr(tr, current, smp_processor_id(), cond_data);
978 local_irq_restore(flags);
981 void tracing_snapshot_instance(struct trace_array *tr)
983 tracing_snapshot_instance_cond(tr, NULL);
987 * tracing_snapshot - take a snapshot of the current buffer.
989 * This causes a swap between the snapshot buffer and the current live
990 * tracing buffer. You can use this to take snapshots of the live
991 * trace when some condition is triggered, but continue to trace.
993 * Note, make sure to allocate the snapshot with either
994 * a tracing_snapshot_alloc(), or by doing it manually
995 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
997 * If the snapshot buffer is not allocated, it will stop tracing.
998 * Basically making a permanent snapshot.
1000 void tracing_snapshot(void)
1002 struct trace_array *tr = &global_trace;
1004 tracing_snapshot_instance(tr);
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1009 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010 * @tr: The tracing instance to snapshot
1011 * @cond_data: The data to be tested conditionally, and possibly saved
1013 * This is the same as tracing_snapshot() except that the snapshot is
1014 * conditional - the snapshot will only happen if the
1015 * cond_snapshot.update() implementation receiving the cond_data
1016 * returns true, which means that the trace array's cond_snapshot
1017 * update() operation used the cond_data to determine whether the
1018 * snapshot should be taken, and if it was, presumably saved it along
1019 * with the snapshot.
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1023 tracing_snapshot_instance_cond(tr, cond_data);
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1028 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029 * @tr: The tracing instance
1031 * When the user enables a conditional snapshot using
1032 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033 * with the snapshot. This accessor is used to retrieve it.
1035 * Should not be called from cond_snapshot.update(), since it takes
1036 * the tr->max_lock lock, which the code calling
1037 * cond_snapshot.update() has already done.
1039 * Returns the cond_data associated with the trace array's snapshot.
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1043 void *cond_data = NULL;
1045 arch_spin_lock(&tr->max_lock);
1047 if (tr->cond_snapshot)
1048 cond_data = tr->cond_snapshot->cond_data;
1050 arch_spin_unlock(&tr->max_lock);
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057 struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1064 if (!tr->allocated_snapshot) {
1066 /* allocate spare buffer */
1067 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1072 tr->allocated_snapshot = true;
1078 static void free_snapshot(struct trace_array *tr)
1081 * We don't free the ring buffer. instead, resize it because
1082 * The max_tr ring buffer has some state (e.g. ring->clock) and
1083 * we want preserve it.
1085 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086 set_buffer_entries(&tr->max_buffer, 1);
1087 tracing_reset_online_cpus(&tr->max_buffer);
1088 tr->allocated_snapshot = false;
1092 * tracing_alloc_snapshot - allocate snapshot buffer.
1094 * This only allocates the snapshot buffer if it isn't already
1095 * allocated - it doesn't also take a snapshot.
1097 * This is meant to be used in cases where the snapshot buffer needs
1098 * to be set up for events that can't sleep but need to be able to
1099 * trigger a snapshot.
1101 int tracing_alloc_snapshot(void)
1103 struct trace_array *tr = &global_trace;
1106 ret = tracing_alloc_snapshot_instance(tr);
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1114 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1116 * This is similar to tracing_snapshot(), but it will allocate the
1117 * snapshot buffer if it isn't already allocated. Use this only
1118 * where it is safe to sleep, as the allocation may sleep.
1120 * This causes a swap between the snapshot buffer and the current live
1121 * tracing buffer. You can use this to take snapshots of the live
1122 * trace when some condition is triggered, but continue to trace.
1124 void tracing_snapshot_alloc(void)
1128 ret = tracing_alloc_snapshot();
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1137 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138 * @tr: The tracing instance
1139 * @cond_data: User data to associate with the snapshot
1140 * @update: Implementation of the cond_snapshot update function
1142 * Check whether the conditional snapshot for the given instance has
1143 * already been enabled, or if the current tracer is already using a
1144 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145 * save the cond_data and update function inside.
1147 * Returns 0 if successful, error otherwise.
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150 cond_update_fn_t update)
1152 struct cond_snapshot *cond_snapshot;
1155 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1159 cond_snapshot->cond_data = cond_data;
1160 cond_snapshot->update = update;
1162 mutex_lock(&trace_types_lock);
1164 ret = tracing_alloc_snapshot_instance(tr);
1168 if (tr->current_trace->use_max_tr) {
1174 * The cond_snapshot can only change to NULL without the
1175 * trace_types_lock. We don't care if we race with it going
1176 * to NULL, but we want to make sure that it's not set to
1177 * something other than NULL when we get here, which we can
1178 * do safely with only holding the trace_types_lock and not
1179 * having to take the max_lock.
1181 if (tr->cond_snapshot) {
1186 arch_spin_lock(&tr->max_lock);
1187 tr->cond_snapshot = cond_snapshot;
1188 arch_spin_unlock(&tr->max_lock);
1190 mutex_unlock(&trace_types_lock);
1195 mutex_unlock(&trace_types_lock);
1196 kfree(cond_snapshot);
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1202 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203 * @tr: The tracing instance
1205 * Check whether the conditional snapshot for the given instance is
1206 * enabled; if so, free the cond_snapshot associated with it,
1207 * otherwise return -EINVAL.
1209 * Returns 0 if successful, error otherwise.
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1215 arch_spin_lock(&tr->max_lock);
1217 if (!tr->cond_snapshot)
1220 kfree(tr->cond_snapshot);
1221 tr->cond_snapshot = NULL;
1224 arch_spin_unlock(&tr->max_lock);
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1230 void tracing_snapshot(void)
1232 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1237 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1242 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1269 void tracer_tracing_off(struct trace_array *tr)
1271 if (tr->array_buffer.buffer)
1272 ring_buffer_record_off(tr->array_buffer.buffer);
1274 * This flag is looked at when buffers haven't been allocated
1275 * yet, or by some tracers (like irqsoff), that just want to
1276 * know if the ring buffer has been disabled, but it can handle
1277 * races of where it gets disabled but we still do a record.
1278 * As the check is in the fast path of the tracers, it is more
1279 * important to be fast than accurate.
1281 tr->buffer_disabled = 1;
1282 /* Make the flag seen by readers */
1287 * tracing_off - turn off tracing buffers
1289 * This function stops the tracing buffers from recording data.
1290 * It does not disable any overhead the tracers themselves may
1291 * be causing. This function simply causes all recording to
1292 * the ring buffers to fail.
1294 void tracing_off(void)
1296 tracer_tracing_off(&global_trace);
1298 EXPORT_SYMBOL_GPL(tracing_off);
1300 void disable_trace_on_warning(void)
1302 if (__disable_trace_on_warning) {
1303 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304 "Disabling tracing due to warning\n");
1310 * tracer_tracing_is_on - show real state of ring buffer enabled
1311 * @tr : the trace array to know if ring buffer is enabled
1313 * Shows real state of the ring buffer if it is enabled or not.
1315 bool tracer_tracing_is_on(struct trace_array *tr)
1317 if (tr->array_buffer.buffer)
1318 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1319 return !tr->buffer_disabled;
1323 * tracing_is_on - show state of ring buffers enabled
1325 int tracing_is_on(void)
1327 return tracer_tracing_is_on(&global_trace);
1329 EXPORT_SYMBOL_GPL(tracing_is_on);
1331 static int __init set_buf_size(char *str)
1333 unsigned long buf_size;
1337 buf_size = memparse(str, &str);
1338 /* nr_entries can not be zero */
1341 trace_buf_size = buf_size;
1344 __setup("trace_buf_size=", set_buf_size);
1346 static int __init set_tracing_thresh(char *str)
1348 unsigned long threshold;
1353 ret = kstrtoul(str, 0, &threshold);
1356 tracing_thresh = threshold * 1000;
1359 __setup("tracing_thresh=", set_tracing_thresh);
1361 unsigned long nsecs_to_usecs(unsigned long nsecs)
1363 return nsecs / 1000;
1367 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1368 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1369 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1370 * of strings in the order that the evals (enum) were defined.
1375 /* These must match the bit postions in trace_iterator_flags */
1376 static const char *trace_options[] = {
1384 int in_ns; /* is this clock in nanoseconds? */
1385 } trace_clocks[] = {
1386 { trace_clock_local, "local", 1 },
1387 { trace_clock_global, "global", 1 },
1388 { trace_clock_counter, "counter", 0 },
1389 { trace_clock_jiffies, "uptime", 0 },
1390 { trace_clock, "perf", 1 },
1391 { ktime_get_mono_fast_ns, "mono", 1 },
1392 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1393 { ktime_get_boot_fast_ns, "boot", 1 },
1397 bool trace_clock_in_ns(struct trace_array *tr)
1399 if (trace_clocks[tr->clock_id].in_ns)
1406 * trace_parser_get_init - gets the buffer for trace parser
1408 int trace_parser_get_init(struct trace_parser *parser, int size)
1410 memset(parser, 0, sizeof(*parser));
1412 parser->buffer = kmalloc(size, GFP_KERNEL);
1413 if (!parser->buffer)
1416 parser->size = size;
1421 * trace_parser_put - frees the buffer for trace parser
1423 void trace_parser_put(struct trace_parser *parser)
1425 kfree(parser->buffer);
1426 parser->buffer = NULL;
1430 * trace_get_user - reads the user input string separated by space
1431 * (matched by isspace(ch))
1433 * For each string found the 'struct trace_parser' is updated,
1434 * and the function returns.
1436 * Returns number of bytes read.
1438 * See kernel/trace/trace.h for 'struct trace_parser' details.
1440 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1441 size_t cnt, loff_t *ppos)
1448 trace_parser_clear(parser);
1450 ret = get_user(ch, ubuf++);
1458 * The parser is not finished with the last write,
1459 * continue reading the user input without skipping spaces.
1461 if (!parser->cont) {
1462 /* skip white space */
1463 while (cnt && isspace(ch)) {
1464 ret = get_user(ch, ubuf++);
1473 /* only spaces were written */
1474 if (isspace(ch) || !ch) {
1481 /* read the non-space input */
1482 while (cnt && !isspace(ch) && ch) {
1483 if (parser->idx < parser->size - 1)
1484 parser->buffer[parser->idx++] = ch;
1489 ret = get_user(ch, ubuf++);
1496 /* We either got finished input or we have to wait for another call. */
1497 if (isspace(ch) || !ch) {
1498 parser->buffer[parser->idx] = 0;
1499 parser->cont = false;
1500 } else if (parser->idx < parser->size - 1) {
1501 parser->cont = true;
1502 parser->buffer[parser->idx++] = ch;
1503 /* Make sure the parsed string always terminates with '\0'. */
1504 parser->buffer[parser->idx] = 0;
1517 /* TODO add a seq_buf_to_buffer() */
1518 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1522 if (trace_seq_used(s) <= s->seq.readpos)
1525 len = trace_seq_used(s) - s->seq.readpos;
1528 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1530 s->seq.readpos += cnt;
1534 unsigned long __read_mostly tracing_thresh;
1535 static const struct file_operations tracing_max_lat_fops;
1537 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1538 defined(CONFIG_FSNOTIFY)
1540 static struct workqueue_struct *fsnotify_wq;
1542 static void latency_fsnotify_workfn(struct work_struct *work)
1544 struct trace_array *tr = container_of(work, struct trace_array,
1546 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1549 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1551 struct trace_array *tr = container_of(iwork, struct trace_array,
1553 queue_work(fsnotify_wq, &tr->fsnotify_work);
1556 static void trace_create_maxlat_file(struct trace_array *tr,
1557 struct dentry *d_tracer)
1559 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1560 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1561 tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1562 d_tracer, &tr->max_latency,
1563 &tracing_max_lat_fops);
1566 __init static int latency_fsnotify_init(void)
1568 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1569 WQ_UNBOUND | WQ_HIGHPRI, 0);
1571 pr_err("Unable to allocate tr_max_lat_wq\n");
1577 late_initcall_sync(latency_fsnotify_init);
1579 void latency_fsnotify(struct trace_array *tr)
1584 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1585 * possible that we are called from __schedule() or do_idle(), which
1586 * could cause a deadlock.
1588 irq_work_queue(&tr->fsnotify_irqwork);
1592 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1593 * defined(CONFIG_FSNOTIFY)
1597 #define trace_create_maxlat_file(tr, d_tracer) \
1598 trace_create_file("tracing_max_latency", 0644, d_tracer, \
1599 &tr->max_latency, &tracing_max_lat_fops)
1603 #ifdef CONFIG_TRACER_MAX_TRACE
1605 * Copy the new maximum trace into the separate maximum-trace
1606 * structure. (this way the maximum trace is permanently saved,
1607 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1610 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1612 struct array_buffer *trace_buf = &tr->array_buffer;
1613 struct array_buffer *max_buf = &tr->max_buffer;
1614 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1615 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1618 max_buf->time_start = data->preempt_timestamp;
1620 max_data->saved_latency = tr->max_latency;
1621 max_data->critical_start = data->critical_start;
1622 max_data->critical_end = data->critical_end;
1624 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1625 max_data->pid = tsk->pid;
1627 * If tsk == current, then use current_uid(), as that does not use
1628 * RCU. The irq tracer can be called out of RCU scope.
1631 max_data->uid = current_uid();
1633 max_data->uid = task_uid(tsk);
1635 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1636 max_data->policy = tsk->policy;
1637 max_data->rt_priority = tsk->rt_priority;
1639 /* record this tasks comm */
1640 tracing_record_cmdline(tsk);
1641 latency_fsnotify(tr);
1645 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1647 * @tsk: the task with the latency
1648 * @cpu: The cpu that initiated the trace.
1649 * @cond_data: User data associated with a conditional snapshot
1651 * Flip the buffers between the @tr and the max_tr and record information
1652 * about which task was the cause of this latency.
1655 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1661 WARN_ON_ONCE(!irqs_disabled());
1663 if (!tr->allocated_snapshot) {
1664 /* Only the nop tracer should hit this when disabling */
1665 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1669 arch_spin_lock(&tr->max_lock);
1671 /* Inherit the recordable setting from array_buffer */
1672 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1673 ring_buffer_record_on(tr->max_buffer.buffer);
1675 ring_buffer_record_off(tr->max_buffer.buffer);
1677 #ifdef CONFIG_TRACER_SNAPSHOT
1678 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1681 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1683 __update_max_tr(tr, tsk, cpu);
1686 arch_spin_unlock(&tr->max_lock);
1690 * update_max_tr_single - only copy one trace over, and reset the rest
1692 * @tsk: task with the latency
1693 * @cpu: the cpu of the buffer to copy.
1695 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1698 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1705 WARN_ON_ONCE(!irqs_disabled());
1706 if (!tr->allocated_snapshot) {
1707 /* Only the nop tracer should hit this when disabling */
1708 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1712 arch_spin_lock(&tr->max_lock);
1714 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1716 if (ret == -EBUSY) {
1718 * We failed to swap the buffer due to a commit taking
1719 * place on this CPU. We fail to record, but we reset
1720 * the max trace buffer (no one writes directly to it)
1721 * and flag that it failed.
1723 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1724 "Failed to swap buffers due to commit in progress\n");
1727 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1729 __update_max_tr(tr, tsk, cpu);
1730 arch_spin_unlock(&tr->max_lock);
1732 #endif /* CONFIG_TRACER_MAX_TRACE */
1734 static int wait_on_pipe(struct trace_iterator *iter, int full)
1736 /* Iterators are static, they should be filled or empty */
1737 if (trace_buffer_iter(iter, iter->cpu_file))
1740 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1744 #ifdef CONFIG_FTRACE_STARTUP_TEST
1745 static bool selftests_can_run;
1747 struct trace_selftests {
1748 struct list_head list;
1749 struct tracer *type;
1752 static LIST_HEAD(postponed_selftests);
1754 static int save_selftest(struct tracer *type)
1756 struct trace_selftests *selftest;
1758 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1762 selftest->type = type;
1763 list_add(&selftest->list, &postponed_selftests);
1767 static int run_tracer_selftest(struct tracer *type)
1769 struct trace_array *tr = &global_trace;
1770 struct tracer *saved_tracer = tr->current_trace;
1773 if (!type->selftest || tracing_selftest_disabled)
1777 * If a tracer registers early in boot up (before scheduling is
1778 * initialized and such), then do not run its selftests yet.
1779 * Instead, run it a little later in the boot process.
1781 if (!selftests_can_run)
1782 return save_selftest(type);
1785 * Run a selftest on this tracer.
1786 * Here we reset the trace buffer, and set the current
1787 * tracer to be this tracer. The tracer can then run some
1788 * internal tracing to verify that everything is in order.
1789 * If we fail, we do not register this tracer.
1791 tracing_reset_online_cpus(&tr->array_buffer);
1793 tr->current_trace = type;
1795 #ifdef CONFIG_TRACER_MAX_TRACE
1796 if (type->use_max_tr) {
1797 /* If we expanded the buffers, make sure the max is expanded too */
1798 if (ring_buffer_expanded)
1799 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1800 RING_BUFFER_ALL_CPUS);
1801 tr->allocated_snapshot = true;
1805 /* the test is responsible for initializing and enabling */
1806 pr_info("Testing tracer %s: ", type->name);
1807 ret = type->selftest(type, tr);
1808 /* the test is responsible for resetting too */
1809 tr->current_trace = saved_tracer;
1811 printk(KERN_CONT "FAILED!\n");
1812 /* Add the warning after printing 'FAILED' */
1816 /* Only reset on passing, to avoid touching corrupted buffers */
1817 tracing_reset_online_cpus(&tr->array_buffer);
1819 #ifdef CONFIG_TRACER_MAX_TRACE
1820 if (type->use_max_tr) {
1821 tr->allocated_snapshot = false;
1823 /* Shrink the max buffer again */
1824 if (ring_buffer_expanded)
1825 ring_buffer_resize(tr->max_buffer.buffer, 1,
1826 RING_BUFFER_ALL_CPUS);
1830 printk(KERN_CONT "PASSED\n");
1834 static __init int init_trace_selftests(void)
1836 struct trace_selftests *p, *n;
1837 struct tracer *t, **last;
1840 selftests_can_run = true;
1842 mutex_lock(&trace_types_lock);
1844 if (list_empty(&postponed_selftests))
1847 pr_info("Running postponed tracer tests:\n");
1849 tracing_selftest_running = true;
1850 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1851 /* This loop can take minutes when sanitizers are enabled, so
1852 * lets make sure we allow RCU processing.
1855 ret = run_tracer_selftest(p->type);
1856 /* If the test fails, then warn and remove from available_tracers */
1858 WARN(1, "tracer: %s failed selftest, disabling\n",
1860 last = &trace_types;
1861 for (t = trace_types; t; t = t->next) {
1872 tracing_selftest_running = false;
1875 mutex_unlock(&trace_types_lock);
1879 core_initcall(init_trace_selftests);
1881 static inline int run_tracer_selftest(struct tracer *type)
1885 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1887 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1889 static void __init apply_trace_boot_options(void);
1892 * register_tracer - register a tracer with the ftrace system.
1893 * @type: the plugin for the tracer
1895 * Register a new plugin tracer.
1897 int __init register_tracer(struct tracer *type)
1903 pr_info("Tracer must have a name\n");
1907 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1908 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1912 if (security_locked_down(LOCKDOWN_TRACEFS)) {
1913 pr_warn("Can not register tracer %s due to lockdown\n",
1918 mutex_lock(&trace_types_lock);
1920 tracing_selftest_running = true;
1922 for (t = trace_types; t; t = t->next) {
1923 if (strcmp(type->name, t->name) == 0) {
1925 pr_info("Tracer %s already registered\n",
1932 if (!type->set_flag)
1933 type->set_flag = &dummy_set_flag;
1935 /*allocate a dummy tracer_flags*/
1936 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1941 type->flags->val = 0;
1942 type->flags->opts = dummy_tracer_opt;
1944 if (!type->flags->opts)
1945 type->flags->opts = dummy_tracer_opt;
1947 /* store the tracer for __set_tracer_option */
1948 type->flags->trace = type;
1950 ret = run_tracer_selftest(type);
1954 type->next = trace_types;
1956 add_tracer_options(&global_trace, type);
1959 tracing_selftest_running = false;
1960 mutex_unlock(&trace_types_lock);
1962 if (ret || !default_bootup_tracer)
1965 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1968 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1969 /* Do we want this tracer to start on bootup? */
1970 tracing_set_tracer(&global_trace, type->name);
1971 default_bootup_tracer = NULL;
1973 apply_trace_boot_options();
1975 /* disable other selftests, since this will break it. */
1976 tracing_selftest_disabled = true;
1977 #ifdef CONFIG_FTRACE_STARTUP_TEST
1978 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1986 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1988 struct trace_buffer *buffer = buf->buffer;
1993 ring_buffer_record_disable(buffer);
1995 /* Make sure all commits have finished */
1997 ring_buffer_reset_cpu(buffer, cpu);
1999 ring_buffer_record_enable(buffer);
2002 void tracing_reset_online_cpus(struct array_buffer *buf)
2004 struct trace_buffer *buffer = buf->buffer;
2009 ring_buffer_record_disable(buffer);
2011 /* Make sure all commits have finished */
2014 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2016 ring_buffer_reset_online_cpus(buffer);
2018 ring_buffer_record_enable(buffer);
2021 /* Must have trace_types_lock held */
2022 void tracing_reset_all_online_cpus(void)
2024 struct trace_array *tr;
2026 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2027 if (!tr->clear_trace)
2029 tr->clear_trace = false;
2030 tracing_reset_online_cpus(&tr->array_buffer);
2031 #ifdef CONFIG_TRACER_MAX_TRACE
2032 tracing_reset_online_cpus(&tr->max_buffer);
2037 static int *tgid_map;
2039 #define SAVED_CMDLINES_DEFAULT 128
2040 #define NO_CMDLINE_MAP UINT_MAX
2041 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2042 struct saved_cmdlines_buffer {
2043 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2044 unsigned *map_cmdline_to_pid;
2045 unsigned cmdline_num;
2047 char *saved_cmdlines;
2049 static struct saved_cmdlines_buffer *savedcmd;
2051 /* temporary disable recording */
2052 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2054 static inline char *get_saved_cmdlines(int idx)
2056 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2059 static inline void set_cmdline(int idx, const char *cmdline)
2061 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2064 static int allocate_cmdlines_buffer(unsigned int val,
2065 struct saved_cmdlines_buffer *s)
2067 s->map_cmdline_to_pid = kmalloc_array(val,
2068 sizeof(*s->map_cmdline_to_pid),
2070 if (!s->map_cmdline_to_pid)
2073 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2074 if (!s->saved_cmdlines) {
2075 kfree(s->map_cmdline_to_pid);
2080 s->cmdline_num = val;
2081 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2082 sizeof(s->map_pid_to_cmdline));
2083 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2084 val * sizeof(*s->map_cmdline_to_pid));
2089 static int trace_create_savedcmd(void)
2093 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2097 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2107 int is_tracing_stopped(void)
2109 return global_trace.stop_count;
2113 * tracing_start - quick start of the tracer
2115 * If tracing is enabled but was stopped by tracing_stop,
2116 * this will start the tracer back up.
2118 void tracing_start(void)
2120 struct trace_buffer *buffer;
2121 unsigned long flags;
2123 if (tracing_disabled)
2126 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2127 if (--global_trace.stop_count) {
2128 if (global_trace.stop_count < 0) {
2129 /* Someone screwed up their debugging */
2131 global_trace.stop_count = 0;
2136 /* Prevent the buffers from switching */
2137 arch_spin_lock(&global_trace.max_lock);
2139 buffer = global_trace.array_buffer.buffer;
2141 ring_buffer_record_enable(buffer);
2143 #ifdef CONFIG_TRACER_MAX_TRACE
2144 buffer = global_trace.max_buffer.buffer;
2146 ring_buffer_record_enable(buffer);
2149 arch_spin_unlock(&global_trace.max_lock);
2152 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2155 static void tracing_start_tr(struct trace_array *tr)
2157 struct trace_buffer *buffer;
2158 unsigned long flags;
2160 if (tracing_disabled)
2163 /* If global, we need to also start the max tracer */
2164 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2165 return tracing_start();
2167 raw_spin_lock_irqsave(&tr->start_lock, flags);
2169 if (--tr->stop_count) {
2170 if (tr->stop_count < 0) {
2171 /* Someone screwed up their debugging */
2178 buffer = tr->array_buffer.buffer;
2180 ring_buffer_record_enable(buffer);
2183 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2187 * tracing_stop - quick stop of the tracer
2189 * Light weight way to stop tracing. Use in conjunction with
2192 void tracing_stop(void)
2194 struct trace_buffer *buffer;
2195 unsigned long flags;
2197 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2198 if (global_trace.stop_count++)
2201 /* Prevent the buffers from switching */
2202 arch_spin_lock(&global_trace.max_lock);
2204 buffer = global_trace.array_buffer.buffer;
2206 ring_buffer_record_disable(buffer);
2208 #ifdef CONFIG_TRACER_MAX_TRACE
2209 buffer = global_trace.max_buffer.buffer;
2211 ring_buffer_record_disable(buffer);
2214 arch_spin_unlock(&global_trace.max_lock);
2217 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2220 static void tracing_stop_tr(struct trace_array *tr)
2222 struct trace_buffer *buffer;
2223 unsigned long flags;
2225 /* If global, we need to also stop the max tracer */
2226 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2227 return tracing_stop();
2229 raw_spin_lock_irqsave(&tr->start_lock, flags);
2230 if (tr->stop_count++)
2233 buffer = tr->array_buffer.buffer;
2235 ring_buffer_record_disable(buffer);
2238 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2241 static int trace_save_cmdline(struct task_struct *tsk)
2245 /* treat recording of idle task as a success */
2249 if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2253 * It's not the end of the world if we don't get
2254 * the lock, but we also don't want to spin
2255 * nor do we want to disable interrupts,
2256 * so if we miss here, then better luck next time.
2258 if (!arch_spin_trylock(&trace_cmdline_lock))
2261 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2262 if (idx == NO_CMDLINE_MAP) {
2263 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2266 * Check whether the cmdline buffer at idx has a pid
2267 * mapped. We are going to overwrite that entry so we
2268 * need to clear the map_pid_to_cmdline. Otherwise we
2269 * would read the new comm for the old pid.
2271 pid = savedcmd->map_cmdline_to_pid[idx];
2272 if (pid != NO_CMDLINE_MAP)
2273 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2275 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2276 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2278 savedcmd->cmdline_idx = idx;
2281 set_cmdline(idx, tsk->comm);
2283 arch_spin_unlock(&trace_cmdline_lock);
2288 static void __trace_find_cmdline(int pid, char comm[])
2293 strcpy(comm, "<idle>");
2297 if (WARN_ON_ONCE(pid < 0)) {
2298 strcpy(comm, "<XXX>");
2302 if (pid > PID_MAX_DEFAULT) {
2303 strcpy(comm, "<...>");
2307 map = savedcmd->map_pid_to_cmdline[pid];
2308 if (map != NO_CMDLINE_MAP)
2309 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2311 strcpy(comm, "<...>");
2314 void trace_find_cmdline(int pid, char comm[])
2317 arch_spin_lock(&trace_cmdline_lock);
2319 __trace_find_cmdline(pid, comm);
2321 arch_spin_unlock(&trace_cmdline_lock);
2325 int trace_find_tgid(int pid)
2327 if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2330 return tgid_map[pid];
2333 static int trace_save_tgid(struct task_struct *tsk)
2335 /* treat recording of idle task as a success */
2339 if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2342 tgid_map[tsk->pid] = tsk->tgid;
2346 static bool tracing_record_taskinfo_skip(int flags)
2348 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2350 if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2352 if (!__this_cpu_read(trace_taskinfo_save))
2358 * tracing_record_taskinfo - record the task info of a task
2360 * @task: task to record
2361 * @flags: TRACE_RECORD_CMDLINE for recording comm
2362 * TRACE_RECORD_TGID for recording tgid
2364 void tracing_record_taskinfo(struct task_struct *task, int flags)
2368 if (tracing_record_taskinfo_skip(flags))
2372 * Record as much task information as possible. If some fail, continue
2373 * to try to record the others.
2375 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2376 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2378 /* If recording any information failed, retry again soon. */
2382 __this_cpu_write(trace_taskinfo_save, false);
2386 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2388 * @prev: previous task during sched_switch
2389 * @next: next task during sched_switch
2390 * @flags: TRACE_RECORD_CMDLINE for recording comm
2391 * TRACE_RECORD_TGID for recording tgid
2393 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2394 struct task_struct *next, int flags)
2398 if (tracing_record_taskinfo_skip(flags))
2402 * Record as much task information as possible. If some fail, continue
2403 * to try to record the others.
2405 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2406 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2407 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2408 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2410 /* If recording any information failed, retry again soon. */
2414 __this_cpu_write(trace_taskinfo_save, false);
2417 /* Helpers to record a specific task information */
2418 void tracing_record_cmdline(struct task_struct *task)
2420 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2423 void tracing_record_tgid(struct task_struct *task)
2425 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2429 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2430 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2431 * simplifies those functions and keeps them in sync.
2433 enum print_line_t trace_handle_return(struct trace_seq *s)
2435 return trace_seq_has_overflowed(s) ?
2436 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2438 EXPORT_SYMBOL_GPL(trace_handle_return);
2441 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2442 unsigned long flags, int pc)
2444 struct task_struct *tsk = current;
2446 entry->preempt_count = pc & 0xff;
2447 entry->pid = (tsk) ? tsk->pid : 0;
2450 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2451 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2453 TRACE_FLAG_IRQS_NOSUPPORT |
2455 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2456 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2457 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2458 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2459 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2461 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2463 struct ring_buffer_event *
2464 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2467 unsigned long flags, int pc)
2469 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2472 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2473 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2474 static int trace_buffered_event_ref;
2477 * trace_buffered_event_enable - enable buffering events
2479 * When events are being filtered, it is quicker to use a temporary
2480 * buffer to write the event data into if there's a likely chance
2481 * that it will not be committed. The discard of the ring buffer
2482 * is not as fast as committing, and is much slower than copying
2485 * When an event is to be filtered, allocate per cpu buffers to
2486 * write the event data into, and if the event is filtered and discarded
2487 * it is simply dropped, otherwise, the entire data is to be committed
2490 void trace_buffered_event_enable(void)
2492 struct ring_buffer_event *event;
2496 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2498 if (trace_buffered_event_ref++)
2501 for_each_tracing_cpu(cpu) {
2502 page = alloc_pages_node(cpu_to_node(cpu),
2503 GFP_KERNEL | __GFP_NORETRY, 0);
2507 event = page_address(page);
2508 memset(event, 0, sizeof(*event));
2510 per_cpu(trace_buffered_event, cpu) = event;
2513 if (cpu == smp_processor_id() &&
2514 this_cpu_read(trace_buffered_event) !=
2515 per_cpu(trace_buffered_event, cpu))
2522 trace_buffered_event_disable();
2525 static void enable_trace_buffered_event(void *data)
2527 /* Probably not needed, but do it anyway */
2529 this_cpu_dec(trace_buffered_event_cnt);
2532 static void disable_trace_buffered_event(void *data)
2534 this_cpu_inc(trace_buffered_event_cnt);
2538 * trace_buffered_event_disable - disable buffering events
2540 * When a filter is removed, it is faster to not use the buffered
2541 * events, and to commit directly into the ring buffer. Free up
2542 * the temp buffers when there are no more users. This requires
2543 * special synchronization with current events.
2545 void trace_buffered_event_disable(void)
2549 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2551 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2554 if (--trace_buffered_event_ref)
2558 /* For each CPU, set the buffer as used. */
2559 smp_call_function_many(tracing_buffer_mask,
2560 disable_trace_buffered_event, NULL, 1);
2563 /* Wait for all current users to finish */
2566 for_each_tracing_cpu(cpu) {
2567 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2568 per_cpu(trace_buffered_event, cpu) = NULL;
2571 * Make sure trace_buffered_event is NULL before clearing
2572 * trace_buffered_event_cnt.
2577 /* Do the work on each cpu */
2578 smp_call_function_many(tracing_buffer_mask,
2579 enable_trace_buffered_event, NULL, 1);
2583 static struct trace_buffer *temp_buffer;
2585 struct ring_buffer_event *
2586 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2587 struct trace_event_file *trace_file,
2588 int type, unsigned long len,
2589 unsigned long flags, int pc)
2591 struct ring_buffer_event *entry;
2594 *current_rb = trace_file->tr->array_buffer.buffer;
2596 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2597 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2598 (entry = this_cpu_read(trace_buffered_event))) {
2599 /* Try to use the per cpu buffer first */
2600 val = this_cpu_inc_return(trace_buffered_event_cnt);
2602 trace_event_setup(entry, type, flags, pc);
2603 entry->array[0] = len;
2606 this_cpu_dec(trace_buffered_event_cnt);
2609 entry = __trace_buffer_lock_reserve(*current_rb,
2610 type, len, flags, pc);
2612 * If tracing is off, but we have triggers enabled
2613 * we still need to look at the event data. Use the temp_buffer
2614 * to store the trace event for the tigger to use. It's recusive
2615 * safe and will not be recorded anywhere.
2617 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2618 *current_rb = temp_buffer;
2619 entry = __trace_buffer_lock_reserve(*current_rb,
2620 type, len, flags, pc);
2624 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2626 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2627 static DEFINE_MUTEX(tracepoint_printk_mutex);
2629 static void output_printk(struct trace_event_buffer *fbuffer)
2631 struct trace_event_call *event_call;
2632 struct trace_event_file *file;
2633 struct trace_event *event;
2634 unsigned long flags;
2635 struct trace_iterator *iter = tracepoint_print_iter;
2637 /* We should never get here if iter is NULL */
2638 if (WARN_ON_ONCE(!iter))
2641 event_call = fbuffer->trace_file->event_call;
2642 if (!event_call || !event_call->event.funcs ||
2643 !event_call->event.funcs->trace)
2646 file = fbuffer->trace_file;
2647 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2648 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2649 !filter_match_preds(file->filter, fbuffer->entry)))
2652 event = &fbuffer->trace_file->event_call->event;
2654 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2655 trace_seq_init(&iter->seq);
2656 iter->ent = fbuffer->entry;
2657 event_call->event.funcs->trace(iter, 0, event);
2658 trace_seq_putc(&iter->seq, 0);
2659 printk("%s", iter->seq.buffer);
2661 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2664 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2665 void *buffer, size_t *lenp,
2668 int save_tracepoint_printk;
2671 mutex_lock(&tracepoint_printk_mutex);
2672 save_tracepoint_printk = tracepoint_printk;
2674 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2677 * This will force exiting early, as tracepoint_printk
2678 * is always zero when tracepoint_printk_iter is not allocated
2680 if (!tracepoint_print_iter)
2681 tracepoint_printk = 0;
2683 if (save_tracepoint_printk == tracepoint_printk)
2686 if (tracepoint_printk)
2687 static_key_enable(&tracepoint_printk_key.key);
2689 static_key_disable(&tracepoint_printk_key.key);
2692 mutex_unlock(&tracepoint_printk_mutex);
2697 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2699 if (static_key_false(&tracepoint_printk_key.key))
2700 output_printk(fbuffer);
2702 event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2703 fbuffer->event, fbuffer->entry,
2704 fbuffer->flags, fbuffer->pc, fbuffer->regs);
2706 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2711 * trace_buffer_unlock_commit_regs()
2712 * trace_event_buffer_commit()
2713 * trace_event_raw_event_xxx()
2715 # define STACK_SKIP 3
2717 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2718 struct trace_buffer *buffer,
2719 struct ring_buffer_event *event,
2720 unsigned long flags, int pc,
2721 struct pt_regs *regs)
2723 __buffer_unlock_commit(buffer, event);
2726 * If regs is not set, then skip the necessary functions.
2727 * Note, we can still get here via blktrace, wakeup tracer
2728 * and mmiotrace, but that's ok if they lose a function or
2729 * two. They are not that meaningful.
2731 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2732 ftrace_trace_userstack(buffer, flags, pc);
2736 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2739 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2740 struct ring_buffer_event *event)
2742 __buffer_unlock_commit(buffer, event);
2746 trace_process_export(struct trace_export *export,
2747 struct ring_buffer_event *event)
2749 struct trace_entry *entry;
2750 unsigned int size = 0;
2752 entry = ring_buffer_event_data(event);
2753 size = ring_buffer_event_length(event);
2754 export->write(export, entry, size);
2757 static DEFINE_MUTEX(ftrace_export_lock);
2759 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2761 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2763 static inline void ftrace_exports_enable(void)
2765 static_branch_enable(&ftrace_exports_enabled);
2768 static inline void ftrace_exports_disable(void)
2770 static_branch_disable(&ftrace_exports_enabled);
2773 static void ftrace_exports(struct ring_buffer_event *event)
2775 struct trace_export *export;
2777 preempt_disable_notrace();
2779 export = rcu_dereference_raw_check(ftrace_exports_list);
2781 trace_process_export(export, event);
2782 export = rcu_dereference_raw_check(export->next);
2785 preempt_enable_notrace();
2789 add_trace_export(struct trace_export **list, struct trace_export *export)
2791 rcu_assign_pointer(export->next, *list);
2793 * We are entering export into the list but another
2794 * CPU might be walking that list. We need to make sure
2795 * the export->next pointer is valid before another CPU sees
2796 * the export pointer included into the list.
2798 rcu_assign_pointer(*list, export);
2802 rm_trace_export(struct trace_export **list, struct trace_export *export)
2804 struct trace_export **p;
2806 for (p = list; *p != NULL; p = &(*p)->next)
2813 rcu_assign_pointer(*p, (*p)->next);
2819 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2822 ftrace_exports_enable();
2824 add_trace_export(list, export);
2828 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2832 ret = rm_trace_export(list, export);
2834 ftrace_exports_disable();
2839 int register_ftrace_export(struct trace_export *export)
2841 if (WARN_ON_ONCE(!export->write))
2844 mutex_lock(&ftrace_export_lock);
2846 add_ftrace_export(&ftrace_exports_list, export);
2848 mutex_unlock(&ftrace_export_lock);
2852 EXPORT_SYMBOL_GPL(register_ftrace_export);
2854 int unregister_ftrace_export(struct trace_export *export)
2858 mutex_lock(&ftrace_export_lock);
2860 ret = rm_ftrace_export(&ftrace_exports_list, export);
2862 mutex_unlock(&ftrace_export_lock);
2866 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2869 trace_function(struct trace_array *tr,
2870 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2873 struct trace_event_call *call = &event_function;
2874 struct trace_buffer *buffer = tr->array_buffer.buffer;
2875 struct ring_buffer_event *event;
2876 struct ftrace_entry *entry;
2878 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2882 entry = ring_buffer_event_data(event);
2884 entry->parent_ip = parent_ip;
2886 if (!call_filter_check_discard(call, entry, buffer, event)) {
2887 if (static_branch_unlikely(&ftrace_exports_enabled))
2888 ftrace_exports(event);
2889 __buffer_unlock_commit(buffer, event);
2893 #ifdef CONFIG_STACKTRACE
2895 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2896 #define FTRACE_KSTACK_NESTING 4
2898 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2900 struct ftrace_stack {
2901 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2905 struct ftrace_stacks {
2906 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2909 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2910 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2912 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2913 unsigned long flags,
2914 int skip, int pc, struct pt_regs *regs)
2916 struct trace_event_call *call = &event_kernel_stack;
2917 struct ring_buffer_event *event;
2918 unsigned int size, nr_entries;
2919 struct ftrace_stack *fstack;
2920 struct stack_entry *entry;
2924 * Add one, for this function and the call to save_stack_trace()
2925 * If regs is set, then these functions will not be in the way.
2927 #ifndef CONFIG_UNWINDER_ORC
2932 preempt_disable_notrace();
2934 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2936 /* This should never happen. If it does, yell once and skip */
2937 if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2941 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2942 * interrupt will either see the value pre increment or post
2943 * increment. If the interrupt happens pre increment it will have
2944 * restored the counter when it returns. We just need a barrier to
2945 * keep gcc from moving things around.
2949 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2950 size = ARRAY_SIZE(fstack->calls);
2953 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2956 nr_entries = stack_trace_save(fstack->calls, size, skip);
2959 size = nr_entries * sizeof(unsigned long);
2960 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2961 sizeof(*entry) + size, flags, pc);
2964 entry = ring_buffer_event_data(event);
2966 memcpy(&entry->caller, fstack->calls, size);
2967 entry->size = nr_entries;
2969 if (!call_filter_check_discard(call, entry, buffer, event))
2970 __buffer_unlock_commit(buffer, event);
2973 /* Again, don't let gcc optimize things here */
2975 __this_cpu_dec(ftrace_stack_reserve);
2976 preempt_enable_notrace();
2980 static inline void ftrace_trace_stack(struct trace_array *tr,
2981 struct trace_buffer *buffer,
2982 unsigned long flags,
2983 int skip, int pc, struct pt_regs *regs)
2985 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2988 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2991 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2994 struct trace_buffer *buffer = tr->array_buffer.buffer;
2996 if (rcu_is_watching()) {
2997 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3002 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3003 * but if the above rcu_is_watching() failed, then the NMI
3004 * triggered someplace critical, and rcu_irq_enter() should
3005 * not be called from NMI.
3007 if (unlikely(in_nmi()))
3010 rcu_irq_enter_irqson();
3011 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3012 rcu_irq_exit_irqson();
3016 * trace_dump_stack - record a stack back trace in the trace buffer
3017 * @skip: Number of functions to skip (helper handlers)
3019 void trace_dump_stack(int skip)
3021 unsigned long flags;
3023 if (tracing_disabled || tracing_selftest_running)
3026 local_save_flags(flags);
3028 #ifndef CONFIG_UNWINDER_ORC
3029 /* Skip 1 to skip this function. */
3032 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3033 flags, skip, preempt_count(), NULL);
3035 EXPORT_SYMBOL_GPL(trace_dump_stack);
3037 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3038 static DEFINE_PER_CPU(int, user_stack_count);
3041 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3043 struct trace_event_call *call = &event_user_stack;
3044 struct ring_buffer_event *event;
3045 struct userstack_entry *entry;
3047 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3051 * NMIs can not handle page faults, even with fix ups.
3052 * The save user stack can (and often does) fault.
3054 if (unlikely(in_nmi()))
3058 * prevent recursion, since the user stack tracing may
3059 * trigger other kernel events.
3062 if (__this_cpu_read(user_stack_count))
3065 __this_cpu_inc(user_stack_count);
3067 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3068 sizeof(*entry), flags, pc);
3070 goto out_drop_count;
3071 entry = ring_buffer_event_data(event);
3073 entry->tgid = current->tgid;
3074 memset(&entry->caller, 0, sizeof(entry->caller));
3076 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3077 if (!call_filter_check_discard(call, entry, buffer, event))
3078 __buffer_unlock_commit(buffer, event);
3081 __this_cpu_dec(user_stack_count);
3085 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3086 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3087 unsigned long flags, int pc)
3090 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3092 #endif /* CONFIG_STACKTRACE */
3094 /* created for use with alloc_percpu */
3095 struct trace_buffer_struct {
3097 char buffer[4][TRACE_BUF_SIZE];
3100 static struct trace_buffer_struct *trace_percpu_buffer;
3103 * Thise allows for lockless recording. If we're nested too deeply, then
3104 * this returns NULL.
3106 static char *get_trace_buf(void)
3108 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3110 if (!buffer || buffer->nesting >= 4)
3115 /* Interrupts must see nesting incremented before we use the buffer */
3117 return &buffer->buffer[buffer->nesting][0];
3120 static void put_trace_buf(void)
3122 /* Don't let the decrement of nesting leak before this */
3124 this_cpu_dec(trace_percpu_buffer->nesting);
3127 static int alloc_percpu_trace_buffer(void)
3129 struct trace_buffer_struct *buffers;
3131 if (trace_percpu_buffer)
3134 buffers = alloc_percpu(struct trace_buffer_struct);
3135 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3138 trace_percpu_buffer = buffers;
3142 static int buffers_allocated;
3144 void trace_printk_init_buffers(void)
3146 if (buffers_allocated)
3149 if (alloc_percpu_trace_buffer())
3152 /* trace_printk() is for debug use only. Don't use it in production. */
3155 pr_warn("**********************************************************\n");
3156 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3158 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3160 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3161 pr_warn("** unsafe for production use. **\n");
3163 pr_warn("** If you see this message and you are not debugging **\n");
3164 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3166 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3167 pr_warn("**********************************************************\n");
3169 /* Expand the buffers to set size */
3170 tracing_update_buffers();
3172 buffers_allocated = 1;
3175 * trace_printk_init_buffers() can be called by modules.
3176 * If that happens, then we need to start cmdline recording
3177 * directly here. If the global_trace.buffer is already
3178 * allocated here, then this was called by module code.
3180 if (global_trace.array_buffer.buffer)
3181 tracing_start_cmdline_record();
3183 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3185 void trace_printk_start_comm(void)
3187 /* Start tracing comms if trace printk is set */
3188 if (!buffers_allocated)
3190 tracing_start_cmdline_record();
3193 static void trace_printk_start_stop_comm(int enabled)
3195 if (!buffers_allocated)
3199 tracing_start_cmdline_record();
3201 tracing_stop_cmdline_record();
3205 * trace_vbprintk - write binary msg to tracing buffer
3206 * @ip: The address of the caller
3207 * @fmt: The string format to write to the buffer
3208 * @args: Arguments for @fmt
3210 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3212 struct trace_event_call *call = &event_bprint;
3213 struct ring_buffer_event *event;
3214 struct trace_buffer *buffer;
3215 struct trace_array *tr = &global_trace;
3216 struct bprint_entry *entry;
3217 unsigned long flags;
3219 int len = 0, size, pc;
3221 if (unlikely(tracing_selftest_running || tracing_disabled))
3224 /* Don't pollute graph traces with trace_vprintk internals */
3225 pause_graph_tracing();
3227 pc = preempt_count();
3228 preempt_disable_notrace();
3230 tbuffer = get_trace_buf();
3236 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3238 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3241 local_save_flags(flags);
3242 size = sizeof(*entry) + sizeof(u32) * len;
3243 buffer = tr->array_buffer.buffer;
3244 ring_buffer_nest_start(buffer);
3245 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3249 entry = ring_buffer_event_data(event);
3253 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3254 if (!call_filter_check_discard(call, entry, buffer, event)) {
3255 __buffer_unlock_commit(buffer, event);
3256 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3260 ring_buffer_nest_end(buffer);
3265 preempt_enable_notrace();
3266 unpause_graph_tracing();
3270 EXPORT_SYMBOL_GPL(trace_vbprintk);
3274 __trace_array_vprintk(struct trace_buffer *buffer,
3275 unsigned long ip, const char *fmt, va_list args)
3277 struct trace_event_call *call = &event_print;
3278 struct ring_buffer_event *event;
3279 int len = 0, size, pc;
3280 struct print_entry *entry;
3281 unsigned long flags;
3284 if (tracing_disabled || tracing_selftest_running)
3287 /* Don't pollute graph traces with trace_vprintk internals */
3288 pause_graph_tracing();
3290 pc = preempt_count();
3291 preempt_disable_notrace();
3294 tbuffer = get_trace_buf();
3300 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3302 local_save_flags(flags);
3303 size = sizeof(*entry) + len + 1;
3304 ring_buffer_nest_start(buffer);
3305 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3309 entry = ring_buffer_event_data(event);
3312 memcpy(&entry->buf, tbuffer, len + 1);
3313 if (!call_filter_check_discard(call, entry, buffer, event)) {
3314 __buffer_unlock_commit(buffer, event);
3315 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3319 ring_buffer_nest_end(buffer);
3323 preempt_enable_notrace();
3324 unpause_graph_tracing();
3330 int trace_array_vprintk(struct trace_array *tr,
3331 unsigned long ip, const char *fmt, va_list args)
3333 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3337 * trace_array_printk - Print a message to a specific instance
3338 * @tr: The instance trace_array descriptor
3339 * @ip: The instruction pointer that this is called from.
3340 * @fmt: The format to print (printf format)
3342 * If a subsystem sets up its own instance, they have the right to
3343 * printk strings into their tracing instance buffer using this
3344 * function. Note, this function will not write into the top level
3345 * buffer (use trace_printk() for that), as writing into the top level
3346 * buffer should only have events that can be individually disabled.
3347 * trace_printk() is only used for debugging a kernel, and should not
3348 * be ever encorporated in normal use.
3350 * trace_array_printk() can be used, as it will not add noise to the
3351 * top level tracing buffer.
3353 * Note, trace_array_init_printk() must be called on @tr before this
3357 int trace_array_printk(struct trace_array *tr,
3358 unsigned long ip, const char *fmt, ...)
3366 /* This is only allowed for created instances */
3367 if (tr == &global_trace)
3370 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3374 ret = trace_array_vprintk(tr, ip, fmt, ap);
3378 EXPORT_SYMBOL_GPL(trace_array_printk);
3381 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3382 * @tr: The trace array to initialize the buffers for
3384 * As trace_array_printk() only writes into instances, they are OK to
3385 * have in the kernel (unlike trace_printk()). This needs to be called
3386 * before trace_array_printk() can be used on a trace_array.
3388 int trace_array_init_printk(struct trace_array *tr)
3393 /* This is only allowed for created instances */
3394 if (tr == &global_trace)
3397 return alloc_percpu_trace_buffer();
3399 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3402 int trace_array_printk_buf(struct trace_buffer *buffer,
3403 unsigned long ip, const char *fmt, ...)
3408 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3412 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3418 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3420 return trace_array_vprintk(&global_trace, ip, fmt, args);
3422 EXPORT_SYMBOL_GPL(trace_vprintk);
3424 static void trace_iterator_increment(struct trace_iterator *iter)
3426 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3430 ring_buffer_iter_advance(buf_iter);
3433 static struct trace_entry *
3434 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3435 unsigned long *lost_events)
3437 struct ring_buffer_event *event;
3438 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3441 event = ring_buffer_iter_peek(buf_iter, ts);
3443 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3444 (unsigned long)-1 : 0;
3446 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3451 iter->ent_size = ring_buffer_event_length(event);
3452 return ring_buffer_event_data(event);
3458 static struct trace_entry *
3459 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3460 unsigned long *missing_events, u64 *ent_ts)
3462 struct trace_buffer *buffer = iter->array_buffer->buffer;
3463 struct trace_entry *ent, *next = NULL;
3464 unsigned long lost_events = 0, next_lost = 0;
3465 int cpu_file = iter->cpu_file;
3466 u64 next_ts = 0, ts;
3472 * If we are in a per_cpu trace file, don't bother by iterating over
3473 * all cpu and peek directly.
3475 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3476 if (ring_buffer_empty_cpu(buffer, cpu_file))
3478 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3480 *ent_cpu = cpu_file;
3485 for_each_tracing_cpu(cpu) {
3487 if (ring_buffer_empty_cpu(buffer, cpu))
3490 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3493 * Pick the entry with the smallest timestamp:
3495 if (ent && (!next || ts < next_ts)) {
3499 next_lost = lost_events;
3500 next_size = iter->ent_size;
3504 iter->ent_size = next_size;
3507 *ent_cpu = next_cpu;
3513 *missing_events = next_lost;
3518 #define STATIC_TEMP_BUF_SIZE 128
3519 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3521 /* Find the next real entry, without updating the iterator itself */
3522 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3523 int *ent_cpu, u64 *ent_ts)
3525 /* __find_next_entry will reset ent_size */
3526 int ent_size = iter->ent_size;
3527 struct trace_entry *entry;
3530 * If called from ftrace_dump(), then the iter->temp buffer
3531 * will be the static_temp_buf and not created from kmalloc.
3532 * If the entry size is greater than the buffer, we can
3533 * not save it. Just return NULL in that case. This is only
3534 * used to add markers when two consecutive events' time
3535 * stamps have a large delta. See trace_print_lat_context()
3537 if (iter->temp == static_temp_buf &&
3538 STATIC_TEMP_BUF_SIZE < ent_size)
3542 * The __find_next_entry() may call peek_next_entry(), which may
3543 * call ring_buffer_peek() that may make the contents of iter->ent
3544 * undefined. Need to copy iter->ent now.
3546 if (iter->ent && iter->ent != iter->temp) {
3547 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3548 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3550 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3555 iter->temp_size = iter->ent_size;
3557 memcpy(iter->temp, iter->ent, iter->ent_size);
3558 iter->ent = iter->temp;
3560 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3561 /* Put back the original ent_size */
3562 iter->ent_size = ent_size;
3567 /* Find the next real entry, and increment the iterator to the next entry */
3568 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3570 iter->ent = __find_next_entry(iter, &iter->cpu,
3571 &iter->lost_events, &iter->ts);
3574 trace_iterator_increment(iter);
3576 return iter->ent ? iter : NULL;
3579 static void trace_consume(struct trace_iterator *iter)
3581 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3582 &iter->lost_events);
3585 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3587 struct trace_iterator *iter = m->private;
3591 WARN_ON_ONCE(iter->leftover);
3595 /* can't go backwards */
3600 ent = trace_find_next_entry_inc(iter);
3604 while (ent && iter->idx < i)
3605 ent = trace_find_next_entry_inc(iter);
3612 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3614 struct ring_buffer_iter *buf_iter;
3615 unsigned long entries = 0;
3618 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3620 buf_iter = trace_buffer_iter(iter, cpu);
3624 ring_buffer_iter_reset(buf_iter);
3627 * We could have the case with the max latency tracers
3628 * that a reset never took place on a cpu. This is evident
3629 * by the timestamp being before the start of the buffer.
3631 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3632 if (ts >= iter->array_buffer->time_start)
3635 ring_buffer_iter_advance(buf_iter);
3638 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3642 * The current tracer is copied to avoid a global locking
3645 static void *s_start(struct seq_file *m, loff_t *pos)
3647 struct trace_iterator *iter = m->private;
3648 struct trace_array *tr = iter->tr;
3649 int cpu_file = iter->cpu_file;
3655 * copy the tracer to avoid using a global lock all around.
3656 * iter->trace is a copy of current_trace, the pointer to the
3657 * name may be used instead of a strcmp(), as iter->trace->name
3658 * will point to the same string as current_trace->name.
3660 mutex_lock(&trace_types_lock);
3661 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3662 *iter->trace = *tr->current_trace;
3663 mutex_unlock(&trace_types_lock);
3665 #ifdef CONFIG_TRACER_MAX_TRACE
3666 if (iter->snapshot && iter->trace->use_max_tr)
3667 return ERR_PTR(-EBUSY);
3670 if (!iter->snapshot)
3671 atomic_inc(&trace_record_taskinfo_disabled);
3673 if (*pos != iter->pos) {
3678 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3679 for_each_tracing_cpu(cpu)
3680 tracing_iter_reset(iter, cpu);
3682 tracing_iter_reset(iter, cpu_file);
3685 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3690 * If we overflowed the seq_file before, then we want
3691 * to just reuse the trace_seq buffer again.
3697 p = s_next(m, p, &l);
3701 trace_event_read_lock();
3702 trace_access_lock(cpu_file);
3706 static void s_stop(struct seq_file *m, void *p)
3708 struct trace_iterator *iter = m->private;
3710 #ifdef CONFIG_TRACER_MAX_TRACE
3711 if (iter->snapshot && iter->trace->use_max_tr)
3715 if (!iter->snapshot)
3716 atomic_dec(&trace_record_taskinfo_disabled);
3718 trace_access_unlock(iter->cpu_file);
3719 trace_event_read_unlock();
3723 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3724 unsigned long *entries, int cpu)
3726 unsigned long count;
3728 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3730 * If this buffer has skipped entries, then we hold all
3731 * entries for the trace and we need to ignore the
3732 * ones before the time stamp.
3734 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3735 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3736 /* total is the same as the entries */
3740 ring_buffer_overrun_cpu(buf->buffer, cpu);
3745 get_total_entries(struct array_buffer *buf,
3746 unsigned long *total, unsigned long *entries)
3754 for_each_tracing_cpu(cpu) {
3755 get_total_entries_cpu(buf, &t, &e, cpu);
3761 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3763 unsigned long total, entries;
3768 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3773 unsigned long trace_total_entries(struct trace_array *tr)
3775 unsigned long total, entries;
3780 get_total_entries(&tr->array_buffer, &total, &entries);
3785 static void print_lat_help_header(struct seq_file *m)
3787 seq_puts(m, "# _------=> CPU# \n"
3788 "# / _-----=> irqs-off \n"
3789 "# | / _----=> need-resched \n"
3790 "# || / _---=> hardirq/softirq \n"
3791 "# ||| / _--=> preempt-depth \n"
3793 "# cmd pid ||||| time | caller \n"
3794 "# \\ / ||||| \\ | / \n");
3797 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3799 unsigned long total;
3800 unsigned long entries;
3802 get_total_entries(buf, &total, &entries);
3803 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3804 entries, total, num_online_cpus());
3808 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3811 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3813 print_event_info(buf, m);
3815 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3816 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3819 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3822 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3823 const char *space = " ";
3824 int prec = tgid ? 12 : 2;
3826 print_event_info(buf, m);
3828 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
3829 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3830 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3831 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3832 seq_printf(m, "# %.*s||| / delay\n", prec, space);
3833 seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3834 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
3838 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3840 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3841 struct array_buffer *buf = iter->array_buffer;
3842 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3843 struct tracer *type = iter->trace;
3844 unsigned long entries;
3845 unsigned long total;
3846 const char *name = "preemption";
3850 get_total_entries(buf, &total, &entries);
3852 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3854 seq_puts(m, "# -----------------------------------"
3855 "---------------------------------\n");
3856 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3857 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3858 nsecs_to_usecs(data->saved_latency),
3862 #if defined(CONFIG_PREEMPT_NONE)
3864 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3866 #elif defined(CONFIG_PREEMPT)
3868 #elif defined(CONFIG_PREEMPT_RT)
3873 /* These are reserved for later use */
3876 seq_printf(m, " #P:%d)\n", num_online_cpus());
3880 seq_puts(m, "# -----------------\n");
3881 seq_printf(m, "# | task: %.16s-%d "
3882 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3883 data->comm, data->pid,
3884 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3885 data->policy, data->rt_priority);
3886 seq_puts(m, "# -----------------\n");
3888 if (data->critical_start) {
3889 seq_puts(m, "# => started at: ");
3890 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3891 trace_print_seq(m, &iter->seq);
3892 seq_puts(m, "\n# => ended at: ");
3893 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3894 trace_print_seq(m, &iter->seq);
3895 seq_puts(m, "\n#\n");
3901 static void test_cpu_buff_start(struct trace_iterator *iter)
3903 struct trace_seq *s = &iter->seq;
3904 struct trace_array *tr = iter->tr;
3906 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3909 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3912 if (cpumask_available(iter->started) &&
3913 cpumask_test_cpu(iter->cpu, iter->started))
3916 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3919 if (cpumask_available(iter->started))
3920 cpumask_set_cpu(iter->cpu, iter->started);
3922 /* Don't print started cpu buffer for the first entry of the trace */
3924 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3928 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3930 struct trace_array *tr = iter->tr;
3931 struct trace_seq *s = &iter->seq;
3932 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3933 struct trace_entry *entry;
3934 struct trace_event *event;
3938 test_cpu_buff_start(iter);
3940 event = ftrace_find_event(entry->type);
3942 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3943 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3944 trace_print_lat_context(iter);
3946 trace_print_context(iter);
3949 if (trace_seq_has_overflowed(s))
3950 return TRACE_TYPE_PARTIAL_LINE;
3953 return event->funcs->trace(iter, sym_flags, event);
3955 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3957 return trace_handle_return(s);
3960 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3962 struct trace_array *tr = iter->tr;
3963 struct trace_seq *s = &iter->seq;
3964 struct trace_entry *entry;
3965 struct trace_event *event;
3969 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3970 trace_seq_printf(s, "%d %d %llu ",
3971 entry->pid, iter->cpu, iter->ts);
3973 if (trace_seq_has_overflowed(s))
3974 return TRACE_TYPE_PARTIAL_LINE;
3976 event = ftrace_find_event(entry->type);
3978 return event->funcs->raw(iter, 0, event);
3980 trace_seq_printf(s, "%d ?\n", entry->type);
3982 return trace_handle_return(s);
3985 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3987 struct trace_array *tr = iter->tr;
3988 struct trace_seq *s = &iter->seq;
3989 unsigned char newline = '\n';
3990 struct trace_entry *entry;
3991 struct trace_event *event;
3995 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3996 SEQ_PUT_HEX_FIELD(s, entry->pid);
3997 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3998 SEQ_PUT_HEX_FIELD(s, iter->ts);
3999 if (trace_seq_has_overflowed(s))
4000 return TRACE_TYPE_PARTIAL_LINE;
4003 event = ftrace_find_event(entry->type);
4005 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4006 if (ret != TRACE_TYPE_HANDLED)
4010 SEQ_PUT_FIELD(s, newline);
4012 return trace_handle_return(s);
4015 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4017 struct trace_array *tr = iter->tr;
4018 struct trace_seq *s = &iter->seq;
4019 struct trace_entry *entry;
4020 struct trace_event *event;
4024 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4025 SEQ_PUT_FIELD(s, entry->pid);
4026 SEQ_PUT_FIELD(s, iter->cpu);
4027 SEQ_PUT_FIELD(s, iter->ts);
4028 if (trace_seq_has_overflowed(s))
4029 return TRACE_TYPE_PARTIAL_LINE;
4032 event = ftrace_find_event(entry->type);
4033 return event ? event->funcs->binary(iter, 0, event) :
4037 int trace_empty(struct trace_iterator *iter)
4039 struct ring_buffer_iter *buf_iter;
4042 /* If we are looking at one CPU buffer, only check that one */
4043 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4044 cpu = iter->cpu_file;
4045 buf_iter = trace_buffer_iter(iter, cpu);
4047 if (!ring_buffer_iter_empty(buf_iter))
4050 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4056 for_each_tracing_cpu(cpu) {
4057 buf_iter = trace_buffer_iter(iter, cpu);
4059 if (!ring_buffer_iter_empty(buf_iter))
4062 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4070 /* Called with trace_event_read_lock() held. */
4071 enum print_line_t print_trace_line(struct trace_iterator *iter)
4073 struct trace_array *tr = iter->tr;
4074 unsigned long trace_flags = tr->trace_flags;
4075 enum print_line_t ret;
4077 if (iter->lost_events) {
4078 if (iter->lost_events == (unsigned long)-1)
4079 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4082 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4083 iter->cpu, iter->lost_events);
4084 if (trace_seq_has_overflowed(&iter->seq))
4085 return TRACE_TYPE_PARTIAL_LINE;
4088 if (iter->trace && iter->trace->print_line) {
4089 ret = iter->trace->print_line(iter);
4090 if (ret != TRACE_TYPE_UNHANDLED)
4094 if (iter->ent->type == TRACE_BPUTS &&
4095 trace_flags & TRACE_ITER_PRINTK &&
4096 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4097 return trace_print_bputs_msg_only(iter);
4099 if (iter->ent->type == TRACE_BPRINT &&
4100 trace_flags & TRACE_ITER_PRINTK &&
4101 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4102 return trace_print_bprintk_msg_only(iter);
4104 if (iter->ent->type == TRACE_PRINT &&
4105 trace_flags & TRACE_ITER_PRINTK &&
4106 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4107 return trace_print_printk_msg_only(iter);
4109 if (trace_flags & TRACE_ITER_BIN)
4110 return print_bin_fmt(iter);
4112 if (trace_flags & TRACE_ITER_HEX)
4113 return print_hex_fmt(iter);
4115 if (trace_flags & TRACE_ITER_RAW)
4116 return print_raw_fmt(iter);
4118 return print_trace_fmt(iter);
4121 void trace_latency_header(struct seq_file *m)
4123 struct trace_iterator *iter = m->private;
4124 struct trace_array *tr = iter->tr;
4126 /* print nothing if the buffers are empty */
4127 if (trace_empty(iter))
4130 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4131 print_trace_header(m, iter);
4133 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4134 print_lat_help_header(m);
4137 void trace_default_header(struct seq_file *m)
4139 struct trace_iterator *iter = m->private;
4140 struct trace_array *tr = iter->tr;
4141 unsigned long trace_flags = tr->trace_flags;
4143 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4146 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4147 /* print nothing if the buffers are empty */
4148 if (trace_empty(iter))
4150 print_trace_header(m, iter);
4151 if (!(trace_flags & TRACE_ITER_VERBOSE))
4152 print_lat_help_header(m);
4154 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4155 if (trace_flags & TRACE_ITER_IRQ_INFO)
4156 print_func_help_header_irq(iter->array_buffer,
4159 print_func_help_header(iter->array_buffer, m,
4165 static void test_ftrace_alive(struct seq_file *m)
4167 if (!ftrace_is_dead())
4169 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4170 "# MAY BE MISSING FUNCTION EVENTS\n");
4173 #ifdef CONFIG_TRACER_MAX_TRACE
4174 static void show_snapshot_main_help(struct seq_file *m)
4176 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4177 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4178 "# Takes a snapshot of the main buffer.\n"
4179 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4180 "# (Doesn't have to be '2' works with any number that\n"
4181 "# is not a '0' or '1')\n");
4184 static void show_snapshot_percpu_help(struct seq_file *m)
4186 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4187 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4188 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4189 "# Takes a snapshot of the main buffer for this cpu.\n");
4191 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4192 "# Must use main snapshot file to allocate.\n");
4194 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4195 "# (Doesn't have to be '2' works with any number that\n"
4196 "# is not a '0' or '1')\n");
4199 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4201 if (iter->tr->allocated_snapshot)
4202 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4204 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4206 seq_puts(m, "# Snapshot commands:\n");
4207 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4208 show_snapshot_main_help(m);
4210 show_snapshot_percpu_help(m);
4213 /* Should never be called */
4214 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4217 static int s_show(struct seq_file *m, void *v)
4219 struct trace_iterator *iter = v;
4222 if (iter->ent == NULL) {
4224 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4226 test_ftrace_alive(m);
4228 if (iter->snapshot && trace_empty(iter))
4229 print_snapshot_help(m, iter);
4230 else if (iter->trace && iter->trace->print_header)
4231 iter->trace->print_header(m);
4233 trace_default_header(m);
4235 } else if (iter->leftover) {
4237 * If we filled the seq_file buffer earlier, we
4238 * want to just show it now.
4240 ret = trace_print_seq(m, &iter->seq);
4242 /* ret should this time be zero, but you never know */
4243 iter->leftover = ret;
4246 print_trace_line(iter);
4247 ret = trace_print_seq(m, &iter->seq);
4249 * If we overflow the seq_file buffer, then it will
4250 * ask us for this data again at start up.
4252 * ret is 0 if seq_file write succeeded.
4255 iter->leftover = ret;
4262 * Should be used after trace_array_get(), trace_types_lock
4263 * ensures that i_cdev was already initialized.
4265 static inline int tracing_get_cpu(struct inode *inode)
4267 if (inode->i_cdev) /* See trace_create_cpu_file() */
4268 return (long)inode->i_cdev - 1;
4269 return RING_BUFFER_ALL_CPUS;
4272 static const struct seq_operations tracer_seq_ops = {
4279 static struct trace_iterator *
4280 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4282 struct trace_array *tr = inode->i_private;
4283 struct trace_iterator *iter;
4286 if (tracing_disabled)
4287 return ERR_PTR(-ENODEV);
4289 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4291 return ERR_PTR(-ENOMEM);
4293 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4295 if (!iter->buffer_iter)
4299 * trace_find_next_entry() may need to save off iter->ent.
4300 * It will place it into the iter->temp buffer. As most
4301 * events are less than 128, allocate a buffer of that size.
4302 * If one is greater, then trace_find_next_entry() will
4303 * allocate a new buffer to adjust for the bigger iter->ent.
4304 * It's not critical if it fails to get allocated here.
4306 iter->temp = kmalloc(128, GFP_KERNEL);
4308 iter->temp_size = 128;
4311 * We make a copy of the current tracer to avoid concurrent
4312 * changes on it while we are reading.
4314 mutex_lock(&trace_types_lock);
4315 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4319 *iter->trace = *tr->current_trace;
4321 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4326 #ifdef CONFIG_TRACER_MAX_TRACE
4327 /* Currently only the top directory has a snapshot */
4328 if (tr->current_trace->print_max || snapshot)
4329 iter->array_buffer = &tr->max_buffer;
4332 iter->array_buffer = &tr->array_buffer;
4333 iter->snapshot = snapshot;
4335 iter->cpu_file = tracing_get_cpu(inode);
4336 mutex_init(&iter->mutex);
4338 /* Notify the tracer early; before we stop tracing. */
4339 if (iter->trace->open)
4340 iter->trace->open(iter);
4342 /* Annotate start of buffers if we had overruns */
4343 if (ring_buffer_overruns(iter->array_buffer->buffer))
4344 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4346 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4347 if (trace_clocks[tr->clock_id].in_ns)
4348 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4351 * If pause-on-trace is enabled, then stop the trace while
4352 * dumping, unless this is the "snapshot" file
4354 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4355 tracing_stop_tr(tr);
4357 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4358 for_each_tracing_cpu(cpu) {
4359 iter->buffer_iter[cpu] =
4360 ring_buffer_read_prepare(iter->array_buffer->buffer,
4363 ring_buffer_read_prepare_sync();
4364 for_each_tracing_cpu(cpu) {
4365 ring_buffer_read_start(iter->buffer_iter[cpu]);
4366 tracing_iter_reset(iter, cpu);
4369 cpu = iter->cpu_file;
4370 iter->buffer_iter[cpu] =
4371 ring_buffer_read_prepare(iter->array_buffer->buffer,
4373 ring_buffer_read_prepare_sync();
4374 ring_buffer_read_start(iter->buffer_iter[cpu]);
4375 tracing_iter_reset(iter, cpu);
4378 mutex_unlock(&trace_types_lock);
4383 mutex_unlock(&trace_types_lock);
4386 kfree(iter->buffer_iter);
4388 seq_release_private(inode, file);
4389 return ERR_PTR(-ENOMEM);
4392 int tracing_open_generic(struct inode *inode, struct file *filp)
4396 ret = tracing_check_open_get_tr(NULL);
4400 filp->private_data = inode->i_private;
4404 bool tracing_is_disabled(void)
4406 return (tracing_disabled) ? true: false;
4410 * Open and update trace_array ref count.
4411 * Must have the current trace_array passed to it.
4413 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4415 struct trace_array *tr = inode->i_private;
4418 ret = tracing_check_open_get_tr(tr);
4422 filp->private_data = inode->i_private;
4427 static int tracing_release(struct inode *inode, struct file *file)
4429 struct trace_array *tr = inode->i_private;
4430 struct seq_file *m = file->private_data;
4431 struct trace_iterator *iter;
4434 if (!(file->f_mode & FMODE_READ)) {
4435 trace_array_put(tr);
4439 /* Writes do not use seq_file */
4441 mutex_lock(&trace_types_lock);
4443 for_each_tracing_cpu(cpu) {
4444 if (iter->buffer_iter[cpu])
4445 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4448 if (iter->trace && iter->trace->close)
4449 iter->trace->close(iter);
4451 if (!iter->snapshot && tr->stop_count)
4452 /* reenable tracing if it was previously enabled */
4453 tracing_start_tr(tr);
4455 __trace_array_put(tr);
4457 mutex_unlock(&trace_types_lock);
4459 mutex_destroy(&iter->mutex);
4460 free_cpumask_var(iter->started);
4463 kfree(iter->buffer_iter);
4464 seq_release_private(inode, file);
4469 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4471 struct trace_array *tr = inode->i_private;
4473 trace_array_put(tr);
4477 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4479 struct trace_array *tr = inode->i_private;
4481 trace_array_put(tr);
4483 return single_release(inode, file);
4486 static int tracing_open(struct inode *inode, struct file *file)
4488 struct trace_array *tr = inode->i_private;
4489 struct trace_iterator *iter;
4492 ret = tracing_check_open_get_tr(tr);
4496 /* If this file was open for write, then erase contents */
4497 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4498 int cpu = tracing_get_cpu(inode);
4499 struct array_buffer *trace_buf = &tr->array_buffer;
4501 #ifdef CONFIG_TRACER_MAX_TRACE
4502 if (tr->current_trace->print_max)
4503 trace_buf = &tr->max_buffer;
4506 if (cpu == RING_BUFFER_ALL_CPUS)
4507 tracing_reset_online_cpus(trace_buf);
4509 tracing_reset_cpu(trace_buf, cpu);
4512 if (file->f_mode & FMODE_READ) {
4513 iter = __tracing_open(inode, file, false);
4515 ret = PTR_ERR(iter);
4516 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4517 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4521 trace_array_put(tr);
4527 * Some tracers are not suitable for instance buffers.
4528 * A tracer is always available for the global array (toplevel)
4529 * or if it explicitly states that it is.
4532 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4534 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4537 /* Find the next tracer that this trace array may use */
4538 static struct tracer *
4539 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4541 while (t && !trace_ok_for_array(t, tr))
4548 t_next(struct seq_file *m, void *v, loff_t *pos)
4550 struct trace_array *tr = m->private;
4551 struct tracer *t = v;
4556 t = get_tracer_for_array(tr, t->next);
4561 static void *t_start(struct seq_file *m, loff_t *pos)
4563 struct trace_array *tr = m->private;
4567 mutex_lock(&trace_types_lock);
4569 t = get_tracer_for_array(tr, trace_types);
4570 for (; t && l < *pos; t = t_next(m, t, &l))
4576 static void t_stop(struct seq_file *m, void *p)
4578 mutex_unlock(&trace_types_lock);
4581 static int t_show(struct seq_file *m, void *v)
4583 struct tracer *t = v;
4588 seq_puts(m, t->name);
4597 static const struct seq_operations show_traces_seq_ops = {
4604 static int show_traces_open(struct inode *inode, struct file *file)
4606 struct trace_array *tr = inode->i_private;
4610 ret = tracing_check_open_get_tr(tr);
4614 ret = seq_open(file, &show_traces_seq_ops);
4616 trace_array_put(tr);
4620 m = file->private_data;
4626 static int show_traces_release(struct inode *inode, struct file *file)
4628 struct trace_array *tr = inode->i_private;
4630 trace_array_put(tr);
4631 return seq_release(inode, file);
4635 tracing_write_stub(struct file *filp, const char __user *ubuf,
4636 size_t count, loff_t *ppos)
4641 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4645 if (file->f_mode & FMODE_READ)
4646 ret = seq_lseek(file, offset, whence);
4648 file->f_pos = ret = 0;
4653 static const struct file_operations tracing_fops = {
4654 .open = tracing_open,
4656 .write = tracing_write_stub,
4657 .llseek = tracing_lseek,
4658 .release = tracing_release,
4661 static const struct file_operations show_traces_fops = {
4662 .open = show_traces_open,
4664 .llseek = seq_lseek,
4665 .release = show_traces_release,
4669 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4670 size_t count, loff_t *ppos)
4672 struct trace_array *tr = file_inode(filp)->i_private;
4676 len = snprintf(NULL, 0, "%*pb\n",
4677 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4678 mask_str = kmalloc(len, GFP_KERNEL);
4682 len = snprintf(mask_str, len, "%*pb\n",
4683 cpumask_pr_args(tr->tracing_cpumask));
4688 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4696 int tracing_set_cpumask(struct trace_array *tr,
4697 cpumask_var_t tracing_cpumask_new)
4704 local_irq_disable();
4705 arch_spin_lock(&tr->max_lock);
4706 for_each_tracing_cpu(cpu) {
4708 * Increase/decrease the disabled counter if we are
4709 * about to flip a bit in the cpumask:
4711 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4712 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4713 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4714 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4716 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4717 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4718 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4719 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4722 arch_spin_unlock(&tr->max_lock);
4725 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4731 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4732 size_t count, loff_t *ppos)
4734 struct trace_array *tr = file_inode(filp)->i_private;
4735 cpumask_var_t tracing_cpumask_new;
4738 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4741 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4745 err = tracing_set_cpumask(tr, tracing_cpumask_new);
4749 free_cpumask_var(tracing_cpumask_new);
4754 free_cpumask_var(tracing_cpumask_new);
4759 static const struct file_operations tracing_cpumask_fops = {
4760 .open = tracing_open_generic_tr,
4761 .read = tracing_cpumask_read,
4762 .write = tracing_cpumask_write,
4763 .release = tracing_release_generic_tr,
4764 .llseek = generic_file_llseek,
4767 static int tracing_trace_options_show(struct seq_file *m, void *v)
4769 struct tracer_opt *trace_opts;
4770 struct trace_array *tr = m->private;
4774 mutex_lock(&trace_types_lock);
4775 tracer_flags = tr->current_trace->flags->val;
4776 trace_opts = tr->current_trace->flags->opts;
4778 for (i = 0; trace_options[i]; i++) {
4779 if (tr->trace_flags & (1 << i))
4780 seq_printf(m, "%s\n", trace_options[i]);
4782 seq_printf(m, "no%s\n", trace_options[i]);
4785 for (i = 0; trace_opts[i].name; i++) {
4786 if (tracer_flags & trace_opts[i].bit)
4787 seq_printf(m, "%s\n", trace_opts[i].name);
4789 seq_printf(m, "no%s\n", trace_opts[i].name);
4791 mutex_unlock(&trace_types_lock);
4796 static int __set_tracer_option(struct trace_array *tr,
4797 struct tracer_flags *tracer_flags,
4798 struct tracer_opt *opts, int neg)
4800 struct tracer *trace = tracer_flags->trace;
4803 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4808 tracer_flags->val &= ~opts->bit;
4810 tracer_flags->val |= opts->bit;
4814 /* Try to assign a tracer specific option */
4815 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4817 struct tracer *trace = tr->current_trace;
4818 struct tracer_flags *tracer_flags = trace->flags;
4819 struct tracer_opt *opts = NULL;
4822 for (i = 0; tracer_flags->opts[i].name; i++) {
4823 opts = &tracer_flags->opts[i];
4825 if (strcmp(cmp, opts->name) == 0)
4826 return __set_tracer_option(tr, trace->flags, opts, neg);
4832 /* Some tracers require overwrite to stay enabled */
4833 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4835 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4841 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4843 if ((mask == TRACE_ITER_RECORD_TGID) ||
4844 (mask == TRACE_ITER_RECORD_CMD))
4845 lockdep_assert_held(&event_mutex);
4847 /* do nothing if flag is already set */
4848 if (!!(tr->trace_flags & mask) == !!enabled)
4851 /* Give the tracer a chance to approve the change */
4852 if (tr->current_trace->flag_changed)
4853 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4857 tr->trace_flags |= mask;
4859 tr->trace_flags &= ~mask;
4861 if (mask == TRACE_ITER_RECORD_CMD)
4862 trace_event_enable_cmd_record(enabled);
4864 if (mask == TRACE_ITER_RECORD_TGID) {
4866 tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4870 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4874 trace_event_enable_tgid_record(enabled);
4877 if (mask == TRACE_ITER_EVENT_FORK)
4878 trace_event_follow_fork(tr, enabled);
4880 if (mask == TRACE_ITER_FUNC_FORK)
4881 ftrace_pid_follow_fork(tr, enabled);
4883 if (mask == TRACE_ITER_OVERWRITE) {
4884 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4885 #ifdef CONFIG_TRACER_MAX_TRACE
4886 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4890 if (mask == TRACE_ITER_PRINTK) {
4891 trace_printk_start_stop_comm(enabled);
4892 trace_printk_control(enabled);
4898 int trace_set_options(struct trace_array *tr, char *option)
4903 size_t orig_len = strlen(option);
4906 cmp = strstrip(option);
4908 len = str_has_prefix(cmp, "no");
4914 mutex_lock(&event_mutex);
4915 mutex_lock(&trace_types_lock);
4917 ret = match_string(trace_options, -1, cmp);
4918 /* If no option could be set, test the specific tracer options */
4920 ret = set_tracer_option(tr, cmp, neg);
4922 ret = set_tracer_flag(tr, 1 << ret, !neg);
4924 mutex_unlock(&trace_types_lock);
4925 mutex_unlock(&event_mutex);
4928 * If the first trailing whitespace is replaced with '\0' by strstrip,
4929 * turn it back into a space.
4931 if (orig_len > strlen(option))
4932 option[strlen(option)] = ' ';
4937 static void __init apply_trace_boot_options(void)
4939 char *buf = trace_boot_options_buf;
4943 option = strsep(&buf, ",");
4949 trace_set_options(&global_trace, option);
4951 /* Put back the comma to allow this to be called again */
4958 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4959 size_t cnt, loff_t *ppos)
4961 struct seq_file *m = filp->private_data;
4962 struct trace_array *tr = m->private;
4966 if (cnt >= sizeof(buf))
4969 if (copy_from_user(buf, ubuf, cnt))
4974 ret = trace_set_options(tr, buf);
4983 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4985 struct trace_array *tr = inode->i_private;
4988 ret = tracing_check_open_get_tr(tr);
4992 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4994 trace_array_put(tr);
4999 static const struct file_operations tracing_iter_fops = {
5000 .open = tracing_trace_options_open,
5002 .llseek = seq_lseek,
5003 .release = tracing_single_release_tr,
5004 .write = tracing_trace_options_write,
5007 static const char readme_msg[] =
5008 "tracing mini-HOWTO:\n\n"
5009 "# echo 0 > tracing_on : quick way to disable tracing\n"
5010 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5011 " Important files:\n"
5012 " trace\t\t\t- The static contents of the buffer\n"
5013 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5014 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5015 " current_tracer\t- function and latency tracers\n"
5016 " available_tracers\t- list of configured tracers for current_tracer\n"
5017 " error_log\t- error log for failed commands (that support it)\n"
5018 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5019 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5020 " trace_clock\t\t-change the clock used to order events\n"
5021 " local: Per cpu clock but may not be synced across CPUs\n"
5022 " global: Synced across CPUs but slows tracing down.\n"
5023 " counter: Not a clock, but just an increment\n"
5024 " uptime: Jiffy counter from time of boot\n"
5025 " perf: Same clock that perf events use\n"
5026 #ifdef CONFIG_X86_64
5027 " x86-tsc: TSC cycle counter\n"
5029 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5030 " delta: Delta difference against a buffer-wide timestamp\n"
5031 " absolute: Absolute (standalone) timestamp\n"
5032 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5033 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5034 " tracing_cpumask\t- Limit which CPUs to trace\n"
5035 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5036 "\t\t\t Remove sub-buffer with rmdir\n"
5037 " trace_options\t\t- Set format or modify how tracing happens\n"
5038 "\t\t\t Disable an option by prefixing 'no' to the\n"
5039 "\t\t\t option name\n"
5040 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5041 #ifdef CONFIG_DYNAMIC_FTRACE
5042 "\n available_filter_functions - list of functions that can be filtered on\n"
5043 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5044 "\t\t\t functions\n"
5045 "\t accepts: func_full_name or glob-matching-pattern\n"
5046 "\t modules: Can select a group via module\n"
5047 "\t Format: :mod:<module-name>\n"
5048 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5049 "\t triggers: a command to perform when function is hit\n"
5050 "\t Format: <function>:<trigger>[:count]\n"
5051 "\t trigger: traceon, traceoff\n"
5052 "\t\t enable_event:<system>:<event>\n"
5053 "\t\t disable_event:<system>:<event>\n"
5054 #ifdef CONFIG_STACKTRACE
5057 #ifdef CONFIG_TRACER_SNAPSHOT
5062 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5063 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5064 "\t The first one will disable tracing every time do_fault is hit\n"
5065 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5066 "\t The first time do trap is hit and it disables tracing, the\n"
5067 "\t counter will decrement to 2. If tracing is already disabled,\n"
5068 "\t the counter will not decrement. It only decrements when the\n"
5069 "\t trigger did work\n"
5070 "\t To remove trigger without count:\n"
5071 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5072 "\t To remove trigger with a count:\n"
5073 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5074 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5075 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5076 "\t modules: Can select a group via module command :mod:\n"
5077 "\t Does not accept triggers\n"
5078 #endif /* CONFIG_DYNAMIC_FTRACE */
5079 #ifdef CONFIG_FUNCTION_TRACER
5080 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5082 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5085 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5086 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5087 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5088 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5090 #ifdef CONFIG_TRACER_SNAPSHOT
5091 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5092 "\t\t\t snapshot buffer. Read the contents for more\n"
5093 "\t\t\t information\n"
5095 #ifdef CONFIG_STACK_TRACER
5096 " stack_trace\t\t- Shows the max stack trace when active\n"
5097 " stack_max_size\t- Shows current max stack size that was traced\n"
5098 "\t\t\t Write into this file to reset the max size (trigger a\n"
5099 "\t\t\t new trace)\n"
5100 #ifdef CONFIG_DYNAMIC_FTRACE
5101 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5104 #endif /* CONFIG_STACK_TRACER */
5105 #ifdef CONFIG_DYNAMIC_EVENTS
5106 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5107 "\t\t\t Write into this file to define/undefine new trace events.\n"
5109 #ifdef CONFIG_KPROBE_EVENTS
5110 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5111 "\t\t\t Write into this file to define/undefine new trace events.\n"
5113 #ifdef CONFIG_UPROBE_EVENTS
5114 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5115 "\t\t\t Write into this file to define/undefine new trace events.\n"
5117 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5118 "\t accepts: event-definitions (one definition per line)\n"
5119 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5120 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5121 #ifdef CONFIG_HIST_TRIGGERS
5122 "\t s:[synthetic/]<event> <field> [<field>]\n"
5124 "\t -:[<group>/]<event>\n"
5125 #ifdef CONFIG_KPROBE_EVENTS
5126 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5127 "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5129 #ifdef CONFIG_UPROBE_EVENTS
5130 " place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5132 "\t args: <name>=fetcharg[:type]\n"
5133 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5134 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5135 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5137 "\t $stack<index>, $stack, $retval, $comm,\n"
5139 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5140 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5141 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5142 "\t <type>\\[<array-size>\\]\n"
5143 #ifdef CONFIG_HIST_TRIGGERS
5144 "\t field: <stype> <name>;\n"
5145 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5146 "\t [unsigned] char/int/long\n"
5149 " events/\t\t- Directory containing all trace event subsystems:\n"
5150 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5151 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5152 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5154 " filter\t\t- If set, only events passing filter are traced\n"
5155 " events/<system>/<event>/\t- Directory containing control files for\n"
5157 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5158 " filter\t\t- If set, only events passing filter are traced\n"
5159 " trigger\t\t- If set, a command to perform when event is hit\n"
5160 "\t Format: <trigger>[:count][if <filter>]\n"
5161 "\t trigger: traceon, traceoff\n"
5162 "\t enable_event:<system>:<event>\n"
5163 "\t disable_event:<system>:<event>\n"
5164 #ifdef CONFIG_HIST_TRIGGERS
5165 "\t enable_hist:<system>:<event>\n"
5166 "\t disable_hist:<system>:<event>\n"
5168 #ifdef CONFIG_STACKTRACE
5171 #ifdef CONFIG_TRACER_SNAPSHOT
5174 #ifdef CONFIG_HIST_TRIGGERS
5175 "\t\t hist (see below)\n"
5177 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5178 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5179 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5180 "\t events/block/block_unplug/trigger\n"
5181 "\t The first disables tracing every time block_unplug is hit.\n"
5182 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5183 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5184 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5185 "\t Like function triggers, the counter is only decremented if it\n"
5186 "\t enabled or disabled tracing.\n"
5187 "\t To remove a trigger without a count:\n"
5188 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5189 "\t To remove a trigger with a count:\n"
5190 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5191 "\t Filters can be ignored when removing a trigger.\n"
5192 #ifdef CONFIG_HIST_TRIGGERS
5193 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5194 "\t Format: hist:keys=<field1[,field2,...]>\n"
5195 "\t [:values=<field1[,field2,...]>]\n"
5196 "\t [:sort=<field1[,field2,...]>]\n"
5197 "\t [:size=#entries]\n"
5198 "\t [:pause][:continue][:clear]\n"
5199 "\t [:name=histname1]\n"
5200 "\t [:<handler>.<action>]\n"
5201 "\t [if <filter>]\n\n"
5202 "\t When a matching event is hit, an entry is added to a hash\n"
5203 "\t table using the key(s) and value(s) named, and the value of a\n"
5204 "\t sum called 'hitcount' is incremented. Keys and values\n"
5205 "\t correspond to fields in the event's format description. Keys\n"
5206 "\t can be any field, or the special string 'stacktrace'.\n"
5207 "\t Compound keys consisting of up to two fields can be specified\n"
5208 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5209 "\t fields. Sort keys consisting of up to two fields can be\n"
5210 "\t specified using the 'sort' keyword. The sort direction can\n"
5211 "\t be modified by appending '.descending' or '.ascending' to a\n"
5212 "\t sort field. The 'size' parameter can be used to specify more\n"
5213 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5214 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5215 "\t its histogram data will be shared with other triggers of the\n"
5216 "\t same name, and trigger hits will update this common data.\n\n"
5217 "\t Reading the 'hist' file for the event will dump the hash\n"
5218 "\t table in its entirety to stdout. If there are multiple hist\n"
5219 "\t triggers attached to an event, there will be a table for each\n"
5220 "\t trigger in the output. The table displayed for a named\n"
5221 "\t trigger will be the same as any other instance having the\n"
5222 "\t same name. The default format used to display a given field\n"
5223 "\t can be modified by appending any of the following modifiers\n"
5224 "\t to the field name, as applicable:\n\n"
5225 "\t .hex display a number as a hex value\n"
5226 "\t .sym display an address as a symbol\n"
5227 "\t .sym-offset display an address as a symbol and offset\n"
5228 "\t .execname display a common_pid as a program name\n"
5229 "\t .syscall display a syscall id as a syscall name\n"
5230 "\t .log2 display log2 value rather than raw number\n"
5231 "\t .usecs display a common_timestamp in microseconds\n\n"
5232 "\t The 'pause' parameter can be used to pause an existing hist\n"
5233 "\t trigger or to start a hist trigger but not log any events\n"
5234 "\t until told to do so. 'continue' can be used to start or\n"
5235 "\t restart a paused hist trigger.\n\n"
5236 "\t The 'clear' parameter will clear the contents of a running\n"
5237 "\t hist trigger and leave its current paused/active state\n"
5239 "\t The enable_hist and disable_hist triggers can be used to\n"
5240 "\t have one event conditionally start and stop another event's\n"
5241 "\t already-attached hist trigger. The syntax is analogous to\n"
5242 "\t the enable_event and disable_event triggers.\n\n"
5243 "\t Hist trigger handlers and actions are executed whenever a\n"
5244 "\t a histogram entry is added or updated. They take the form:\n\n"
5245 "\t <handler>.<action>\n\n"
5246 "\t The available handlers are:\n\n"
5247 "\t onmatch(matching.event) - invoke on addition or update\n"
5248 "\t onmax(var) - invoke if var exceeds current max\n"
5249 "\t onchange(var) - invoke action if var changes\n\n"
5250 "\t The available actions are:\n\n"
5251 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5252 "\t save(field,...) - save current event fields\n"
5253 #ifdef CONFIG_TRACER_SNAPSHOT
5254 "\t snapshot() - snapshot the trace buffer\n"
5260 tracing_readme_read(struct file *filp, char __user *ubuf,
5261 size_t cnt, loff_t *ppos)
5263 return simple_read_from_buffer(ubuf, cnt, ppos,
5264 readme_msg, strlen(readme_msg));
5267 static const struct file_operations tracing_readme_fops = {
5268 .open = tracing_open_generic,
5269 .read = tracing_readme_read,
5270 .llseek = generic_file_llseek,
5273 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5277 if (*pos || m->count)
5282 for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5283 if (trace_find_tgid(*ptr))
5290 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5300 v = saved_tgids_next(m, v, &l);
5308 static void saved_tgids_stop(struct seq_file *m, void *v)
5312 static int saved_tgids_show(struct seq_file *m, void *v)
5314 int pid = (int *)v - tgid_map;
5316 seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5320 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5321 .start = saved_tgids_start,
5322 .stop = saved_tgids_stop,
5323 .next = saved_tgids_next,
5324 .show = saved_tgids_show,
5327 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5331 ret = tracing_check_open_get_tr(NULL);
5335 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5339 static const struct file_operations tracing_saved_tgids_fops = {
5340 .open = tracing_saved_tgids_open,
5342 .llseek = seq_lseek,
5343 .release = seq_release,
5346 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5348 unsigned int *ptr = v;
5350 if (*pos || m->count)
5355 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5357 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5366 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5372 arch_spin_lock(&trace_cmdline_lock);
5374 v = &savedcmd->map_cmdline_to_pid[0];
5376 v = saved_cmdlines_next(m, v, &l);
5384 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5386 arch_spin_unlock(&trace_cmdline_lock);
5390 static int saved_cmdlines_show(struct seq_file *m, void *v)
5392 char buf[TASK_COMM_LEN];
5393 unsigned int *pid = v;
5395 __trace_find_cmdline(*pid, buf);
5396 seq_printf(m, "%d %s\n", *pid, buf);
5400 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5401 .start = saved_cmdlines_start,
5402 .next = saved_cmdlines_next,
5403 .stop = saved_cmdlines_stop,
5404 .show = saved_cmdlines_show,
5407 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5411 ret = tracing_check_open_get_tr(NULL);
5415 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5418 static const struct file_operations tracing_saved_cmdlines_fops = {
5419 .open = tracing_saved_cmdlines_open,
5421 .llseek = seq_lseek,
5422 .release = seq_release,
5426 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5427 size_t cnt, loff_t *ppos)
5432 arch_spin_lock(&trace_cmdline_lock);
5433 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5434 arch_spin_unlock(&trace_cmdline_lock);
5436 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5439 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5441 kfree(s->saved_cmdlines);
5442 kfree(s->map_cmdline_to_pid);
5446 static int tracing_resize_saved_cmdlines(unsigned int val)
5448 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5450 s = kmalloc(sizeof(*s), GFP_KERNEL);
5454 if (allocate_cmdlines_buffer(val, s) < 0) {
5459 arch_spin_lock(&trace_cmdline_lock);
5460 savedcmd_temp = savedcmd;
5462 arch_spin_unlock(&trace_cmdline_lock);
5463 free_saved_cmdlines_buffer(savedcmd_temp);
5469 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5470 size_t cnt, loff_t *ppos)
5475 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5479 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5480 if (!val || val > PID_MAX_DEFAULT)
5483 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5492 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5493 .open = tracing_open_generic,
5494 .read = tracing_saved_cmdlines_size_read,
5495 .write = tracing_saved_cmdlines_size_write,
5498 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5499 static union trace_eval_map_item *
5500 update_eval_map(union trace_eval_map_item *ptr)
5502 if (!ptr->map.eval_string) {
5503 if (ptr->tail.next) {
5504 ptr = ptr->tail.next;
5505 /* Set ptr to the next real item (skip head) */
5513 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5515 union trace_eval_map_item *ptr = v;
5518 * Paranoid! If ptr points to end, we don't want to increment past it.
5519 * This really should never happen.
5522 ptr = update_eval_map(ptr);
5523 if (WARN_ON_ONCE(!ptr))
5527 ptr = update_eval_map(ptr);
5532 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5534 union trace_eval_map_item *v;
5537 mutex_lock(&trace_eval_mutex);
5539 v = trace_eval_maps;
5543 while (v && l < *pos) {
5544 v = eval_map_next(m, v, &l);
5550 static void eval_map_stop(struct seq_file *m, void *v)
5552 mutex_unlock(&trace_eval_mutex);
5555 static int eval_map_show(struct seq_file *m, void *v)
5557 union trace_eval_map_item *ptr = v;
5559 seq_printf(m, "%s %ld (%s)\n",
5560 ptr->map.eval_string, ptr->map.eval_value,
5566 static const struct seq_operations tracing_eval_map_seq_ops = {
5567 .start = eval_map_start,
5568 .next = eval_map_next,
5569 .stop = eval_map_stop,
5570 .show = eval_map_show,
5573 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5577 ret = tracing_check_open_get_tr(NULL);
5581 return seq_open(filp, &tracing_eval_map_seq_ops);
5584 static const struct file_operations tracing_eval_map_fops = {
5585 .open = tracing_eval_map_open,
5587 .llseek = seq_lseek,
5588 .release = seq_release,
5591 static inline union trace_eval_map_item *
5592 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5594 /* Return tail of array given the head */
5595 return ptr + ptr->head.length + 1;
5599 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5602 struct trace_eval_map **stop;
5603 struct trace_eval_map **map;
5604 union trace_eval_map_item *map_array;
5605 union trace_eval_map_item *ptr;
5610 * The trace_eval_maps contains the map plus a head and tail item,
5611 * where the head holds the module and length of array, and the
5612 * tail holds a pointer to the next list.
5614 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5616 pr_warn("Unable to allocate trace eval mapping\n");
5620 mutex_lock(&trace_eval_mutex);
5622 if (!trace_eval_maps)
5623 trace_eval_maps = map_array;
5625 ptr = trace_eval_maps;
5627 ptr = trace_eval_jmp_to_tail(ptr);
5628 if (!ptr->tail.next)
5630 ptr = ptr->tail.next;
5633 ptr->tail.next = map_array;
5635 map_array->head.mod = mod;
5636 map_array->head.length = len;
5639 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5640 map_array->map = **map;
5643 memset(map_array, 0, sizeof(*map_array));
5645 mutex_unlock(&trace_eval_mutex);
5648 static void trace_create_eval_file(struct dentry *d_tracer)
5650 trace_create_file("eval_map", 0444, d_tracer,
5651 NULL, &tracing_eval_map_fops);
5654 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5655 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5656 static inline void trace_insert_eval_map_file(struct module *mod,
5657 struct trace_eval_map **start, int len) { }
5658 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5660 static void trace_insert_eval_map(struct module *mod,
5661 struct trace_eval_map **start, int len)
5663 struct trace_eval_map **map;
5670 trace_event_eval_update(map, len);
5672 trace_insert_eval_map_file(mod, start, len);
5676 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5677 size_t cnt, loff_t *ppos)
5679 struct trace_array *tr = filp->private_data;
5680 char buf[MAX_TRACER_SIZE+2];
5683 mutex_lock(&trace_types_lock);
5684 r = sprintf(buf, "%s\n", tr->current_trace->name);
5685 mutex_unlock(&trace_types_lock);
5687 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5690 int tracer_init(struct tracer *t, struct trace_array *tr)
5692 tracing_reset_online_cpus(&tr->array_buffer);
5696 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5700 for_each_tracing_cpu(cpu)
5701 per_cpu_ptr(buf->data, cpu)->entries = val;
5704 #ifdef CONFIG_TRACER_MAX_TRACE
5705 /* resize @tr's buffer to the size of @size_tr's entries */
5706 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5707 struct array_buffer *size_buf, int cpu_id)
5711 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5712 for_each_tracing_cpu(cpu) {
5713 ret = ring_buffer_resize(trace_buf->buffer,
5714 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5717 per_cpu_ptr(trace_buf->data, cpu)->entries =
5718 per_cpu_ptr(size_buf->data, cpu)->entries;
5721 ret = ring_buffer_resize(trace_buf->buffer,
5722 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5724 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5725 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5730 #endif /* CONFIG_TRACER_MAX_TRACE */
5732 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5733 unsigned long size, int cpu)
5738 * If kernel or user changes the size of the ring buffer
5739 * we use the size that was given, and we can forget about
5740 * expanding it later.
5742 ring_buffer_expanded = true;
5744 /* May be called before buffers are initialized */
5745 if (!tr->array_buffer.buffer)
5748 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5752 #ifdef CONFIG_TRACER_MAX_TRACE
5753 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5754 !tr->current_trace->use_max_tr)
5757 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5759 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5760 &tr->array_buffer, cpu);
5763 * AARGH! We are left with different
5764 * size max buffer!!!!
5765 * The max buffer is our "snapshot" buffer.
5766 * When a tracer needs a snapshot (one of the
5767 * latency tracers), it swaps the max buffer
5768 * with the saved snap shot. We succeeded to
5769 * update the size of the main buffer, but failed to
5770 * update the size of the max buffer. But when we tried
5771 * to reset the main buffer to the original size, we
5772 * failed there too. This is very unlikely to
5773 * happen, but if it does, warn and kill all
5777 tracing_disabled = 1;
5782 if (cpu == RING_BUFFER_ALL_CPUS)
5783 set_buffer_entries(&tr->max_buffer, size);
5785 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5788 #endif /* CONFIG_TRACER_MAX_TRACE */
5790 if (cpu == RING_BUFFER_ALL_CPUS)
5791 set_buffer_entries(&tr->array_buffer, size);
5793 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5798 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5799 unsigned long size, int cpu_id)
5803 mutex_lock(&trace_types_lock);
5805 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5806 /* make sure, this cpu is enabled in the mask */
5807 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5813 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5818 mutex_unlock(&trace_types_lock);
5825 * tracing_update_buffers - used by tracing facility to expand ring buffers
5827 * To save on memory when the tracing is never used on a system with it
5828 * configured in. The ring buffers are set to a minimum size. But once
5829 * a user starts to use the tracing facility, then they need to grow
5830 * to their default size.
5832 * This function is to be called when a tracer is about to be used.
5834 int tracing_update_buffers(void)
5838 mutex_lock(&trace_types_lock);
5839 if (!ring_buffer_expanded)
5840 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5841 RING_BUFFER_ALL_CPUS);
5842 mutex_unlock(&trace_types_lock);
5847 struct trace_option_dentry;
5850 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5853 * Used to clear out the tracer before deletion of an instance.
5854 * Must have trace_types_lock held.
5856 static void tracing_set_nop(struct trace_array *tr)
5858 if (tr->current_trace == &nop_trace)
5861 tr->current_trace->enabled--;
5863 if (tr->current_trace->reset)
5864 tr->current_trace->reset(tr);
5866 tr->current_trace = &nop_trace;
5869 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5871 /* Only enable if the directory has been created already. */
5875 create_trace_option_files(tr, t);
5878 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5881 #ifdef CONFIG_TRACER_MAX_TRACE
5886 mutex_lock(&trace_types_lock);
5888 if (!ring_buffer_expanded) {
5889 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5890 RING_BUFFER_ALL_CPUS);
5896 for (t = trace_types; t; t = t->next) {
5897 if (strcmp(t->name, buf) == 0)
5904 if (t == tr->current_trace)
5907 #ifdef CONFIG_TRACER_SNAPSHOT
5908 if (t->use_max_tr) {
5909 arch_spin_lock(&tr->max_lock);
5910 if (tr->cond_snapshot)
5912 arch_spin_unlock(&tr->max_lock);
5917 /* Some tracers won't work on kernel command line */
5918 if (system_state < SYSTEM_RUNNING && t->noboot) {
5919 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5924 /* Some tracers are only allowed for the top level buffer */
5925 if (!trace_ok_for_array(t, tr)) {
5930 /* If trace pipe files are being read, we can't change the tracer */
5931 if (tr->trace_ref) {
5936 trace_branch_disable();
5938 tr->current_trace->enabled--;
5940 if (tr->current_trace->reset)
5941 tr->current_trace->reset(tr);
5943 /* Current trace needs to be nop_trace before synchronize_rcu */
5944 tr->current_trace = &nop_trace;
5946 #ifdef CONFIG_TRACER_MAX_TRACE
5947 had_max_tr = tr->allocated_snapshot;
5949 if (had_max_tr && !t->use_max_tr) {
5951 * We need to make sure that the update_max_tr sees that
5952 * current_trace changed to nop_trace to keep it from
5953 * swapping the buffers after we resize it.
5954 * The update_max_tr is called from interrupts disabled
5955 * so a synchronized_sched() is sufficient.
5962 #ifdef CONFIG_TRACER_MAX_TRACE
5963 if (t->use_max_tr && !had_max_tr) {
5964 ret = tracing_alloc_snapshot_instance(tr);
5971 ret = tracer_init(t, tr);
5976 tr->current_trace = t;
5977 tr->current_trace->enabled++;
5978 trace_branch_enable(tr);
5980 mutex_unlock(&trace_types_lock);
5986 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5987 size_t cnt, loff_t *ppos)
5989 struct trace_array *tr = filp->private_data;
5990 char buf[MAX_TRACER_SIZE+1];
5997 if (cnt > MAX_TRACER_SIZE)
5998 cnt = MAX_TRACER_SIZE;
6000 if (copy_from_user(buf, ubuf, cnt))
6005 /* strip ending whitespace. */
6006 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6009 err = tracing_set_tracer(tr, buf);
6019 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6020 size_t cnt, loff_t *ppos)
6025 r = snprintf(buf, sizeof(buf), "%ld\n",
6026 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6027 if (r > sizeof(buf))
6029 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6033 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6034 size_t cnt, loff_t *ppos)
6039 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6049 tracing_thresh_read(struct file *filp, char __user *ubuf,
6050 size_t cnt, loff_t *ppos)
6052 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6056 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6057 size_t cnt, loff_t *ppos)
6059 struct trace_array *tr = filp->private_data;
6062 mutex_lock(&trace_types_lock);
6063 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6067 if (tr->current_trace->update_thresh) {
6068 ret = tr->current_trace->update_thresh(tr);
6075 mutex_unlock(&trace_types_lock);
6080 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6083 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6084 size_t cnt, loff_t *ppos)
6086 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6090 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6091 size_t cnt, loff_t *ppos)
6093 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6098 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6100 struct trace_array *tr = inode->i_private;
6101 struct trace_iterator *iter;
6104 ret = tracing_check_open_get_tr(tr);
6108 mutex_lock(&trace_types_lock);
6110 /* create a buffer to store the information to pass to userspace */
6111 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6114 __trace_array_put(tr);
6118 trace_seq_init(&iter->seq);
6119 iter->trace = tr->current_trace;
6121 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6126 /* trace pipe does not show start of buffer */
6127 cpumask_setall(iter->started);
6129 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6130 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6132 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6133 if (trace_clocks[tr->clock_id].in_ns)
6134 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6137 iter->array_buffer = &tr->array_buffer;
6138 iter->cpu_file = tracing_get_cpu(inode);
6139 mutex_init(&iter->mutex);
6140 filp->private_data = iter;
6142 if (iter->trace->pipe_open)
6143 iter->trace->pipe_open(iter);
6145 nonseekable_open(inode, filp);
6149 mutex_unlock(&trace_types_lock);
6154 __trace_array_put(tr);
6155 mutex_unlock(&trace_types_lock);
6159 static int tracing_release_pipe(struct inode *inode, struct file *file)
6161 struct trace_iterator *iter = file->private_data;
6162 struct trace_array *tr = inode->i_private;
6164 mutex_lock(&trace_types_lock);
6168 if (iter->trace->pipe_close)
6169 iter->trace->pipe_close(iter);
6171 mutex_unlock(&trace_types_lock);
6173 free_cpumask_var(iter->started);
6174 mutex_destroy(&iter->mutex);
6177 trace_array_put(tr);
6183 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6185 struct trace_array *tr = iter->tr;
6187 /* Iterators are static, they should be filled or empty */
6188 if (trace_buffer_iter(iter, iter->cpu_file))
6189 return EPOLLIN | EPOLLRDNORM;
6191 if (tr->trace_flags & TRACE_ITER_BLOCK)
6193 * Always select as readable when in blocking mode
6195 return EPOLLIN | EPOLLRDNORM;
6197 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6202 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6204 struct trace_iterator *iter = filp->private_data;
6206 return trace_poll(iter, filp, poll_table);
6209 /* Must be called with iter->mutex held. */
6210 static int tracing_wait_pipe(struct file *filp)
6212 struct trace_iterator *iter = filp->private_data;
6215 while (trace_empty(iter)) {
6217 if ((filp->f_flags & O_NONBLOCK)) {
6222 * We block until we read something and tracing is disabled.
6223 * We still block if tracing is disabled, but we have never
6224 * read anything. This allows a user to cat this file, and
6225 * then enable tracing. But after we have read something,
6226 * we give an EOF when tracing is again disabled.
6228 * iter->pos will be 0 if we haven't read anything.
6230 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6233 mutex_unlock(&iter->mutex);
6235 ret = wait_on_pipe(iter, 0);
6237 mutex_lock(&iter->mutex);
6250 tracing_read_pipe(struct file *filp, char __user *ubuf,
6251 size_t cnt, loff_t *ppos)
6253 struct trace_iterator *iter = filp->private_data;
6257 * Avoid more than one consumer on a single file descriptor
6258 * This is just a matter of traces coherency, the ring buffer itself
6261 mutex_lock(&iter->mutex);
6263 /* return any leftover data */
6264 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6268 trace_seq_init(&iter->seq);
6270 if (iter->trace->read) {
6271 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6277 sret = tracing_wait_pipe(filp);
6281 /* stop when tracing is finished */
6282 if (trace_empty(iter)) {
6287 if (cnt >= PAGE_SIZE)
6288 cnt = PAGE_SIZE - 1;
6290 /* reset all but tr, trace, and overruns */
6291 memset(&iter->seq, 0,
6292 sizeof(struct trace_iterator) -
6293 offsetof(struct trace_iterator, seq));
6294 cpumask_clear(iter->started);
6295 trace_seq_init(&iter->seq);
6298 trace_event_read_lock();
6299 trace_access_lock(iter->cpu_file);
6300 while (trace_find_next_entry_inc(iter) != NULL) {
6301 enum print_line_t ret;
6302 int save_len = iter->seq.seq.len;
6304 ret = print_trace_line(iter);
6305 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6306 /* don't print partial lines */
6307 iter->seq.seq.len = save_len;
6310 if (ret != TRACE_TYPE_NO_CONSUME)
6311 trace_consume(iter);
6313 if (trace_seq_used(&iter->seq) >= cnt)
6317 * Setting the full flag means we reached the trace_seq buffer
6318 * size and we should leave by partial output condition above.
6319 * One of the trace_seq_* functions is not used properly.
6321 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6324 trace_access_unlock(iter->cpu_file);
6325 trace_event_read_unlock();
6327 /* Now copy what we have to the user */
6328 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6329 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6330 trace_seq_init(&iter->seq);
6333 * If there was nothing to send to user, in spite of consuming trace
6334 * entries, go back to wait for more entries.
6340 mutex_unlock(&iter->mutex);
6345 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6348 __free_page(spd->pages[idx]);
6352 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6358 /* Seq buffer is page-sized, exactly what we need. */
6360 save_len = iter->seq.seq.len;
6361 ret = print_trace_line(iter);
6363 if (trace_seq_has_overflowed(&iter->seq)) {
6364 iter->seq.seq.len = save_len;
6369 * This should not be hit, because it should only
6370 * be set if the iter->seq overflowed. But check it
6371 * anyway to be safe.
6373 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6374 iter->seq.seq.len = save_len;
6378 count = trace_seq_used(&iter->seq) - save_len;
6381 iter->seq.seq.len = save_len;
6385 if (ret != TRACE_TYPE_NO_CONSUME)
6386 trace_consume(iter);
6388 if (!trace_find_next_entry_inc(iter)) {
6398 static ssize_t tracing_splice_read_pipe(struct file *filp,
6400 struct pipe_inode_info *pipe,
6404 struct page *pages_def[PIPE_DEF_BUFFERS];
6405 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6406 struct trace_iterator *iter = filp->private_data;
6407 struct splice_pipe_desc spd = {
6409 .partial = partial_def,
6410 .nr_pages = 0, /* This gets updated below. */
6411 .nr_pages_max = PIPE_DEF_BUFFERS,
6412 .ops = &default_pipe_buf_ops,
6413 .spd_release = tracing_spd_release_pipe,
6419 if (splice_grow_spd(pipe, &spd))
6422 mutex_lock(&iter->mutex);
6424 if (iter->trace->splice_read) {
6425 ret = iter->trace->splice_read(iter, filp,
6426 ppos, pipe, len, flags);
6431 ret = tracing_wait_pipe(filp);
6435 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6440 trace_event_read_lock();
6441 trace_access_lock(iter->cpu_file);
6443 /* Fill as many pages as possible. */
6444 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6445 spd.pages[i] = alloc_page(GFP_KERNEL);
6449 rem = tracing_fill_pipe_page(rem, iter);
6451 /* Copy the data into the page, so we can start over. */
6452 ret = trace_seq_to_buffer(&iter->seq,
6453 page_address(spd.pages[i]),
6454 trace_seq_used(&iter->seq));
6456 __free_page(spd.pages[i]);
6459 spd.partial[i].offset = 0;
6460 spd.partial[i].len = trace_seq_used(&iter->seq);
6462 trace_seq_init(&iter->seq);
6465 trace_access_unlock(iter->cpu_file);
6466 trace_event_read_unlock();
6467 mutex_unlock(&iter->mutex);
6472 ret = splice_to_pipe(pipe, &spd);
6476 splice_shrink_spd(&spd);
6480 mutex_unlock(&iter->mutex);
6485 tracing_entries_read(struct file *filp, char __user *ubuf,
6486 size_t cnt, loff_t *ppos)
6488 struct inode *inode = file_inode(filp);
6489 struct trace_array *tr = inode->i_private;
6490 int cpu = tracing_get_cpu(inode);
6495 mutex_lock(&trace_types_lock);
6497 if (cpu == RING_BUFFER_ALL_CPUS) {
6498 int cpu, buf_size_same;
6503 /* check if all cpu sizes are same */
6504 for_each_tracing_cpu(cpu) {
6505 /* fill in the size from first enabled cpu */
6507 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6508 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6514 if (buf_size_same) {
6515 if (!ring_buffer_expanded)
6516 r = sprintf(buf, "%lu (expanded: %lu)\n",
6518 trace_buf_size >> 10);
6520 r = sprintf(buf, "%lu\n", size >> 10);
6522 r = sprintf(buf, "X\n");
6524 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6526 mutex_unlock(&trace_types_lock);
6528 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6533 tracing_entries_write(struct file *filp, const char __user *ubuf,
6534 size_t cnt, loff_t *ppos)
6536 struct inode *inode = file_inode(filp);
6537 struct trace_array *tr = inode->i_private;
6541 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6545 /* must have at least 1 entry */
6549 /* value is in KB */
6551 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6561 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6562 size_t cnt, loff_t *ppos)
6564 struct trace_array *tr = filp->private_data;
6567 unsigned long size = 0, expanded_size = 0;
6569 mutex_lock(&trace_types_lock);
6570 for_each_tracing_cpu(cpu) {
6571 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6572 if (!ring_buffer_expanded)
6573 expanded_size += trace_buf_size >> 10;
6575 if (ring_buffer_expanded)
6576 r = sprintf(buf, "%lu\n", size);
6578 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6579 mutex_unlock(&trace_types_lock);
6581 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6585 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6586 size_t cnt, loff_t *ppos)
6589 * There is no need to read what the user has written, this function
6590 * is just to make sure that there is no error when "echo" is used
6599 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6601 struct trace_array *tr = inode->i_private;
6603 /* disable tracing ? */
6604 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6605 tracer_tracing_off(tr);
6606 /* resize the ring buffer to 0 */
6607 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6609 trace_array_put(tr);
6615 tracing_mark_write(struct file *filp, const char __user *ubuf,
6616 size_t cnt, loff_t *fpos)
6618 struct trace_array *tr = filp->private_data;
6619 struct ring_buffer_event *event;
6620 enum event_trigger_type tt = ETT_NONE;
6621 struct trace_buffer *buffer;
6622 struct print_entry *entry;
6623 unsigned long irq_flags;
6628 /* Used in tracing_mark_raw_write() as well */
6629 #define FAULTED_STR "<faulted>"
6630 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6632 if (tracing_disabled)
6635 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6638 if (cnt > TRACE_BUF_SIZE)
6639 cnt = TRACE_BUF_SIZE;
6641 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6643 local_save_flags(irq_flags);
6644 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6646 /* If less than "<faulted>", then make sure we can still add that */
6647 if (cnt < FAULTED_SIZE)
6648 size += FAULTED_SIZE - cnt;
6650 buffer = tr->array_buffer.buffer;
6651 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6652 irq_flags, preempt_count());
6653 if (unlikely(!event))
6654 /* Ring buffer disabled, return as if not open for write */
6657 entry = ring_buffer_event_data(event);
6658 entry->ip = _THIS_IP_;
6660 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6662 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6669 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6670 /* do not add \n before testing triggers, but add \0 */
6671 entry->buf[cnt] = '\0';
6672 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6675 if (entry->buf[cnt - 1] != '\n') {
6676 entry->buf[cnt] = '\n';
6677 entry->buf[cnt + 1] = '\0';
6679 entry->buf[cnt] = '\0';
6681 __buffer_unlock_commit(buffer, event);
6684 event_triggers_post_call(tr->trace_marker_file, tt);
6692 /* Limit it for now to 3K (including tag) */
6693 #define RAW_DATA_MAX_SIZE (1024*3)
6696 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6697 size_t cnt, loff_t *fpos)
6699 struct trace_array *tr = filp->private_data;
6700 struct ring_buffer_event *event;
6701 struct trace_buffer *buffer;
6702 struct raw_data_entry *entry;
6703 unsigned long irq_flags;
6708 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6710 if (tracing_disabled)
6713 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6716 /* The marker must at least have a tag id */
6717 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6720 if (cnt > TRACE_BUF_SIZE)
6721 cnt = TRACE_BUF_SIZE;
6723 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6725 local_save_flags(irq_flags);
6726 size = sizeof(*entry) + cnt;
6727 if (cnt < FAULT_SIZE_ID)
6728 size += FAULT_SIZE_ID - cnt;
6730 buffer = tr->array_buffer.buffer;
6731 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6732 irq_flags, preempt_count());
6734 /* Ring buffer disabled, return as if not open for write */
6737 entry = ring_buffer_event_data(event);
6739 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6742 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6747 __buffer_unlock_commit(buffer, event);
6755 static int tracing_clock_show(struct seq_file *m, void *v)
6757 struct trace_array *tr = m->private;
6760 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6762 "%s%s%s%s", i ? " " : "",
6763 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6764 i == tr->clock_id ? "]" : "");
6770 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6774 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6775 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6778 if (i == ARRAY_SIZE(trace_clocks))
6781 mutex_lock(&trace_types_lock);
6785 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6788 * New clock may not be consistent with the previous clock.
6789 * Reset the buffer so that it doesn't have incomparable timestamps.
6791 tracing_reset_online_cpus(&tr->array_buffer);
6793 #ifdef CONFIG_TRACER_MAX_TRACE
6794 if (tr->max_buffer.buffer)
6795 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6796 tracing_reset_online_cpus(&tr->max_buffer);
6799 mutex_unlock(&trace_types_lock);
6804 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6805 size_t cnt, loff_t *fpos)
6807 struct seq_file *m = filp->private_data;
6808 struct trace_array *tr = m->private;
6810 const char *clockstr;
6813 if (cnt >= sizeof(buf))
6816 if (copy_from_user(buf, ubuf, cnt))
6821 clockstr = strstrip(buf);
6823 ret = tracing_set_clock(tr, clockstr);
6832 static int tracing_clock_open(struct inode *inode, struct file *file)
6834 struct trace_array *tr = inode->i_private;
6837 ret = tracing_check_open_get_tr(tr);
6841 ret = single_open(file, tracing_clock_show, inode->i_private);
6843 trace_array_put(tr);
6848 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6850 struct trace_array *tr = m->private;
6852 mutex_lock(&trace_types_lock);
6854 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6855 seq_puts(m, "delta [absolute]\n");
6857 seq_puts(m, "[delta] absolute\n");
6859 mutex_unlock(&trace_types_lock);
6864 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6866 struct trace_array *tr = inode->i_private;
6869 ret = tracing_check_open_get_tr(tr);
6873 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6875 trace_array_put(tr);
6880 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6884 mutex_lock(&trace_types_lock);
6886 if (abs && tr->time_stamp_abs_ref++)
6890 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6895 if (--tr->time_stamp_abs_ref)
6899 ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6901 #ifdef CONFIG_TRACER_MAX_TRACE
6902 if (tr->max_buffer.buffer)
6903 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6906 mutex_unlock(&trace_types_lock);
6911 struct ftrace_buffer_info {
6912 struct trace_iterator iter;
6914 unsigned int spare_cpu;
6918 #ifdef CONFIG_TRACER_SNAPSHOT
6919 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6921 struct trace_array *tr = inode->i_private;
6922 struct trace_iterator *iter;
6926 ret = tracing_check_open_get_tr(tr);
6930 if (file->f_mode & FMODE_READ) {
6931 iter = __tracing_open(inode, file, true);
6933 ret = PTR_ERR(iter);
6935 /* Writes still need the seq_file to hold the private data */
6937 m = kzalloc(sizeof(*m), GFP_KERNEL);
6940 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6948 iter->array_buffer = &tr->max_buffer;
6949 iter->cpu_file = tracing_get_cpu(inode);
6951 file->private_data = m;
6955 trace_array_put(tr);
6961 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6964 struct seq_file *m = filp->private_data;
6965 struct trace_iterator *iter = m->private;
6966 struct trace_array *tr = iter->tr;
6970 ret = tracing_update_buffers();
6974 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6978 mutex_lock(&trace_types_lock);
6980 if (tr->current_trace->use_max_tr) {
6985 arch_spin_lock(&tr->max_lock);
6986 if (tr->cond_snapshot)
6988 arch_spin_unlock(&tr->max_lock);
6994 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6998 if (tr->allocated_snapshot)
7002 /* Only allow per-cpu swap if the ring buffer supports it */
7003 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7004 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7009 if (tr->allocated_snapshot)
7010 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7011 &tr->array_buffer, iter->cpu_file);
7013 ret = tracing_alloc_snapshot_instance(tr);
7016 local_irq_disable();
7017 /* Now, we're going to swap */
7018 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7019 update_max_tr(tr, current, smp_processor_id(), NULL);
7021 update_max_tr_single(tr, current, iter->cpu_file);
7025 if (tr->allocated_snapshot) {
7026 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7027 tracing_reset_online_cpus(&tr->max_buffer);
7029 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7039 mutex_unlock(&trace_types_lock);
7043 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7045 struct seq_file *m = file->private_data;
7048 ret = tracing_release(inode, file);
7050 if (file->f_mode & FMODE_READ)
7053 /* If write only, the seq_file is just a stub */
7061 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7062 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7063 size_t count, loff_t *ppos);
7064 static int tracing_buffers_release(struct inode *inode, struct file *file);
7065 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7066 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7068 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7070 struct ftrace_buffer_info *info;
7073 /* The following checks for tracefs lockdown */
7074 ret = tracing_buffers_open(inode, filp);
7078 info = filp->private_data;
7080 if (info->iter.trace->use_max_tr) {
7081 tracing_buffers_release(inode, filp);
7085 info->iter.snapshot = true;
7086 info->iter.array_buffer = &info->iter.tr->max_buffer;
7091 #endif /* CONFIG_TRACER_SNAPSHOT */
7094 static const struct file_operations tracing_thresh_fops = {
7095 .open = tracing_open_generic,
7096 .read = tracing_thresh_read,
7097 .write = tracing_thresh_write,
7098 .llseek = generic_file_llseek,
7101 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7102 static const struct file_operations tracing_max_lat_fops = {
7103 .open = tracing_open_generic,
7104 .read = tracing_max_lat_read,
7105 .write = tracing_max_lat_write,
7106 .llseek = generic_file_llseek,
7110 static const struct file_operations set_tracer_fops = {
7111 .open = tracing_open_generic,
7112 .read = tracing_set_trace_read,
7113 .write = tracing_set_trace_write,
7114 .llseek = generic_file_llseek,
7117 static const struct file_operations tracing_pipe_fops = {
7118 .open = tracing_open_pipe,
7119 .poll = tracing_poll_pipe,
7120 .read = tracing_read_pipe,
7121 .splice_read = tracing_splice_read_pipe,
7122 .release = tracing_release_pipe,
7123 .llseek = no_llseek,
7126 static const struct file_operations tracing_entries_fops = {
7127 .open = tracing_open_generic_tr,
7128 .read = tracing_entries_read,
7129 .write = tracing_entries_write,
7130 .llseek = generic_file_llseek,
7131 .release = tracing_release_generic_tr,
7134 static const struct file_operations tracing_total_entries_fops = {
7135 .open = tracing_open_generic_tr,
7136 .read = tracing_total_entries_read,
7137 .llseek = generic_file_llseek,
7138 .release = tracing_release_generic_tr,
7141 static const struct file_operations tracing_free_buffer_fops = {
7142 .open = tracing_open_generic_tr,
7143 .write = tracing_free_buffer_write,
7144 .release = tracing_free_buffer_release,
7147 static const struct file_operations tracing_mark_fops = {
7148 .open = tracing_open_generic_tr,
7149 .write = tracing_mark_write,
7150 .llseek = generic_file_llseek,
7151 .release = tracing_release_generic_tr,
7154 static const struct file_operations tracing_mark_raw_fops = {
7155 .open = tracing_open_generic_tr,
7156 .write = tracing_mark_raw_write,
7157 .llseek = generic_file_llseek,
7158 .release = tracing_release_generic_tr,
7161 static const struct file_operations trace_clock_fops = {
7162 .open = tracing_clock_open,
7164 .llseek = seq_lseek,
7165 .release = tracing_single_release_tr,
7166 .write = tracing_clock_write,
7169 static const struct file_operations trace_time_stamp_mode_fops = {
7170 .open = tracing_time_stamp_mode_open,
7172 .llseek = seq_lseek,
7173 .release = tracing_single_release_tr,
7176 #ifdef CONFIG_TRACER_SNAPSHOT
7177 static const struct file_operations snapshot_fops = {
7178 .open = tracing_snapshot_open,
7180 .write = tracing_snapshot_write,
7181 .llseek = tracing_lseek,
7182 .release = tracing_snapshot_release,
7185 static const struct file_operations snapshot_raw_fops = {
7186 .open = snapshot_raw_open,
7187 .read = tracing_buffers_read,
7188 .release = tracing_buffers_release,
7189 .splice_read = tracing_buffers_splice_read,
7190 .llseek = no_llseek,
7193 #endif /* CONFIG_TRACER_SNAPSHOT */
7195 #define TRACING_LOG_ERRS_MAX 8
7196 #define TRACING_LOG_LOC_MAX 128
7198 #define CMD_PREFIX " Command: "
7201 const char **errs; /* ptr to loc-specific array of err strings */
7202 u8 type; /* index into errs -> specific err string */
7203 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7207 struct tracing_log_err {
7208 struct list_head list;
7209 struct err_info info;
7210 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7211 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7214 static DEFINE_MUTEX(tracing_err_log_lock);
7216 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7218 struct tracing_log_err *err;
7220 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7221 err = kzalloc(sizeof(*err), GFP_KERNEL);
7223 err = ERR_PTR(-ENOMEM);
7224 tr->n_err_log_entries++;
7229 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7230 list_del(&err->list);
7236 * err_pos - find the position of a string within a command for error careting
7237 * @cmd: The tracing command that caused the error
7238 * @str: The string to position the caret at within @cmd
7240 * Finds the position of the first occurence of @str within @cmd. The
7241 * return value can be passed to tracing_log_err() for caret placement
7244 * Returns the index within @cmd of the first occurence of @str or 0
7245 * if @str was not found.
7247 unsigned int err_pos(char *cmd, const char *str)
7251 if (WARN_ON(!strlen(cmd)))
7254 found = strstr(cmd, str);
7262 * tracing_log_err - write an error to the tracing error log
7263 * @tr: The associated trace array for the error (NULL for top level array)
7264 * @loc: A string describing where the error occurred
7265 * @cmd: The tracing command that caused the error
7266 * @errs: The array of loc-specific static error strings
7267 * @type: The index into errs[], which produces the specific static err string
7268 * @pos: The position the caret should be placed in the cmd
7270 * Writes an error into tracing/error_log of the form:
7272 * <loc>: error: <text>
7276 * tracing/error_log is a small log file containing the last
7277 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7278 * unless there has been a tracing error, and the error log can be
7279 * cleared and have its memory freed by writing the empty string in
7280 * truncation mode to it i.e. echo > tracing/error_log.
7282 * NOTE: the @errs array along with the @type param are used to
7283 * produce a static error string - this string is not copied and saved
7284 * when the error is logged - only a pointer to it is saved. See
7285 * existing callers for examples of how static strings are typically
7286 * defined for use with tracing_log_err().
7288 void tracing_log_err(struct trace_array *tr,
7289 const char *loc, const char *cmd,
7290 const char **errs, u8 type, u8 pos)
7292 struct tracing_log_err *err;
7297 mutex_lock(&tracing_err_log_lock);
7298 err = get_tracing_log_err(tr);
7299 if (PTR_ERR(err) == -ENOMEM) {
7300 mutex_unlock(&tracing_err_log_lock);
7304 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7305 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7307 err->info.errs = errs;
7308 err->info.type = type;
7309 err->info.pos = pos;
7310 err->info.ts = local_clock();
7312 list_add_tail(&err->list, &tr->err_log);
7313 mutex_unlock(&tracing_err_log_lock);
7316 static void clear_tracing_err_log(struct trace_array *tr)
7318 struct tracing_log_err *err, *next;
7320 mutex_lock(&tracing_err_log_lock);
7321 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7322 list_del(&err->list);
7326 tr->n_err_log_entries = 0;
7327 mutex_unlock(&tracing_err_log_lock);
7330 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7332 struct trace_array *tr = m->private;
7334 mutex_lock(&tracing_err_log_lock);
7336 return seq_list_start(&tr->err_log, *pos);
7339 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7341 struct trace_array *tr = m->private;
7343 return seq_list_next(v, &tr->err_log, pos);
7346 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7348 mutex_unlock(&tracing_err_log_lock);
7351 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7355 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7357 for (i = 0; i < pos; i++)
7362 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7364 struct tracing_log_err *err = v;
7367 const char *err_text = err->info.errs[err->info.type];
7368 u64 sec = err->info.ts;
7371 nsec = do_div(sec, NSEC_PER_SEC);
7372 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7373 err->loc, err_text);
7374 seq_printf(m, "%s", err->cmd);
7375 tracing_err_log_show_pos(m, err->info.pos);
7381 static const struct seq_operations tracing_err_log_seq_ops = {
7382 .start = tracing_err_log_seq_start,
7383 .next = tracing_err_log_seq_next,
7384 .stop = tracing_err_log_seq_stop,
7385 .show = tracing_err_log_seq_show
7388 static int tracing_err_log_open(struct inode *inode, struct file *file)
7390 struct trace_array *tr = inode->i_private;
7393 ret = tracing_check_open_get_tr(tr);
7397 /* If this file was opened for write, then erase contents */
7398 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7399 clear_tracing_err_log(tr);
7401 if (file->f_mode & FMODE_READ) {
7402 ret = seq_open(file, &tracing_err_log_seq_ops);
7404 struct seq_file *m = file->private_data;
7407 trace_array_put(tr);
7413 static ssize_t tracing_err_log_write(struct file *file,
7414 const char __user *buffer,
7415 size_t count, loff_t *ppos)
7420 static int tracing_err_log_release(struct inode *inode, struct file *file)
7422 struct trace_array *tr = inode->i_private;
7424 trace_array_put(tr);
7426 if (file->f_mode & FMODE_READ)
7427 seq_release(inode, file);
7432 static const struct file_operations tracing_err_log_fops = {
7433 .open = tracing_err_log_open,
7434 .write = tracing_err_log_write,
7436 .llseek = seq_lseek,
7437 .release = tracing_err_log_release,
7440 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7442 struct trace_array *tr = inode->i_private;
7443 struct ftrace_buffer_info *info;
7446 ret = tracing_check_open_get_tr(tr);
7450 info = kvzalloc(sizeof(*info), GFP_KERNEL);
7452 trace_array_put(tr);
7456 mutex_lock(&trace_types_lock);
7459 info->iter.cpu_file = tracing_get_cpu(inode);
7460 info->iter.trace = tr->current_trace;
7461 info->iter.array_buffer = &tr->array_buffer;
7463 /* Force reading ring buffer for first read */
7464 info->read = (unsigned int)-1;
7466 filp->private_data = info;
7470 mutex_unlock(&trace_types_lock);
7472 ret = nonseekable_open(inode, filp);
7474 trace_array_put(tr);
7480 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7482 struct ftrace_buffer_info *info = filp->private_data;
7483 struct trace_iterator *iter = &info->iter;
7485 return trace_poll(iter, filp, poll_table);
7489 tracing_buffers_read(struct file *filp, char __user *ubuf,
7490 size_t count, loff_t *ppos)
7492 struct ftrace_buffer_info *info = filp->private_data;
7493 struct trace_iterator *iter = &info->iter;
7500 #ifdef CONFIG_TRACER_MAX_TRACE
7501 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7506 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7508 if (IS_ERR(info->spare)) {
7509 ret = PTR_ERR(info->spare);
7512 info->spare_cpu = iter->cpu_file;
7518 /* Do we have previous read data to read? */
7519 if (info->read < PAGE_SIZE)
7523 trace_access_lock(iter->cpu_file);
7524 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7528 trace_access_unlock(iter->cpu_file);
7531 if (trace_empty(iter)) {
7532 if ((filp->f_flags & O_NONBLOCK))
7535 ret = wait_on_pipe(iter, 0);
7546 size = PAGE_SIZE - info->read;
7550 ret = copy_to_user(ubuf, info->spare + info->read, size);
7562 static int tracing_buffers_release(struct inode *inode, struct file *file)
7564 struct ftrace_buffer_info *info = file->private_data;
7565 struct trace_iterator *iter = &info->iter;
7567 mutex_lock(&trace_types_lock);
7569 iter->tr->trace_ref--;
7571 __trace_array_put(iter->tr);
7574 ring_buffer_free_read_page(iter->array_buffer->buffer,
7575 info->spare_cpu, info->spare);
7578 mutex_unlock(&trace_types_lock);
7584 struct trace_buffer *buffer;
7587 refcount_t refcount;
7590 static void buffer_ref_release(struct buffer_ref *ref)
7592 if (!refcount_dec_and_test(&ref->refcount))
7594 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7598 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7599 struct pipe_buffer *buf)
7601 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7603 buffer_ref_release(ref);
7607 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7608 struct pipe_buffer *buf)
7610 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7612 if (refcount_read(&ref->refcount) > INT_MAX/2)
7615 refcount_inc(&ref->refcount);
7619 /* Pipe buffer operations for a buffer. */
7620 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7621 .release = buffer_pipe_buf_release,
7622 .get = buffer_pipe_buf_get,
7626 * Callback from splice_to_pipe(), if we need to release some pages
7627 * at the end of the spd in case we error'ed out in filling the pipe.
7629 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7631 struct buffer_ref *ref =
7632 (struct buffer_ref *)spd->partial[i].private;
7634 buffer_ref_release(ref);
7635 spd->partial[i].private = 0;
7639 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7640 struct pipe_inode_info *pipe, size_t len,
7643 struct ftrace_buffer_info *info = file->private_data;
7644 struct trace_iterator *iter = &info->iter;
7645 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7646 struct page *pages_def[PIPE_DEF_BUFFERS];
7647 struct splice_pipe_desc spd = {
7649 .partial = partial_def,
7650 .nr_pages_max = PIPE_DEF_BUFFERS,
7651 .ops = &buffer_pipe_buf_ops,
7652 .spd_release = buffer_spd_release,
7654 struct buffer_ref *ref;
7658 #ifdef CONFIG_TRACER_MAX_TRACE
7659 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7663 if (*ppos & (PAGE_SIZE - 1))
7666 if (len & (PAGE_SIZE - 1)) {
7667 if (len < PAGE_SIZE)
7672 if (splice_grow_spd(pipe, &spd))
7676 trace_access_lock(iter->cpu_file);
7677 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7679 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7683 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7689 refcount_set(&ref->refcount, 1);
7690 ref->buffer = iter->array_buffer->buffer;
7691 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7692 if (IS_ERR(ref->page)) {
7693 ret = PTR_ERR(ref->page);
7698 ref->cpu = iter->cpu_file;
7700 r = ring_buffer_read_page(ref->buffer, &ref->page,
7701 len, iter->cpu_file, 1);
7703 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7709 page = virt_to_page(ref->page);
7711 spd.pages[i] = page;
7712 spd.partial[i].len = PAGE_SIZE;
7713 spd.partial[i].offset = 0;
7714 spd.partial[i].private = (unsigned long)ref;
7718 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7721 trace_access_unlock(iter->cpu_file);
7724 /* did we read anything? */
7725 if (!spd.nr_pages) {
7730 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7733 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7740 ret = splice_to_pipe(pipe, &spd);
7742 splice_shrink_spd(&spd);
7747 static const struct file_operations tracing_buffers_fops = {
7748 .open = tracing_buffers_open,
7749 .read = tracing_buffers_read,
7750 .poll = tracing_buffers_poll,
7751 .release = tracing_buffers_release,
7752 .splice_read = tracing_buffers_splice_read,
7753 .llseek = no_llseek,
7757 tracing_stats_read(struct file *filp, char __user *ubuf,
7758 size_t count, loff_t *ppos)
7760 struct inode *inode = file_inode(filp);
7761 struct trace_array *tr = inode->i_private;
7762 struct array_buffer *trace_buf = &tr->array_buffer;
7763 int cpu = tracing_get_cpu(inode);
7764 struct trace_seq *s;
7766 unsigned long long t;
7767 unsigned long usec_rem;
7769 s = kmalloc(sizeof(*s), GFP_KERNEL);
7775 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7776 trace_seq_printf(s, "entries: %ld\n", cnt);
7778 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7779 trace_seq_printf(s, "overrun: %ld\n", cnt);
7781 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7782 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7784 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7785 trace_seq_printf(s, "bytes: %ld\n", cnt);
7787 if (trace_clocks[tr->clock_id].in_ns) {
7788 /* local or global for trace_clock */
7789 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7790 usec_rem = do_div(t, USEC_PER_SEC);
7791 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7794 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7795 usec_rem = do_div(t, USEC_PER_SEC);
7796 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7798 /* counter or tsc mode for trace_clock */
7799 trace_seq_printf(s, "oldest event ts: %llu\n",
7800 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7802 trace_seq_printf(s, "now ts: %llu\n",
7803 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7806 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7807 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7809 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7810 trace_seq_printf(s, "read events: %ld\n", cnt);
7812 count = simple_read_from_buffer(ubuf, count, ppos,
7813 s->buffer, trace_seq_used(s));
7820 static const struct file_operations tracing_stats_fops = {
7821 .open = tracing_open_generic_tr,
7822 .read = tracing_stats_read,
7823 .llseek = generic_file_llseek,
7824 .release = tracing_release_generic_tr,
7827 #ifdef CONFIG_DYNAMIC_FTRACE
7830 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7831 size_t cnt, loff_t *ppos)
7837 /* 256 should be plenty to hold the amount needed */
7838 buf = kmalloc(256, GFP_KERNEL);
7842 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7843 ftrace_update_tot_cnt,
7844 ftrace_number_of_pages,
7845 ftrace_number_of_groups);
7847 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7852 static const struct file_operations tracing_dyn_info_fops = {
7853 .open = tracing_open_generic,
7854 .read = tracing_read_dyn_info,
7855 .llseek = generic_file_llseek,
7857 #endif /* CONFIG_DYNAMIC_FTRACE */
7859 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7861 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7862 struct trace_array *tr, struct ftrace_probe_ops *ops,
7865 tracing_snapshot_instance(tr);
7869 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7870 struct trace_array *tr, struct ftrace_probe_ops *ops,
7873 struct ftrace_func_mapper *mapper = data;
7877 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7887 tracing_snapshot_instance(tr);
7891 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7892 struct ftrace_probe_ops *ops, void *data)
7894 struct ftrace_func_mapper *mapper = data;
7897 seq_printf(m, "%ps:", (void *)ip);
7899 seq_puts(m, "snapshot");
7902 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7905 seq_printf(m, ":count=%ld\n", *count);
7907 seq_puts(m, ":unlimited\n");
7913 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7914 unsigned long ip, void *init_data, void **data)
7916 struct ftrace_func_mapper *mapper = *data;
7919 mapper = allocate_ftrace_func_mapper();
7925 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7929 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7930 unsigned long ip, void *data)
7932 struct ftrace_func_mapper *mapper = data;
7937 free_ftrace_func_mapper(mapper, NULL);
7941 ftrace_func_mapper_remove_ip(mapper, ip);
7944 static struct ftrace_probe_ops snapshot_probe_ops = {
7945 .func = ftrace_snapshot,
7946 .print = ftrace_snapshot_print,
7949 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7950 .func = ftrace_count_snapshot,
7951 .print = ftrace_snapshot_print,
7952 .init = ftrace_snapshot_init,
7953 .free = ftrace_snapshot_free,
7957 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7958 char *glob, char *cmd, char *param, int enable)
7960 struct ftrace_probe_ops *ops;
7961 void *count = (void *)-1;
7968 /* hash funcs only work with set_ftrace_filter */
7972 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
7975 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7980 number = strsep(¶m, ":");
7982 if (!strlen(number))
7986 * We use the callback data field (which is a pointer)
7989 ret = kstrtoul(number, 0, (unsigned long *)&count);
7994 ret = tracing_alloc_snapshot_instance(tr);
7998 ret = register_ftrace_function_probe(glob, tr, ops, count);
8001 return ret < 0 ? ret : 0;
8004 static struct ftrace_func_command ftrace_snapshot_cmd = {
8006 .func = ftrace_trace_snapshot_callback,
8009 static __init int register_snapshot_cmd(void)
8011 return register_ftrace_command(&ftrace_snapshot_cmd);
8014 static inline __init int register_snapshot_cmd(void) { return 0; }
8015 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8017 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8019 if (WARN_ON(!tr->dir))
8020 return ERR_PTR(-ENODEV);
8022 /* Top directory uses NULL as the parent */
8023 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8026 /* All sub buffers have a descriptor */
8030 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8032 struct dentry *d_tracer;
8035 return tr->percpu_dir;
8037 d_tracer = tracing_get_dentry(tr);
8038 if (IS_ERR(d_tracer))
8041 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8043 MEM_FAIL(!tr->percpu_dir,
8044 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8046 return tr->percpu_dir;
8049 static struct dentry *
8050 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8051 void *data, long cpu, const struct file_operations *fops)
8053 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8055 if (ret) /* See tracing_get_cpu() */
8056 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8061 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8063 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8064 struct dentry *d_cpu;
8065 char cpu_dir[30]; /* 30 characters should be more than enough */
8070 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8071 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8073 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8077 /* per cpu trace_pipe */
8078 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8079 tr, cpu, &tracing_pipe_fops);
8082 trace_create_cpu_file("trace", 0644, d_cpu,
8083 tr, cpu, &tracing_fops);
8085 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8086 tr, cpu, &tracing_buffers_fops);
8088 trace_create_cpu_file("stats", 0444, d_cpu,
8089 tr, cpu, &tracing_stats_fops);
8091 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8092 tr, cpu, &tracing_entries_fops);
8094 #ifdef CONFIG_TRACER_SNAPSHOT
8095 trace_create_cpu_file("snapshot", 0644, d_cpu,
8096 tr, cpu, &snapshot_fops);
8098 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8099 tr, cpu, &snapshot_raw_fops);
8103 #ifdef CONFIG_FTRACE_SELFTEST
8104 /* Let selftest have access to static functions in this file */
8105 #include "trace_selftest.c"
8109 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8112 struct trace_option_dentry *topt = filp->private_data;
8115 if (topt->flags->val & topt->opt->bit)
8120 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8124 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8127 struct trace_option_dentry *topt = filp->private_data;
8131 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8135 if (val != 0 && val != 1)
8138 if (!!(topt->flags->val & topt->opt->bit) != val) {
8139 mutex_lock(&trace_types_lock);
8140 ret = __set_tracer_option(topt->tr, topt->flags,
8142 mutex_unlock(&trace_types_lock);
8153 static const struct file_operations trace_options_fops = {
8154 .open = tracing_open_generic,
8155 .read = trace_options_read,
8156 .write = trace_options_write,
8157 .llseek = generic_file_llseek,
8161 * In order to pass in both the trace_array descriptor as well as the index
8162 * to the flag that the trace option file represents, the trace_array
8163 * has a character array of trace_flags_index[], which holds the index
8164 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8165 * The address of this character array is passed to the flag option file
8166 * read/write callbacks.
8168 * In order to extract both the index and the trace_array descriptor,
8169 * get_tr_index() uses the following algorithm.
8173 * As the pointer itself contains the address of the index (remember
8176 * Then to get the trace_array descriptor, by subtracting that index
8177 * from the ptr, we get to the start of the index itself.
8179 * ptr - idx == &index[0]
8181 * Then a simple container_of() from that pointer gets us to the
8182 * trace_array descriptor.
8184 static void get_tr_index(void *data, struct trace_array **ptr,
8185 unsigned int *pindex)
8187 *pindex = *(unsigned char *)data;
8189 *ptr = container_of(data - *pindex, struct trace_array,
8194 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8197 void *tr_index = filp->private_data;
8198 struct trace_array *tr;
8202 get_tr_index(tr_index, &tr, &index);
8204 if (tr->trace_flags & (1 << index))
8209 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8213 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8216 void *tr_index = filp->private_data;
8217 struct trace_array *tr;
8222 get_tr_index(tr_index, &tr, &index);
8224 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8228 if (val != 0 && val != 1)
8231 mutex_lock(&event_mutex);
8232 mutex_lock(&trace_types_lock);
8233 ret = set_tracer_flag(tr, 1 << index, val);
8234 mutex_unlock(&trace_types_lock);
8235 mutex_unlock(&event_mutex);
8245 static const struct file_operations trace_options_core_fops = {
8246 .open = tracing_open_generic,
8247 .read = trace_options_core_read,
8248 .write = trace_options_core_write,
8249 .llseek = generic_file_llseek,
8252 struct dentry *trace_create_file(const char *name,
8254 struct dentry *parent,
8256 const struct file_operations *fops)
8260 ret = tracefs_create_file(name, mode, parent, data, fops);
8262 pr_warn("Could not create tracefs '%s' entry\n", name);
8268 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8270 struct dentry *d_tracer;
8275 d_tracer = tracing_get_dentry(tr);
8276 if (IS_ERR(d_tracer))
8279 tr->options = tracefs_create_dir("options", d_tracer);
8281 pr_warn("Could not create tracefs directory 'options'\n");
8289 create_trace_option_file(struct trace_array *tr,
8290 struct trace_option_dentry *topt,
8291 struct tracer_flags *flags,
8292 struct tracer_opt *opt)
8294 struct dentry *t_options;
8296 t_options = trace_options_init_dentry(tr);
8300 topt->flags = flags;
8304 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8305 &trace_options_fops);
8310 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8312 struct trace_option_dentry *topts;
8313 struct trace_options *tr_topts;
8314 struct tracer_flags *flags;
8315 struct tracer_opt *opts;
8322 flags = tracer->flags;
8324 if (!flags || !flags->opts)
8328 * If this is an instance, only create flags for tracers
8329 * the instance may have.
8331 if (!trace_ok_for_array(tracer, tr))
8334 for (i = 0; i < tr->nr_topts; i++) {
8335 /* Make sure there's no duplicate flags. */
8336 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8342 for (cnt = 0; opts[cnt].name; cnt++)
8345 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8349 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8356 tr->topts = tr_topts;
8357 tr->topts[tr->nr_topts].tracer = tracer;
8358 tr->topts[tr->nr_topts].topts = topts;
8361 for (cnt = 0; opts[cnt].name; cnt++) {
8362 create_trace_option_file(tr, &topts[cnt], flags,
8364 MEM_FAIL(topts[cnt].entry == NULL,
8365 "Failed to create trace option: %s",
8370 static struct dentry *
8371 create_trace_option_core_file(struct trace_array *tr,
8372 const char *option, long index)
8374 struct dentry *t_options;
8376 t_options = trace_options_init_dentry(tr);
8380 return trace_create_file(option, 0644, t_options,
8381 (void *)&tr->trace_flags_index[index],
8382 &trace_options_core_fops);
8385 static void create_trace_options_dir(struct trace_array *tr)
8387 struct dentry *t_options;
8388 bool top_level = tr == &global_trace;
8391 t_options = trace_options_init_dentry(tr);
8395 for (i = 0; trace_options[i]; i++) {
8397 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8398 create_trace_option_core_file(tr, trace_options[i], i);
8403 rb_simple_read(struct file *filp, char __user *ubuf,
8404 size_t cnt, loff_t *ppos)
8406 struct trace_array *tr = filp->private_data;
8410 r = tracer_tracing_is_on(tr);
8411 r = sprintf(buf, "%d\n", r);
8413 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8417 rb_simple_write(struct file *filp, const char __user *ubuf,
8418 size_t cnt, loff_t *ppos)
8420 struct trace_array *tr = filp->private_data;
8421 struct trace_buffer *buffer = tr->array_buffer.buffer;
8425 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8430 mutex_lock(&trace_types_lock);
8431 if (!!val == tracer_tracing_is_on(tr)) {
8432 val = 0; /* do nothing */
8434 tracer_tracing_on(tr);
8435 if (tr->current_trace->start)
8436 tr->current_trace->start(tr);
8438 tracer_tracing_off(tr);
8439 if (tr->current_trace->stop)
8440 tr->current_trace->stop(tr);
8442 mutex_unlock(&trace_types_lock);
8450 static const struct file_operations rb_simple_fops = {
8451 .open = tracing_open_generic_tr,
8452 .read = rb_simple_read,
8453 .write = rb_simple_write,
8454 .release = tracing_release_generic_tr,
8455 .llseek = default_llseek,
8459 buffer_percent_read(struct file *filp, char __user *ubuf,
8460 size_t cnt, loff_t *ppos)
8462 struct trace_array *tr = filp->private_data;
8466 r = tr->buffer_percent;
8467 r = sprintf(buf, "%d\n", r);
8469 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8473 buffer_percent_write(struct file *filp, const char __user *ubuf,
8474 size_t cnt, loff_t *ppos)
8476 struct trace_array *tr = filp->private_data;
8480 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8490 tr->buffer_percent = val;
8497 static const struct file_operations buffer_percent_fops = {
8498 .open = tracing_open_generic_tr,
8499 .read = buffer_percent_read,
8500 .write = buffer_percent_write,
8501 .release = tracing_release_generic_tr,
8502 .llseek = default_llseek,
8505 static struct dentry *trace_instance_dir;
8508 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8511 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8513 enum ring_buffer_flags rb_flags;
8515 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8519 buf->buffer = ring_buffer_alloc(size, rb_flags);
8523 buf->data = alloc_percpu(struct trace_array_cpu);
8525 ring_buffer_free(buf->buffer);
8530 /* Allocate the first page for all buffers */
8531 set_buffer_entries(&tr->array_buffer,
8532 ring_buffer_size(tr->array_buffer.buffer, 0));
8537 static int allocate_trace_buffers(struct trace_array *tr, int size)
8541 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8545 #ifdef CONFIG_TRACER_MAX_TRACE
8546 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8547 allocate_snapshot ? size : 1);
8548 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8549 ring_buffer_free(tr->array_buffer.buffer);
8550 tr->array_buffer.buffer = NULL;
8551 free_percpu(tr->array_buffer.data);
8552 tr->array_buffer.data = NULL;
8555 tr->allocated_snapshot = allocate_snapshot;
8558 * Only the top level trace array gets its snapshot allocated
8559 * from the kernel command line.
8561 allocate_snapshot = false;
8567 static void free_trace_buffer(struct array_buffer *buf)
8570 ring_buffer_free(buf->buffer);
8572 free_percpu(buf->data);
8577 static void free_trace_buffers(struct trace_array *tr)
8582 free_trace_buffer(&tr->array_buffer);
8584 #ifdef CONFIG_TRACER_MAX_TRACE
8585 free_trace_buffer(&tr->max_buffer);
8589 static void init_trace_flags_index(struct trace_array *tr)
8593 /* Used by the trace options files */
8594 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8595 tr->trace_flags_index[i] = i;
8598 static void __update_tracer_options(struct trace_array *tr)
8602 for (t = trace_types; t; t = t->next)
8603 add_tracer_options(tr, t);
8606 static void update_tracer_options(struct trace_array *tr)
8608 mutex_lock(&trace_types_lock);
8609 __update_tracer_options(tr);
8610 mutex_unlock(&trace_types_lock);
8613 /* Must have trace_types_lock held */
8614 struct trace_array *trace_array_find(const char *instance)
8616 struct trace_array *tr, *found = NULL;
8618 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8619 if (tr->name && strcmp(tr->name, instance) == 0) {
8628 struct trace_array *trace_array_find_get(const char *instance)
8630 struct trace_array *tr;
8632 mutex_lock(&trace_types_lock);
8633 tr = trace_array_find(instance);
8636 mutex_unlock(&trace_types_lock);
8641 static struct trace_array *trace_array_create(const char *name)
8643 struct trace_array *tr;
8647 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8649 return ERR_PTR(ret);
8651 tr->name = kstrdup(name, GFP_KERNEL);
8655 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8658 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8660 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8662 raw_spin_lock_init(&tr->start_lock);
8664 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8666 tr->current_trace = &nop_trace;
8668 INIT_LIST_HEAD(&tr->systems);
8669 INIT_LIST_HEAD(&tr->events);
8670 INIT_LIST_HEAD(&tr->hist_vars);
8671 INIT_LIST_HEAD(&tr->err_log);
8673 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8676 tr->dir = tracefs_create_dir(name, trace_instance_dir);
8680 ret = event_trace_add_tracer(tr->dir, tr);
8682 tracefs_remove(tr->dir);
8686 ftrace_init_trace_array(tr);
8688 init_tracer_tracefs(tr, tr->dir);
8689 init_trace_flags_index(tr);
8690 __update_tracer_options(tr);
8692 list_add(&tr->list, &ftrace_trace_arrays);
8700 free_trace_buffers(tr);
8701 free_cpumask_var(tr->tracing_cpumask);
8705 return ERR_PTR(ret);
8708 static int instance_mkdir(const char *name)
8710 struct trace_array *tr;
8713 mutex_lock(&event_mutex);
8714 mutex_lock(&trace_types_lock);
8717 if (trace_array_find(name))
8720 tr = trace_array_create(name);
8722 ret = PTR_ERR_OR_ZERO(tr);
8725 mutex_unlock(&trace_types_lock);
8726 mutex_unlock(&event_mutex);
8731 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8732 * @name: The name of the trace array to be looked up/created.
8734 * Returns pointer to trace array with given name.
8735 * NULL, if it cannot be created.
8737 * NOTE: This function increments the reference counter associated with the
8738 * trace array returned. This makes sure it cannot be freed while in use.
8739 * Use trace_array_put() once the trace array is no longer needed.
8740 * If the trace_array is to be freed, trace_array_destroy() needs to
8741 * be called after the trace_array_put(), or simply let user space delete
8742 * it from the tracefs instances directory. But until the
8743 * trace_array_put() is called, user space can not delete it.
8746 struct trace_array *trace_array_get_by_name(const char *name)
8748 struct trace_array *tr;
8750 mutex_lock(&event_mutex);
8751 mutex_lock(&trace_types_lock);
8753 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8754 if (tr->name && strcmp(tr->name, name) == 0)
8758 tr = trace_array_create(name);
8766 mutex_unlock(&trace_types_lock);
8767 mutex_unlock(&event_mutex);
8770 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8772 static int __remove_instance(struct trace_array *tr)
8776 /* Reference counter for a newly created trace array = 1. */
8777 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8780 list_del(&tr->list);
8782 /* Disable all the flags that were enabled coming in */
8783 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8784 if ((1 << i) & ZEROED_TRACE_FLAGS)
8785 set_tracer_flag(tr, 1 << i, 0);
8788 tracing_set_nop(tr);
8789 clear_ftrace_function_probes(tr);
8790 event_trace_del_tracer(tr);
8791 ftrace_clear_pids(tr);
8792 ftrace_destroy_function_files(tr);
8793 tracefs_remove(tr->dir);
8794 free_trace_buffers(tr);
8796 for (i = 0; i < tr->nr_topts; i++) {
8797 kfree(tr->topts[i].topts);
8801 free_cpumask_var(tr->tracing_cpumask);
8809 int trace_array_destroy(struct trace_array *this_tr)
8811 struct trace_array *tr;
8817 mutex_lock(&event_mutex);
8818 mutex_lock(&trace_types_lock);
8822 /* Making sure trace array exists before destroying it. */
8823 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8824 if (tr == this_tr) {
8825 ret = __remove_instance(tr);
8830 mutex_unlock(&trace_types_lock);
8831 mutex_unlock(&event_mutex);
8835 EXPORT_SYMBOL_GPL(trace_array_destroy);
8837 static int instance_rmdir(const char *name)
8839 struct trace_array *tr;
8842 mutex_lock(&event_mutex);
8843 mutex_lock(&trace_types_lock);
8846 tr = trace_array_find(name);
8848 ret = __remove_instance(tr);
8850 mutex_unlock(&trace_types_lock);
8851 mutex_unlock(&event_mutex);
8856 static __init void create_trace_instances(struct dentry *d_tracer)
8858 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8861 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8866 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8868 struct trace_event_file *file;
8871 trace_create_file("available_tracers", 0444, d_tracer,
8872 tr, &show_traces_fops);
8874 trace_create_file("current_tracer", 0644, d_tracer,
8875 tr, &set_tracer_fops);
8877 trace_create_file("tracing_cpumask", 0644, d_tracer,
8878 tr, &tracing_cpumask_fops);
8880 trace_create_file("trace_options", 0644, d_tracer,
8881 tr, &tracing_iter_fops);
8883 trace_create_file("trace", 0644, d_tracer,
8886 trace_create_file("trace_pipe", 0444, d_tracer,
8887 tr, &tracing_pipe_fops);
8889 trace_create_file("buffer_size_kb", 0644, d_tracer,
8890 tr, &tracing_entries_fops);
8892 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8893 tr, &tracing_total_entries_fops);
8895 trace_create_file("free_buffer", 0200, d_tracer,
8896 tr, &tracing_free_buffer_fops);
8898 trace_create_file("trace_marker", 0220, d_tracer,
8899 tr, &tracing_mark_fops);
8901 file = __find_event_file(tr, "ftrace", "print");
8902 if (file && file->dir)
8903 trace_create_file("trigger", 0644, file->dir, file,
8904 &event_trigger_fops);
8905 tr->trace_marker_file = file;
8907 trace_create_file("trace_marker_raw", 0220, d_tracer,
8908 tr, &tracing_mark_raw_fops);
8910 trace_create_file("trace_clock", 0644, d_tracer, tr,
8913 trace_create_file("tracing_on", 0644, d_tracer,
8914 tr, &rb_simple_fops);
8916 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8917 &trace_time_stamp_mode_fops);
8919 tr->buffer_percent = 50;
8921 trace_create_file("buffer_percent", 0444, d_tracer,
8922 tr, &buffer_percent_fops);
8924 create_trace_options_dir(tr);
8926 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8927 trace_create_maxlat_file(tr, d_tracer);
8930 if (ftrace_create_function_files(tr, d_tracer))
8931 MEM_FAIL(1, "Could not allocate function filter files");
8933 #ifdef CONFIG_TRACER_SNAPSHOT
8934 trace_create_file("snapshot", 0644, d_tracer,
8935 tr, &snapshot_fops);
8938 trace_create_file("error_log", 0644, d_tracer,
8939 tr, &tracing_err_log_fops);
8941 for_each_tracing_cpu(cpu)
8942 tracing_init_tracefs_percpu(tr, cpu);
8944 ftrace_init_tracefs(tr, d_tracer);
8947 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8949 struct vfsmount *mnt;
8950 struct file_system_type *type;
8953 * To maintain backward compatibility for tools that mount
8954 * debugfs to get to the tracing facility, tracefs is automatically
8955 * mounted to the debugfs/tracing directory.
8957 type = get_fs_type("tracefs");
8960 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8961 put_filesystem(type);
8970 * tracing_init_dentry - initialize top level trace array
8972 * This is called when creating files or directories in the tracing
8973 * directory. It is called via fs_initcall() by any of the boot up code
8974 * and expects to return the dentry of the top level tracing directory.
8976 struct dentry *tracing_init_dentry(void)
8978 struct trace_array *tr = &global_trace;
8980 if (security_locked_down(LOCKDOWN_TRACEFS)) {
8981 pr_warn("Tracing disabled due to lockdown\n");
8982 return ERR_PTR(-EPERM);
8985 /* The top level trace array uses NULL as parent */
8989 if (WARN_ON(!tracefs_initialized()))
8990 return ERR_PTR(-ENODEV);
8993 * As there may still be users that expect the tracing
8994 * files to exist in debugfs/tracing, we must automount
8995 * the tracefs file system there, so older tools still
8996 * work with the newer kerenl.
8998 tr->dir = debugfs_create_automount("tracing", NULL,
8999 trace_automount, NULL);
9004 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9005 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9007 static void __init trace_eval_init(void)
9011 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9012 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9015 #ifdef CONFIG_MODULES
9016 static void trace_module_add_evals(struct module *mod)
9018 if (!mod->num_trace_evals)
9022 * Modules with bad taint do not have events created, do
9023 * not bother with enums either.
9025 if (trace_module_has_bad_taint(mod))
9028 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9031 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9032 static void trace_module_remove_evals(struct module *mod)
9034 union trace_eval_map_item *map;
9035 union trace_eval_map_item **last = &trace_eval_maps;
9037 if (!mod->num_trace_evals)
9040 mutex_lock(&trace_eval_mutex);
9042 map = trace_eval_maps;
9045 if (map->head.mod == mod)
9047 map = trace_eval_jmp_to_tail(map);
9048 last = &map->tail.next;
9049 map = map->tail.next;
9054 *last = trace_eval_jmp_to_tail(map)->tail.next;
9057 mutex_unlock(&trace_eval_mutex);
9060 static inline void trace_module_remove_evals(struct module *mod) { }
9061 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9063 static int trace_module_notify(struct notifier_block *self,
9064 unsigned long val, void *data)
9066 struct module *mod = data;
9069 case MODULE_STATE_COMING:
9070 trace_module_add_evals(mod);
9072 case MODULE_STATE_GOING:
9073 trace_module_remove_evals(mod);
9080 static struct notifier_block trace_module_nb = {
9081 .notifier_call = trace_module_notify,
9084 #endif /* CONFIG_MODULES */
9086 static __init int tracer_init_tracefs(void)
9088 struct dentry *d_tracer;
9090 trace_access_lock_init();
9092 d_tracer = tracing_init_dentry();
9093 if (IS_ERR(d_tracer))
9098 init_tracer_tracefs(&global_trace, d_tracer);
9099 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9101 trace_create_file("tracing_thresh", 0644, d_tracer,
9102 &global_trace, &tracing_thresh_fops);
9104 trace_create_file("README", 0444, d_tracer,
9105 NULL, &tracing_readme_fops);
9107 trace_create_file("saved_cmdlines", 0444, d_tracer,
9108 NULL, &tracing_saved_cmdlines_fops);
9110 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9111 NULL, &tracing_saved_cmdlines_size_fops);
9113 trace_create_file("saved_tgids", 0444, d_tracer,
9114 NULL, &tracing_saved_tgids_fops);
9118 trace_create_eval_file(d_tracer);
9120 #ifdef CONFIG_MODULES
9121 register_module_notifier(&trace_module_nb);
9124 #ifdef CONFIG_DYNAMIC_FTRACE
9125 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9126 NULL, &tracing_dyn_info_fops);
9129 create_trace_instances(d_tracer);
9131 update_tracer_options(&global_trace);
9136 static int trace_panic_handler(struct notifier_block *this,
9137 unsigned long event, void *unused)
9139 if (ftrace_dump_on_oops)
9140 ftrace_dump(ftrace_dump_on_oops);
9144 static struct notifier_block trace_panic_notifier = {
9145 .notifier_call = trace_panic_handler,
9147 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9150 static int trace_die_handler(struct notifier_block *self,
9156 if (ftrace_dump_on_oops)
9157 ftrace_dump(ftrace_dump_on_oops);
9165 static struct notifier_block trace_die_notifier = {
9166 .notifier_call = trace_die_handler,
9171 * printk is set to max of 1024, we really don't need it that big.
9172 * Nothing should be printing 1000 characters anyway.
9174 #define TRACE_MAX_PRINT 1000
9177 * Define here KERN_TRACE so that we have one place to modify
9178 * it if we decide to change what log level the ftrace dump
9181 #define KERN_TRACE KERN_EMERG
9184 trace_printk_seq(struct trace_seq *s)
9186 /* Probably should print a warning here. */
9187 if (s->seq.len >= TRACE_MAX_PRINT)
9188 s->seq.len = TRACE_MAX_PRINT;
9191 * More paranoid code. Although the buffer size is set to
9192 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9193 * an extra layer of protection.
9195 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9196 s->seq.len = s->seq.size - 1;
9198 /* should be zero ended, but we are paranoid. */
9199 s->buffer[s->seq.len] = 0;
9201 printk(KERN_TRACE "%s", s->buffer);
9206 void trace_init_global_iter(struct trace_iterator *iter)
9208 iter->tr = &global_trace;
9209 iter->trace = iter->tr->current_trace;
9210 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9211 iter->array_buffer = &global_trace.array_buffer;
9213 if (iter->trace && iter->trace->open)
9214 iter->trace->open(iter);
9216 /* Annotate start of buffers if we had overruns */
9217 if (ring_buffer_overruns(iter->array_buffer->buffer))
9218 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9220 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9221 if (trace_clocks[iter->tr->clock_id].in_ns)
9222 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9225 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9227 /* use static because iter can be a bit big for the stack */
9228 static struct trace_iterator iter;
9229 static atomic_t dump_running;
9230 struct trace_array *tr = &global_trace;
9231 unsigned int old_userobj;
9232 unsigned long flags;
9235 /* Only allow one dump user at a time. */
9236 if (atomic_inc_return(&dump_running) != 1) {
9237 atomic_dec(&dump_running);
9242 * Always turn off tracing when we dump.
9243 * We don't need to show trace output of what happens
9244 * between multiple crashes.
9246 * If the user does a sysrq-z, then they can re-enable
9247 * tracing with echo 1 > tracing_on.
9251 local_irq_save(flags);
9252 printk_nmi_direct_enter();
9254 /* Simulate the iterator */
9255 trace_init_global_iter(&iter);
9256 /* Can not use kmalloc for iter.temp */
9257 iter.temp = static_temp_buf;
9258 iter.temp_size = STATIC_TEMP_BUF_SIZE;
9260 for_each_tracing_cpu(cpu) {
9261 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9264 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9266 /* don't look at user memory in panic mode */
9267 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9269 switch (oops_dump_mode) {
9271 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9274 iter.cpu_file = raw_smp_processor_id();
9279 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9280 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9283 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9285 /* Did function tracer already get disabled? */
9286 if (ftrace_is_dead()) {
9287 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9288 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9292 * We need to stop all tracing on all CPUS to read the
9293 * the next buffer. This is a bit expensive, but is
9294 * not done often. We fill all what we can read,
9295 * and then release the locks again.
9298 while (!trace_empty(&iter)) {
9301 printk(KERN_TRACE "---------------------------------\n");
9305 trace_iterator_reset(&iter);
9306 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9308 if (trace_find_next_entry_inc(&iter) != NULL) {
9311 ret = print_trace_line(&iter);
9312 if (ret != TRACE_TYPE_NO_CONSUME)
9313 trace_consume(&iter);
9315 touch_nmi_watchdog();
9317 trace_printk_seq(&iter.seq);
9321 printk(KERN_TRACE " (ftrace buffer empty)\n");
9323 printk(KERN_TRACE "---------------------------------\n");
9326 tr->trace_flags |= old_userobj;
9328 for_each_tracing_cpu(cpu) {
9329 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9331 atomic_dec(&dump_running);
9332 printk_nmi_direct_exit();
9333 local_irq_restore(flags);
9335 EXPORT_SYMBOL_GPL(ftrace_dump);
9337 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9344 argv = argv_split(GFP_KERNEL, buf, &argc);
9349 ret = createfn(argc, argv);
9356 #define WRITE_BUFSIZE 4096
9358 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9359 size_t count, loff_t *ppos,
9360 int (*createfn)(int, char **))
9362 char *kbuf, *buf, *tmp;
9367 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9371 while (done < count) {
9372 size = count - done;
9374 if (size >= WRITE_BUFSIZE)
9375 size = WRITE_BUFSIZE - 1;
9377 if (copy_from_user(kbuf, buffer + done, size)) {
9384 tmp = strchr(buf, '\n');
9387 size = tmp - buf + 1;
9390 if (done + size < count) {
9393 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9394 pr_warn("Line length is too long: Should be less than %d\n",
9402 /* Remove comments */
9403 tmp = strchr(buf, '#');
9408 ret = trace_run_command(buf, createfn);
9413 } while (done < count);
9423 __init static int tracer_alloc_buffers(void)
9429 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9430 pr_warn("Tracing disabled due to lockdown\n");
9435 * Make sure we don't accidently add more trace options
9436 * than we have bits for.
9438 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9440 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9443 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9444 goto out_free_buffer_mask;
9446 /* Only allocate trace_printk buffers if a trace_printk exists */
9447 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9448 /* Must be called before global_trace.buffer is allocated */
9449 trace_printk_init_buffers();
9451 /* To save memory, keep the ring buffer size to its minimum */
9452 if (ring_buffer_expanded)
9453 ring_buf_size = trace_buf_size;
9457 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9458 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9460 raw_spin_lock_init(&global_trace.start_lock);
9463 * The prepare callbacks allocates some memory for the ring buffer. We
9464 * don't free the buffer if the if the CPU goes down. If we were to free
9465 * the buffer, then the user would lose any trace that was in the
9466 * buffer. The memory will be removed once the "instance" is removed.
9468 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9469 "trace/RB:preapre", trace_rb_cpu_prepare,
9472 goto out_free_cpumask;
9473 /* Used for event triggers */
9475 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9477 goto out_rm_hp_state;
9479 if (trace_create_savedcmd() < 0)
9480 goto out_free_temp_buffer;
9482 /* TODO: make the number of buffers hot pluggable with CPUS */
9483 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9484 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9485 goto out_free_savedcmd;
9488 if (global_trace.buffer_disabled)
9491 if (trace_boot_clock) {
9492 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9494 pr_warn("Trace clock %s not defined, going back to default\n",
9499 * register_tracer() might reference current_trace, so it
9500 * needs to be set before we register anything. This is
9501 * just a bootstrap of current_trace anyway.
9503 global_trace.current_trace = &nop_trace;
9505 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9507 ftrace_init_global_array_ops(&global_trace);
9509 init_trace_flags_index(&global_trace);
9511 register_tracer(&nop_trace);
9513 /* Function tracing may start here (via kernel command line) */
9514 init_function_trace();
9516 /* All seems OK, enable tracing */
9517 tracing_disabled = 0;
9519 atomic_notifier_chain_register(&panic_notifier_list,
9520 &trace_panic_notifier);
9522 register_die_notifier(&trace_die_notifier);
9524 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9526 INIT_LIST_HEAD(&global_trace.systems);
9527 INIT_LIST_HEAD(&global_trace.events);
9528 INIT_LIST_HEAD(&global_trace.hist_vars);
9529 INIT_LIST_HEAD(&global_trace.err_log);
9530 list_add(&global_trace.list, &ftrace_trace_arrays);
9532 apply_trace_boot_options();
9534 register_snapshot_cmd();
9539 free_saved_cmdlines_buffer(savedcmd);
9540 out_free_temp_buffer:
9541 ring_buffer_free(temp_buffer);
9543 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9545 free_cpumask_var(global_trace.tracing_cpumask);
9546 out_free_buffer_mask:
9547 free_cpumask_var(tracing_buffer_mask);
9552 void __init early_trace_init(void)
9554 if (tracepoint_printk) {
9555 tracepoint_print_iter =
9556 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9557 if (MEM_FAIL(!tracepoint_print_iter,
9558 "Failed to allocate trace iterator\n"))
9559 tracepoint_printk = 0;
9561 static_key_enable(&tracepoint_printk_key.key);
9563 tracer_alloc_buffers();
9566 void __init trace_init(void)
9571 __init static int clear_boot_tracer(void)
9574 * The default tracer at boot buffer is an init section.
9575 * This function is called in lateinit. If we did not
9576 * find the boot tracer, then clear it out, to prevent
9577 * later registration from accessing the buffer that is
9578 * about to be freed.
9580 if (!default_bootup_tracer)
9583 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9584 default_bootup_tracer);
9585 default_bootup_tracer = NULL;
9590 fs_initcall(tracer_init_tracefs);
9591 late_initcall_sync(clear_boot_tracer);
9593 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9594 __init static int tracing_set_default_clock(void)
9596 /* sched_clock_stable() is determined in late_initcall */
9597 if (!trace_boot_clock && !sched_clock_stable()) {
9598 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9599 pr_warn("Can not set tracing clock due to lockdown\n");
9604 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9605 "If you want to keep using the local clock, then add:\n"
9606 " \"trace_clock=local\"\n"
9607 "on the kernel command line\n");
9608 tracing_set_clock(&global_trace, "global");
9613 late_initcall_sync(tracing_set_default_clock);