Merge branch 'trace/ftrace/urgent' into HEAD
[linux-2.6-microblaze.git] / kernel / trace / trace.c
index eccb4e1..3834de9 100644 (file)
@@ -514,7 +514,7 @@ void trace_free_pid_list(struct trace_pid_list *pid_list)
  * @filtered_pids: The list of pids to check
  * @search_pid: The PID to find in @filtered_pids
  *
- * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
+ * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
  */
 bool
 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
@@ -545,7 +545,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids,
                       struct task_struct *task)
 {
        /*
-        * If filterd_no_pids is not empty, and the task's pid is listed
+        * If filtered_no_pids is not empty, and the task's pid is listed
         * in filtered_no_pids, then return true.
         * Otherwise, if filtered_pids is empty, that means we can
         * trace all tasks. If it has content, then only trace pids
@@ -612,7 +612,7 @@ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
 
        (*pos)++;
 
-       /* pid already is +1 of the actual prevous bit */
+       /* pid already is +1 of the actual previous bit */
        pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
 
        /* Return pid + 1 to allow zero to be represented */
@@ -834,7 +834,7 @@ DEFINE_MUTEX(trace_types_lock);
  * The content of events may become garbage if we allow other process consumes
  * these events concurrently:
  *   A) the page of the consumed events may become a normal page
- *      (not reader page) in ring buffer, and this page will be rewrited
+ *      (not reader page) in ring buffer, and this page will be rewritten
  *      by events producer.
  *   B) The page of the consumed events may become a page for splice_read,
  *      and this page will be returned to system.
@@ -1520,7 +1520,7 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
 #undef C
 #define C(a, b) b
 
-/* These must match the bit postions in trace_iterator_flags */
+/* These must match the bit positions in trace_iterator_flags */
 static const char *trace_options[] = {
        TRACE_FLAGS
        NULL
@@ -2737,12 +2737,13 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
                          unsigned int trace_ctx)
 {
        struct ring_buffer_event *entry;
+       struct trace_array *tr = trace_file->tr;
        int val;
 
-       *current_rb = trace_file->tr->array_buffer.buffer;
+       *current_rb = tr->array_buffer.buffer;
 
-       if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
-            (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
+       if (!tr->no_filter_buffering_ref &&
+           (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
            (entry = this_cpu_read(trace_buffered_event))) {
                /* Try to use the per cpu buffer first */
                val = this_cpu_inc_return(trace_buffered_event_cnt);
@@ -2984,7 +2985,8 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
 
        size = nr_entries * sizeof(unsigned long);
        event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
-                                           sizeof(*entry) + size, trace_ctx);
+                                   (sizeof(*entry) - sizeof(entry->caller)) + size,
+                                   trace_ctx);
        if (!event)
                goto out;
        entry = ring_buffer_event_data(event);
@@ -3367,7 +3369,7 @@ int trace_array_vprintk(struct trace_array *tr,
  * buffer (use trace_printk() for that), as writing into the top level
  * buffer should only have events that can be individually disabled.
  * trace_printk() is only used for debugging a kernel, and should not
- * be ever encorporated in normal use.
+ * be ever incorporated in normal use.
  *
  * trace_array_printk() can be used, as it will not add noise to the
  * top level tracing buffer.
@@ -3557,6 +3559,204 @@ static char *trace_iter_expand_format(struct trace_iterator *iter)
        return tmp;
 }
 
+/* Returns true if the string is safe to dereference from an event */
+static bool trace_safe_str(struct trace_iterator *iter, const char *str)
+{
+       unsigned long addr = (unsigned long)str;
+       struct trace_event *trace_event;
+       struct trace_event_call *event;
+
+       /* OK if part of the event data */
+       if ((addr >= (unsigned long)iter->ent) &&
+           (addr < (unsigned long)iter->ent + iter->ent_size))
+               return true;
+
+       /* OK if part of the temp seq buffer */
+       if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
+           (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
+               return true;
+
+       /* Core rodata can not be freed */
+       if (is_kernel_rodata(addr))
+               return true;
+
+       if (trace_is_tracepoint_string(str))
+               return true;
+
+       /*
+        * Now this could be a module event, referencing core module
+        * data, which is OK.
+        */
+       if (!iter->ent)
+               return false;
+
+       trace_event = ftrace_find_event(iter->ent->type);
+       if (!trace_event)
+               return false;
+
+       event = container_of(trace_event, struct trace_event_call, event);
+       if (!event->mod)
+               return false;
+
+       /* Would rather have rodata, but this will suffice */
+       if (within_module_core(addr, event->mod))
+               return true;
+
+       return false;
+}
+
+static const char *show_buffer(struct trace_seq *s)
+{
+       struct seq_buf *seq = &s->seq;
+
+       seq_buf_terminate(seq);
+
+       return seq->buffer;
+}
+
+static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
+
+static int test_can_verify_check(const char *fmt, ...)
+{
+       char buf[16];
+       va_list ap;
+       int ret;
+
+       /*
+        * The verifier is dependent on vsnprintf() modifies the va_list
+        * passed to it, where it is sent as a reference. Some architectures
+        * (like x86_32) passes it by value, which means that vsnprintf()
+        * does not modify the va_list passed to it, and the verifier
+        * would then need to be able to understand all the values that
+        * vsnprintf can use. If it is passed by value, then the verifier
+        * is disabled.
+        */
+       va_start(ap, fmt);
+       vsnprintf(buf, 16, "%d", ap);
+       ret = va_arg(ap, int);
+       va_end(ap);
+
+       return ret;
+}
+
+static void test_can_verify(void)
+{
+       if (!test_can_verify_check("%d %d", 0, 1)) {
+               pr_info("trace event string verifier disabled\n");
+               static_branch_inc(&trace_no_verify);
+       }
+}
+
+/**
+ * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
+ * @iter: The iterator that holds the seq buffer and the event being printed
+ * @fmt: The format used to print the event
+ * @ap: The va_list holding the data to print from @fmt.
+ *
+ * This writes the data into the @iter->seq buffer using the data from
+ * @fmt and @ap. If the format has a %s, then the source of the string
+ * is examined to make sure it is safe to print, otherwise it will
+ * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
+ * pointer.
+ */
+void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
+                        va_list ap)
+{
+       const char *p = fmt;
+       const char *str;
+       int i, j;
+
+       if (WARN_ON_ONCE(!fmt))
+               return;
+
+       if (static_branch_unlikely(&trace_no_verify))
+               goto print;
+
+       /* Don't bother checking when doing a ftrace_dump() */
+       if (iter->fmt == static_fmt_buf)
+               goto print;
+
+       while (*p) {
+               j = 0;
+
+               /* We only care about %s and variants */
+               for (i = 0; p[i]; i++) {
+                       if (i + 1 >= iter->fmt_size) {
+                               /*
+                                * If we can't expand the copy buffer,
+                                * just print it.
+                                */
+                               if (!trace_iter_expand_format(iter))
+                                       goto print;
+                       }
+
+                       if (p[i] == '\\' && p[i+1]) {
+                               i++;
+                               continue;
+                       }
+                       if (p[i] == '%') {
+                               /* Need to test cases like %08.*s */
+                               for (j = 1; p[i+j]; j++) {
+                                       if (isdigit(p[i+j]) ||
+                                           p[i+j] == '*' ||
+                                           p[i+j] == '.')
+                                               continue;
+                                       break;
+                               }
+                               if (p[i+j] == 's')
+                                       break;
+                       }
+                       j = 0;
+               }
+               /* If no %s found then just print normally */
+               if (!p[i])
+                       break;
+
+               /* Copy up to the %s, and print that */
+               strncpy(iter->fmt, p, i);
+               iter->fmt[i] = '\0';
+               trace_seq_vprintf(&iter->seq, iter->fmt, ap);
+
+               /* The ap now points to the string data of the %s */
+               str = va_arg(ap, const char *);
+
+               /*
+                * If you hit this warning, it is likely that the
+                * trace event in question used %s on a string that
+                * was saved at the time of the event, but may not be
+                * around when the trace is read. Use __string(),
+                * __assign_str() and __get_str() helpers in the TRACE_EVENT()
+                * instead. See samples/trace_events/trace-events-sample.h
+                * for reference.
+                */
+               if (WARN_ONCE(!trace_safe_str(iter, str),
+                             "fmt: '%s' current_buffer: '%s'",
+                             fmt, show_buffer(&iter->seq))) {
+                       int ret;
+
+                       /* Try to safely read the string */
+                       ret = strncpy_from_kernel_nofault(iter->fmt, str,
+                                                         iter->fmt_size);
+                       if (ret < 0)
+                               trace_seq_printf(&iter->seq, "(0x%px)", str);
+                       else
+                               trace_seq_printf(&iter->seq, "(0x%px:%s)",
+                                                str, iter->fmt);
+                       str = "[UNSAFE-MEMORY]";
+                       strcpy(iter->fmt, "%s");
+               } else {
+                       strncpy(iter->fmt, p + i, j + 1);
+                       iter->fmt[j+1] = '\0';
+               }
+               trace_seq_printf(&iter->seq, iter->fmt, str);
+
+               p += i + j + 1;
+       }
+ print:
+       if (*p)
+               trace_seq_vprintf(&iter->seq, p, ap);
+}
+
 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
 {
        const char *p, *new_fmt;
@@ -6763,7 +6963,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
        if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
                /* do not add \n before testing triggers, but add \0 */
                entry->buf[cnt] = '\0';
-               tt = event_triggers_call(tr->trace_marker_file, entry, event);
+               tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
        }
 
        if (entry->buf[cnt - 1] != '\n') {
@@ -6971,31 +7171,34 @@ static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
        return ret;
 }
 
-int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
+u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
+{
+       if (rbe == this_cpu_read(trace_buffered_event))
+               return ring_buffer_time_stamp(buffer, smp_processor_id());
+
+       return ring_buffer_event_time_stamp(buffer, rbe);
+}
+
+/*
+ * Set or disable using the per CPU trace_buffer_event when possible.
+ */
+int tracing_set_filter_buffering(struct trace_array *tr, bool set)
 {
        int ret = 0;
 
        mutex_lock(&trace_types_lock);
 
-       if (abs && tr->time_stamp_abs_ref++)
+       if (set && tr->no_filter_buffering_ref++)
                goto out;
 
-       if (!abs) {
-               if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
+       if (!set) {
+               if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
                        ret = -EINVAL;
                        goto out;
                }
 
-               if (--tr->time_stamp_abs_ref)
-                       goto out;
+               --tr->no_filter_buffering_ref;
        }
-
-       ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
-
-#ifdef CONFIG_TRACER_MAX_TRACE
-       if (tr->max_buffer.buffer)
-               ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
-#endif
  out:
        mutex_unlock(&trace_types_lock);
 
@@ -7331,11 +7534,11 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
  * @cmd: The tracing command that caused the error
  * @str: The string to position the caret at within @cmd
  *
- * Finds the position of the first occurence of @str within @cmd.  The
+ * Finds the position of the first occurrence of @str within @cmd.  The
  * return value can be passed to tracing_log_err() for caret placement
  * within @cmd.
  *
- * Returns the index within @cmd of the first occurence of @str or 0
+ * Returns the index within @cmd of the first occurrence of @str or 0
  * if @str was not found.
  */
 unsigned int err_pos(char *cmd, const char *str)
@@ -9118,7 +9321,7 @@ int tracing_init_dentry(void)
         * As there may still be users that expect the tracing
         * files to exist in debugfs/tracing, we must automount
         * the tracefs file system there, so older tools still
-        * work with the newer kerenl.
+        * work with the newer kernel.
         */
        tr->dir = debugfs_create_automount("tracing", NULL,
                                           trace_automount, NULL);
@@ -9671,6 +9874,8 @@ __init static int tracer_alloc_buffers(void)
 
        register_snapshot_cmd();
 
+       test_can_verify();
+
        return 0;
 
 out_free_savedcmd: