Merge tag 'trace-v6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 5 Aug 2022 16:41:12 +0000 (09:41 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 5 Aug 2022 16:41:12 +0000 (09:41 -0700)
Pull tracing updates from Steven Rostedt:

 - Runtime verification infrastructure

   This is the biggest change here. It introduces the runtime
   verification that is necessary for running Linux on safety critical
   systems.

   It allows for deterministic automata models to be inserted into the
   kernel that will attach to tracepoints, where the information on
   these tracepoints will move the model from state to state.

   If a state is encountered that does not belong to the model, it will
   then activate a given reactor, that could just inform the user or
   even panic the kernel (for which safety critical systems will detect
   and can recover from).

 - Two monitor models are also added: Wakeup In Preemptive (WIP - not to
   be confused with "work in progress"), and Wakeup While Not Running
   (WWNR).

 - Added __vstring() helper to the TRACE_EVENT() macro to replace
   several vsnprintf() usages that were all doing it wrong.

 - eprobes now can have their event autogenerated when the event name is
   left off.

 - The rest is various cleanups and fixes.

* tag 'trace-v6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (50 commits)
  rv: Unlock on error path in rv_unregister_reactor()
  tracing: Use alignof__(struct {type b;}) instead of offsetof()
  tracing/eprobe: Show syntax error logs in error_log file
  scripts/tracing: Fix typo 'the the' in comment
  tracepoints: It is CONFIG_TRACEPOINTS not CONFIG_TRACEPOINT
  tracing: Use free_trace_buffer() in allocate_trace_buffers()
  tracing: Use a struct alignof to determine trace event field alignment
  rv/reactor: Add the panic reactor
  rv/reactor: Add the printk reactor
  rv/monitor: Add the wwnr monitor
  rv/monitor: Add the wip monitor
  rv/monitor: Add the wip monitor skeleton created by dot2k
  Documentation/rv: Add deterministic automata instrumentation documentation
  Documentation/rv: Add deterministic automata monitor synthesis documentation
  tools/rv: Add dot2k
  Documentation/rv: Add deterministic automaton documentation
  tools/rv: Add dot2c
  Documentation/rv: Add a basic documentation
  rv/include: Add instrumentation helper functions
  rv/include: Add deterministic automata monitor definition via C macros
  ...

1  2 
arch/x86/kernel/ftrace.c
drivers/usb/mtu3/mtu3_trace.h
include/linux/sched.h
include/linux/tracepoint.h
kernel/fork.c
kernel/trace/trace.c
kernel/trace/trace_uprobe.c

diff --combined arch/x86/kernel/ftrace.c
@@@ -91,6 -91,7 +91,7 @@@ static int ftrace_verify_code(unsigned 
  
        /* Make sure it is what we expect it to be */
        if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) {
+               ftrace_expected = old_code;
                WARN_ON(1);
                return -EINVAL;
        }
@@@ -301,7 -302,7 +302,7 @@@ union ftrace_op_code_union 
        } __attribute__((packed));
  };
  
 -#define RET_SIZE              1 + IS_ENABLED(CONFIG_SLS)
 +#define RET_SIZE              (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
  
  static unsigned long
  create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
                goto fail;
  
        ip = trampoline + size;
 -      memcpy(ip, retq, RET_SIZE);
 +      if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
 +              __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE);
 +      else
 +              memcpy(ip, retq, sizeof(retq));
  
        /* No need to test direct calls on created trampolines */
        if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
  
  #include "mtu3.h"
  
- #define MTU3_MSG_MAX  256
  TRACE_EVENT(mtu3_log,
        TP_PROTO(struct device *dev, struct va_format *vaf),
        TP_ARGS(dev, vaf),
        TP_STRUCT__entry(
                __string(name, dev_name(dev))
-               __dynamic_array(char, msg, MTU3_MSG_MAX)
+               __vstring(msg, vaf->fmt, vaf->va)
        ),
        TP_fast_assign(
                __assign_str(name, dev_name(dev));
-               vsnprintf(__get_str(msg), MTU3_MSG_MAX, vaf->fmt, *vaf->va);
+               __assign_vstr(msg, vaf->fmt, vaf->va);
        ),
        TP_printk("%s: %s", __get_str(name), __get_str(msg))
  );
@@@ -238,8 -236,8 +236,8 @@@ DECLARE_EVENT_CLASS(mtu3_log_ep
                __entry->direction = mep->is_in;
                __entry->gpd_ring = &mep->gpd_ring;
        ),
 -      TP_printk("%s: type %d maxp %d slot %d mult %d burst %d ring %p/%pad flags %c:%c%c%c:%c",
 -              __get_str(name), __entry->type,
 +      TP_printk("%s: type %s maxp %d slot %d mult %d burst %d ring %p/%pad flags %c:%c%c%c:%c",
 +              __get_str(name), usb_ep_type_string(__entry->type),
                __entry->maxp, __entry->slot,
                __entry->mult, __entry->maxburst,
                __entry->gpd_ring, &__entry->gpd_ring->dma,
diff --combined include/linux/sched.h
@@@ -34,6 -34,7 +34,7 @@@
  #include <linux/rseq.h>
  #include <linux/seqlock.h>
  #include <linux/kcsan.h>
+ #include <linux/rv.h>
  #include <asm/kmap_size.h>
  
  /* task_struct member predeclarations (sorted alphabetically): */
@@@ -843,9 -844,8 +844,9 @@@ struct task_struct 
        int                             trc_reader_nesting;
        int                             trc_ipi_to_cpu;
        union rcu_special               trc_reader_special;
 -      bool                            trc_reader_checked;
        struct list_head                trc_holdout_list;
 +      struct list_head                trc_blkd_node;
 +      int                             trc_blkd_cpu;
  #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
  
        struct sched_info               sched_info;
        struct callback_head            l1d_flush_kill;
  #endif
  
+ #ifdef CONFIG_RV
+       /*
+        * Per-task RV monitor. Nowadays fixed in RV_PER_TASK_MONITORS.
+        * If we find justification for more monitors, we can think
+        * about adding more or developing a dynamic method. So far,
+        * none of these are justified.
+        */
+       union rv_task_monitor           rv[RV_PER_TASK_MONITORS];
+ #endif
        /*
         * New fields for task_struct should be added above here, so that
         * they are included in the randomized portion of task_struct.
@@@ -2224,7 -2234,6 +2235,7 @@@ static inline void set_task_cpu(struct 
  
  extern bool sched_task_on_rq(struct task_struct *p);
  extern unsigned long get_wchan(struct task_struct *p);
 +extern struct task_struct *cpu_curr_snapshot(int cpu);
  
  /*
   * In order to reduce various lock holder preemption latencies provide an
@@@ -2259,7 -2268,7 +2270,7 @@@ static inline bool owner_on_cpu(struct 
  }
  
  /* Returns effective CPU energy utilization, as seen by the scheduler */
 -unsigned long sched_cpu_util(int cpu, unsigned long max);
 +unsigned long sched_cpu_util(int cpu);
  #endif /* CONFIG_SMP */
  
  #ifdef CONFIG_RSEQ
@@@ -151,7 -151,7 +151,7 @@@ static inline struct tracepoint *tracep
  /*
   * Individual subsystem my have a separate configuration to
   * enable their tracepoints. By default, this file will create
-  * the tracepoints if CONFIG_TRACEPOINT is defined. If a subsystem
+  * the tracepoints if CONFIG_TRACEPOINTS is defined. If a subsystem
   * wants to be able to disable its tracepoints from being created
   * it can define NOTRACE before including the tracepoint headers.
   */
                 */                                                     \
                if (rcuidle) {                                          \
                        __idx = srcu_read_lock_notrace(&tracepoint_srcu);\
 -                      rcu_irq_enter_irqson();                         \
 +                      ct_irq_enter_irqson();                          \
                }                                                       \
                                                                        \
                __DO_TRACE_CALL(name, TP_ARGS(args));                   \
                                                                        \
                if (rcuidle) {                                          \
 -                      rcu_irq_exit_irqson();                          \
 +                      ct_irq_exit_irqson();                           \
                        srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
                }                                                       \
                                                                        \
diff --combined kernel/fork.c
@@@ -1814,7 -1814,6 +1814,7 @@@ static inline void rcu_copy_process(str
        p->trc_reader_nesting = 0;
        p->trc_reader_special.s = 0;
        INIT_LIST_HEAD(&p->trc_holdout_list);
 +      INIT_LIST_HEAD(&p->trc_blkd_node);
  #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
  }
  
@@@ -1965,6 -1964,18 +1965,18 @@@ static void copy_oom_score_adj(u64 clon
        mutex_unlock(&oom_adj_mutex);
  }
  
+ #ifdef CONFIG_RV
+ static void rv_task_fork(struct task_struct *p)
+ {
+       int i;
+       for (i = 0; i < RV_PER_TASK_MONITORS; i++)
+               p->rv[i].da_mon.monitoring = false;
+ }
+ #else
+ #define rv_task_fork(p) do {} while (0)
+ #endif
  /*
   * This creates a new process as a copy of the old one,
   * but does not actually start it yet.
@@@ -2034,11 -2045,8 +2046,11 @@@ static __latent_entropy struct task_str
        /*
         * If the new process will be in a different time namespace
         * do not allow it to share VM or a thread group with the forking task.
 +       *
 +       * On vfork, the child process enters the target time namespace only
 +       * after exec.
         */
 -      if (clone_flags & (CLONE_THREAD | CLONE_VM)) {
 +      if ((clone_flags & (CLONE_VM | CLONE_VFORK)) == CLONE_VM) {
                if (nsp->time_ns != nsp->time_ns_for_children)
                        return ERR_PTR(-EINVAL);
        }
         */
        copy_seccomp(p);
  
+       rv_task_fork(p);
        rseq_fork(p, clone_flags);
  
        /* Don't start children in a dying pid namespace */
diff --combined kernel/trace/trace.c
@@@ -3105,17 -3105,17 +3105,17 @@@ void __trace_stack(struct trace_array *
        }
  
        /*
 -       * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
 +       * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
         * but if the above rcu_is_watching() failed, then the NMI
 -       * triggered someplace critical, and rcu_irq_enter() should
 +       * triggered someplace critical, and ct_irq_enter() should
         * not be called from NMI.
         */
        if (unlikely(in_nmi()))
                return;
  
 -      rcu_irq_enter_irqson();
 +      ct_irq_enter_irqson();
        __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
 -      rcu_irq_exit_irqson();
 +      ct_irq_exit_irqson();
  }
  
  /**
@@@ -5569,13 -5569,13 +5569,13 @@@ static const char readme_msg[] 
  #endif
  #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
        "\t  accepts: event-definitions (one definition per line)\n"
-       "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
-       "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
+       "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
+       "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
  #ifdef CONFIG_HIST_TRIGGERS
        "\t           s:[synthetic/]<event> <field> [<field>]\n"
  #endif
-       "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
-       "\t           -:[<group>/]<event>\n"
+       "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
+       "\t           -:[<group>/][<event>]\n"
  #ifdef CONFIG_KPROBE_EVENTS
        "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
    "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
@@@ -9101,6 -9101,16 +9101,16 @@@ allocate_trace_buffer(struct trace_arra
        return 0;
  }
  
+ static void free_trace_buffer(struct array_buffer *buf)
+ {
+       if (buf->buffer) {
+               ring_buffer_free(buf->buffer);
+               buf->buffer = NULL;
+               free_percpu(buf->data);
+               buf->data = NULL;
+       }
+ }
  static int allocate_trace_buffers(struct trace_array *tr, int size)
  {
        int ret;
        ret = allocate_trace_buffer(tr, &tr->max_buffer,
                                    allocate_snapshot ? size : 1);
        if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
-               ring_buffer_free(tr->array_buffer.buffer);
-               tr->array_buffer.buffer = NULL;
-               free_percpu(tr->array_buffer.data);
-               tr->array_buffer.data = NULL;
+               free_trace_buffer(&tr->array_buffer);
                return -ENOMEM;
        }
        tr->allocated_snapshot = allocate_snapshot;
        return 0;
  }
  
- static void free_trace_buffer(struct array_buffer *buf)
- {
-       if (buf->buffer) {
-               ring_buffer_free(buf->buffer);
-               buf->buffer = NULL;
-               free_percpu(buf->data);
-               buf->data = NULL;
-       }
- }
  static void free_trace_buffers(struct trace_array *tr)
  {
        if (!tr)
@@@ -9772,6 -9769,8 +9769,8 @@@ static __init int tracer_init_tracefs(v
                tracer_init_tracefs_work_func(NULL);
        }
  
+       rv_init_interface();
        return 0;
  }
  
@@@ -16,7 -16,6 +16,7 @@@
  #include <linux/namei.h>
  #include <linux/string.h>
  #include <linux/rculist.h>
 +#include <linux/filter.h>
  
  #include "trace_dynevent.h"
  #include "trace_probe.h"
@@@ -313,7 -312,8 +313,8 @@@ static bool trace_uprobe_match(const ch
  {
        struct trace_uprobe *tu = to_trace_uprobe(ev);
  
-       return strcmp(trace_probe_name(&tu->tp), event) == 0 &&
+       return (event[0] == '\0' ||
+               strcmp(trace_probe_name(&tu->tp), event) == 0) &&
           (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0) &&
           trace_uprobe_match_command_head(tu, argc, argv);
  }
@@@ -533,7 -533,7 +534,7 @@@ end
  
  /*
   * Argument syntax:
-  *  - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET[%return][(REF)] [FETCHARGS]
+  *  - Add uprobe: p|r[:[GRP/][EVENT]] PATH:OFFSET[%return][(REF)] [FETCHARGS]
   */
  static int __trace_uprobe_create(int argc, const char **argv)
  {
        const char *event = NULL, *group = UPROBE_EVENT_SYSTEM;
        char *arg, *filename, *rctr, *rctr_end, *tmp;
        char buf[MAX_EVENT_NAME_LEN];
+       char gbuf[MAX_EVENT_NAME_LEN];
        enum probe_print_type ptype;
        struct path path;
        unsigned long offset, ref_ctr_offset;
        /* setup a probe */
        trace_probe_log_set_index(0);
        if (event) {
-               ret = traceprobe_parse_event_name(&event, &group, buf,
+               ret = traceprobe_parse_event_name(&event, &group, gbuf,
                                                  event - argv[0]);
                if (ret)
                        goto fail_address_parse;
-       } else {
+       }
+       if (!event) {
                char *tail;
                char *ptr;
  
@@@ -1343,15 -1346,15 +1347,15 @@@ static void __uprobe_perf_func(struct t
        int size, esize;
        int rctx;
  
 +#ifdef CONFIG_BPF_EVENTS
        if (bpf_prog_array_valid(call)) {
                u32 ret;
  
 -              preempt_disable();
 -              ret = trace_call_bpf(call, regs);
 -              preempt_enable();
 +              ret = bpf_prog_run_array_sleepable(call->prog_array, regs, bpf_prog_run);
                if (!ret)
                        return;
        }
 +#endif /* CONFIG_BPF_EVENTS */
  
        esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));