Merge tag 'trace-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 5 Sep 2021 18:50:41 +0000 (11:50 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 5 Sep 2021 18:50:41 +0000 (11:50 -0700)
Pull tracing updates from Steven Rostedt:

 - simplify the Kconfig use of FTRACE and TRACE_IRQFLAGS_SUPPORT

 - bootconfig can now start histograms

 - bootconfig supports group/all enabling

 - histograms now can put values in linear size buckets

 - execnames can be passed to synthetic events

 - introduce "event probes" that attach to other events and can retrieve
   data from pointers of fields, or record fields as different types (a
   pointer to a string as a string instead of just a hex number)

 - various fixes and clean ups

* tag 'trace-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (35 commits)
  tracing/doc: Fix table format in histogram code
  selftests/ftrace: Add selftest for testing duplicate eprobes and kprobes
  selftests/ftrace: Add selftest for testing eprobe events on synthetic events
  selftests/ftrace: Add test case to test adding and removing of event probe
  selftests/ftrace: Fix requirement check of README file
  selftests/ftrace: Add clear_dynamic_events() to test cases
  tracing: Add a probe that attaches to trace events
  tracing/probes: Reject events which have the same name of existing one
  tracing/probes: Have process_fetch_insn() take a void * instead of pt_regs
  tracing/probe: Change traceprobe_set_print_fmt() to take a type
  tracing/probes: Use struct_size() instead of defining custom macros
  tracing/probes: Allow for dot delimiter as well as slash for system names
  tracing/probe: Have traceprobe_parse_probe_arg() take a const arg
  tracing: Have dynamic events have a ref counter
  tracing: Add DYNAMIC flag for dynamic events
  tracing: Replace deprecated CPU-hotplug functions.
  MAINTAINERS: Add an entry for os noise/latency
  tracepoint: Fix kerneldoc comments
  bootconfig/tracing/ktest: Update ktest example for boot-time tracing
  tools/bootconfig: Use per-group/all enable option in ftrace2bconf script
  ...

63 files changed:
Documentation/trace/boottime-trace.rst
Documentation/trace/histogram.rst
MAINTAINERS
arch/Kconfig
arch/arc/Kconfig
arch/arm/Kconfig
arch/arm64/Kconfig
arch/csky/Kconfig
arch/hexagon/Kconfig
arch/microblaze/Kconfig
arch/microblaze/Kconfig.debug
arch/mips/Kconfig
arch/mips/Kconfig.debug
arch/nds32/Kconfig
arch/nios2/Kconfig
arch/openrisc/Kconfig
arch/parisc/Kconfig
arch/parisc/Kconfig.debug
arch/powerpc/Kconfig
arch/riscv/Kconfig
arch/s390/Kconfig
arch/s390/Kconfig.debug
arch/sh/Kconfig
arch/sh/Kconfig.debug
arch/sparc/Kconfig
arch/sparc/Kconfig.debug
arch/um/Kconfig
arch/x86/Kconfig
arch/x86/Kconfig.debug
arch/xtensa/Kconfig
include/linux/trace_events.h
include/linux/tracepoint.h
kernel/trace/Kconfig
kernel/trace/Makefile
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_boot.c
kernel/trace/trace_dynevent.c
kernel/trace/trace_dynevent.h
kernel/trace/trace_eprobe.c [new file with mode: 0644]
kernel/trace/trace_event_perf.c
kernel/trace/trace_events.c
kernel/trace/trace_events_hist.c
kernel/trace/trace_events_synth.c
kernel/trace/trace_events_trigger.c
kernel/trace/trace_hwlat.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_osnoise.c
kernel/trace/trace_probe.c
kernel/trace/trace_probe.h
kernel/trace/trace_probe_tmpl.h
kernel/trace/trace_uprobe.c
kernel/tracepoint.c
tools/bootconfig/scripts/bconf2ftrace.sh
tools/bootconfig/scripts/ftrace2bconf.sh
tools/bootconfig/scripts/xbc.sh
tools/testing/ktest/examples/bootconfigs/boottrace.bconf
tools/testing/ktest/examples/bootconfigs/verify-boottrace.sh
tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc [new file with mode: 0644]
tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc [new file with mode: 0644]
tools/testing/selftests/ftrace/test.d/functions
tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc [new file with mode: 0644]

index 8053898..6dcfbc6 100644 (file)
@@ -125,6 +125,71 @@ Note that kprobe and synthetic event definitions can be written under
 instance node, but those are also visible from other instances. So please
 take care for event name conflict.
 
+Ftrace Histogram Options
+------------------------
+
+Since it is too long to write a histogram action as a string for per-event
+action option, there are tree-style options under per-event 'hist' subkey
+for the histogram actions. For the detail of the each parameter,
+please read the event histogram document [3]_.
+
+.. [3] See :ref:`Documentation/trace/histogram.rst <histogram>`
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]keys = KEY1[, KEY2[...]]
+  Set histogram key parameters. (Mandatory)
+  The 'N' is a digit string for the multiple histogram. You can omit it
+  if there is one histogram on the event.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]values = VAL1[, VAL2[...]]
+  Set histogram value parameters.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]sort = SORT1[, SORT2[...]]
+  Set histogram sort parameter options.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]size = NR_ENTRIES
+  Set histogram size (number of entries).
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]name = NAME
+  Set histogram name.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]var.VARIABLE = EXPR
+  Define a new VARIABLE by EXPR expression.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]<pause|continue|clear>
+  Set histogram control parameter. You can set one of them.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onmatch.[M.]event = GROUP.EVENT
+  Set histogram 'onmatch' handler matching event parameter.
+  The 'M' is a digit string for the multiple 'onmatch' handler. You can omit it
+  if there is one 'onmatch' handler on this histogram.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onmatch.[M.]trace = EVENT[, ARG1[...]]
+  Set histogram 'trace' action for 'onmatch'.
+  EVENT must be a synthetic event name, and ARG1... are parameters
+  for that event. Mandatory if 'onmatch.event' option is set.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onmax.[M.]var = VAR
+  Set histogram 'onmax' handler variable parameter.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onchange.[M.]var = VAR
+  Set histogram 'onchange' handler variable parameter.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]<onmax|onchange>.[M.]save = ARG1[, ARG2[...]]
+  Set histogram 'save' action parameters for 'onmax' or 'onchange' handler.
+  This option or below 'snapshot' option is mandatory if 'onmax.var' or
+  'onchange.var' option is set.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]<onmax|onchange>.[M.]snapshot
+  Set histogram 'snapshot' action for 'onmax' or 'onchange' handler.
+  This option or above 'save' option is mandatory if 'onmax.var' or
+  'onchange.var' option is set.
+
+ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.filter = FILTER_EXPR
+  Set histogram filter expression. You don't need 'if' in the FILTER_EXPR.
+
+Note that this 'hist' option can conflict with the per-event 'actions'
+option if the 'actions' option has a histogram action.
+
 
 When to Start
 =============
@@ -159,13 +224,23 @@ below::
         }
         synthetic.initcall_latency {
                 fields = "unsigned long func", "u64 lat"
-                actions = "hist:keys=func.sym,lat:vals=lat:sort=lat"
+                hist {
+                        keys = func.sym, lat
+                        values = lat
+                        sort = lat
+                }
         }
-        initcall.initcall_start {
-                actions = "hist:keys=func:ts0=common_timestamp.usecs"
+        initcall.initcall_start.hist {
+                keys = func
+                var.ts0 = common_timestamp.usecs
         }
-        initcall.initcall_finish {
-                actions = "hist:keys=func:lat=common_timestamp.usecs-$ts0:onmatch(initcall.initcall_start).initcall_latency(func,$lat)"
+        initcall.initcall_finish.hist {
+                keys = func
+                var.lat = common_timestamp.usecs - $ts0
+                onmatch {
+                        event = initcall.initcall_start
+                        trace = initcall_latency, func, $lat
+                }
         }
   }
 
index f99be80..5334156 100644 (file)
@@ -70,15 +70,16 @@ Documentation written by Tom Zanussi
   modified by appending any of the following modifiers to the field
   name:
 
-       =========== ==========================================
-        .hex        display a number as a hex value
-       .sym        display an address as a symbol
-       .sym-offset display an address as a symbol and offset
-       .syscall    display a syscall id as a system call name
-       .execname   display a common_pid as a program name
-       .log2       display log2 value rather than raw number
-       .usecs      display a common_timestamp in microseconds
-       =========== ==========================================
+       =============  =================================================
+        .hex           display a number as a hex value
+       .sym           display an address as a symbol
+       .sym-offset    display an address as a symbol and offset
+       .syscall       display a syscall id as a system call name
+       .execname      display a common_pid as a program name
+       .log2          display log2 value rather than raw number
+       .buckets=size  display grouping of values rather than raw number
+       .usecs         display a common_timestamp in microseconds
+       =============  =================================================
 
   Note that in general the semantics of a given field aren't
   interpreted when applying a modifier to it, but there are some
@@ -228,7 +229,7 @@ Extended error information
   that lists the total number of bytes requested for each function in
   the kernel that made one or more calls to kmalloc::
 
-    # echo 'hist:key=call_site:val=bytes_req' > \
+    # echo 'hist:key=call_site:val=bytes_req.buckets=32' > \
             /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
 
   This tells the tracing system to create a 'hist' trigger using the
@@ -1823,20 +1824,99 @@ and variables defined on other events (see Section 2.2.3 below on
 how that is done using hist trigger 'onmatch' action). Once that is
 done, the 'wakeup_latency' synthetic event instance is created.
 
-A histogram can now be defined for the new synthetic event::
-
-  # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \
-        /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
-
 The new event is created under the tracing/events/synthetic/ directory
 and looks and behaves just like any other event::
 
   # ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency
         enable  filter  format  hist  id  trigger
 
+A histogram can now be defined for the new synthetic event::
+
+  # echo 'hist:keys=pid,prio,lat.log2:sort=lat' >> \
+        /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
+
+The above shows the latency "lat" in a power of 2 grouping.
+
 Like any other event, once a histogram is enabled for the event, the
 output can be displayed by reading the event's 'hist' file.
 
+  # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/hist
+
+  # event histogram
+  #
+  # trigger info: hist:keys=pid,prio,lat.log2:vals=hitcount:sort=lat.log2:size=2048 [active]
+  #
+
+  { pid:       2035, prio:          9, lat: ~ 2^2  } hitcount:         43
+  { pid:       2034, prio:          9, lat: ~ 2^2  } hitcount:         60
+  { pid:       2029, prio:          9, lat: ~ 2^2  } hitcount:        965
+  { pid:       2034, prio:        120, lat: ~ 2^2  } hitcount:          9
+  { pid:       2033, prio:        120, lat: ~ 2^2  } hitcount:          5
+  { pid:       2030, prio:          9, lat: ~ 2^2  } hitcount:        335
+  { pid:       2030, prio:        120, lat: ~ 2^2  } hitcount:         10
+  { pid:       2032, prio:        120, lat: ~ 2^2  } hitcount:          1
+  { pid:       2035, prio:        120, lat: ~ 2^2  } hitcount:          2
+  { pid:       2031, prio:          9, lat: ~ 2^2  } hitcount:        176
+  { pid:       2028, prio:        120, lat: ~ 2^2  } hitcount:         15
+  { pid:       2033, prio:          9, lat: ~ 2^2  } hitcount:         91
+  { pid:       2032, prio:          9, lat: ~ 2^2  } hitcount:        125
+  { pid:       2029, prio:        120, lat: ~ 2^2  } hitcount:          4
+  { pid:       2031, prio:        120, lat: ~ 2^2  } hitcount:          3
+  { pid:       2029, prio:        120, lat: ~ 2^3  } hitcount:          2
+  { pid:       2035, prio:          9, lat: ~ 2^3  } hitcount:         41
+  { pid:       2030, prio:        120, lat: ~ 2^3  } hitcount:          1
+  { pid:       2032, prio:          9, lat: ~ 2^3  } hitcount:         32
+  { pid:       2031, prio:          9, lat: ~ 2^3  } hitcount:         44
+  { pid:       2034, prio:          9, lat: ~ 2^3  } hitcount:         40
+  { pid:       2030, prio:          9, lat: ~ 2^3  } hitcount:         29
+  { pid:       2033, prio:          9, lat: ~ 2^3  } hitcount:         31
+  { pid:       2029, prio:          9, lat: ~ 2^3  } hitcount:         31
+  { pid:       2028, prio:        120, lat: ~ 2^3  } hitcount:         18
+  { pid:       2031, prio:        120, lat: ~ 2^3  } hitcount:          2
+  { pid:       2028, prio:        120, lat: ~ 2^4  } hitcount:          1
+  { pid:       2029, prio:          9, lat: ~ 2^4  } hitcount:          4
+  { pid:       2031, prio:        120, lat: ~ 2^7  } hitcount:          1
+  { pid:       2032, prio:        120, lat: ~ 2^7  } hitcount:          1
+
+  Totals:
+      Hits: 2122
+      Entries: 30
+      Dropped: 0
+
+
+The latency values can also be grouped linearly by a given size with
+the ".buckets" modifier and specify a size (in this case groups of 10).
+
+  # echo 'hist:keys=pid,prio,lat.buckets=10:sort=lat' >> \
+        /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
+
+  # event histogram
+  #
+  # trigger info: hist:keys=pid,prio,lat.buckets=10:vals=hitcount:sort=lat.buckets=10:size=2048 [active]
+  #
+
+  { pid:       2067, prio:          9, lat: ~ 0-9 } hitcount:        220
+  { pid:       2068, prio:          9, lat: ~ 0-9 } hitcount:        157
+  { pid:       2070, prio:          9, lat: ~ 0-9 } hitcount:        100
+  { pid:       2067, prio:        120, lat: ~ 0-9 } hitcount:          6
+  { pid:       2065, prio:        120, lat: ~ 0-9 } hitcount:          2
+  { pid:       2066, prio:        120, lat: ~ 0-9 } hitcount:          2
+  { pid:       2069, prio:          9, lat: ~ 0-9 } hitcount:        122
+  { pid:       2069, prio:        120, lat: ~ 0-9 } hitcount:          8
+  { pid:       2070, prio:        120, lat: ~ 0-9 } hitcount:          1
+  { pid:       2068, prio:        120, lat: ~ 0-9 } hitcount:          7
+  { pid:       2066, prio:          9, lat: ~ 0-9 } hitcount:        365
+  { pid:       2064, prio:        120, lat: ~ 0-9 } hitcount:         35
+  { pid:       2065, prio:          9, lat: ~ 0-9 } hitcount:        998
+  { pid:       2071, prio:          9, lat: ~ 0-9 } hitcount:         85
+  { pid:       2065, prio:          9, lat: ~ 10-19 } hitcount:          2
+  { pid:       2064, prio:        120, lat: ~ 10-19 } hitcount:          2
+
+  Totals:
+      Hits: 2112
+      Entries: 16
+      Dropped: 0
+
 2.2.3 Hist trigger 'handlers' and 'actions'
 -------------------------------------------
 
index cfec5dc..4278b38 100644 (file)
@@ -18973,6 +18973,20 @@ F:     arch/x86/mm/testmmiotrace.c
 F:     include/linux/mmiotrace.h
 F:     kernel/trace/trace_mmiotrace.c
 
+TRACING OS NOISE / LATENCY TRACERS
+M:     Steven Rostedt <rostedt@goodmis.org>
+M:     Daniel Bristot de Oliveira <bristot@kernel.org>
+S:     Maintained
+F:     kernel/trace/trace_osnoise.c
+F:     include/trace/events/osnoise.h
+F:     kernel/trace/trace_hwlat.c
+F:     kernel/trace/trace_irqsoff.c
+F:     kernel/trace/trace_sched_wakeup.c
+F:     Documentation/trace/osnoise-tracer.rst
+F:     Documentation/trace/timerlat-tracer.rst
+F:     Documentation/trace/hwlat_detector.rst
+F:     arch/*/kernel/trace.c
+
 TRADITIONAL CHINESE DOCUMENTATION
 M:     Hu Haowen <src.res@email.cn>
 L:     linux-doc-tw-discuss@lists.sourceforge.net
index 98db634..3743174 100644 (file)
@@ -197,6 +197,9 @@ config HAVE_FUNCTION_ERROR_INJECTION
 config HAVE_NMI
        bool
 
+config TRACE_IRQFLAGS_SUPPORT
+       bool
+
 #
 # An arch should select this if it provides all these things:
 #
index f631a7a..3a5a80f 100644 (file)
@@ -49,9 +49,7 @@ config ARC
        select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
        select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
        select SET_FS
-
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool y
+       select TRACE_IRQFLAGS_SUPPORT
 
 config LOCKDEP_SUPPORT
        def_bool y
index f1d6531..9986302 100644 (file)
@@ -126,6 +126,7 @@ config ARM
        select RTC_LIB
        select SET_FS
        select SYS_SUPPORTS_APM_EMULATION
+       select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
        # Above selects are sorted alphabetically; please add new ones
        # according to that.  Thanks.
        help
@@ -189,10 +190,6 @@ config LOCKDEP_SUPPORT
        bool
        default y
 
-config TRACE_IRQFLAGS_SUPPORT
-       bool
-       default !CPU_V7M
-
 config ARCH_HAS_ILOG2_U32
        bool
 
index 9dc1720..077f2ec 100644 (file)
@@ -220,6 +220,7 @@ config ARM64
        select SYSCTL_EXCEPTION_TRACE
        select THREAD_INFO_IN_TASK
        select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
+       select TRACE_IRQFLAGS_SUPPORT
        help
          ARM 64-bit (AArch64) Linux support.
 
@@ -287,9 +288,6 @@ config ILLEGAL_POINTER_VALUE
 config LOCKDEP_SUPPORT
        def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool y
-
 config GENERIC_BUG
        def_bool y
        depends on BUG
index 2716f63..9d4d898 100644 (file)
@@ -82,6 +82,7 @@ config CSKY
        select PCI_SYSCALL if PCI
        select PCI_MSI if PCI
        select SET_FS
+       select TRACE_IRQFLAGS_SUPPORT
 
 config LOCKDEP_SUPPORT
        def_bool y
@@ -139,9 +140,6 @@ config STACKTRACE_SUPPORT
 config TIME_LOW_RES
        def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool y
-
 config CPU_TLB_SIZE
        int
        default "128"   if (CPU_CK610 || CPU_CK807 || CPU_CK810)
index aab1a40..15dd8f3 100644 (file)
@@ -32,6 +32,7 @@ config HEXAGON
        select GENERIC_CPU_DEVICES
        select SET_FS
        select ARCH_WANT_LD_ORPHAN_WARN
+       select TRACE_IRQFLAGS_SUPPORT
        help
          Qualcomm Hexagon is a processor architecture designed for high
          performance and low power across a wide variety of applications.
@@ -53,9 +54,6 @@ config EARLY_PRINTK
 config MMU
        def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool y
-
 config GENERIC_CSUM
        def_bool y
 
index 14a67a4..59798e4 100644 (file)
@@ -44,6 +44,7 @@ config MICROBLAZE
        select SPARSE_IRQ
        select SET_FS
        select ZONE_DMA
+       select TRACE_IRQFLAGS_SUPPORT
 
 # Endianness selection
 choice
index 865527a..a4e40e5 100644 (file)
@@ -1,6 +1 @@
 # SPDX-License-Identifier: GPL-2.0-only
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.rst.
-
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool y
index 9b8ff6c..771ca53 100644 (file)
@@ -98,6 +98,7 @@ config MIPS
        select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
        select RTC_LIB
        select SYSCTL_EXCEPTION_TRACE
+       select TRACE_IRQFLAGS_SUPPORT
        select VIRT_TO_BUS
        select ARCH_HAS_ELFCORE_COMPAT
 
index 43dbf59..f4ae790 100644 (file)
@@ -1,9 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-       bool
-       default y
-
 config EARLY_PRINTK
        bool "Early printk" if EXPERT
        depends on SYS_HAS_EARLY_PRINTK
index 9c9f387..aea26e7 100644 (file)
@@ -46,6 +46,7 @@ config NDS32
        select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_DYNAMIC_FTRACE
        select SET_FS
+       select TRACE_IRQFLAGS_SUPPORT
        help
          Andes(nds32) Linux support.
 
@@ -62,9 +63,6 @@ config GENERIC_LOCKBREAK
        def_bool y
        depends on PREEMPTION
 
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool y
-
 config STACKTRACE_SUPPORT
        def_bool y
 
index 3efe553..33fd06f 100644 (file)
@@ -41,9 +41,6 @@ config NO_IOPORT_MAP
 config FPU
        def_bool n
 
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool n
-
 menu "Kernel features"
 
 source "kernel/Kconfig.hz"
index 50035a9..e804026 100644 (file)
@@ -37,6 +37,7 @@ config OPENRISC
        select GENERIC_IRQ_MULTI_HANDLER
        select MMU_GATHER_NO_RANGE if MMU
        select SET_FS
+       select TRACE_IRQFLAGS_SUPPORT
 
 config CPU_BIG_ENDIAN
        def_bool y
@@ -50,9 +51,6 @@ config GENERIC_HWEIGHT
 config NO_IOPORT_MAP
        def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool y
-
 # For now, use generic checksum functions
 #These can be reimplemented in assembly later if so inclined
 config GENERIC_CSUM
index 95d4bbf..3001a7d 100644 (file)
@@ -66,6 +66,7 @@ config PARISC
        select HAVE_DYNAMIC_FTRACE_WITH_REGS
        select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS
        select SET_FS
+       select TRACE_IRQFLAGS_SUPPORT
 
        help
          The PA-RISC microprocessor is designed by Hewlett-Packard and used
index 1478ded..f66554c 100644 (file)
@@ -1,4 +1 @@
 # SPDX-License-Identifier: GPL-2.0
-
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool y
index 8a93b50..ba5b661 100644 (file)
@@ -94,10 +94,6 @@ config STACKTRACE_SUPPORT
        bool
        default y
 
-config TRACE_IRQFLAGS_SUPPORT
-       bool
-       default y
-
 config LOCKDEP_SUPPORT
        bool
        default y
@@ -270,6 +266,7 @@ config PPC
        select STRICT_KERNEL_RWX if STRICT_MODULE_RWX
        select SYSCTL_EXCEPTION_TRACE
        select THREAD_INFO_IN_TASK
+       select TRACE_IRQFLAGS_SUPPORT
        select VIRT_TO_BUS                      if !PPC64
        #
        # Please keep this list sorted alphabetically.
index baf60fc..aac669a 100644 (file)
@@ -113,6 +113,7 @@ config RISCV
        select SPARSE_IRQ
        select SYSCTL_EXCEPTION_TRACE
        select THREAD_INFO_IN_TASK
+       select TRACE_IRQFLAGS_SUPPORT
        select UACCESS_MEMCPY if !MMU
        select ZONE_DMA32 if 64BIT
 
@@ -182,9 +183,6 @@ config ARCH_SUPPORTS_UPROBES
 config STACKTRACE_SUPPORT
        def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool y
-
 config GENERIC_BUG
        def_bool y
        depends on BUG
index 92c0a1b..5af51ac 100644 (file)
@@ -209,6 +209,7 @@ config S390
        select SWIOTLB
        select SYSCTL_EXCEPTION_TRACE
        select THREAD_INFO_IN_TASK
+       select TRACE_IRQFLAGS_SUPPORT
        select TTY
        select VIRT_CPU_ACCOUNTING
        select ZONE_DMA
index 9ea6e61..e94a2a7 100644 (file)
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool y
-
 config EARLY_PRINTK
        def_bool y
 
index f37280e..6904f4b 100644 (file)
@@ -69,6 +69,7 @@ config SUPERH
        select RTC_LIB
        select SET_FS
        select SPARSE_IRQ
+       select TRACE_IRQFLAGS_SUPPORT
        help
          The SuperH is a RISC processor targeted for use in embedded systems
          and consumer electronics; it was also used in the Sega Dreamcast
index 28a43d6..958f790 100644 (file)
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool y
-
 config SH_STANDARD_BIOS
        bool "Use LinuxSH standard BIOS"
        help
index fa650e4..b120ed9 100644 (file)
@@ -47,6 +47,7 @@ config SPARC
        select NEED_DMA_MAP_STATE
        select NEED_SG_DMA_LENGTH
        select SET_FS
+       select TRACE_IRQFLAGS_SUPPORT
 
 config SPARC32
        def_bool !64BIT
index 50a918d..6b2bec1 100644 (file)
@@ -1,9 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-       bool
-       default y
-
 config DEBUG_DCFLUSH
        bool "D-cache flush debugging"
        depends on SPARC64 && DEBUG_KERNEL
index 77e66d3..8de2646 100644 (file)
@@ -22,6 +22,7 @@ config UML
        select GENERIC_CPU_DEVICES
        select HAVE_GCC_PLUGINS
        select SET_FS
+       select TRACE_IRQFLAGS_SUPPORT
        select TTY # Needed for line.c
 
 config MMU
@@ -52,10 +53,6 @@ config ISA
 config SBUS
        bool
 
-config TRACE_IRQFLAGS_SUPPORT
-       bool
-       default y
-
 config LOCKDEP_SUPPORT
        bool
        default y
index 1146b85..4e001bb 100644 (file)
@@ -259,6 +259,7 @@ config X86
        select STACK_VALIDATION                 if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE)
        select SYSCTL_EXCEPTION_TRACE
        select THREAD_INFO_IN_TASK
+       select TRACE_IRQFLAGS_SUPPORT
        select USER_STACKTRACE_SUPPORT
        select VIRT_TO_BUS
        select HAVE_ARCH_KCSAN                  if X86_64
index 80b57e7..d3a6f74 100644 (file)
@@ -1,8 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool y
-
 config TRACE_IRQFLAGS_NMI_SUPPORT
        def_bool y
 
index 9bf2a9b..0e56bad 100644 (file)
@@ -42,6 +42,7 @@ config XTENSA
        select MODULES_USE_ELF_RELA
        select PERF_USE_VMALLOC
        select SET_FS
+       select TRACE_IRQFLAGS_SUPPORT
        select VIRT_TO_BUS
        help
          Xtensa processors are 32-bit RISC machines designed by Tensilica
@@ -73,9 +74,6 @@ config LOCKDEP_SUPPORT
 config STACKTRACE_SUPPORT
        def_bool y
 
-config TRACE_IRQFLAGS_SUPPORT
-       def_bool y
-
 config MMU
        def_bool n
 
index 8e0631a..3e475ee 100644 (file)
@@ -310,8 +310,10 @@ enum {
        TRACE_EVENT_FL_NO_SET_FILTER_BIT,
        TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
        TRACE_EVENT_FL_TRACEPOINT_BIT,
+       TRACE_EVENT_FL_DYNAMIC_BIT,
        TRACE_EVENT_FL_KPROBE_BIT,
        TRACE_EVENT_FL_UPROBE_BIT,
+       TRACE_EVENT_FL_EPROBE_BIT,
 };
 
 /*
@@ -321,8 +323,10 @@ enum {
  *  NO_SET_FILTER - Set when filter has error and is to be ignored
  *  IGNORE_ENABLE - For trace internal events, do not enable with debugfs file
  *  TRACEPOINT    - Event is a tracepoint
+ *  DYNAMIC       - Event is a dynamic event (created at run time)
  *  KPROBE        - Event is a kprobe
  *  UPROBE        - Event is a uprobe
+ *  EPROBE        - Event is an event probe
  */
 enum {
        TRACE_EVENT_FL_FILTERED         = (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -330,8 +334,10 @@ enum {
        TRACE_EVENT_FL_NO_SET_FILTER    = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
        TRACE_EVENT_FL_IGNORE_ENABLE    = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
        TRACE_EVENT_FL_TRACEPOINT       = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
+       TRACE_EVENT_FL_DYNAMIC          = (1 << TRACE_EVENT_FL_DYNAMIC_BIT),
        TRACE_EVENT_FL_KPROBE           = (1 << TRACE_EVENT_FL_KPROBE_BIT),
        TRACE_EVENT_FL_UPROBE           = (1 << TRACE_EVENT_FL_UPROBE_BIT),
+       TRACE_EVENT_FL_EPROBE           = (1 << TRACE_EVENT_FL_EPROBE_BIT),
 };
 
 #define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
@@ -347,7 +353,14 @@ struct trace_event_call {
        struct trace_event      event;
        char                    *print_fmt;
        struct event_filter     *filter;
-       void                    *mod;
+       /*
+        * Static events can disappear with modules,
+        * where as dynamic ones need their own ref count.
+        */
+       union {
+               void                            *module;
+               atomic_t                        refcnt;
+       };
        void                    *data;
 
        /* See the TRACE_EVENT_FL_* flags above */
@@ -363,6 +376,42 @@ struct trace_event_call {
 #endif
 };
 
+#ifdef CONFIG_DYNAMIC_EVENTS
+bool trace_event_dyn_try_get_ref(struct trace_event_call *call);
+void trace_event_dyn_put_ref(struct trace_event_call *call);
+bool trace_event_dyn_busy(struct trace_event_call *call);
+#else
+static inline bool trace_event_dyn_try_get_ref(struct trace_event_call *call)
+{
+       /* Without DYNAMIC_EVENTS configured, nothing should be calling this */
+       return false;
+}
+static inline void trace_event_dyn_put_ref(struct trace_event_call *call)
+{
+}
+static inline bool trace_event_dyn_busy(struct trace_event_call *call)
+{
+       /* Nothing should call this without DYNAIMIC_EVENTS configured. */
+       return true;
+}
+#endif
+
+static inline bool trace_event_try_get_ref(struct trace_event_call *call)
+{
+       if (call->flags & TRACE_EVENT_FL_DYNAMIC)
+               return trace_event_dyn_try_get_ref(call);
+       else
+               return try_module_get(call->module);
+}
+
+static inline void trace_event_put_ref(struct trace_event_call *call)
+{
+       if (call->flags & TRACE_EVENT_FL_DYNAMIC)
+               trace_event_dyn_put_ref(call);
+       else
+               module_put(call->module);
+}
+
 #ifdef CONFIG_PERF_EVENTS
 static inline bool bpf_prog_array_valid(struct trace_event_call *call)
 {
@@ -634,6 +683,7 @@ enum event_trigger_type {
        ETT_EVENT_ENABLE        = (1 << 3),
        ETT_EVENT_HIST          = (1 << 4),
        ETT_HIST_ENABLE         = (1 << 5),
+       ETT_EVENT_EPROBE        = (1 << 6),
 };
 
 extern int filter_match_preds(struct event_filter *filter, void *rec);
index ab58696..28031b1 100644 (file)
@@ -475,7 +475,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
  *     *
  *     * The declared 'local variable' is called '__entry'
  *     *
- *     * __field(pid_t, prev_prid) is equivalent to a standard declaration:
+ *     * __field(pid_t, prev_pid) is equivalent to a standard declaration:
  *     *
  *     *       pid_t   prev_pid;
  *     *
index 3ee23f4..420ff4b 100644 (file)
@@ -135,10 +135,9 @@ config TRACING_SUPPORT
        depends on STACKTRACE_SUPPORT
        default y
 
-if TRACING_SUPPORT
-
 menuconfig FTRACE
        bool "Tracers"
+       depends on TRACING_SUPPORT
        default y if DEBUG_KERNEL
        help
          Enable the kernel tracing infrastructure.
@@ -1037,6 +1036,3 @@ config HIST_TRIGGERS_DEBUG
           If unsure, say N.
 
 endif # FTRACE
-
-endif # TRACING_SUPPORT
-
index b1c47cc..6de5d4d 100644 (file)
@@ -77,6 +77,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
 endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
+obj-$(CONFIG_PROBE_EVENTS) += trace_eprobe.o
 obj-$(CONFIG_TRACE_EVENT_INJECT) += trace_events_inject.o
 obj-$(CONFIG_SYNTH_EVENTS) += trace_events_synth.o
 obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
index e592d1d..c5a3fbf 100644 (file)
@@ -2111,7 +2111,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
                        }
                }
 
-               get_online_cpus();
+               cpus_read_lock();
                /*
                 * Fire off all the required work handlers
                 * We can't schedule on offline CPUs, but it's not necessary
@@ -2143,7 +2143,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
                        cpu_buffer->nr_pages_to_update = 0;
                }
 
-               put_online_cpus();
+               cpus_read_unlock();
        } else {
                cpu_buffer = buffer->buffers[cpu_id];
 
@@ -2171,7 +2171,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
                        goto out_err;
                }
 
-               get_online_cpus();
+               cpus_read_lock();
 
                /* Can't run something on an offline CPU. */
                if (!cpu_online(cpu_id))
@@ -2183,7 +2183,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
                }
 
                cpu_buffer->nr_pages_to_update = 0;
-               put_online_cpus();
+               cpus_read_unlock();
        }
 
  out:
index 2755534..2dbf797 100644 (file)
@@ -3697,11 +3697,11 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str)
                return false;
 
        event = container_of(trace_event, struct trace_event_call, event);
-       if (!event->mod)
+       if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
                return false;
 
        /* Would rather have rodata, but this will suffice */
-       if (within_module_core(addr, event->mod))
+       if (within_module_core(addr, event->module))
                return true;
 
        return false;
@@ -5543,6 +5543,7 @@ static const char readme_msg[] =
 #ifdef CONFIG_HIST_TRIGGERS
        "\t           s:[synthetic/]<event> <field> [<field>]\n"
 #endif
+       "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
        "\t           -:[<group>/]<event>\n"
 #ifdef CONFIG_KPROBE_EVENTS
        "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
@@ -5552,7 +5553,7 @@ static const char readme_msg[] =
   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
 #endif
        "\t     args: <name>=fetcharg[:type]\n"
-       "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
+       "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
        "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
 #else
@@ -5567,6 +5568,8 @@ static const char readme_msg[] =
        "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
        "\t           [unsigned] char/int/long\n"
 #endif
+       "\t    efield: For event probes ('e' types), the field is on of the fields\n"
+       "\t            of the <attached-group>/<attached-event>.\n"
 #endif
        "  events/\t\t- Directory containing all trace event subsystems:\n"
        "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
@@ -5654,6 +5657,7 @@ static const char readme_msg[] =
        "\t            .execname   display a common_pid as a program name\n"
        "\t            .syscall    display a syscall id as a syscall name\n"
        "\t            .log2       display log2 value rather than raw number\n"
+       "\t            .buckets=size  display values in groups of size rather than raw number\n"
        "\t            .usecs      display a common_timestamp in microseconds\n\n"
        "\t    The 'pause' parameter can be used to pause an existing hist\n"
        "\t    trigger or to start a hist trigger but not log any events\n"
index 4a0e693..b7c0f8e 100644 (file)
@@ -126,6 +126,11 @@ struct kprobe_trace_entry_head {
        unsigned long           ip;
 };
 
+struct eprobe_trace_entry_head {
+       struct trace_entry      ent;
+       unsigned int            type;
+};
+
 struct kretprobe_trace_entry_head {
        struct trace_entry      ent;
        unsigned long           func;
@@ -1508,9 +1513,14 @@ static inline int register_trigger_hist_enable_disable_cmds(void) { return 0; }
 extern int register_trigger_cmds(void);
 extern void clear_event_triggers(struct trace_array *tr);
 
+enum {
+       EVENT_TRIGGER_FL_PROBE          = BIT(0),
+};
+
 struct event_trigger_data {
        unsigned long                   count;
        int                             ref;
+       int                             flags;
        struct event_trigger_ops        *ops;
        struct event_command            *cmd_ops;
        struct event_filter __rcu       *filter;
@@ -1918,6 +1928,14 @@ static inline bool is_good_name(const char *name)
        return true;
 }
 
+/* Convert certain expected symbols into '_' when generating event names */
+static inline void sanitize_event_name(char *name)
+{
+       while (*name++ != '\0')
+               if (*name == ':' || *name == '.')
+                       *name = '_';
+}
+
 /*
  * This is a generic way to read and write a u64 value from a file in tracefs.
  *
index 94ef2d0..1060b04 100644 (file)
@@ -171,6 +171,290 @@ trace_boot_add_synth_event(struct xbc_node *node, const char *event)
 }
 #endif
 
+#ifdef CONFIG_HIST_TRIGGERS
+static int __init __printf(3, 4)
+append_printf(char **bufp, char *end, const char *fmt, ...)
+{
+       va_list args;
+       int ret;
+
+       if (*bufp == end)
+               return -ENOSPC;
+
+       va_start(args, fmt);
+       ret = vsnprintf(*bufp, end - *bufp, fmt, args);
+       if (ret < end - *bufp) {
+               *bufp += ret;
+       } else {
+               *bufp = end;
+               ret = -ERANGE;
+       }
+       va_end(args);
+
+       return ret;
+}
+
+static int __init
+append_str_nospace(char **bufp, char *end, const char *str)
+{
+       char *p = *bufp;
+       int len;
+
+       while (p < end - 1 && *str != '\0') {
+               if (!isspace(*str))
+                       *(p++) = *str;
+               str++;
+       }
+       *p = '\0';
+       if (p == end - 1) {
+               *bufp = end;
+               return -ENOSPC;
+       }
+       len = p - *bufp;
+       *bufp = p;
+       return (int)len;
+}
+
+static int __init
+trace_boot_hist_add_array(struct xbc_node *hnode, char **bufp,
+                         char *end, const char *key)
+{
+       struct xbc_node *knode, *anode;
+       const char *p;
+       char sep;
+
+       knode = xbc_node_find_child(hnode, key);
+       if (knode) {
+               anode = xbc_node_get_child(knode);
+               if (!anode) {
+                       pr_err("hist.%s requires value(s).\n", key);
+                       return -EINVAL;
+               }
+
+               append_printf(bufp, end, ":%s", key);
+               sep = '=';
+               xbc_array_for_each_value(anode, p) {
+                       append_printf(bufp, end, "%c%s", sep, p);
+                       if (sep == '=')
+                               sep = ',';
+               }
+       } else
+               return -ENOENT;
+
+       return 0;
+}
+
+static int __init
+trace_boot_hist_add_one_handler(struct xbc_node *hnode, char **bufp,
+                               char *end, const char *handler,
+                               const char *param)
+{
+       struct xbc_node *knode, *anode;
+       const char *p;
+       char sep;
+
+       /* Compose 'handler' parameter */
+       p = xbc_node_find_value(hnode, param, NULL);
+       if (!p) {
+               pr_err("hist.%s requires '%s' option.\n",
+                      xbc_node_get_data(hnode), param);
+               return -EINVAL;
+       }
+       append_printf(bufp, end, ":%s(%s)", handler, p);
+
+       /* Compose 'action' parameter */
+       knode = xbc_node_find_child(hnode, "trace");
+       if (!knode)
+               knode = xbc_node_find_child(hnode, "save");
+
+       if (knode) {
+               anode = xbc_node_get_child(knode);
+               if (!anode || !xbc_node_is_value(anode)) {
+                       pr_err("hist.%s.%s requires value(s).\n",
+                              xbc_node_get_data(hnode),
+                              xbc_node_get_data(knode));
+                       return -EINVAL;
+               }
+
+               append_printf(bufp, end, ".%s", xbc_node_get_data(knode));
+               sep = '(';
+               xbc_array_for_each_value(anode, p) {
+                       append_printf(bufp, end, "%c%s", sep, p);
+                       if (sep == '(')
+                               sep = ',';
+               }
+               append_printf(bufp, end, ")");
+       } else if (xbc_node_find_child(hnode, "snapshot")) {
+               append_printf(bufp, end, ".snapshot()");
+       } else {
+               pr_err("hist.%s requires an action.\n",
+                      xbc_node_get_data(hnode));
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int __init
+trace_boot_hist_add_handlers(struct xbc_node *hnode, char **bufp,
+                            char *end, const char *param)
+{
+       struct xbc_node *node;
+       const char *p, *handler;
+       int ret;
+
+       handler = xbc_node_get_data(hnode);
+
+       xbc_node_for_each_subkey(hnode, node) {
+               p = xbc_node_get_data(node);
+               if (!isdigit(p[0]))
+                       continue;
+               /* All digit started node should be instances. */
+               ret = trace_boot_hist_add_one_handler(node, bufp, end, handler, param);
+               if (ret < 0)
+                       break;
+       }
+
+       if (xbc_node_find_child(hnode, param))
+               ret = trace_boot_hist_add_one_handler(hnode, bufp, end, handler, param);
+
+       return ret;
+}
+
+/*
+ * Histogram boottime tracing syntax.
+ *
+ * ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist[.N] {
+ *     keys = <KEY>[,...]
+ *     values = <VAL>[,...]
+ *     sort = <SORT-KEY>[,...]
+ *     size = <ENTRIES>
+ *     name = <HISTNAME>
+ *     var { <VAR> = <EXPR> ... }
+ *     pause|continue|clear
+ *     onmax|onchange[.N] { var = <VAR>; <ACTION> [= <PARAM>] }
+ *     onmatch[.N] { event = <EVENT>; <ACTION> [= <PARAM>] }
+ *     filter = <FILTER>
+ * }
+ *
+ * Where <ACTION> are;
+ *
+ *     trace = <EVENT>, <ARG1>[, ...]
+ *     save = <ARG1>[, ...]
+ *     snapshot
+ */
+static int __init
+trace_boot_compose_hist_cmd(struct xbc_node *hnode, char *buf, size_t size)
+{
+       struct xbc_node *node, *knode;
+       char *end = buf + size;
+       const char *p;
+       int ret = 0;
+
+       append_printf(&buf, end, "hist");
+
+       ret = trace_boot_hist_add_array(hnode, &buf, end, "keys");
+       if (ret < 0) {
+               if (ret == -ENOENT)
+                       pr_err("hist requires keys.\n");
+               return -EINVAL;
+       }
+
+       ret = trace_boot_hist_add_array(hnode, &buf, end, "values");
+       if (ret == -EINVAL)
+               return ret;
+       ret = trace_boot_hist_add_array(hnode, &buf, end, "sort");
+       if (ret == -EINVAL)
+               return ret;
+
+       p = xbc_node_find_value(hnode, "size", NULL);
+       if (p)
+               append_printf(&buf, end, ":size=%s", p);
+
+       p = xbc_node_find_value(hnode, "name", NULL);
+       if (p)
+               append_printf(&buf, end, ":name=%s", p);
+
+       node = xbc_node_find_child(hnode, "var");
+       if (node) {
+               xbc_node_for_each_key_value(node, knode, p) {
+                       /* Expression must not include spaces. */
+                       append_printf(&buf, end, ":%s=",
+                                     xbc_node_get_data(knode));
+                       append_str_nospace(&buf, end, p);
+               }
+       }
+
+       /* Histogram control attributes (mutual exclusive) */
+       if (xbc_node_find_child(hnode, "pause"))
+               append_printf(&buf, end, ":pause");
+       else if (xbc_node_find_child(hnode, "continue"))
+               append_printf(&buf, end, ":continue");
+       else if (xbc_node_find_child(hnode, "clear"))
+               append_printf(&buf, end, ":clear");
+
+       /* Histogram handler and actions */
+       node = xbc_node_find_child(hnode, "onmax");
+       if (node && trace_boot_hist_add_handlers(node, &buf, end, "var") < 0)
+               return -EINVAL;
+       node = xbc_node_find_child(hnode, "onchange");
+       if (node && trace_boot_hist_add_handlers(node, &buf, end, "var") < 0)
+               return -EINVAL;
+       node = xbc_node_find_child(hnode, "onmatch");
+       if (node && trace_boot_hist_add_handlers(node, &buf, end, "event") < 0)
+               return -EINVAL;
+
+       p = xbc_node_find_value(hnode, "filter", NULL);
+       if (p)
+               append_printf(&buf, end, " if %s", p);
+
+       if (buf == end) {
+               pr_err("hist exceeds the max command length.\n");
+               return -E2BIG;
+       }
+
+       return 0;
+}
+
+static void __init
+trace_boot_init_histograms(struct trace_event_file *file,
+                          struct xbc_node *hnode, char *buf, size_t size)
+{
+       struct xbc_node *node;
+       const char *p;
+       char *tmp;
+
+       xbc_node_for_each_subkey(hnode, node) {
+               p = xbc_node_get_data(node);
+               if (!isdigit(p[0]))
+                       continue;
+               /* All digit started node should be instances. */
+               if (trace_boot_compose_hist_cmd(node, buf, size) == 0) {
+                       tmp = kstrdup(buf, GFP_KERNEL);
+                       if (trigger_process_regex(file, buf) < 0)
+                               pr_err("Failed to apply hist trigger: %s\n", tmp);
+                       kfree(tmp);
+               }
+       }
+
+       if (xbc_node_find_child(hnode, "keys")) {
+               if (trace_boot_compose_hist_cmd(hnode, buf, size) == 0) {
+                       tmp = kstrdup(buf, GFP_KERNEL);
+                       if (trigger_process_regex(file, buf) < 0)
+                               pr_err("Failed to apply hist trigger: %s\n", tmp);
+                       kfree(tmp);
+               }
+       }
+}
+#else
+static void __init
+trace_boot_init_histograms(struct trace_event_file *file,
+                          struct xbc_node *hnode, char *buf, size_t size)
+{
+       /* do nothing */
+}
+#endif
+
 static void __init
 trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode,
                          struct xbc_node *enode)
@@ -205,12 +489,18 @@ trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode,
                        pr_err("Failed to apply filter: %s\n", buf);
        }
 
-       xbc_node_for_each_array_value(enode, "actions", anode, p) {
-               if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf))
-                       pr_err("action string is too long: %s\n", p);
-               else if (trigger_process_regex(file, buf) < 0)
-                       pr_err("Failed to apply an action: %s\n", buf);
-       }
+       if (IS_ENABLED(CONFIG_HIST_TRIGGERS)) {
+               xbc_node_for_each_array_value(enode, "actions", anode, p) {
+                       if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf))
+                               pr_err("action string is too long: %s\n", p);
+                       else if (trigger_process_regex(file, buf) < 0)
+                               pr_err("Failed to apply an action: %s\n", p);
+               }
+               anode = xbc_node_find_child(enode, "hist");
+               if (anode)
+                       trace_boot_init_histograms(file, anode, buf, ARRAY_SIZE(buf));
+       } else if (xbc_node_find_value(enode, "actions", NULL))
+               pr_err("Failed to apply event actions because CONFIG_HIST_TRIGGERS is not set.\n");
 
        if (xbc_node_find_value(enode, "enable", NULL)) {
                if (trace_event_enable_disable(file, 1, 0) < 0)
index e57cc08..1110112 100644 (file)
 #include <linux/tracefs.h>
 
 #include "trace.h"
+#include "trace_output.h"      /* for trace_event_sem */
 #include "trace_dynevent.h"
 
 static DEFINE_MUTEX(dyn_event_ops_mutex);
 static LIST_HEAD(dyn_event_ops_list);
 
+bool trace_event_dyn_try_get_ref(struct trace_event_call *dyn_call)
+{
+       struct trace_event_call *call;
+       bool ret = false;
+
+       if (WARN_ON_ONCE(!(dyn_call->flags & TRACE_EVENT_FL_DYNAMIC)))
+               return false;
+
+       down_read(&trace_event_sem);
+       list_for_each_entry(call, &ftrace_events, list) {
+               if (call == dyn_call) {
+                       atomic_inc(&dyn_call->refcnt);
+                       ret = true;
+               }
+       }
+       up_read(&trace_event_sem);
+       return ret;
+}
+
+void trace_event_dyn_put_ref(struct trace_event_call *call)
+{
+       if (WARN_ON_ONCE(!(call->flags & TRACE_EVENT_FL_DYNAMIC)))
+               return;
+
+       if (WARN_ON_ONCE(atomic_read(&call->refcnt) <= 0)) {
+               atomic_set(&call->refcnt, 0);
+               return;
+       }
+
+       atomic_dec(&call->refcnt);
+}
+
+bool trace_event_dyn_busy(struct trace_event_call *call)
+{
+       return atomic_read(&call->refcnt) != 0;
+}
+
 int dyn_event_register(struct dyn_event_operations *ops)
 {
        if (!ops || !ops->create || !ops->show || !ops->is_busy ||
index 7754936..936477a 100644 (file)
@@ -76,13 +76,15 @@ int dyn_event_init(struct dyn_event *ev, struct dyn_event_operations *ops)
        return 0;
 }
 
-static inline int dyn_event_add(struct dyn_event *ev)
+static inline int dyn_event_add(struct dyn_event *ev,
+                               struct trace_event_call *call)
 {
        lockdep_assert_held(&event_mutex);
 
        if (!ev || !ev->ops)
                return -EINVAL;
 
+       call->flags |= TRACE_EVENT_FL_DYNAMIC;
        list_add_tail(&ev->list, &dyn_event_list);
        return 0;
 }
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
new file mode 100644 (file)
index 0000000..56a96e9
--- /dev/null
@@ -0,0 +1,903 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * event probes
+ *
+ * Part of this code was copied from kernel/trace/trace_kprobe.c written by
+ * Masami Hiramatsu <mhiramat@kernel.org>
+ *
+ * Copyright (C) 2021, VMware Inc, Steven Rostedt <rostedt@goodmis.org>
+ * Copyright (C) 2021, VMware Inc, Tzvetomir Stoyanov tz.stoyanov@gmail.com>
+ *
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ftrace.h>
+
+#include "trace_dynevent.h"
+#include "trace_probe.h"
+#include "trace_probe_tmpl.h"
+
+#define EPROBE_EVENT_SYSTEM "eprobes"
+
+struct trace_eprobe {
+       /* tracepoint system */
+       const char *event_system;
+
+       /* tracepoint event */
+       const char *event_name;
+
+       struct trace_event_call *event;
+
+       struct dyn_event        devent;
+       struct trace_probe      tp;
+};
+
+struct eprobe_data {
+       struct trace_event_file *file;
+       struct trace_eprobe     *ep;
+};
+
+static int __trace_eprobe_create(int argc, const char *argv[]);
+
+static void trace_event_probe_cleanup(struct trace_eprobe *ep)
+{
+       if (!ep)
+               return;
+       trace_probe_cleanup(&ep->tp);
+       kfree(ep->event_name);
+       kfree(ep->event_system);
+       if (ep->event)
+               trace_event_put_ref(ep->event);
+       kfree(ep);
+}
+
+static struct trace_eprobe *to_trace_eprobe(struct dyn_event *ev)
+{
+       return container_of(ev, struct trace_eprobe, devent);
+}
+
+static int eprobe_dyn_event_create(const char *raw_command)
+{
+       return trace_probe_create(raw_command, __trace_eprobe_create);
+}
+
+static int eprobe_dyn_event_show(struct seq_file *m, struct dyn_event *ev)
+{
+       struct trace_eprobe *ep = to_trace_eprobe(ev);
+       int i;
+
+       seq_printf(m, "e:%s/%s", trace_probe_group_name(&ep->tp),
+                               trace_probe_name(&ep->tp));
+       seq_printf(m, " %s.%s", ep->event_system, ep->event_name);
+
+       for (i = 0; i < ep->tp.nr_args; i++)
+               seq_printf(m, " %s=%s", ep->tp.args[i].name, ep->tp.args[i].comm);
+       seq_putc(m, '\n');
+
+       return 0;
+}
+
+static int unregister_trace_eprobe(struct trace_eprobe *ep)
+{
+       /* If other probes are on the event, just unregister eprobe */
+       if (trace_probe_has_sibling(&ep->tp))
+               goto unreg;
+
+       /* Enabled event can not be unregistered */
+       if (trace_probe_is_enabled(&ep->tp))
+               return -EBUSY;
+
+       /* Will fail if probe is being used by ftrace or perf */
+       if (trace_probe_unregister_event_call(&ep->tp))
+               return -EBUSY;
+
+unreg:
+       dyn_event_remove(&ep->devent);
+       trace_probe_unlink(&ep->tp);
+
+       return 0;
+}
+
+static int eprobe_dyn_event_release(struct dyn_event *ev)
+{
+       struct trace_eprobe *ep = to_trace_eprobe(ev);
+       int ret = unregister_trace_eprobe(ep);
+
+       if (!ret)
+               trace_event_probe_cleanup(ep);
+       return ret;
+}
+
+static bool eprobe_dyn_event_is_busy(struct dyn_event *ev)
+{
+       struct trace_eprobe *ep = to_trace_eprobe(ev);
+
+       return trace_probe_is_enabled(&ep->tp);
+}
+
+static bool eprobe_dyn_event_match(const char *system, const char *event,
+                       int argc, const char **argv, struct dyn_event *ev)
+{
+       struct trace_eprobe *ep = to_trace_eprobe(ev);
+
+       return strcmp(trace_probe_name(&ep->tp), event) == 0 &&
+           (!system || strcmp(trace_probe_group_name(&ep->tp), system) == 0) &&
+           trace_probe_match_command_args(&ep->tp, argc, argv);
+}
+
+static struct dyn_event_operations eprobe_dyn_event_ops = {
+       .create = eprobe_dyn_event_create,
+       .show = eprobe_dyn_event_show,
+       .is_busy = eprobe_dyn_event_is_busy,
+       .free = eprobe_dyn_event_release,
+       .match = eprobe_dyn_event_match,
+};
+
+static struct trace_eprobe *alloc_event_probe(const char *group,
+                                             const char *this_event,
+                                             struct trace_event_call *event,
+                                             int nargs)
+{
+       struct trace_eprobe *ep;
+       const char *event_name;
+       const char *sys_name;
+       int ret = -ENOMEM;
+
+       if (!event)
+               return ERR_PTR(-ENODEV);
+
+       sys_name = event->class->system;
+       event_name = trace_event_name(event);
+
+       ep = kzalloc(struct_size(ep, tp.args, nargs), GFP_KERNEL);
+       if (!ep) {
+               trace_event_put_ref(ep->event);
+               goto error;
+       }
+       ep->event = event;
+       ep->event_name = kstrdup(event_name, GFP_KERNEL);
+       if (!ep->event_name)
+               goto error;
+       ep->event_system = kstrdup(sys_name, GFP_KERNEL);
+       if (!ep->event_system)
+               goto error;
+
+       ret = trace_probe_init(&ep->tp, this_event, group, false);
+       if (ret < 0)
+               goto error;
+
+       dyn_event_init(&ep->devent, &eprobe_dyn_event_ops);
+       return ep;
+error:
+       trace_event_probe_cleanup(ep);
+       return ERR_PTR(ret);
+}
+
+static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
+{
+       struct probe_arg *parg = &ep->tp.args[i];
+       struct ftrace_event_field *field;
+       struct list_head *head;
+
+       head = trace_get_fields(ep->event);
+       list_for_each_entry(field, head, link) {
+               if (!strcmp(parg->code->data, field->name)) {
+                       kfree(parg->code->data);
+                       parg->code->data = field;
+                       return 0;
+               }
+       }
+       kfree(parg->code->data);
+       parg->code->data = NULL;
+       return -ENOENT;
+}
+
+static int eprobe_event_define_fields(struct trace_event_call *event_call)
+{
+       int ret;
+       struct eprobe_trace_entry_head field;
+       struct trace_probe *tp;
+
+       tp = trace_probe_primary_from_call(event_call);
+       if (WARN_ON_ONCE(!tp))
+               return -ENOENT;
+
+       DEFINE_FIELD(unsigned int, type, FIELD_STRING_TYPE, 0);
+
+       return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
+}
+
+static struct trace_event_fields eprobe_fields_array[] = {
+       { .type = TRACE_FUNCTION_TYPE,
+         .define_fields = eprobe_event_define_fields },
+       {}
+};
+
+/* Event entry printers */
+static enum print_line_t
+print_eprobe_event(struct trace_iterator *iter, int flags,
+                  struct trace_event *event)
+{
+       struct eprobe_trace_entry_head *field;
+       struct trace_event_call *pevent;
+       struct trace_event *probed_event;
+       struct trace_seq *s = &iter->seq;
+       struct trace_probe *tp;
+
+       field = (struct eprobe_trace_entry_head *)iter->ent;
+       tp = trace_probe_primary_from_call(
+               container_of(event, struct trace_event_call, event));
+       if (WARN_ON_ONCE(!tp))
+               goto out;
+
+       trace_seq_printf(s, "%s: (", trace_probe_name(tp));
+
+       probed_event = ftrace_find_event(field->type);
+       if (probed_event) {
+               pevent = container_of(probed_event, struct trace_event_call, event);
+               trace_seq_printf(s, "%s.%s", pevent->class->system,
+                                trace_event_name(pevent));
+       } else {
+               trace_seq_printf(s, "%u", field->type);
+       }
+
+       trace_seq_putc(s, ')');
+
+       if (print_probe_args(s, tp->args, tp->nr_args,
+                            (u8 *)&field[1], field) < 0)
+               goto out;
+
+       trace_seq_putc(s, '\n');
+ out:
+       return trace_handle_return(s);
+}
+
+static unsigned long get_event_field(struct fetch_insn *code, void *rec)
+{
+       struct ftrace_event_field *field = code->data;
+       unsigned long val;
+       void *addr;
+
+       addr = rec + field->offset;
+
+       switch (field->size) {
+       case 1:
+               if (field->is_signed)
+                       val = *(char *)addr;
+               else
+                       val = *(unsigned char *)addr;
+               break;
+       case 2:
+               if (field->is_signed)
+                       val = *(short *)addr;
+               else
+                       val = *(unsigned short *)addr;
+               break;
+       case 4:
+               if (field->is_signed)
+                       val = *(int *)addr;
+               else
+                       val = *(unsigned int *)addr;
+               break;
+       default:
+               if (field->is_signed)
+                       val = *(long *)addr;
+               else
+                       val = *(unsigned long *)addr;
+               break;
+       }
+       return val;
+}
+
+static int get_eprobe_size(struct trace_probe *tp, void *rec)
+{
+       struct probe_arg *arg;
+       int i, len, ret = 0;
+
+       for (i = 0; i < tp->nr_args; i++) {
+               arg = tp->args + i;
+               if (unlikely(arg->dynamic)) {
+                       unsigned long val;
+
+                       val = get_event_field(arg->code, rec);
+                       len = process_fetch_insn_bottom(arg->code + 1, val, NULL, NULL);
+                       if (len > 0)
+                               ret += len;
+               }
+       }
+
+       return ret;
+}
+
+/* Kprobe specific fetch functions */
+
+/* Note that we don't verify it, since the code does not come from user space */
+static int
+process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
+                  void *base)
+{
+       unsigned long val;
+
+       val = get_event_field(code, rec);
+       return process_fetch_insn_bottom(code + 1, val, dest, base);
+}
+NOKPROBE_SYMBOL(process_fetch_insn)
+
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+fetch_store_strlen_user(unsigned long addr)
+{
+       const void __user *uaddr =  (__force const void __user *)addr;
+
+       return strnlen_user_nofault(uaddr, MAX_STRING_SIZE);
+}
+
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+fetch_store_strlen(unsigned long addr)
+{
+       int ret, len = 0;
+       u8 c;
+
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+       if (addr < TASK_SIZE)
+               return fetch_store_strlen_user(addr);
+#endif
+
+       do {
+               ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1);
+               len++;
+       } while (c && ret == 0 && len < MAX_STRING_SIZE);
+
+       return (ret < 0) ? ret : len;
+}
+
+/*
+ * Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf
+ * with max length and relative data location.
+ */
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base)
+{
+       const void __user *uaddr =  (__force const void __user *)addr;
+       int maxlen = get_loc_len(*(u32 *)dest);
+       void *__dest;
+       long ret;
+
+       if (unlikely(!maxlen))
+               return -ENOMEM;
+
+       __dest = get_loc_data(dest, base);
+
+       ret = strncpy_from_user_nofault(__dest, uaddr, maxlen);
+       if (ret >= 0)
+               *(u32 *)dest = make_data_loc(ret, __dest - base);
+
+       return ret;
+}
+
+/*
+ * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
+ * length and relative data location.
+ */
+static nokprobe_inline int
+fetch_store_string(unsigned long addr, void *dest, void *base)
+{
+       int maxlen = get_loc_len(*(u32 *)dest);
+       void *__dest;
+       long ret;
+
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+       if ((unsigned long)addr < TASK_SIZE)
+               return fetch_store_string_user(addr, dest, base);
+#endif
+
+       if (unlikely(!maxlen))
+               return -ENOMEM;
+
+       __dest = get_loc_data(dest, base);
+
+       /*
+        * Try to get string again, since the string can be changed while
+        * probing.
+        */
+       ret = strncpy_from_kernel_nofault(__dest, (void *)addr, maxlen);
+       if (ret >= 0)
+               *(u32 *)dest = make_data_loc(ret, __dest - base);
+
+       return ret;
+}
+
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size)
+{
+       const void __user *uaddr =  (__force const void __user *)src;
+
+       return copy_from_user_nofault(dest, uaddr, size);
+}
+
+static nokprobe_inline int
+probe_mem_read(void *dest, void *src, size_t size)
+{
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+       if ((unsigned long)src < TASK_SIZE)
+               return probe_mem_read_user(dest, src, size);
+#endif
+       return copy_from_kernel_nofault(dest, src, size);
+}
+
+/* eprobe handler */
+static inline void
+__eprobe_trace_func(struct eprobe_data *edata, void *rec)
+{
+       struct eprobe_trace_entry_head *entry;
+       struct trace_event_call *call = trace_probe_event_call(&edata->ep->tp);
+       struct trace_event_buffer fbuffer;
+       int dsize;
+
+       if (WARN_ON_ONCE(call != edata->file->event_call))
+               return;
+
+       if (trace_trigger_soft_disabled(edata->file))
+               return;
+
+       fbuffer.trace_ctx = tracing_gen_ctx();
+       fbuffer.trace_file = edata->file;
+
+       dsize = get_eprobe_size(&edata->ep->tp, rec);
+       fbuffer.regs = NULL;
+
+       fbuffer.event =
+               trace_event_buffer_lock_reserve(&fbuffer.buffer, edata->file,
+                                       call->event.type,
+                                       sizeof(*entry) + edata->ep->tp.size + dsize,
+                                       fbuffer.trace_ctx);
+       if (!fbuffer.event)
+               return;
+
+       entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
+       if (edata->ep->event)
+               entry->type = edata->ep->event->event.type;
+       else
+               entry->type = 0;
+       store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize);
+
+       trace_event_buffer_commit(&fbuffer);
+}
+
+/*
+ * The event probe implementation uses event triggers to get access to
+ * the event it is attached to, but is not an actual trigger. The below
+ * functions are just stubs to fulfill what is needed to use the trigger
+ * infrastructure.
+ */
+static int eprobe_trigger_init(struct event_trigger_ops *ops,
+                              struct event_trigger_data *data)
+{
+       return 0;
+}
+
+static void eprobe_trigger_free(struct event_trigger_ops *ops,
+                               struct event_trigger_data *data)
+{
+
+}
+
+static int eprobe_trigger_print(struct seq_file *m,
+                               struct event_trigger_ops *ops,
+                               struct event_trigger_data *data)
+{
+       /* Do not print eprobe event triggers */
+       return 0;
+}
+
+static void eprobe_trigger_func(struct event_trigger_data *data,
+                               struct trace_buffer *buffer, void *rec,
+                               struct ring_buffer_event *rbe)
+{
+       struct eprobe_data *edata = data->private_data;
+
+       __eprobe_trace_func(edata, rec);
+}
+
+static struct event_trigger_ops eprobe_trigger_ops = {
+       .func                   = eprobe_trigger_func,
+       .print                  = eprobe_trigger_print,
+       .init                   = eprobe_trigger_init,
+       .free                   = eprobe_trigger_free,
+};
+
+static int eprobe_trigger_cmd_func(struct event_command *cmd_ops,
+                                  struct trace_event_file *file,
+                                  char *glob, char *cmd, char *param)
+{
+       return -1;
+}
+
+static int eprobe_trigger_reg_func(char *glob, struct event_trigger_ops *ops,
+                                struct event_trigger_data *data,
+                                struct trace_event_file *file)
+{
+       return -1;
+}
+
+static void eprobe_trigger_unreg_func(char *glob, struct event_trigger_ops *ops,
+                                   struct event_trigger_data *data,
+                                   struct trace_event_file *file)
+{
+
+}
+
+static struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd,
+                                                       char *param)
+{
+       return &eprobe_trigger_ops;
+}
+
+static struct event_command event_trigger_cmd = {
+       .name                   = "eprobe",
+       .trigger_type           = ETT_EVENT_EPROBE,
+       .flags                  = EVENT_CMD_FL_NEEDS_REC,
+       .func                   = eprobe_trigger_cmd_func,
+       .reg                    = eprobe_trigger_reg_func,
+       .unreg                  = eprobe_trigger_unreg_func,
+       .unreg_all              = NULL,
+       .get_trigger_ops        = eprobe_trigger_get_ops,
+       .set_filter             = NULL,
+};
+
+static struct event_trigger_data *
+new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file)
+{
+       struct event_trigger_data *trigger;
+       struct eprobe_data *edata;
+
+       edata = kzalloc(sizeof(*edata), GFP_KERNEL);
+       trigger = kzalloc(sizeof(*trigger), GFP_KERNEL);
+       if (!trigger || !edata) {
+               kfree(edata);
+               kfree(trigger);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       trigger->flags = EVENT_TRIGGER_FL_PROBE;
+       trigger->count = -1;
+       trigger->ops = &eprobe_trigger_ops;
+
+       /*
+        * EVENT PROBE triggers are not registered as commands with
+        * register_event_command(), as they are not controlled by the user
+        * from the trigger file
+        */
+       trigger->cmd_ops = &event_trigger_cmd;
+
+       INIT_LIST_HEAD(&trigger->list);
+       RCU_INIT_POINTER(trigger->filter, NULL);
+
+       edata->file = file;
+       edata->ep = ep;
+       trigger->private_data = edata;
+
+       return trigger;
+}
+
+static int enable_eprobe(struct trace_eprobe *ep,
+                        struct trace_event_file *eprobe_file)
+{
+       struct event_trigger_data *trigger;
+       struct trace_event_file *file;
+       struct trace_array *tr = eprobe_file->tr;
+
+       file = find_event_file(tr, ep->event_system, ep->event_name);
+       if (!file)
+               return -ENOENT;
+       trigger = new_eprobe_trigger(ep, eprobe_file);
+       if (IS_ERR(trigger))
+               return PTR_ERR(trigger);
+
+       list_add_tail_rcu(&trigger->list, &file->triggers);
+
+       trace_event_trigger_enable_disable(file, 1);
+       update_cond_flag(file);
+
+       return 0;
+}
+
+static struct trace_event_functions eprobe_funcs = {
+       .trace          = print_eprobe_event
+};
+
+static int disable_eprobe(struct trace_eprobe *ep,
+                         struct trace_array *tr)
+{
+       struct event_trigger_data *trigger;
+       struct trace_event_file *file;
+       struct eprobe_data *edata;
+
+       file = find_event_file(tr, ep->event_system, ep->event_name);
+       if (!file)
+               return -ENOENT;
+
+       list_for_each_entry(trigger, &file->triggers, list) {
+               if (!(trigger->flags & EVENT_TRIGGER_FL_PROBE))
+                       continue;
+               edata = trigger->private_data;
+               if (edata->ep == ep)
+                       break;
+       }
+       if (list_entry_is_head(trigger, &file->triggers, list))
+               return -ENODEV;
+
+       list_del_rcu(&trigger->list);
+
+       trace_event_trigger_enable_disable(file, 0);
+       update_cond_flag(file);
+       return 0;
+}
+
+static int enable_trace_eprobe(struct trace_event_call *call,
+                              struct trace_event_file *file)
+{
+       struct trace_probe *pos, *tp;
+       struct trace_eprobe *ep;
+       bool enabled;
+       int ret = 0;
+
+       tp = trace_probe_primary_from_call(call);
+       if (WARN_ON_ONCE(!tp))
+               return -ENODEV;
+       enabled = trace_probe_is_enabled(tp);
+
+       /* This also changes "enabled" state */
+       if (file) {
+               ret = trace_probe_add_file(tp, file);
+               if (ret)
+                       return ret;
+       } else
+               trace_probe_set_flag(tp, TP_FLAG_PROFILE);
+
+       if (enabled)
+               return 0;
+
+       list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
+               ep = container_of(pos, struct trace_eprobe, tp);
+               ret = enable_eprobe(ep, file);
+               if (ret)
+                       break;
+               enabled = true;
+       }
+
+       if (ret) {
+               /* Failed to enable one of them. Roll back all */
+               if (enabled)
+                       disable_eprobe(ep, file->tr);
+               if (file)
+                       trace_probe_remove_file(tp, file);
+               else
+                       trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
+       }
+
+       return ret;
+}
+
+static int disable_trace_eprobe(struct trace_event_call *call,
+                               struct trace_event_file *file)
+{
+       struct trace_probe *pos, *tp;
+       struct trace_eprobe *ep;
+
+       tp = trace_probe_primary_from_call(call);
+       if (WARN_ON_ONCE(!tp))
+               return -ENODEV;
+
+       if (file) {
+               if (!trace_probe_get_file_link(tp, file))
+                       return -ENOENT;
+               if (!trace_probe_has_single_file(tp))
+                       goto out;
+               trace_probe_clear_flag(tp, TP_FLAG_TRACE);
+       } else
+               trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
+
+       if (!trace_probe_is_enabled(tp)) {
+               list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
+                       ep = container_of(pos, struct trace_eprobe, tp);
+                       disable_eprobe(ep, file->tr);
+               }
+       }
+
+ out:
+       if (file)
+               /*
+                * Synchronization is done in below function. For perf event,
+                * file == NULL and perf_trace_event_unreg() calls
+                * tracepoint_synchronize_unregister() to ensure synchronize
+                * event. We don't need to care about it.
+                */
+               trace_probe_remove_file(tp, file);
+
+       return 0;
+}
+
+static int eprobe_register(struct trace_event_call *event,
+                          enum trace_reg type, void *data)
+{
+       struct trace_event_file *file = data;
+
+       switch (type) {
+       case TRACE_REG_REGISTER:
+               return enable_trace_eprobe(event, file);
+       case TRACE_REG_UNREGISTER:
+               return disable_trace_eprobe(event, file);
+#ifdef CONFIG_PERF_EVENTS
+       case TRACE_REG_PERF_REGISTER:
+       case TRACE_REG_PERF_UNREGISTER:
+       case TRACE_REG_PERF_OPEN:
+       case TRACE_REG_PERF_CLOSE:
+       case TRACE_REG_PERF_ADD:
+       case TRACE_REG_PERF_DEL:
+               return 0;
+#endif
+       }
+       return 0;
+}
+
+static inline void init_trace_eprobe_call(struct trace_eprobe *ep)
+{
+       struct trace_event_call *call = trace_probe_event_call(&ep->tp);
+
+       call->flags = TRACE_EVENT_FL_EPROBE;
+       call->event.funcs = &eprobe_funcs;
+       call->class->fields_array = eprobe_fields_array;
+       call->class->reg = eprobe_register;
+}
+
+static struct trace_event_call *
+find_and_get_event(const char *system, const char *event_name)
+{
+       struct trace_event_call *tp_event;
+       const char *name;
+
+       list_for_each_entry(tp_event, &ftrace_events, list) {
+               /* Skip other probes and ftrace events */
+               if (tp_event->flags &
+                   (TRACE_EVENT_FL_IGNORE_ENABLE |
+                    TRACE_EVENT_FL_KPROBE |
+                    TRACE_EVENT_FL_UPROBE |
+                    TRACE_EVENT_FL_EPROBE))
+                       continue;
+               if (!tp_event->class->system ||
+                   strcmp(system, tp_event->class->system))
+                       continue;
+               name = trace_event_name(tp_event);
+               if (!name || strcmp(event_name, name))
+                       continue;
+               if (!trace_event_try_get_ref(tp_event)) {
+                       return NULL;
+                       break;
+               }
+               return tp_event;
+               break;
+       }
+       return NULL;
+}
+
+static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i)
+{
+       unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_TPOINT;
+       int ret;
+
+       ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], flags);
+       if (ret)
+               return ret;
+
+       if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG)
+               ret = trace_eprobe_tp_arg_update(ep, i);
+
+       return ret;
+}
+
+static int __trace_eprobe_create(int argc, const char *argv[])
+{
+       /*
+        * Argument syntax:
+        *      e[:[GRP/]ENAME] SYSTEM.EVENT [FETCHARGS]
+        * Fetch args:
+        *  <name>=$<field>[:TYPE]
+        */
+       const char *event = NULL, *group = EPROBE_EVENT_SYSTEM;
+       const char *sys_event = NULL, *sys_name = NULL;
+       struct trace_event_call *event_call;
+       struct trace_eprobe *ep = NULL;
+       char buf1[MAX_EVENT_NAME_LEN];
+       char buf2[MAX_EVENT_NAME_LEN];
+       int ret = 0;
+       int i;
+
+       if (argc < 2 || argv[0][0] != 'e')
+               return -ECANCELED;
+
+       trace_probe_log_init("event_probe", argc, argv);
+
+       event = strchr(&argv[0][1], ':');
+       if (event) {
+               event++;
+               ret = traceprobe_parse_event_name(&event, &group, buf1,
+                                                 event - argv[0]);
+               if (ret)
+                       goto parse_error;
+       } else {
+               strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN);
+               sanitize_event_name(buf1);
+               event = buf1;
+       }
+       if (!is_good_name(event) || !is_good_name(group))
+               goto parse_error;
+
+       sys_event = argv[1];
+       ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2,
+                                         sys_event - argv[1]);
+       if (ret || !sys_name)
+               goto parse_error;
+       if (!is_good_name(sys_event) || !is_good_name(sys_name))
+               goto parse_error;
+
+       mutex_lock(&event_mutex);
+       event_call = find_and_get_event(sys_name, sys_event);
+       ep = alloc_event_probe(group, event, event_call, argc - 2);
+       mutex_unlock(&event_mutex);
+
+       if (IS_ERR(ep)) {
+               ret = PTR_ERR(ep);
+               /* This must return -ENOMEM, else there is a bug */
+               WARN_ON_ONCE(ret != -ENOMEM);
+               goto error;     /* We know ep is not allocated */
+       }
+
+       argc -= 2; argv += 2;
+       /* parse arguments */
+       for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+               trace_probe_log_set_index(i + 2);
+               ret = trace_eprobe_tp_update_arg(ep, argv, i);
+               if (ret)
+                       goto error;
+       }
+       ret = traceprobe_set_print_fmt(&ep->tp, PROBE_PRINT_EVENT);
+       if (ret < 0)
+               goto error;
+       init_trace_eprobe_call(ep);
+       mutex_lock(&event_mutex);
+       ret = trace_probe_register_event_call(&ep->tp);
+       if (ret) {
+               if (ret == -EEXIST) {
+                       trace_probe_log_set_index(0);
+                       trace_probe_log_err(0, EVENT_EXIST);
+               }
+               mutex_unlock(&event_mutex);
+               goto error;
+       }
+       ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
+       mutex_unlock(&event_mutex);
+       return ret;
+parse_error:
+       ret = -EINVAL;
+error:
+       trace_event_probe_cleanup(ep);
+       return ret;
+}
+
+/*
+ * Register dynevent at core_initcall. This allows kernel to setup eprobe
+ * events in postcore_initcall without tracefs.
+ */
+static __init int trace_events_eprobe_init_early(void)
+{
+       int err = 0;
+
+       err = dyn_event_register(&eprobe_dyn_event_ops);
+       if (err)
+               pr_warn("Could not register eprobe_dyn_event_ops\n");
+
+       return err;
+}
+core_initcall(trace_events_eprobe_init_early);
index 03be443..6aed10e 100644 (file)
@@ -177,7 +177,7 @@ static void perf_trace_event_unreg(struct perf_event *p_event)
                }
        }
 out:
-       module_put(tp_event->mod);
+       trace_event_put_ref(tp_event);
 }
 
 static int perf_trace_event_open(struct perf_event *p_event)
@@ -224,10 +224,10 @@ int perf_trace_init(struct perf_event *p_event)
        list_for_each_entry(tp_event, &ftrace_events, list) {
                if (tp_event->event.type == event_id &&
                    tp_event->class && tp_event->class->reg &&
-                   try_module_get(tp_event->mod)) {
+                   trace_event_try_get_ref(tp_event)) {
                        ret = perf_trace_event_init(tp_event, p_event);
                        if (ret)
-                               module_put(tp_event->mod);
+                               trace_event_put_ref(tp_event);
                        break;
                }
        }
index 80e9698..1349b6d 100644 (file)
@@ -2525,7 +2525,10 @@ __register_event(struct trace_event_call *call, struct module *mod)
                return ret;
 
        list_add(&call->list, &ftrace_events);
-       call->mod = mod;
+       if (call->flags & TRACE_EVENT_FL_DYNAMIC)
+               atomic_set(&call->refcnt, 0);
+       else
+               call->module = mod;
 
        return 0;
 }
@@ -2839,7 +2842,9 @@ static void trace_module_remove_events(struct module *mod)
 
        down_write(&trace_event_sem);
        list_for_each_entry_safe(call, p, &ftrace_events, list) {
-               if (call->mod == mod)
+               if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module)
+                       continue;
+               if (call->module == mod)
                        __trace_remove_event_call(call);
        }
        up_write(&trace_event_sem);
@@ -2982,7 +2987,7 @@ struct trace_event_file *trace_get_event_file(const char *instance,
        }
 
        /* Don't let event modules unload while in use */
-       ret = try_module_get(file->event_call->mod);
+       ret = trace_event_try_get_ref(file->event_call);
        if (!ret) {
                trace_array_put(tr);
                ret = -EBUSY;
@@ -3012,7 +3017,7 @@ EXPORT_SYMBOL_GPL(trace_get_event_file);
 void trace_put_event_file(struct trace_event_file *file)
 {
        mutex_lock(&event_mutex);
-       module_put(file->event_call->mod);
+       trace_event_put_ref(file->event_call);
        mutex_unlock(&event_mutex);
 
        trace_array_put(file->tr);
@@ -3147,7 +3152,7 @@ static int free_probe_data(void *data)
        if (!edata->ref) {
                /* Remove the SOFT_MODE flag */
                __ftrace_event_enable_disable(edata->file, 0, 1);
-               module_put(edata->file->event_call->mod);
+               trace_event_put_ref(edata->file->event_call);
                kfree(edata);
        }
        return 0;
@@ -3280,7 +3285,7 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
 
  out_reg:
        /* Don't let event modules unload while probe registered */
-       ret = try_module_get(file->event_call->mod);
+       ret = trace_event_try_get_ref(file->event_call);
        if (!ret) {
                ret = -EBUSY;
                goto out_free;
@@ -3310,7 +3315,7 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
  out_disable:
        __ftrace_event_enable_disable(file, 0, 1);
  out_put:
-       module_put(file->event_call->mod);
+       trace_event_put_ref(file->event_call);
  out_free:
        kfree(data);
        goto out;
@@ -3376,7 +3381,8 @@ void __trace_early_add_events(struct trace_array *tr)
 
        list_for_each_entry(call, &ftrace_events, list) {
                /* Early boot up should not have any modules loaded */
-               if (WARN_ON_ONCE(call->mod))
+               if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) &&
+                   WARN_ON_ONCE(call->module))
                        continue;
 
                ret = __trace_early_add_new_event(call, tr);
index a48aa2a..9d91b1c 100644 (file)
@@ -121,6 +121,7 @@ struct hist_field {
        unsigned int                    size;
        unsigned int                    offset;
        unsigned int                    is_signed;
+       unsigned long                   buckets;
        const char                      *type;
        struct hist_field               *operands[HIST_FIELD_OPERANDS_MAX];
        struct hist_trigger_data        *hist_data;
@@ -219,6 +220,27 @@ static u64 hist_field_log2(struct hist_field *hist_field,
        return (u64) ilog2(roundup_pow_of_two(val));
 }
 
+static u64 hist_field_bucket(struct hist_field *hist_field,
+                            struct tracing_map_elt *elt,
+                            struct trace_buffer *buffer,
+                            struct ring_buffer_event *rbe,
+                            void *event)
+{
+       struct hist_field *operand = hist_field->operands[0];
+       unsigned long buckets = hist_field->buckets;
+
+       u64 val = operand->fn(operand, elt, buffer, rbe, event);
+
+       if (WARN_ON_ONCE(!buckets))
+               return val;
+
+       if (val >= LONG_MAX)
+               val = div64_ul(val, buckets);
+       else
+               val = (u64)((unsigned long)val / buckets);
+       return val * buckets;
+}
+
 static u64 hist_field_plus(struct hist_field *hist_field,
                           struct tracing_map_elt *elt,
                           struct trace_buffer *buffer,
@@ -318,6 +340,7 @@ enum hist_field_flags {
        HIST_FIELD_FL_VAR_REF           = 1 << 14,
        HIST_FIELD_FL_CPU               = 1 << 15,
        HIST_FIELD_FL_ALIAS             = 1 << 16,
+       HIST_FIELD_FL_BUCKET            = 1 << 17,
 };
 
 struct var_defs {
@@ -1109,7 +1132,8 @@ static const char *hist_field_name(struct hist_field *field,
        if (field->field)
                field_name = field->field->name;
        else if (field->flags & HIST_FIELD_FL_LOG2 ||
-                field->flags & HIST_FIELD_FL_ALIAS)
+                field->flags & HIST_FIELD_FL_ALIAS ||
+                field->flags & HIST_FIELD_FL_BUCKET)
                field_name = hist_field_name(field->operands[0], ++level);
        else if (field->flags & HIST_FIELD_FL_CPU)
                field_name = "common_cpu";
@@ -1396,17 +1420,17 @@ static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt)
        struct hist_trigger_data *hist_data = elt->map->private_data;
        unsigned int size = TASK_COMM_LEN;
        struct hist_elt_data *elt_data;
-       struct hist_field *key_field;
+       struct hist_field *hist_field;
        unsigned int i, n_str;
 
        elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL);
        if (!elt_data)
                return -ENOMEM;
 
-       for_each_hist_key_field(i, hist_data) {
-               key_field = hist_data->fields[i];
+       for_each_hist_field(i, hist_data) {
+               hist_field = hist_data->fields[i];
 
-               if (key_field->flags & HIST_FIELD_FL_EXECNAME) {
+               if (hist_field->flags & HIST_FIELD_FL_EXECNAME) {
                        elt_data->comm = kzalloc(size, GFP_KERNEL);
                        if (!elt_data->comm) {
                                kfree(elt_data);
@@ -1470,6 +1494,8 @@ static const char *get_hist_field_flags(struct hist_field *hist_field)
                flags_str = "syscall";
        else if (hist_field->flags & HIST_FIELD_FL_LOG2)
                flags_str = "log2";
+       else if (hist_field->flags & HIST_FIELD_FL_BUCKET)
+               flags_str = "buckets";
        else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)
                flags_str = "usecs";
 
@@ -1590,7 +1616,9 @@ static void __destroy_hist_field(struct hist_field *hist_field)
 
        kfree(hist_field->var.name);
        kfree(hist_field->name);
-       kfree(hist_field->type);
+
+       /* Can likely be a const */
+       kfree_const(hist_field->type);
 
        kfree(hist_field->system);
        kfree(hist_field->event_name);
@@ -1647,9 +1675,7 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
        if (flags & HIST_FIELD_FL_HITCOUNT) {
                hist_field->fn = hist_field_counter;
                hist_field->size = sizeof(u64);
-               hist_field->type = kstrdup("u64", GFP_KERNEL);
-               if (!hist_field->type)
-                       goto free;
+               hist_field->type = "u64";
                goto out;
        }
 
@@ -1658,12 +1684,13 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
                goto out;
        }
 
-       if (flags & HIST_FIELD_FL_LOG2) {
-               unsigned long fl = flags & ~HIST_FIELD_FL_LOG2;
-               hist_field->fn = hist_field_log2;
+       if (flags & (HIST_FIELD_FL_LOG2 | HIST_FIELD_FL_BUCKET)) {
+               unsigned long fl = flags & ~(HIST_FIELD_FL_LOG2 | HIST_FIELD_FL_BUCKET);
+               hist_field->fn = flags & HIST_FIELD_FL_LOG2 ? hist_field_log2 :
+                       hist_field_bucket;
                hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL);
                hist_field->size = hist_field->operands[0]->size;
-               hist_field->type = kstrdup(hist_field->operands[0]->type, GFP_KERNEL);
+               hist_field->type = kstrdup_const(hist_field->operands[0]->type, GFP_KERNEL);
                if (!hist_field->type)
                        goto free;
                goto out;
@@ -1672,18 +1699,14 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
        if (flags & HIST_FIELD_FL_TIMESTAMP) {
                hist_field->fn = hist_field_timestamp;
                hist_field->size = sizeof(u64);
-               hist_field->type = kstrdup("u64", GFP_KERNEL);
-               if (!hist_field->type)
-                       goto free;
+               hist_field->type = "u64";
                goto out;
        }
 
        if (flags & HIST_FIELD_FL_CPU) {
                hist_field->fn = hist_field_cpu;
                hist_field->size = sizeof(int);
-               hist_field->type = kstrdup("unsigned int", GFP_KERNEL);
-               if (!hist_field->type)
-                       goto free;
+               hist_field->type = "unsigned int";
                goto out;
        }
 
@@ -1696,7 +1719,7 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
                flags |= HIST_FIELD_FL_STRING;
 
                hist_field->size = MAX_FILTER_STR_VAL;
-               hist_field->type = kstrdup(field->type, GFP_KERNEL);
+               hist_field->type = kstrdup_const(field->type, GFP_KERNEL);
                if (!hist_field->type)
                        goto free;
 
@@ -1709,7 +1732,7 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
        } else {
                hist_field->size = field->size;
                hist_field->is_signed = field->is_signed;
-               hist_field->type = kstrdup(field->type, GFP_KERNEL);
+               hist_field->type = kstrdup_const(field->type, GFP_KERNEL);
                if (!hist_field->type)
                        goto free;
 
@@ -1795,7 +1818,7 @@ static int init_var_ref(struct hist_field *ref_field,
                }
        }
 
-       ref_field->type = kstrdup(var_field->type, GFP_KERNEL);
+       ref_field->type = kstrdup_const(var_field->type, GFP_KERNEL);
        if (!ref_field->type) {
                err = -ENOMEM;
                goto free;
@@ -1953,7 +1976,7 @@ static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data,
 
 static struct ftrace_event_field *
 parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
-           char *field_str, unsigned long *flags)
+           char *field_str, unsigned long *flags, unsigned long *buckets)
 {
        struct ftrace_event_field *field = NULL;
        char *field_name, *modifier, *str;
@@ -1980,7 +2003,22 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
                        *flags |= HIST_FIELD_FL_LOG2;
                else if (strcmp(modifier, "usecs") == 0)
                        *flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
-               else {
+               else if (strncmp(modifier, "bucket", 6) == 0) {
+                       int ret;
+
+                       modifier += 6;
+
+                       if (*modifier == 's')
+                               modifier++;
+                       if (*modifier != '=')
+                               goto error;
+                       modifier++;
+                       ret = kstrtoul(modifier, 0, buckets);
+                       if (ret || !(*buckets))
+                               goto error;
+                       *flags |= HIST_FIELD_FL_BUCKET;
+               } else {
+ error:
                        hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier));
                        field = ERR_PTR(-EINVAL);
                        goto out;
@@ -2049,6 +2087,7 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data,
        char *s, *ref_system = NULL, *ref_event = NULL, *ref_var = str;
        struct ftrace_event_field *field = NULL;
        struct hist_field *hist_field = NULL;
+       unsigned long buckets = 0;
        int ret = 0;
 
        s = strchr(str, '.');
@@ -2086,7 +2125,7 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data,
        } else
                str = s;
 
-       field = parse_field(hist_data, file, str, flags);
+       field = parse_field(hist_data, file, str, flags, &buckets);
        if (IS_ERR(field)) {
                ret = PTR_ERR(field);
                goto out;
@@ -2097,6 +2136,7 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data,
                ret = -ENOMEM;
                goto out;
        }
+       hist_field->buckets = buckets;
 
        return hist_field;
  out:
@@ -2171,7 +2211,7 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
        expr->operands[0] = operand1;
        expr->operator = FIELD_OP_UNARY_MINUS;
        expr->name = expr_str(expr, 0);
-       expr->type = kstrdup(operand1->type, GFP_KERNEL);
+       expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
        if (!expr->type) {
                ret = -ENOMEM;
                goto free;
@@ -2311,7 +2351,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
 
        expr->operator = field_op;
        expr->name = expr_str(expr, 0);
-       expr->type = kstrdup(operand1->type, GFP_KERNEL);
+       expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
        if (!expr->type) {
                ret = -ENOMEM;
                goto free;
@@ -2699,10 +2739,10 @@ static struct hist_field *create_var(struct hist_trigger_data *hist_data,
        var->var.hist_data = var->hist_data = hist_data;
        var->size = size;
        var->var.name = kstrdup(name, GFP_KERNEL);
-       var->type = kstrdup(type, GFP_KERNEL);
+       var->type = kstrdup_const(type, GFP_KERNEL);
        if (!var->var.name || !var->type) {
+               kfree_const(var->type);
                kfree(var->var.name);
-               kfree(var->type);
                kfree(var);
                var = ERR_PTR(-ENOMEM);
        }
@@ -3731,6 +3771,41 @@ static int create_val_field(struct hist_trigger_data *hist_data,
        return __create_val_field(hist_data, val_idx, file, NULL, field_str, 0);
 }
 
+static const char *no_comm = "(no comm)";
+
+static u64 hist_field_execname(struct hist_field *hist_field,
+                              struct tracing_map_elt *elt,
+                              struct trace_buffer *buffer,
+                              struct ring_buffer_event *rbe,
+                              void *event)
+{
+       struct hist_elt_data *elt_data;
+
+       if (WARN_ON_ONCE(!elt))
+               return (u64)(unsigned long)no_comm;
+
+       elt_data = elt->private_data;
+
+       if (WARN_ON_ONCE(!elt_data->comm))
+               return (u64)(unsigned long)no_comm;
+
+       return (u64)(unsigned long)(elt_data->comm);
+}
+
+/* Convert a var that points to common_pid.execname to a string */
+static void update_var_execname(struct hist_field *hist_field)
+{
+       hist_field->flags = HIST_FIELD_FL_STRING | HIST_FIELD_FL_VAR |
+               HIST_FIELD_FL_EXECNAME;
+       hist_field->size = MAX_FILTER_STR_VAL;
+       hist_field->is_signed = 0;
+
+       kfree_const(hist_field->type);
+       hist_field->type = "char[]";
+
+       hist_field->fn = hist_field_execname;
+}
+
 static int create_var_field(struct hist_trigger_data *hist_data,
                            unsigned int val_idx,
                            struct trace_event_file *file,
@@ -3755,6 +3830,9 @@ static int create_var_field(struct hist_trigger_data *hist_data,
 
        ret = __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags);
 
+       if (!ret && hist_data->fields[val_idx]->flags & HIST_FIELD_FL_EXECNAME)
+               update_var_execname(hist_data->fields[val_idx]);
+
        if (!ret && hist_data->fields[val_idx]->flags & HIST_FIELD_FL_STRING)
                hist_data->fields[val_idx]->var_str_idx = hist_data->n_var_str++;
 
@@ -4698,6 +4776,11 @@ static void hist_trigger_print_key(struct seq_file *m,
                } else if (key_field->flags & HIST_FIELD_FL_LOG2) {
                        seq_printf(m, "%s: ~ 2^%-2llu", field_name,
                                   *(u64 *)(key + key_field->offset));
+               } else if (key_field->flags & HIST_FIELD_FL_BUCKET) {
+                       unsigned long buckets = key_field->buckets;
+                       uval = *(u64 *)(key + key_field->offset);
+                       seq_printf(m, "%s: ~ %llu-%llu", field_name,
+                                  uval, uval + buckets -1);
                } else if (key_field->flags & HIST_FIELD_FL_STRING) {
                        seq_printf(m, "%s: %-50s", field_name,
                                   (char *)(key + key_field->offset));
@@ -5137,6 +5220,8 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
                                seq_printf(m, ".%s", flags);
                }
        }
+       if (hist_field->buckets)
+               seq_printf(m, "=%ld", hist_field->buckets);
 }
 
 static int event_hist_trigger_print(struct seq_file *m,
index 9315fc0..d54094b 100644 (file)
@@ -1298,7 +1298,7 @@ static int __create_synth_event(const char *name, const char *raw_fields)
        }
        ret = register_synth_event(event);
        if (!ret)
-               dyn_event_add(&event->devent);
+               dyn_event_add(&event->devent, &event->call);
        else
                free_synth_event(event);
  out:
@@ -1369,13 +1369,15 @@ static int destroy_synth_event(struct synth_event *se)
        int ret;
 
        if (se->ref)
-               ret = -EBUSY;
-       else {
-               ret = unregister_synth_event(se);
-               if (!ret) {
-                       dyn_event_remove(&se->devent);
-                       free_synth_event(se);
-               }
+               return -EBUSY;
+
+       if (trace_event_dyn_busy(&se->call))
+               return -EBUSY;
+
+       ret = unregister_synth_event(se);
+       if (!ret) {
+               dyn_event_remove(&se->devent);
+               free_synth_event(se);
        }
 
        return ret;
@@ -2102,6 +2104,9 @@ static int synth_event_release(struct dyn_event *ev)
        if (event->ref)
                return -EBUSY;
 
+       if (trace_event_dyn_busy(&event->call))
+               return -EBUSY;
+
        ret = unregister_synth_event(event);
        if (ret)
                return ret;
index cf84d0f..3d5c072 100644 (file)
@@ -124,6 +124,18 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos)
        return seq_list_next(t, &event_file->triggers, pos);
 }
 
+static bool check_user_trigger(struct trace_event_file *file)
+{
+       struct event_trigger_data *data;
+
+       list_for_each_entry_rcu(data, &file->triggers, list) {
+               if (data->flags & EVENT_TRIGGER_FL_PROBE)
+                       continue;
+               return true;
+       }
+       return false;
+}
+
 static void *trigger_start(struct seq_file *m, loff_t *pos)
 {
        struct trace_event_file *event_file;
@@ -134,7 +146,7 @@ static void *trigger_start(struct seq_file *m, loff_t *pos)
        if (unlikely(!event_file))
                return ERR_PTR(-ENODEV);
 
-       if (list_empty(&event_file->triggers))
+       if (list_empty(&event_file->triggers) || !check_user_trigger(event_file))
                return *pos == 0 ? SHOW_AVAILABLE_TRIGGERS : NULL;
 
        return seq_list_start(&event_file->triggers, *pos);
@@ -1334,7 +1346,7 @@ void event_enable_trigger_free(struct event_trigger_ops *ops,
        if (!data->ref) {
                /* Remove the SOFT_MODE flag */
                trace_event_enable_disable(enable_data->file, 0, 1);
-               module_put(enable_data->file->event_call->mod);
+               trace_event_put_ref(enable_data->file->event_call);
                trigger_data_free(data);
                kfree(enable_data);
        }
@@ -1481,7 +1493,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
 
  out_reg:
        /* Don't let event modules unload while probe registered */
-       ret = try_module_get(event_enable_file->event_call->mod);
+       ret = trace_event_try_get_ref(event_enable_file->event_call);
        if (!ret) {
                ret = -EBUSY;
                goto out_free;
@@ -1510,7 +1522,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
  out_disable:
        trace_event_enable_disable(event_enable_file, 0, 1);
  out_put:
-       module_put(event_enable_file->event_call->mod);
+       trace_event_put_ref(event_enable_file->event_call);
  out_free:
        if (cmd_ops->set_filter)
                cmd_ops->set_filter(NULL, trigger_data, NULL);
index 14f46aa..1b83d75 100644 (file)
@@ -325,10 +325,10 @@ static void move_to_next_cpu(void)
        if (!cpumask_equal(current_mask, current->cpus_ptr))
                goto change_mode;
 
-       get_online_cpus();
+       cpus_read_lock();
        cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
        next_cpu = cpumask_next(raw_smp_processor_id(), current_mask);
-       put_online_cpus();
+       cpus_read_unlock();
 
        if (next_cpu >= nr_cpu_ids)
                next_cpu = cpumask_first(current_mask);
@@ -398,7 +398,7 @@ static void stop_single_kthread(void)
        struct hwlat_kthread_data *kdata = get_cpu_data();
        struct task_struct *kthread;
 
-       get_online_cpus();
+       cpus_read_lock();
        kthread = kdata->kthread;
 
        if (!kthread)
@@ -408,7 +408,7 @@ static void stop_single_kthread(void)
        kdata->kthread = NULL;
 
 out_put_cpus:
-       put_online_cpus();
+       cpus_read_unlock();
 }
 
 
@@ -425,14 +425,14 @@ static int start_single_kthread(struct trace_array *tr)
        struct task_struct *kthread;
        int next_cpu;
 
-       get_online_cpus();
+       cpus_read_lock();
        if (kdata->kthread)
                goto out_put_cpus;
 
        kthread = kthread_create(kthread_fn, NULL, "hwlatd");
        if (IS_ERR(kthread)) {
                pr_err(BANNER "could not start sampling thread\n");
-               put_online_cpus();
+               cpus_read_unlock();
                return -ENOMEM;
        }
 
@@ -452,7 +452,7 @@ static int start_single_kthread(struct trace_array *tr)
        wake_up_process(kthread);
 
 out_put_cpus:
-       put_online_cpus();
+       cpus_read_unlock();
        return 0;
 }
 
@@ -479,10 +479,10 @@ static void stop_per_cpu_kthreads(void)
 {
        unsigned int cpu;
 
-       get_online_cpus();
+       cpus_read_lock();
        for_each_online_cpu(cpu)
                stop_cpu_kthread(cpu);
-       put_online_cpus();
+       cpus_read_unlock();
 }
 
 /*
@@ -515,7 +515,7 @@ static void hwlat_hotplug_workfn(struct work_struct *dummy)
 
        mutex_lock(&trace_types_lock);
        mutex_lock(&hwlat_data.lock);
-       get_online_cpus();
+       cpus_read_lock();
 
        if (!hwlat_busy || hwlat_data.thread_mode != MODE_PER_CPU)
                goto out_unlock;
@@ -526,7 +526,7 @@ static void hwlat_hotplug_workfn(struct work_struct *dummy)
        start_cpu_kthread(cpu);
 
 out_unlock:
-       put_online_cpus();
+       cpus_read_unlock();
        mutex_unlock(&hwlat_data.lock);
        mutex_unlock(&trace_types_lock);
 }
@@ -582,7 +582,7 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
        unsigned int cpu;
        int retval;
 
-       get_online_cpus();
+       cpus_read_lock();
        /*
         * Run only on CPUs in which hwlat is allowed to run.
         */
@@ -596,12 +596,12 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
                if (retval)
                        goto out_error;
        }
-       put_online_cpus();
+       cpus_read_unlock();
 
        return 0;
 
 out_error:
-       put_online_cpus();
+       cpus_read_unlock();
        stop_per_cpu_kthreads();
        return retval;
 }
index ea6178c..3a64ba4 100644 (file)
@@ -80,10 +80,6 @@ static struct trace_kprobe *to_trace_kprobe(struct dyn_event *ev)
        for_each_dyn_event(dpos)                \
                if (is_trace_kprobe(dpos) && (pos = to_trace_kprobe(dpos)))
 
-#define SIZEOF_TRACE_KPROBE(n)                         \
-       (offsetof(struct trace_kprobe, tp.args) +       \
-       (sizeof(struct probe_arg) * (n)))
-
 static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
 {
        return tk->rp.handler != NULL;
@@ -265,7 +261,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
        struct trace_kprobe *tk;
        int ret = -ENOMEM;
 
-       tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL);
+       tk = kzalloc(struct_size(tk, tp.args, nargs), GFP_KERNEL);
        if (!tk)
                return ERR_PTR(ret);
 
@@ -543,6 +539,10 @@ static int unregister_trace_kprobe(struct trace_kprobe *tk)
        if (trace_probe_is_enabled(&tk->tp))
                return -EBUSY;
 
+       /* If there's a reference to the dynamic event */
+       if (trace_event_dyn_busy(trace_probe_event_call(&tk->tp)))
+               return -EBUSY;
+
        /* Will fail if probe is being used by ftrace or perf */
        if (unregister_kprobe_event(tk))
                return -EBUSY;
@@ -618,7 +618,7 @@ static int append_trace_kprobe(struct trace_kprobe *tk, struct trace_kprobe *to)
        if (ret)
                trace_probe_unlink(&tk->tp);
        else
-               dyn_event_add(&tk->devent);
+               dyn_event_add(&tk->devent, trace_probe_event_call(&tk->tp));
 
        return ret;
 }
@@ -647,7 +647,11 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
        /* Register new event */
        ret = register_kprobe_event(tk);
        if (ret) {
-               pr_warn("Failed to register probe event(%d)\n", ret);
+               if (ret == -EEXIST) {
+                       trace_probe_log_set_index(0);
+                       trace_probe_log_err(0, EVENT_EXIST);
+               } else
+                       pr_warn("Failed to register probe event(%d)\n", ret);
                goto end;
        }
 
@@ -661,7 +665,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
        if (ret < 0)
                unregister_kprobe_event(tk);
        else
-               dyn_event_add(&tk->devent);
+               dyn_event_add(&tk->devent, trace_probe_event_call(&tk->tp));
 
 end:
        mutex_unlock(&event_mutex);
@@ -703,14 +707,6 @@ static struct notifier_block trace_kprobe_module_nb = {
        .priority = 1   /* Invoked after kprobe module callback */
 };
 
-/* Convert certain expected symbols into '_' when generating event names */
-static inline void sanitize_event_name(char *name)
-{
-       while (*name++ != '\0')
-               if (*name == ':' || *name == '.')
-                       *name = '_';
-}
-
 static int __trace_kprobe_create(int argc, const char *argv[])
 {
        /*
@@ -742,6 +738,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
        bool is_return = false;
        char *symbol = NULL, *tmp = NULL;
        const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
+       enum probe_print_type ptype;
        int maxactive = 0;
        long offset = 0;
        void *addr = NULL;
@@ -869,20 +866,14 @@ static int __trace_kprobe_create(int argc, const char *argv[])
 
        /* parse arguments */
        for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
-               tmp = kstrdup(argv[i], GFP_KERNEL);
-               if (!tmp) {
-                       ret = -ENOMEM;
-                       goto error;
-               }
-
                trace_probe_log_set_index(i + 2);
-               ret = traceprobe_parse_probe_arg(&tk->tp, i, tmp, flags);
-               kfree(tmp);
+               ret = traceprobe_parse_probe_arg(&tk->tp, i, argv[i], flags);
                if (ret)
                        goto error;     /* This can be -ENOMEM */
        }
 
-       ret = traceprobe_set_print_fmt(&tk->tp, is_return);
+       ptype = is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
+       ret = traceprobe_set_print_fmt(&tk->tp, ptype);
        if (ret < 0)
                goto error;
 
@@ -1330,9 +1321,10 @@ probe_mem_read(void *dest, void *src, size_t size)
 
 /* Note that we don't verify it, since the code does not come from user space */
 static int
-process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
+process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
                   void *base)
 {
+       struct pt_regs *regs = rec;
        unsigned long val;
 
 retry:
@@ -1806,6 +1798,7 @@ struct trace_event_call *
 create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
                          bool is_return)
 {
+       enum probe_print_type ptype;
        struct trace_kprobe *tk;
        int ret;
        char *event;
@@ -1829,7 +1822,9 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
 
        init_trace_event_call(tk);
 
-       if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
+       ptype = trace_kprobe_is_return(tk) ?
+               PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
+       if (traceprobe_set_print_fmt(&tk->tp, ptype) < 0) {
                ret = -ENOMEM;
                goto error;
        }
index b61eefe..65b08b8 100644 (file)
@@ -1498,12 +1498,12 @@ static void stop_per_cpu_kthreads(void)
 {
        int cpu;
 
-       get_online_cpus();
+       cpus_read_lock();
 
        for_each_online_cpu(cpu)
                stop_kthread(cpu);
 
-       put_online_cpus();
+       cpus_read_unlock();
 }
 
 /*
@@ -1551,7 +1551,7 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
        int retval;
        int cpu;
 
-       get_online_cpus();
+       cpus_read_lock();
        /*
         * Run only on CPUs in which trace and osnoise are allowed to run.
         */
@@ -1572,7 +1572,7 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
                }
        }
 
-       put_online_cpus();
+       cpus_read_unlock();
 
        return 0;
 }
@@ -1590,7 +1590,7 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy)
                goto out_unlock_trace;
 
        mutex_lock(&interface_lock);
-       get_online_cpus();
+       cpus_read_lock();
 
        if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
                goto out_unlock;
@@ -1601,7 +1601,7 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy)
        start_kthread(cpu);
 
 out_unlock:
-       put_online_cpus();
+       cpus_read_unlock();
        mutex_unlock(&interface_lock);
 out_unlock_trace:
        mutex_unlock(&trace_types_lock);
@@ -1743,11 +1743,11 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
        /*
         * osnoise_cpumask is read by CPU hotplug operations.
         */
-       get_online_cpus();
+       cpus_read_lock();
 
        cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
 
-       put_online_cpus();
+       cpus_read_unlock();
        mutex_unlock(&interface_lock);
 
        if (running)
index 15413ad..3ed2a3f 100644 (file)
@@ -233,6 +233,9 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
        int len;
 
        slash = strchr(event, '/');
+       if (!slash)
+               slash = strchr(event, '.');
+
        if (slash) {
                if (slash == event) {
                        trace_probe_log_err(offset, NO_GROUP_NAME);
@@ -316,6 +319,13 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
                code->op = FETCH_OP_ARG;
                code->param = (unsigned int)param - 1;
 #endif
+       } else if (flags & TPARG_FL_TPOINT) {
+               if (code->data)
+                       return -EFAULT;
+               code->data = kstrdup(arg, GFP_KERNEL);
+               if (!code->data)
+                       return -ENOMEM;
+               code->op = FETCH_OP_TP_ARG;
        } else
                goto inval_var;
 
@@ -540,26 +550,34 @@ static int __parse_bitfield_probe_arg(const char *bf,
 }
 
 /* String length checking wrapper */
-static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
+static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
                struct probe_arg *parg, unsigned int flags, int offset)
 {
        struct fetch_insn *code, *scode, *tmp = NULL;
        char *t, *t2, *t3;
+       char *arg;
        int ret, len;
 
+       arg = kstrdup(argv, GFP_KERNEL);
+       if (!arg)
+               return -ENOMEM;
+
+       ret = -EINVAL;
        len = strlen(arg);
        if (len > MAX_ARGSTR_LEN) {
                trace_probe_log_err(offset, ARG_TOO_LONG);
-               return -EINVAL;
+               goto out;
        } else if (len == 0) {
                trace_probe_log_err(offset, NO_ARG_BODY);
-               return -EINVAL;
+               goto out;
        }
 
+       ret = -ENOMEM;
        parg->comm = kstrdup(arg, GFP_KERNEL);
        if (!parg->comm)
-               return -ENOMEM;
+               goto out;
 
+       ret = -EINVAL;
        t = strchr(arg, ':');
        if (t) {
                *t = '\0';
@@ -571,22 +589,22 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
                                offset += t2 + strlen(t2) - arg;
                                trace_probe_log_err(offset,
                                                    ARRAY_NO_CLOSE);
-                               return -EINVAL;
+                               goto out;
                        } else if (t3[1] != '\0') {
                                trace_probe_log_err(offset + t3 + 1 - arg,
                                                    BAD_ARRAY_SUFFIX);
-                               return -EINVAL;
+                               goto out;
                        }
                        *t3 = '\0';
                        if (kstrtouint(t2, 0, &parg->count) || !parg->count) {
                                trace_probe_log_err(offset + t2 - arg,
                                                    BAD_ARRAY_NUM);
-                               return -EINVAL;
+                               goto out;
                        }
                        if (parg->count > MAX_ARRAY_LEN) {
                                trace_probe_log_err(offset + t2 - arg,
                                                    ARRAY_TOO_BIG);
-                               return -EINVAL;
+                               goto out;
                        }
                }
        }
@@ -598,29 +616,30 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
        if (strcmp(arg, "$comm") == 0 || strncmp(arg, "\\\"", 2) == 0) {
                /* The type of $comm must be "string", and not an array. */
                if (parg->count || (t && strcmp(t, "string")))
-                       return -EINVAL;
+                       goto out;
                parg->type = find_fetch_type("string");
        } else
                parg->type = find_fetch_type(t);
        if (!parg->type) {
                trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_TYPE);
-               return -EINVAL;
+               goto out;
        }
        parg->offset = *size;
        *size += parg->type->size * (parg->count ?: 1);
 
+       ret = -ENOMEM;
        if (parg->count) {
                len = strlen(parg->type->fmttype) + 6;
                parg->fmt = kmalloc(len, GFP_KERNEL);
                if (!parg->fmt)
-                       return -ENOMEM;
+                       goto out;
                snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype,
                         parg->count);
        }
 
        code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL);
        if (!code)
-               return -ENOMEM;
+               goto out;
        code[FETCH_INSN_MAX - 1].op = FETCH_OP_END;
 
        ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1],
@@ -628,19 +647,20 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
        if (ret)
                goto fail;
 
+       ret = -EINVAL;
        /* Store operation */
        if (!strcmp(parg->type->name, "string") ||
            !strcmp(parg->type->name, "ustring")) {
                if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF &&
                    code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM &&
-                   code->op != FETCH_OP_DATA) {
+                   code->op != FETCH_OP_DATA && code->op != FETCH_OP_TP_ARG) {
                        trace_probe_log_err(offset + (t ? (t - arg) : 0),
                                            BAD_STRING);
-                       ret = -EINVAL;
                        goto fail;
                }
                if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM ||
-                    code->op == FETCH_OP_DATA) || parg->count) {
+                    code->op == FETCH_OP_DATA) || code->op == FETCH_OP_TP_ARG ||
+                    parg->count) {
                        /*
                         * IMM, DATA and COMM is pointing actual address, those
                         * must be kept, and if parg->count != 0, this is an
@@ -650,7 +670,6 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
                        code++;
                        if (code->op != FETCH_OP_NOP) {
                                trace_probe_log_err(offset, TOO_MANY_OPS);
-                               ret = -EINVAL;
                                goto fail;
                        }
                }
@@ -672,7 +691,6 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
                code++;
                if (code->op != FETCH_OP_NOP) {
                        trace_probe_log_err(offset, TOO_MANY_OPS);
-                       ret = -EINVAL;
                        goto fail;
                }
                code->op = FETCH_OP_ST_RAW;
@@ -687,6 +705,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
                        goto fail;
                }
        }
+       ret = -EINVAL;
        /* Loop(Array) operation */
        if (parg->count) {
                if (scode->op != FETCH_OP_ST_MEM &&
@@ -694,13 +713,11 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
                    scode->op != FETCH_OP_ST_USTRING) {
                        trace_probe_log_err(offset + (t ? (t - arg) : 0),
                                            BAD_STRING);
-                       ret = -EINVAL;
                        goto fail;
                }
                code++;
                if (code->op != FETCH_OP_NOP) {
                        trace_probe_log_err(offset, TOO_MANY_OPS);
-                       ret = -EINVAL;
                        goto fail;
                }
                code->op = FETCH_OP_LP_ARRAY;
@@ -709,6 +726,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
        code++;
        code->op = FETCH_OP_END;
 
+       ret = 0;
        /* Shrink down the code buffer */
        parg->code = kcalloc(code - tmp + 1, sizeof(*code), GFP_KERNEL);
        if (!parg->code)
@@ -724,6 +742,8 @@ fail:
                                kfree(code->data);
        }
        kfree(tmp);
+out:
+       kfree(arg);
 
        return ret;
 }
@@ -745,11 +765,11 @@ static int traceprobe_conflict_field_name(const char *name,
        return 0;
 }
 
-int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, char *arg,
+int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, const char *arg,
                                unsigned int flags)
 {
        struct probe_arg *parg = &tp->args[i];
-       char *body;
+       const char *body;
 
        /* Increment count for freeing args in error case */
        tp->nr_args++;
@@ -839,19 +859,29 @@ int traceprobe_update_arg(struct probe_arg *arg)
 /* When len=0, we just calculate the needed length */
 #define LEN_OR_ZERO (len ? len - pos : 0)
 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
-                          bool is_return)
+                          enum probe_print_type ptype)
 {
        struct probe_arg *parg;
        int i, j;
        int pos = 0;
        const char *fmt, *arg;
 
-       if (!is_return) {
+       switch (ptype) {
+       case PROBE_PRINT_NORMAL:
                fmt = "(%lx)";
                arg = "REC->" FIELD_STRING_IP;
-       } else {
+               break;
+       case PROBE_PRINT_RETURN:
                fmt = "(%lx <- %lx)";
                arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
+               break;
+       case PROBE_PRINT_EVENT:
+               fmt = "(%u)";
+               arg = "REC->" FIELD_STRING_TYPE;
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               return 0;
        }
 
        pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
@@ -900,20 +930,20 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
 }
 #undef LEN_OR_ZERO
 
-int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return)
+int traceprobe_set_print_fmt(struct trace_probe *tp, enum probe_print_type ptype)
 {
        struct trace_event_call *call = trace_probe_event_call(tp);
        int len;
        char *print_fmt;
 
        /* First: called with 0 length to calculate the needed length */
-       len = __set_print_fmt(tp, NULL, 0, is_return);
+       len = __set_print_fmt(tp, NULL, 0, ptype);
        print_fmt = kmalloc(len + 1, GFP_KERNEL);
        if (!print_fmt)
                return -ENOMEM;
 
        /* Second: actually write the @print_fmt */
-       __set_print_fmt(tp, print_fmt, len + 1, is_return);
+       __set_print_fmt(tp, print_fmt, len + 1, ptype);
        call->print_fmt = print_fmt;
 
        return 0;
@@ -1029,11 +1059,36 @@ error:
        return ret;
 }
 
+static struct trace_event_call *
+find_trace_event_call(const char *system, const char *event_name)
+{
+       struct trace_event_call *tp_event;
+       const char *name;
+
+       list_for_each_entry(tp_event, &ftrace_events, list) {
+               if (!tp_event->class->system ||
+                   strcmp(system, tp_event->class->system))
+                       continue;
+               name = trace_event_name(tp_event);
+               if (!name || strcmp(event_name, name))
+                       continue;
+               return tp_event;
+       }
+
+       return NULL;
+}
+
 int trace_probe_register_event_call(struct trace_probe *tp)
 {
        struct trace_event_call *call = trace_probe_event_call(tp);
        int ret;
 
+       lockdep_assert_held(&event_mutex);
+
+       if (find_trace_event_call(trace_probe_group_name(tp),
+                                 trace_probe_name(tp)))
+               return -EEXIST;
+
        ret = register_trace_event(&call->event);
        if (!ret)
                return -ENODEV;
index 227d518..99e7a5d 100644 (file)
@@ -38,6 +38,7 @@
 #define FIELD_STRING_IP                "__probe_ip"
 #define FIELD_STRING_RETIP     "__probe_ret_ip"
 #define FIELD_STRING_FUNC      "__probe_func"
+#define FIELD_STRING_TYPE      "__probe_type"
 
 #undef DEFINE_FIELD
 #define DEFINE_FIELD(type, item, name, is_signed)                      \
@@ -102,6 +103,7 @@ enum fetch_op {
        FETCH_OP_MOD_BF,        /* Bitfield: .basesize, .lshift, .rshift */
        // Stage 5 (loop) op
        FETCH_OP_LP_ARRAY,      /* Array: .param = loop count */
+       FETCH_OP_TP_ARG,        /* Trace Point argument */
        FETCH_OP_END,
        FETCH_NOP_SYMBOL,       /* Unresolved Symbol holder */
 };
@@ -351,10 +353,11 @@ int trace_probe_create(const char *raw_command, int (*createfn)(int, const char
 #define TPARG_FL_RETURN BIT(0)
 #define TPARG_FL_KERNEL BIT(1)
 #define TPARG_FL_FENTRY BIT(2)
-#define TPARG_FL_MASK  GENMASK(2, 0)
+#define TPARG_FL_TPOINT BIT(3)
+#define TPARG_FL_MASK  GENMASK(3, 0)
 
 extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i,
-                               char *arg, unsigned int flags);
+                               const char *argv, unsigned int flags);
 
 extern int traceprobe_update_arg(struct probe_arg *arg);
 extern void traceprobe_free_probe_arg(struct probe_arg *arg);
@@ -363,7 +366,13 @@ extern int traceprobe_split_symbol_offset(char *symbol, long *offset);
 int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
                                char *buf, int offset);
 
-extern int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return);
+enum probe_print_type {
+       PROBE_PRINT_NORMAL,
+       PROBE_PRINT_RETURN,
+       PROBE_PRINT_EVENT,
+};
+
+extern int traceprobe_set_print_fmt(struct trace_probe *tp, enum probe_print_type ptype);
 
 #ifdef CONFIG_PERF_EVENTS
 extern struct trace_event_call *
@@ -399,6 +408,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
        C(NO_EVENT_NAME,        "Event name is not specified"),         \
        C(EVENT_TOO_LONG,       "Event name is too long"),              \
        C(BAD_EVENT_NAME,       "Event name must follow the same rules as C identifiers"), \
+       C(EVENT_EXIST,          "Given group/event name is already used by another event"), \
        C(RETVAL_ON_PROBE,      "$retval is not available on probe"),   \
        C(BAD_STACK_NUM,        "Invalid stack number"),                \
        C(BAD_ARG_NUM,          "Invalid argument number"),             \
index f003c5d..b3bdb8d 100644 (file)
@@ -54,7 +54,7 @@ fetch_apply_bitfield(struct fetch_insn *code, void *buf)
  * If dest is NULL, don't store result and return required dynamic data size.
  */
 static int
-process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs,
+process_fetch_insn(struct fetch_insn *code, void *rec,
                   void *dest, void *base);
 static nokprobe_inline int fetch_store_strlen(unsigned long addr);
 static nokprobe_inline int
@@ -188,7 +188,7 @@ __get_data_size(struct trace_probe *tp, struct pt_regs *regs)
 
 /* Store the value of each argument */
 static nokprobe_inline void
-store_trace_args(void *data, struct trace_probe *tp, struct pt_regs *regs,
+store_trace_args(void *data, struct trace_probe *tp, void *rec,
                 int header_size, int maxlen)
 {
        struct probe_arg *arg;
@@ -203,7 +203,7 @@ store_trace_args(void *data, struct trace_probe *tp, struct pt_regs *regs,
                /* Point the dynamic data area if needed */
                if (unlikely(arg->dynamic))
                        *dl = make_data_loc(maxlen, dyndata - base);
-               ret = process_fetch_insn(arg->code, regs, dl, base);
+               ret = process_fetch_insn(arg->code, rec, dl, base);
                if (unlikely(ret < 0 && arg->dynamic)) {
                        *dl = make_data_loc(0, dyndata - base);
                } else {
index 9b50869..225ce56 100644 (file)
@@ -83,10 +83,6 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev)
        for_each_dyn_event(dpos)                \
                if (is_trace_uprobe(dpos) && (pos = to_trace_uprobe(dpos)))
 
-#define SIZEOF_TRACE_UPROBE(n)                         \
-       (offsetof(struct trace_uprobe, tp.args) +       \
-       (sizeof(struct probe_arg) * (n)))
-
 static int register_uprobe_event(struct trace_uprobe *tu);
 static int unregister_uprobe_event(struct trace_uprobe *tu);
 
@@ -217,9 +213,10 @@ static unsigned long translate_user_vaddr(unsigned long file_offset)
 
 /* Note that we don't verify it, since the code does not come from user space */
 static int
-process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
+process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
                   void *base)
 {
+       struct pt_regs *regs = rec;
        unsigned long val;
 
        /* 1st stage: get value from context */
@@ -340,7 +337,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
        struct trace_uprobe *tu;
        int ret;
 
-       tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL);
+       tu = kzalloc(struct_size(tu, tp.args, nargs), GFP_KERNEL);
        if (!tu)
                return ERR_PTR(-ENOMEM);
 
@@ -393,6 +390,10 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu)
        if (trace_probe_has_sibling(&tu->tp))
                goto unreg;
 
+       /* If there's a reference to the dynamic event */
+       if (trace_event_dyn_busy(trace_probe_event_call(&tu->tp)))
+               return -EBUSY;
+
        ret = unregister_uprobe_event(tu);
        if (ret)
                return ret;
@@ -455,7 +456,7 @@ static int append_trace_uprobe(struct trace_uprobe *tu, struct trace_uprobe *to)
        /* Append to existing event */
        ret = trace_probe_append(&tu->tp, &to->tp);
        if (!ret)
-               dyn_event_add(&tu->devent);
+               dyn_event_add(&tu->devent, trace_probe_event_call(&tu->tp));
 
        return ret;
 }
@@ -514,11 +515,15 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
 
        ret = register_uprobe_event(tu);
        if (ret) {
-               pr_warn("Failed to register probe event(%d)\n", ret);
+               if (ret == -EEXIST) {
+                       trace_probe_log_set_index(0);
+                       trace_probe_log_err(0, EVENT_EXIST);
+               } else
+                       pr_warn("Failed to register probe event(%d)\n", ret);
                goto end;
        }
 
-       dyn_event_add(&tu->devent);
+       dyn_event_add(&tu->devent, trace_probe_event_call(&tu->tp));
 
 end:
        mutex_unlock(&event_mutex);
@@ -536,6 +541,7 @@ static int __trace_uprobe_create(int argc, const char **argv)
        const char *event = NULL, *group = UPROBE_EVENT_SYSTEM;
        char *arg, *filename, *rctr, *rctr_end, *tmp;
        char buf[MAX_EVENT_NAME_LEN];
+       enum probe_print_type ptype;
        struct path path;
        unsigned long offset, ref_ctr_offset;
        bool is_return = false;
@@ -680,21 +686,15 @@ static int __trace_uprobe_create(int argc, const char **argv)
 
        /* parse arguments */
        for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
-               tmp = kstrdup(argv[i], GFP_KERNEL);
-               if (!tmp) {
-                       ret = -ENOMEM;
-                       goto error;
-               }
-
                trace_probe_log_set_index(i + 2);
-               ret = traceprobe_parse_probe_arg(&tu->tp, i, tmp,
+               ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i],
                                        is_return ? TPARG_FL_RETURN : 0);
-               kfree(tmp);
                if (ret)
                        goto error;
        }
 
-       ret = traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu));
+       ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
+       ret = traceprobe_set_print_fmt(&tu->tp, ptype);
        if (ret < 0)
                goto error;
 
@@ -1585,6 +1585,7 @@ struct trace_event_call *
 create_local_trace_uprobe(char *name, unsigned long offs,
                          unsigned long ref_ctr_offset, bool is_return)
 {
+       enum probe_print_type ptype;
        struct trace_uprobe *tu;
        struct path path;
        int ret;
@@ -1619,7 +1620,8 @@ create_local_trace_uprobe(char *name, unsigned long offs,
        tu->filename = kstrdup(name, GFP_KERNEL);
        init_trace_event_call(tu);
 
-       if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) {
+       ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
+       if (traceprobe_set_print_fmt(&tu->tp, ptype) < 0) {
                ret = -ENOMEM;
                goto error;
        }
index efd14c7..64ea283 100644 (file)
@@ -577,7 +577,7 @@ bool trace_module_has_bad_taint(struct module *mod)
 static BLOCKING_NOTIFIER_HEAD(tracepoint_notify_list);
 
 /**
- * register_tracepoint_notifier - register tracepoint coming/going notifier
+ * register_tracepoint_module_notifier - register tracepoint coming/going notifier
  * @nb: notifier block
  *
  * Notifiers registered with this function are called on module
@@ -603,7 +603,7 @@ end:
 EXPORT_SYMBOL_GPL(register_tracepoint_module_notifier);
 
 /**
- * unregister_tracepoint_notifier - unregister tracepoint coming/going notifier
+ * unregister_tracepoint_module_notifier - unregister tracepoint coming/going notifier
  * @nb: notifier block
  *
  * The notifier block callback should expect a "struct tp_module" data
index feb30c2..850c207 100755 (executable)
@@ -94,6 +94,92 @@ compose_synth() { # event_name branch
        xbc_get_val $2 | while read field; do echo -n "$field; "; done
 }
 
+print_hist_array() { # prefix key
+       __sep="="
+       if xbc_has_key ${1}.${2}; then
+               echo -n ":$2"
+               xbc_get_val ${1}.${2} | while read field; do
+                       echo -n "$__sep$field"; __sep=","
+               done
+       fi
+}
+
+print_hist_action_array() { # prefix key
+       __sep="("
+       echo -n ".$2"
+       xbc_get_val ${1}.${2} | while read field; do
+               echo -n "$__sep$field"; __sep=","
+       done
+       echo -n ")"
+}
+
+print_hist_one_action() { # prefix handler param
+       echo -n ":${2}("`xbc_get_val ${1}.${3}`")"
+       if xbc_has_key "${1}.trace"; then
+               print_hist_action_array ${1} "trace"
+       elif xbc_has_key "${1}.save"; then
+               print_hist_action_array ${1} "save"
+       elif xbc_has_key "${1}.snapshot"; then
+               echo -n ".snapshot()"
+       fi
+}
+
+print_hist_actions() { # prefix handler param
+       for __hdr in `xbc_subkeys ${1}.${2} 1 ".[0-9]"`; do
+               print_hist_one_action ${1}.${2}.$__hdr ${2} ${3}
+       done
+       if xbc_has_key ${1}.${2}.${3} ; then
+               print_hist_one_action ${1}.${2} ${2} ${3}
+       fi
+}
+
+print_hist_var() { # prefix varname
+       echo -n ":${2}="`xbc_get_val ${1}.var.${2} | tr -d [:space:]`
+}
+
+print_one_histogram() { # prefix
+       echo -n "hist"
+       print_hist_array $1 "keys"
+       print_hist_array $1 "values"
+       print_hist_array $1 "sort"
+       if xbc_has_key "${1}.size"; then
+               echo -n ":size="`xbc_get_val ${1}.size`
+       fi
+       if xbc_has_key "${1}.name"; then
+               echo -n ":name="`xbc_get_val ${1}.name`
+       fi
+       for __var in `xbc_subkeys "${1}.var" 1`; do
+               print_hist_var ${1} ${__var}
+       done
+       if xbc_has_key "${1}.pause"; then
+               echo -n ":pause"
+       elif xbc_has_key "${1}.continue"; then
+               echo -n ":continue"
+       elif xbc_has_key "${1}.clear"; then
+               echo -n ":clear"
+       fi
+       print_hist_actions ${1} "onmax" "var"
+       print_hist_actions ${1} "onchange" "var"
+       print_hist_actions ${1} "onmatch" "event"
+
+       if xbc_has_key "${1}.filter"; then
+               echo -n " if "`xbc_get_val ${1}.filter`
+       fi
+}
+
+setup_one_histogram() { # prefix trigger-file
+       run_cmd "echo '`print_one_histogram ${1}`' >> ${2}"
+}
+
+setup_histograms() { # prefix trigger-file
+       for __hist in `xbc_subkeys ${1} 1 ".[0-9]"`; do
+               setup_one_histogram ${1}.$__hist ${2}
+       done
+       if xbc_has_key ${1}.keys; then
+               setup_one_histogram ${1} ${2}
+       fi
+}
+
 setup_event() { # prefix group event [instance]
        branch=$1.$2.$3
        if [ "$4" ]; then
@@ -101,6 +187,12 @@ setup_event() { # prefix group event [instance]
        else
                eventdir="$TRACEFS/events/$2/$3"
        fi
+       # group enable
+       if [ "$3" = "enable" ]; then
+               run_cmd "echo 1 > ${eventdir}"
+               return
+       fi
+
        case $2 in
        kprobes)
                xbc_get_val ${branch}.probes | while read line; do
@@ -115,6 +207,8 @@ setup_event() { # prefix group event [instance]
        set_value_of ${branch}.filter ${eventdir}/filter
        set_array_of ${branch}.actions ${eventdir}/trigger
 
+       setup_histograms ${branch}.hist ${eventdir}/trigger
+
        if xbc_has_key ${branch}.enable; then
                run_cmd "echo 1 > ${eventdir}/enable"
        fi
@@ -127,6 +221,13 @@ setup_events() { # prefix("ftrace" or "ftrace.instance.INSTANCE") [instance]
                        setup_event $prefix ${grpev%.*} ${grpev#*.} $2
                done
        fi
+       if xbc_has_branch ${1}.event.enable; then
+               if [ "$2" ]; then
+                       run_cmd "echo 1 > $TRACEFS/instances/$2/events/enable"
+               else
+                       run_cmd "echo 1 > $TRACEFS/events/enable"
+               fi
+       fi
 }
 
 size2kb() { # size[KB|MB]
index a0c3bcc..fbaf07d 100755 (executable)
@@ -92,6 +92,10 @@ referred_vars() {
        grep "^hist" $1/trigger | grep -o '$[a-zA-Z0-9]*'
 }
 
+event_is_enabled() { # enable-file
+       test -f $1 & grep -q "1" $1
+}
+
 per_event_options() { # event-dir
        evdir=$1
        # Check the special event which has no filter and no trigger
@@ -113,7 +117,9 @@ per_event_options() { # event-dir
                emit_kv $PREFIX.event.$group.$event.actions += \'$action\'
        done
 
-       # enable is not checked; this is done by set_event in the instance.
+       if [ $GROUP_ENABLED -eq 0 ] && event_is_enabled $evdir/enable; then
+               emit_kv $PREFIX.event.$group.$event.enable
+       fi
        val=`cat $evdir/filter`
        if [ "$val" != "none" ]; then
                emit_kv $PREFIX.event.$group.$event.filter = "$val"
@@ -137,8 +143,19 @@ event_options() {
                kprobe_event_options
                synth_event_options
        fi
+       ALL_ENABLED=0
+       if event_is_enabled $INSTANCE/events/enable; then
+               emit_kv $PREFIX.event.enable
+               ALL_ENABLED=1
+       fi
        for group in `ls $INSTANCE/events/` ; do
                [ ! -d $INSTANCE/events/$group ] && continue
+               GROUP_ENABLED=$ALL_ENABLED
+               if [ $ALL_ENABLED -eq 0 ] && \
+                  event_is_enabled $INSTANCE/events/$group/enable ;then
+                       emit_kv $PREFIX.event.$group.enable
+                       GROUP_ENABLED=1
+               fi
                for event in `ls $INSTANCE/events/$group/` ;do
                        [ ! -d $INSTANCE/events/$group/$event ] && continue
                        per_event_options $INSTANCE/events/$group/$event
@@ -226,11 +243,6 @@ instance_options() { # [instance-name]
                emit_kv $PREFIX.tracing_on = $val
        fi
 
-       val=
-       for i in `cat $INSTANCE/set_event`; do
-               val="$val, $i"
-       done
-       [ "$val" ] && emit_kv $PREFIX.events = "${val#,}"
        val=`cat $INSTANCE/current_tracer`
        [ $val != nop ] && emit_kv $PREFIX.tracer = $val
        if grep -qv "^#" $INSTANCE/set_ftrace_filter $INSTANCE/set_ftrace_notrace; then
index b8c84e6..1f0ebf5 100644 (file)
@@ -49,8 +49,8 @@ xbc_has_branch() { # prefix-key
        grep -q "^$1" $XBC_TMPFILE
 }
 
-xbc_subkeys() { # prefix-key depth
+xbc_subkeys() { # prefix-key depth [subkey-pattern]
        __keys=`echo $1 | sed "s/\./ /g"`
        __s=`nr_args $__keys`
-       grep "^$1" $XBC_TMPFILE | cut -d= -f1| cut -d. -f$((__s + 1))-$((__s + $2)) | uniq
+       grep "^$1$3" $XBC_TMPFILE | cut -d= -f1| cut -d. -f$((__s + 1))-$((__s + $2)) | uniq
 }
index 9db64ec..7aa706c 100644 (file)
@@ -10,13 +10,23 @@ ftrace.event {
        }
        synthetic.initcall_latency {
                fields = "unsigned long func", "u64 lat"
-               actions = "hist:keys=func.sym,lat:vals=lat:sort=lat"
+               hist {
+                       keys = func.sym,lat
+                       values = lat
+                       sort = lat
+               }
        }
-       initcall.initcall_start {
-               actions = "hist:keys=func:ts0=common_timestamp.usecs"
+       initcall.initcall_start.hist {
+               keys = func;
+               var.ts0 = common_timestamp.usecs
        }
-       initcall.initcall_finish {
-               actions = "hist:keys=func:lat=common_timestamp.usecs-$ts0:onmatch(initcall.initcall_start).initcall_latency(func,$lat)"
+       initcall.initcall_finish.hist {
+               keys = func
+               var.lat = common_timestamp.usecs - $ts0
+               onmatch {
+                       event = initcall.initcall_start
+                       trace = initcall_latency, func, $lat
+               }
        }
 }
 
index f271940..233e95c 100755 (executable)
@@ -58,7 +58,7 @@ compare_file_partial "events/synthetic/initcall_latency/enable" "0"
 compare_file_partial "events/initcall/initcall_start/trigger" "hist:keys=func:vals=hitcount:ts0=common_timestamp.usecs"
 compare_file_partial "events/initcall/initcall_start/enable" "1"
 
-compare_file_partial "events/initcall/initcall_finish/trigger" 'hist:keys=func:vals=hitcount:lat=common_timestamp.usecs-\$ts0:sort=hitcount:size=2048:clock=global:onmatch(initcall.initcall_start).initcall_latency(func,\$lat)'
+compare_file_partial "events/initcall/initcall_finish/trigger" 'hist:keys=func:vals=hitcount:lat=common_timestamp.usecs-\$ts0:sort=hitcount:size=2048:clock=global:onmatch(initcall.initcall_start).trace(initcall_latency,func,\$lat)'
 compare_file_partial "events/initcall/initcall_finish/enable" "1"
 
 compare_file "instances/foo/current_tracer" "function"
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc
new file mode 100644 (file)
index 0000000..25a3da4
--- /dev/null
@@ -0,0 +1,40 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove eprobe events
+# requires: dynamic_events events/syscalls/sys_enter_openat "e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+clear_dynamic_events
+
+SYSTEM="syscalls"
+EVENT="sys_enter_openat"
+FIELD="filename"
+EPROBE="eprobe_open"
+
+echo "e:$EPROBE $SYSTEM/$EVENT file=+0(\$filename):ustring" >> dynamic_events
+
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+
+echo 1 > events/eprobes/$EPROBE/enable
+ls
+echo 0 > events/eprobes/$EPROBE/enable
+
+content=`grep '^ *ls-' trace | grep 'file='`
+nocontent=`grep '^ *ls-' trace | grep 'file=' | grep -v -e '"/' -e '"."'` || true
+
+if [ -z "$content" ]; then
+       exit_fail
+fi
+
+if [ ! -z "$nocontent" ]; then
+       exit_fail
+fi
+
+echo "-:$EPROBE" >> dynamic_events
+
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
new file mode 100644 (file)
index 0000000..db52257
--- /dev/null
@@ -0,0 +1,38 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - check if duplicate events are caught
+# requires: dynamic_events "e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+HAVE_KPROBES=0
+
+if [ -f kprobe_events ]; then
+       HAVE_KPROBES=1
+fi
+
+clear_dynamic_events
+
+# first create dynamic events for eprobes and kprobes.
+
+echo 'e:egroup/eevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+# Test eprobe for same eprobe, existing kprobe and existing event
+! echo 'e:egroup/eevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+! echo 'e:syscalls/sys_enter_open syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+if [ $HAVE_KPROBES -eq 1 ]; then
+    echo 'p:kgroup/kevent vfs_open file=+0($arg2)' >> dynamic_events
+    ! echo 'e:kgroup/kevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+# Test kprobe for same kprobe, existing eprobe and existing event
+    ! echo 'p:kgroup/kevent vfs_open file=+0($arg2)' >> dynamic_events
+    ! echo 'p:egroup/eevent vfs_open file=+0($arg2)' >> dynamic_events
+    ! echo 'p:syscalls/sys_enter_open vfs_open file=+0($arg2)' >> dynamic_events
+
+    echo '-:kgroup/kevent' >> dynamic_events
+fi
+
+echo '-:egroup/eevent' >> dynamic_events
+
+clear_trace
index a6fac92..000fd05 100644 (file)
@@ -83,6 +83,27 @@ clear_synthetic_events() { # reset all current synthetic events
     done
 }
 
+clear_dynamic_events() { # reset all current dynamic events
+    again=1
+    stop=1
+    # loop mulitple times as some events require other to be removed first
+    while [ $again -eq 1 ]; do
+       stop=$((stop+1))
+       # Prevent infinite loops
+       if [ $stop -gt 10 ]; then
+           break;
+       fi
+       again=2
+       grep -v '^#' dynamic_events|
+       while read line; do
+           del=`echo $line | sed -e 's/^.\([^ ]*\).*/-\1/'`
+           if ! echo "$del" >> dynamic_events; then
+               again=1
+           fi
+       done
+    done
+}
+
 initialize_ftrace() { # Reset ftrace to initial-state
 # As the initial state, ftrace will be set to nop tracer,
 # no events, no triggers, no filters, no function filters,
@@ -93,6 +114,7 @@ initialize_ftrace() { # Reset ftrace to initial-state
     reset_events_filter
     reset_ftrace_filter
     disable_events
+    clear_dynamic_events
     [ -f set_event_pid ] && echo > set_event_pid
     [ -f set_ftrace_pid ] && echo > set_ftrace_pid
     [ -f set_ftrace_notrace ] && echo > set_ftrace_notrace
@@ -115,7 +137,7 @@ check_requires() { # Check required files and tracers
                 echo "Required tracer $t is not configured."
                 exit_unsupported
             fi
-        elif [ $r != $i ]; then
+        elif [ "$r" != "$i" ]; then
             if ! grep -Fq "$r" README ; then
                 echo "Required feature pattern \"$r\" is not in README."
                 exit_unsupported
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc
new file mode 100644 (file)
index 0000000..914fe2e
--- /dev/null
@@ -0,0 +1,53 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger eprobe on synthetic event
+# requires: dynamic_events synthetic_events events/syscalls/sys_enter_openat/hist "e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+clear_dynamic_events
+
+SYSTEM="syscalls"
+START="sys_enter_openat"
+END="sys_exit_openat"
+FIELD="filename"
+SYNTH="synth_open"
+EPROBE="eprobe_open"
+
+echo "$SYNTH u64 filename; s64 ret;" > synthetic_events
+echo "hist:keys=common_pid:__arg__1=$FIELD" > events/$SYSTEM/$START/trigger
+echo "hist:keys=common_pid:filename=\$__arg__1,ret=ret:onmatch($SYSTEM.$START).trace($SYNTH,\$filename,\$ret)" > events/$SYSTEM/$END/trigger
+
+echo "e:$EPROBE synthetic/$SYNTH file=+0(\$filename):ustring ret=\$ret:s64" >> dynamic_events
+
+grep -q "$SYNTH" dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/synthetic/$SYNTH
+test -d events/eprobes/$EPROBE
+
+echo 1 > events/eprobes/$EPROBE/enable
+ls
+echo 0 > events/eprobes/$EPROBE/enable
+
+content=`grep '^ *ls-' trace | grep 'file='`
+nocontent=`grep '^ *ls-' trace | grep 'file=' | grep -v -e '"/' -e '"."'` || true
+
+if [ -z "$content" ]; then
+       exit_fail
+fi
+
+if [ ! -z "$nocontent" ]; then
+       exit_fail
+fi
+
+echo "-:$EPROBE" >> dynamic_events
+echo '!'"hist:keys=common_pid:filename=\$__arg__1,ret=ret:onmatch($SYSTEM.$START).trace($SYNTH,\$filename,\$ret)" > events/$SYSTEM/$END/trigger
+echo '!'"hist:keys=common_pid:__arg__1=$FIELD" > events/$SYSTEM/$START/trigger
+echo '!'"$SYNTH u64 filename; s64 ret;" >> synthetic_events
+
+! grep -q "$SYNTH" dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/synthetic/$SYNTH
+! test -d events/eprobes/$EPROBE
+
+clear_trace