Merge tag 'perf-urgent-for-mingo-5.7-20200403' of git://git.kernel.org/pub/scm/linux...
authorIngo Molnar <mingo@kernel.org>
Sat, 4 Apr 2020 08:35:15 +0000 (10:35 +0200)
committerIngo Molnar <mingo@kernel.org>
Sat, 4 Apr 2020 08:35:15 +0000 (10:35 +0200)
Pull perf/urgent fixes and improvements from Arnaldo Carvalho de Melo:

perf python:

  Arnaldo Carvalho de Melo:

  - Fix clang detection to strip out options passed in $CC.

build:

  He Zhe:

  - Normalize gcc parameter when generating arch errno table, fixing
    the build by removing options from $(CC).

  Sam Lunt:

  - Support Python 3.8+ in Makefile.

perf report/top:

  Arnaldo Carvalho de Melo:

  - Fix title line formatting.

perf script:

  Andreas Gerstmayr:

  - Fix SEGFAULT when using DWARF mode.

  - Fix invalid read of directory entry after closedir(), found with valgrind.

  Hagen Paul Pfeifer:

  - Introduce --deltatime option.

  Stephane Eranian:

  - Allow --symbol to accept hexadecimal addresses.

  Ian Rogers:

  - Add -S/--symbols documentation

  Namhyung Kim:

  - Add --show-cgroup-events option.

perf python:

  Arnaldo Carvalho de Melo:

  - Include rwsem.c in the python binding, needed by the cgroups improvements.

build-test:

  Arnaldo Carvalho de Melo:

  - Honour JOBS to override detection of number of cores

perf top:

  Jin Yao:

  - Support --group-sort-idx to change the sort order

  - perf top: Support hotkey to change sort order

perf pmu-events x86:

  Jin Yao:

  - Use CPU_CLK_UNHALTED.THREAD in Kernel_Utilization metric

perf symbols arm64:

  Kemeng Shi:

  - Fix arm64 gap between kernel start and module end

kernel perf subsystem:

  Namhyung Kim:

  - Add PERF_RECORD_CGROUP event and Add PERF_SAMPLE_CGROUP feature,
    to allow cgroup tracking, saving a link between cgroup path and
    its id number.

perf cgroup:

  Namhyung Kim:

  - Maintain cgroup hierarchy.

perf report:

  Namhyung Kim:

  - Add 'cgroup' sort key.

perf record:

  Namhyung Kim:

  - Support synthesizing cgroup events for pre-existing cgroups.

  - Add --all-cgroups option

Documentation:

  Tony Jones:

  - Update docs regarding kernel/user space unwinding.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
104 files changed:
include/linux/perf_event.h
include/uapi/linux/perf_event.h
init/Kconfig
kernel/events/core.c
tools/build/Makefile.feature
tools/build/feature/Makefile
tools/build/feature/test-file-handle.c [new file with mode: 0644]
tools/include/uapi/linux/perf_event.h
tools/lib/perf/include/perf/event.h
tools/perf/Documentation/perf-config.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-script.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Makefile.config
tools/perf/Makefile.perf
tools/perf/arch/arm64/util/Build
tools/perf/arch/arm64/util/machine.c [new file with mode: 0644]
tools/perf/arch/arm64/util/sym-handling.c [deleted file]
tools/perf/arch/powerpc/util/Build
tools/perf/arch/powerpc/util/sym-handling.c
tools/perf/builtin-diff.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-script.c
tools/perf/builtin-top.c
tools/perf/pmu-events/arch/test/test_cpu/branch.json [new file with mode: 0644]
tools/perf/pmu-events/arch/test/test_cpu/other.json [new file with mode: 0644]
tools/perf/pmu-events/arch/test/test_cpu/uncore.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/amdfam17h/branch.json [deleted file]
tools/perf/pmu-events/arch/x86/amdfam17h/cache.json [deleted file]
tools/perf/pmu-events/arch/x86/amdfam17h/core.json [deleted file]
tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json [deleted file]
tools/perf/pmu-events/arch/x86/amdfam17h/memory.json [deleted file]
tools/perf/pmu-events/arch/x86/amdfam17h/other.json [deleted file]
tools/perf/pmu-events/arch/x86/amdzen1/branch.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/amdzen1/cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/amdzen1/core.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/amdzen1/memory.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/amdzen1/other.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/amdzen2/branch.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/amdzen2/cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/amdzen2/core.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/amdzen2/memory.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/amdzen2/other.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
tools/perf/pmu-events/arch/x86/mapfile.csv
tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
tools/perf/pmu-events/jevents.c
tools/perf/tests/Build
tools/perf/tests/builtin-test.c
tools/perf/tests/make
tools/perf/tests/pmu-events.c [new file with mode: 0644]
tools/perf/tests/sample-parsing.c
tools/perf/tests/tests.h
tools/perf/ui/browsers/hists.c
tools/perf/ui/hist.c
tools/perf/ui/keysyms.h
tools/perf/util/annotate.h
tools/perf/util/cgroup.c
tools/perf/util/cgroup.h
tools/perf/util/cpumap.c
tools/perf/util/dsos.c
tools/perf/util/env.c
tools/perf/util/env.h
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/machine.c
tools/perf/util/machine.h
tools/perf/util/metricgroup.c
tools/perf/util/parse-events.c
tools/perf/util/parse-events.l
tools/perf/util/perf_event_attr_fprintf.c
tools/perf/util/pmu.c
tools/perf/util/pmu.h
tools/perf/util/python-ext-sources
tools/perf/util/record.h
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/setup.py
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/stat-display.c
tools/perf/util/symbol-elf.c
tools/perf/util/symbol_conf.h
tools/perf/util/synthetic-events.c
tools/perf/util/synthetic-events.h
tools/perf/util/tool.h

index 8768a39..9c3e761 100644 (file)
@@ -1020,6 +1020,7 @@ struct perf_sample_data {
        u64                             stack_user_size;
 
        u64                             phys_addr;
+       u64                             cgroup;
 } ____cacheline_aligned;
 
 /* default value for data source */
index 397cfd6..7b2d6fc 100644 (file)
@@ -142,8 +142,9 @@ enum perf_event_sample_format {
        PERF_SAMPLE_REGS_INTR                   = 1U << 18,
        PERF_SAMPLE_PHYS_ADDR                   = 1U << 19,
        PERF_SAMPLE_AUX                         = 1U << 20,
+       PERF_SAMPLE_CGROUP                      = 1U << 21,
 
-       PERF_SAMPLE_MAX = 1U << 21,             /* non-ABI */
+       PERF_SAMPLE_MAX = 1U << 22,             /* non-ABI */
 
        __PERF_SAMPLE_CALLCHAIN_EARLY           = 1ULL << 63, /* non-ABI; internal use */
 };
@@ -381,7 +382,8 @@ struct perf_event_attr {
                                ksymbol        :  1, /* include ksymbol events */
                                bpf_event      :  1, /* include bpf events */
                                aux_output     :  1, /* generate AUX records instead of events */
-                               __reserved_1   : 32;
+                               cgroup         :  1, /* include cgroup events */
+                               __reserved_1   : 31;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -1012,6 +1014,16 @@ enum perf_event_type {
         */
        PERF_RECORD_BPF_EVENT                   = 18,
 
+       /*
+        * struct {
+        *      struct perf_event_header        header;
+        *      u64                             id;
+        *      char                            path[];
+        *      struct sample_id                sample_id;
+        * };
+        */
+       PERF_RECORD_CGROUP                      = 19,
+
        PERF_RECORD_MAX,                        /* non-ABI */
 };
 
index 697d109..93e1faf 100644 (file)
@@ -1030,7 +1030,8 @@ config CGROUP_PERF
        help
          This option extends the perf per-cpu mode to restrict monitoring
          to threads which belong to the cgroup specified and run on the
-         designated cpu.
+         designated cpu.  Or this can be used to have cgroup ID in samples
+         so that it can monitor performance events among cgroups.
 
          Say N if unsure.
 
index d22e4ba..1569979 100644 (file)
@@ -387,6 +387,7 @@ static atomic_t nr_freq_events __read_mostly;
 static atomic_t nr_switch_events __read_mostly;
 static atomic_t nr_ksymbol_events __read_mostly;
 static atomic_t nr_bpf_events __read_mostly;
+static atomic_t nr_cgroup_events __read_mostly;
 
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
@@ -1861,6 +1862,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
        if (sample_type & PERF_SAMPLE_PHYS_ADDR)
                size += sizeof(data->phys_addr);
 
+       if (sample_type & PERF_SAMPLE_CGROUP)
+               size += sizeof(data->cgroup);
+
        event->header_size = size;
 }
 
@@ -4608,6 +4612,8 @@ static void unaccount_event(struct perf_event *event)
                atomic_dec(&nr_comm_events);
        if (event->attr.namespaces)
                atomic_dec(&nr_namespaces_events);
+       if (event->attr.cgroup)
+               atomic_dec(&nr_cgroup_events);
        if (event->attr.task)
                atomic_dec(&nr_task_events);
        if (event->attr.freq)
@@ -6864,6 +6870,9 @@ void perf_output_sample(struct perf_output_handle *handle,
        if (sample_type & PERF_SAMPLE_PHYS_ADDR)
                perf_output_put(handle, data->phys_addr);
 
+       if (sample_type & PERF_SAMPLE_CGROUP)
+               perf_output_put(handle, data->cgroup);
+
        if (sample_type & PERF_SAMPLE_AUX) {
                perf_output_put(handle, data->aux_size);
 
@@ -7063,6 +7072,16 @@ void perf_prepare_sample(struct perf_event_header *header,
        if (sample_type & PERF_SAMPLE_PHYS_ADDR)
                data->phys_addr = perf_virt_to_phys(data->addr);
 
+#ifdef CONFIG_CGROUP_PERF
+       if (sample_type & PERF_SAMPLE_CGROUP) {
+               struct cgroup *cgrp;
+
+               /* protected by RCU */
+               cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup;
+               data->cgroup = cgroup_id(cgrp);
+       }
+#endif
+
        if (sample_type & PERF_SAMPLE_AUX) {
                u64 size;
 
@@ -7735,6 +7754,105 @@ void perf_event_namespaces(struct task_struct *task)
                        NULL);
 }
 
+/*
+ * cgroup tracking
+ */
+#ifdef CONFIG_CGROUP_PERF
+
+struct perf_cgroup_event {
+       char                            *path;
+       int                             path_size;
+       struct {
+               struct perf_event_header        header;
+               u64                             id;
+               char                            path[];
+       } event_id;
+};
+
+static int perf_event_cgroup_match(struct perf_event *event)
+{
+       return event->attr.cgroup;
+}
+
+static void perf_event_cgroup_output(struct perf_event *event, void *data)
+{
+       struct perf_cgroup_event *cgroup_event = data;
+       struct perf_output_handle handle;
+       struct perf_sample_data sample;
+       u16 header_size = cgroup_event->event_id.header.size;
+       int ret;
+
+       if (!perf_event_cgroup_match(event))
+               return;
+
+       perf_event_header__init_id(&cgroup_event->event_id.header,
+                                  &sample, event);
+       ret = perf_output_begin(&handle, event,
+                               cgroup_event->event_id.header.size);
+       if (ret)
+               goto out;
+
+       perf_output_put(&handle, cgroup_event->event_id);
+       __output_copy(&handle, cgroup_event->path, cgroup_event->path_size);
+
+       perf_event__output_id_sample(event, &handle, &sample);
+
+       perf_output_end(&handle);
+out:
+       cgroup_event->event_id.header.size = header_size;
+}
+
+static void perf_event_cgroup(struct cgroup *cgrp)
+{
+       struct perf_cgroup_event cgroup_event;
+       char path_enomem[16] = "//enomem";
+       char *pathname;
+       size_t size;
+
+       if (!atomic_read(&nr_cgroup_events))
+               return;
+
+       cgroup_event = (struct perf_cgroup_event){
+               .event_id  = {
+                       .header = {
+                               .type = PERF_RECORD_CGROUP,
+                               .misc = 0,
+                               .size = sizeof(cgroup_event.event_id),
+                       },
+                       .id = cgroup_id(cgrp),
+               },
+       };
+
+       pathname = kmalloc(PATH_MAX, GFP_KERNEL);
+       if (pathname == NULL) {
+               cgroup_event.path = path_enomem;
+       } else {
+               /* just to be sure to have enough space for alignment */
+               cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64));
+               cgroup_event.path = pathname;
+       }
+
+       /*
+        * Since our buffer works in 8 byte units we need to align our string
+        * size to a multiple of 8. However, we must guarantee the tail end is
+        * zero'd out to avoid leaking random bits to userspace.
+        */
+       size = strlen(cgroup_event.path) + 1;
+       while (!IS_ALIGNED(size, sizeof(u64)))
+               cgroup_event.path[size++] = '\0';
+
+       cgroup_event.event_id.header.size += size;
+       cgroup_event.path_size = size;
+
+       perf_iterate_sb(perf_event_cgroup_output,
+                       &cgroup_event,
+                       NULL);
+
+       kfree(pathname);
+}
+
+#endif
+
 /*
  * mmap tracking
  */
@@ -10781,6 +10899,8 @@ static void account_event(struct perf_event *event)
                atomic_inc(&nr_comm_events);
        if (event->attr.namespaces)
                atomic_inc(&nr_namespaces_events);
+       if (event->attr.cgroup)
+               atomic_inc(&nr_cgroup_events);
        if (event->attr.task)
                atomic_inc(&nr_task_events);
        if (event->attr.freq)
@@ -11160,6 +11280,12 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 
        if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
                ret = perf_reg_validate(attr->sample_regs_intr);
+
+#ifndef CONFIG_CGROUP_PERF
+       if (attr->sample_type & PERF_SAMPLE_CGROUP)
+               return -EINVAL;
+#endif
+
 out:
        return ret;
 
@@ -12757,6 +12883,12 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
        kfree(jc);
 }
 
+static int perf_cgroup_css_online(struct cgroup_subsys_state *css)
+{
+       perf_event_cgroup(css->cgroup);
+       return 0;
+}
+
 static int __perf_cgroup_move(void *info)
 {
        struct task_struct *task = info;
@@ -12778,6 +12910,7 @@ static void perf_cgroup_attach(struct cgroup_taskset *tset)
 struct cgroup_subsys perf_event_cgrp_subsys = {
        .css_alloc      = perf_cgroup_css_alloc,
        .css_free       = perf_cgroup_css_free,
+       .css_online     = perf_cgroup_css_online,
        .attach         = perf_cgroup_attach,
        /*
         * Implicitly enable on dfl hierarchy so that perf events can
index 574c2e0..3e0c019 100644 (file)
@@ -72,7 +72,8 @@ FEATURE_TESTS_BASIC :=                  \
         setns                          \
         libaio                         \
         libzstd                                \
-        disassembler-four-args
+        disassembler-four-args         \
+        file-handle
 
 # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
 # of all feature tests
index 7ac0d80..621f528 100644 (file)
@@ -67,7 +67,8 @@ FILES=                                          \
          test-llvm.bin                         \
          test-llvm-version.bin                 \
          test-libaio.bin                       \
-         test-libzstd.bin
+         test-libzstd.bin                      \
+         test-file-handle.bin
 
 FILES := $(addprefix $(OUTPUT),$(FILES))
 
@@ -321,6 +322,9 @@ $(OUTPUT)test-libaio.bin:
 $(OUTPUT)test-libzstd.bin:
        $(BUILD) -lzstd
 
+$(OUTPUT)test-file-handle.bin:
+       $(BUILD)
+
 ###############################
 
 clean:
diff --git a/tools/build/feature/test-file-handle.c b/tools/build/feature/test-file-handle.c
new file mode 100644 (file)
index 0000000..4d3b03b
--- /dev/null
@@ -0,0 +1,17 @@
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <inttypes.h>
+
+int main(void)
+{
+       struct {
+               struct file_handle fh;
+               uint64_t cgroup_id;
+       } handle;
+       int mount_id;
+
+       name_to_handle_at(AT_FDCWD, "/", &handle.fh, &mount_id, 0);
+       return 0;
+}
index 397cfd6..7b2d6fc 100644 (file)
@@ -142,8 +142,9 @@ enum perf_event_sample_format {
        PERF_SAMPLE_REGS_INTR                   = 1U << 18,
        PERF_SAMPLE_PHYS_ADDR                   = 1U << 19,
        PERF_SAMPLE_AUX                         = 1U << 20,
+       PERF_SAMPLE_CGROUP                      = 1U << 21,
 
-       PERF_SAMPLE_MAX = 1U << 21,             /* non-ABI */
+       PERF_SAMPLE_MAX = 1U << 22,             /* non-ABI */
 
        __PERF_SAMPLE_CALLCHAIN_EARLY           = 1ULL << 63, /* non-ABI; internal use */
 };
@@ -381,7 +382,8 @@ struct perf_event_attr {
                                ksymbol        :  1, /* include ksymbol events */
                                bpf_event      :  1, /* include bpf events */
                                aux_output     :  1, /* generate AUX records instead of events */
-                               __reserved_1   : 32;
+                               cgroup         :  1, /* include cgroup events */
+                               __reserved_1   : 31;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -1012,6 +1014,16 @@ enum perf_event_type {
         */
        PERF_RECORD_BPF_EVENT                   = 18,
 
+       /*
+        * struct {
+        *      struct perf_event_header        header;
+        *      u64                             id;
+        *      char                            path[];
+        *      struct sample_id                sample_id;
+        * };
+        */
+       PERF_RECORD_CGROUP                      = 19,
+
        PERF_RECORD_MAX,                        /* non-ABI */
 };
 
index 1810689..69b44d2 100644 (file)
@@ -105,6 +105,12 @@ struct perf_record_bpf_event {
        __u8                     tag[BPF_TAG_SIZE];  // prog tag
 };
 
+struct perf_record_cgroup {
+       struct perf_event_header header;
+       __u64                    id;
+       char                     path[PATH_MAX];
+};
+
 struct perf_record_sample {
        struct perf_event_header header;
        __u64                    array[];
@@ -352,6 +358,7 @@ union perf_event {
        struct perf_record_mmap2                mmap2;
        struct perf_record_comm                 comm;
        struct perf_record_namespaces           namespaces;
+       struct perf_record_cgroup               cgroup;
        struct perf_record_fork                 fork;
        struct perf_record_lost                 lost;
        struct perf_record_lost_samples         lost_samples;
index 8ead555..f16d8a7 100644 (file)
@@ -405,14 +405,16 @@ ui.*::
                This option is only applied to TUI.
 
 call-graph.*::
-       When sub-commands 'top' and 'report' work with -g/—-children
-       there're options in control of call-graph.
+       The following controls the handling of call-graphs (obtained via the
+       -g/--call-graph options).
 
        call-graph.record-mode::
-               The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'.
-               The value of 'dwarf' is effective only if perf detect needed library
-               (libunwind or a recent version of libdw).
-               'lbr' only work for cpus that support it.
+               The mode for user space can be 'fp' (frame pointer), 'dwarf'
+               and 'lbr'.  The value 'dwarf' is effective only if libunwind
+               (or a recent version of libdw) is present on the system;
+               the value 'lbr' only works for certain cpus. The method for
+               kernel space is controlled not by this option but by the
+               kernel config (CONFIG_UNWINDER_*).
 
        call-graph.dump-size::
                The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
index 7f4db75..b3f3b3f 100644 (file)
@@ -237,16 +237,22 @@ OPTIONS
        option and remains only for backward compatibility.  See --event.
 
 -g::
-       Enables call-graph (stack chain/backtrace) recording.
+       Enables call-graph (stack chain/backtrace) recording for both
+       kernel space and user space.
 
 --call-graph::
        Setup and enable call-graph (stack chain/backtrace) recording,
-       implies -g.  Default is "fp".
+       implies -g.  Default is "fp" (for user space).
 
-       Allows specifying "fp" (frame pointer) or "dwarf"
-       (DWARF's CFI - Call Frame Information) or "lbr"
-       (Hardware Last Branch Record facility) as the method to collect
-       the information used to show the call graphs.
+       The unwinding method used for kernel space is dependent on the
+       unwinder used by the active kernel configuration, i.e
+       CONFIG_UNWINDER_FRAME_POINTER (fp) or CONFIG_UNWINDER_ORC (orc)
+
+       Any option specified here controls the method used for user space.
+
+       Valid options are "fp" (frame pointer), "dwarf" (DWARF's CFI -
+       Call Frame Information) or "lbr" (Hardware Last Branch Record
+       facility).
 
        In some systems, where binaries are build with gcc
        --fomit-frame-pointer, using the "fp" method will produce bogus
@@ -385,7 +391,10 @@ displayed with the weight and local_weight sort keys.  This currently works for
 abort events and some memory events in precise mode on modern Intel CPUs.
 
 --namespaces::
-Record events of type PERF_RECORD_NAMESPACES.
+Record events of type PERF_RECORD_NAMESPACES.  This enables 'cgroup_id' sort key.
+
+--all-cgroups::
+Record events of type PERF_RECORD_CGROUP.  This enables 'cgroup' sort key.
 
 --transaction::
 Record transaction flags for transaction related events.
index bd0a029..f569b9e 100644 (file)
@@ -95,6 +95,7 @@ OPTIONS
        abort cost. This is the global weight.
        - local_weight: Local weight version of the weight above.
        - cgroup_id: ID derived from cgroup namespace device and inode numbers.
+       - cgroup: cgroup pathname in the cgroupfs.
        - transaction: Transaction abort flags.
        - overhead: Overhead percentage of sample
        - overhead_sys: Overhead percentage of sample running in system mode
@@ -377,6 +378,11 @@ OPTIONS
        Show event group information together. It forces group output also
        if there are no groups defined in data file.
 
+--group-sort-idx::
+       Sort the output by the event at the index n in group. If n is invalid,
+       sort by the first event. It can support multiple groups with different
+       amount of events. WARNING: This should be used on grouped events.
+
 --demangle::
        Demangle symbol names to human readable form. It's enabled by default,
        disable with --no-demangle.
index db6a36a..963487e 100644 (file)
@@ -319,6 +319,9 @@ OPTIONS
 --show-bpf-events
        Display bpf events i.e. events of type PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT.
 
+--show-cgroup-events
+       Display cgroup events i.e. events of type PERF_RECORD_CGROUP.
+
 --demangle::
        Demangle symbol names to human readable form. It's enabled by default,
        disable with --no-demangle.
@@ -390,6 +393,9 @@ include::itrace.txt[]
 --reltime::
        Print time stamps relative to trace start.
 
+--deltatime::
+       Print time stamps relative to previous event.
+
 --per-event-dump::
        Create per event files with a "perf.data.EVENT.dump" name instead of
         printing to stdout, useful, for instance, for generating flamegraphs.
@@ -406,6 +412,14 @@ include::itrace.txt[]
 --xed::
        Run xed disassembler on output. Requires installing the xed disassembler.
 
+-S::
+--symbols=symbol[,symbol...]::
+       Only consider the listed symbols. Symbols are typically a name
+       but they may also be hexadecimal address.
+
+       For example, to select the symbol noploop or the address 0x4007a0:
+       perf script --symbols=noploop,0x4007a0
+
 --call-trace::
        Show call stream for intel_pt traces. The CPUs are interleaved, but
        can be filtered with -C.
index 324b6b5..487737a 100644 (file)
@@ -53,6 +53,11 @@ Default is to monitor all CPUS.
 --group::
         Put the counters into a counter group.
 
+--group-sort-idx::
+       Sort the output by the event at the index n in group. If n is invalid,
+       sort by the first event. It can support multiple groups with different
+       amount of events. WARNING: This should be used on grouped events.
+
 -F <freq>::
 --freq=<freq>::
        Profile at this frequency. Use 'max' to use the currently maximum
@@ -272,6 +277,10 @@ Default is to monitor all CPUS.
        Record events of type PERF_RECORD_NAMESPACES and display it with the
        'cgroup_id' sort key.
 
+--all-cgroups::
+       Record events of type PERF_RECORD_CGROUP and display it with the
+       'cgroup' sort key.
+
 --switch-on EVENT_NAME::
        Only consider events after this event is found.
 
index 80e55e7..12a8204 100644 (file)
@@ -228,8 +228,17 @@ strip-libs  = $(filter-out -l%,$(1))
 
 PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
 
+# Python 3.8 changed the output of `python-config --ldflags` to not include the
+# '-lpythonX.Y' flag unless '--embed' is also passed. The feature check for
+# libpython fails if that flag is not included in LDFLAGS
+ifeq ($(shell $(PYTHON_CONFIG_SQ) --ldflags --embed 2>&1 1>/dev/null; echo $$?), 0)
+  PYTHON_CONFIG_LDFLAGS := --ldflags --embed
+else
+  PYTHON_CONFIG_LDFLAGS := --ldflags
+endif
+
 ifdef PYTHON_CONFIG
-  PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
+  PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null)
   PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
   PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
   PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
@@ -348,6 +357,10 @@ ifeq ($(feature-gettid), 1)
   CFLAGS += -DHAVE_GETTID
 endif
 
+ifeq ($(feature-file-handle), 1)
+  CFLAGS += -DHAVE_FILE_HANDLE
+endif
+
 ifdef NO_LIBELF
   NO_DWARF := 1
   NO_DEMANGLE := 1
index 3eda9d4..d15a311 100644 (file)
@@ -231,6 +231,7 @@ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
 BPF_DIR         = $(srctree)/tools/lib/bpf/
 SUBCMD_DIR      = $(srctree)/tools/lib/subcmd/
 LIBPERF_DIR     = $(srctree)/tools/lib/perf/
+DOC_DIR         = $(srctree)/tools/perf/Documentation/
 
 # Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
 # Without this setting the output feature dump file misses some features, for
@@ -573,7 +574,7 @@ arch_errno_hdr_dir := $(srctree)/tools
 arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
 
 $(arch_errno_name_array): $(arch_errno_tbl)
-       $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@
+       $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@
 
 sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c
 sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh
@@ -792,7 +793,6 @@ $(LIBSUBCMD): FORCE
        $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a
 
 $(LIBSUBCMD)-clean:
-       $(call QUIET_CLEAN, libsubcmd)
        $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean
 
 help:
@@ -832,7 +832,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
 
 # 'make doc' should call 'make -C Documentation all'
 $(DOC_TARGETS):
-       $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
+       $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all)
 
 TAG_FOLDERS= . ../lib ../include
 TAG_FILES= ../../include/uapi/linux/perf_event.h
@@ -959,7 +959,7 @@ install-python_ext:
 
 # 'make install-doc' should call 'make -C Documentation install'
 $(INSTALL_DOC_TARGETS):
-       $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=)
+       $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=)
 
 ### Cleaning rules
 
@@ -1008,7 +1008,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
                $(OUTPUT)$(rename_flags_array) \
                $(OUTPUT)$(arch_errno_name_array) \
                $(OUTPUT)$(sync_file_range_arrays)
-       $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
+       $(call QUIET_CLEAN, Documentation) \
+       $(MAKE) -C $(DOC_DIR) O=$(OUTPUT) clean >/dev/null
 
 #
 # To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
index 0a7782c..5c13438 100644 (file)
@@ -1,6 +1,6 @@
 perf-y += header.o
+perf-y += machine.o
 perf-y += perf_regs.o
-perf-y += sym-handling.o
 perf-$(CONFIG_DWARF)     += dwarf-regs.o
 perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
 perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
new file mode 100644 (file)
index 0000000..d41b27e
--- /dev/null
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <string.h>
+#include "debug.h"
+#include "symbol.h"
+
+/* On arm64, kernel text segment start at high memory address,
+ * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
+ * address, like 0xffff 0000 00ax xxxx. When only samll amount of
+ * memory is used by modules, gap between end of module's text segment
+ * and start of kernel text segment may be reach 2G.
+ * Therefore do not fill this gap and do not assign it to the kernel dso map.
+ */
+
+#define SYMBOL_LIMIT (1 << 12) /* 4K */
+
+void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
+{
+       if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) ||
+                       (strchr(p->name, '[') == NULL && strchr(c->name, '[')))
+               /* Limit range of last symbol in module and kernel */
+               p->end += SYMBOL_LIMIT;
+       else
+               p->end = c->start;
+       pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
+}
diff --git a/tools/perf/arch/arm64/util/sym-handling.c b/tools/perf/arch/arm64/util/sym-handling.c
deleted file mode 100644 (file)
index 8dfa3e5..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *
- * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
- */
-
-#include "symbol.h" // for the elf__needs_adjust_symbols() prototype
-#include <stdbool.h>
-
-#ifdef HAVE_LIBELF_SUPPORT
-#include <gelf.h>
-
-bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
-{
-       return ehdr.e_type == ET_EXEC ||
-              ehdr.e_type == ET_REL ||
-              ehdr.e_type == ET_DYN;
-}
-#endif
index 7cf0b88..e5c9504 100644 (file)
@@ -1,5 +1,4 @@
 perf-y += header.o
-perf-y += sym-handling.o
 perf-y += kvm-stat.o
 perf-y += perf_regs.o
 perf-y += mem-events.o
index abb7a12..0856b32 100644 (file)
 #include "probe-event.h"
 #include "probe-file.h"
 
-#ifdef HAVE_LIBELF_SUPPORT
-bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
-{
-       return ehdr.e_type == ET_EXEC ||
-              ehdr.e_type == ET_REL ||
-              ehdr.e_type == ET_DYN;
-}
-
-#endif
-
 int arch__choose_best_symbol(struct symbol *syma,
                             struct symbol *symb __maybe_unused)
 {
index 5e697cd..c94a002 100644 (file)
@@ -455,6 +455,7 @@ static struct perf_diff pdiff = {
                .fork   = perf_event__process_fork,
                .lost   = perf_event__process_lost,
                .namespaces = perf_event__process_namespaces,
+               .cgroup = perf_event__process_cgroup,
                .ordered_events = true,
                .ordering_requires_timestamps = true,
        },
index 4c30146..1ab349a 100644 (file)
@@ -1397,6 +1397,11 @@ static int record__synthesize(struct record *rec, bool tail)
        if (err < 0)
                pr_warning("Couldn't synthesize bpf events.\n");
 
+       err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
+                                            machine);
+       if (err < 0)
+               pr_warning("Couldn't synthesize cgroup events.\n");
+
        err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
                                            process_synthesized_event, opts->sample_address,
                                            1);
@@ -1428,6 +1433,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
        if (rec->opts.record_namespaces)
                tool->namespace_events = true;
 
+       if (rec->opts.record_cgroup) {
+#ifdef HAVE_FILE_HANDLE
+               tool->cgroup_events = true;
+#else
+               pr_err("cgroup tracking is not supported\n");
+               return -1;
+#endif
+       }
+
        if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
                signal(SIGUSR2, snapshot_sig_handler);
                if (rec->opts.auxtrace_snapshot_mode)
@@ -2358,6 +2372,8 @@ static struct option __record_options[] = {
                        "per thread proc mmap processing timeout in ms"),
        OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
                    "Record namespaces events"),
+       OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
+                   "Record cgroup events"),
        OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
                    "Record context switch events"),
        OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
index 5f4045d..26d8fc2 100644 (file)
@@ -635,7 +635,7 @@ static int report__browse_hists(struct report *rep)
                 * Usually "ret" is the last pressed key, and we only
                 * care if the key notifies us to switch data file.
                 */
-               if (ret != K_SWITCH_INPUT_DATA)
+               if (ret != K_SWITCH_INPUT_DATA && ret != K_RELOAD)
                        ret = 0;
                break;
        case 2:
@@ -1105,6 +1105,7 @@ int cmd_report(int argc, const char **argv)
                        .mmap2           = perf_event__process_mmap2,
                        .comm            = perf_event__process_comm,
                        .namespaces      = perf_event__process_namespaces,
+                       .cgroup          = perf_event__process_cgroup,
                        .exit            = perf_event__process_exit,
                        .fork            = perf_event__process_fork,
                        .lost            = perf_event__process_lost,
@@ -1227,6 +1228,10 @@ int cmd_report(int argc, const char **argv)
                    "Show a column with the sum of periods"),
        OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set,
                    "Show event group information together"),
+       OPT_INTEGER(0, "group-sort-idx", &symbol_conf.group_sort_idx,
+                   "Sort the output by the event at the index n in group. "
+                   "If n is invalid, sort by the first event. "
+                   "WARNING: should be used on grouped events."),
        OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
                    "use branch records for per branch histogram filling",
                    parse_branch_mode),
@@ -1369,6 +1374,12 @@ repeat:
 
        setup_forced_leader(&report, session->evlist);
 
+       if (symbol_conf.group_sort_idx && !session->evlist->nr_groups) {
+               parse_options_usage(NULL, options, "group-sort-idx", 0);
+               ret = -EINVAL;
+               goto error;
+       }
+
        if (itrace_synth_opts.last_branch)
                has_br_stack = true;
 
@@ -1470,7 +1481,7 @@ repeat:
                sort_order = sort_tmp;
        }
 
-       if ((last_key != K_SWITCH_INPUT_DATA) &&
+       if ((last_key != K_SWITCH_INPUT_DATA && last_key != K_RELOAD) &&
            (setup_sorting(session->evlist) < 0)) {
                if (sort_order)
                        parse_options_usage(report_usage, options, "s", 1);
@@ -1549,7 +1560,7 @@ repeat:
        sort__setup_elide(stdout);
 
        ret = __cmd_report(&report);
-       if (ret == K_SWITCH_INPUT_DATA) {
+       if (ret == K_SWITCH_INPUT_DATA || ret == K_RELOAD) {
                perf_session__delete(session);
                last_key = K_SWITCH_INPUT_DATA;
                goto repeat;
index 656b347..1f57a7e 100644 (file)
@@ -63,7 +63,9 @@
 static char const              *script_name;
 static char const              *generate_script_lang;
 static bool                    reltime;
+static bool                    deltatime;
 static u64                     initial_time;
+static u64                     previous_time;
 static bool                    debug_mode;
 static u64                     last_timestamp;
 static u64                     nr_unordered;
@@ -704,6 +706,13 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
                        if (!initial_time)
                                initial_time = sample->time;
                        t = sample->time - initial_time;
+               } else if (deltatime) {
+                       if (previous_time)
+                               t = sample->time - previous_time;
+                       else {
+                               t = 0;
+                       }
+                       previous_time = sample->time;
                }
                nsecs = t;
                secs = nsecs / NSEC_PER_SEC;
@@ -1685,6 +1694,7 @@ struct perf_script {
        bool                    show_lost_events;
        bool                    show_round_events;
        bool                    show_bpf_events;
+       bool                    show_cgroup_events;
        bool                    allocated;
        bool                    per_event_dump;
        struct evswitch         evswitch;
@@ -2203,6 +2213,41 @@ out:
        return ret;
 }
 
+static int process_cgroup_event(struct perf_tool *tool,
+                               union perf_event *event,
+                               struct perf_sample *sample,
+                               struct machine *machine)
+{
+       struct thread *thread;
+       struct perf_script *script = container_of(tool, struct perf_script, tool);
+       struct perf_session *session = script->session;
+       struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+       int ret = -1;
+
+       thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+       if (thread == NULL) {
+               pr_debug("problem processing CGROUP event, skipping it.\n");
+               return -1;
+       }
+
+       if (perf_event__process_cgroup(tool, event, sample, machine) < 0)
+               goto out;
+
+       if (!evsel->core.attr.sample_id_all) {
+               sample->cpu = 0;
+               sample->time = 0;
+       }
+       if (!filter_cpu(sample)) {
+               perf_sample__fprintf_start(sample, thread, evsel,
+                                          PERF_RECORD_CGROUP, stdout);
+               perf_event__fprintf(event, stdout);
+       }
+       ret = 0;
+out:
+       thread__put(thread);
+       return ret;
+}
+
 static int process_fork_event(struct perf_tool *tool,
                              union perf_event *event,
                              struct perf_sample *sample,
@@ -2542,6 +2587,8 @@ static int __cmd_script(struct perf_script *script)
                script->tool.context_switch = process_switch_event;
        if (script->show_namespace_events)
                script->tool.namespaces = process_namespaces_event;
+       if (script->show_cgroup_events)
+               script->tool.cgroup = process_cgroup_event;
        if (script->show_lost_events)
                script->tool.lost = process_lost_event;
        if (script->show_round_events) {
@@ -3218,10 +3265,10 @@ static char *get_script_path(const char *script_root, const char *suffix)
                        __script_root = get_script_root(script_dirent, suffix);
                        if (__script_root && !strcmp(script_root, __script_root)) {
                                free(__script_root);
-                               closedir(lang_dir);
                                closedir(scripts_dir);
                                scnprintf(script_path, MAXPATHLEN, "%s/%s",
                                          lang_path, script_dirent->d_name);
+                               closedir(lang_dir);
                                return strdup(script_path);
                        }
                        free(__script_root);
@@ -3467,6 +3514,7 @@ int cmd_script(int argc, const char **argv)
                        .mmap2           = perf_event__process_mmap2,
                        .comm            = perf_event__process_comm,
                        .namespaces      = perf_event__process_namespaces,
+                       .cgroup          = perf_event__process_cgroup,
                        .exit            = perf_event__process_exit,
                        .fork            = perf_event__process_fork,
                        .attr            = process_attr,
@@ -3555,6 +3603,7 @@ int cmd_script(int argc, const char **argv)
                     "anything beyond the specified depth will be ignored. "
                     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_BOOLEAN(0, "reltime", &reltime, "Show time stamps relative to start"),
+       OPT_BOOLEAN(0, "deltatime", &deltatime, "Show time stamps relative to previous event"),
        OPT_BOOLEAN('I', "show-info", &show_full_info,
                    "display extended information from perf.data file"),
        OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
@@ -3567,6 +3616,8 @@ int cmd_script(int argc, const char **argv)
                    "Show context switch events (if recorded)"),
        OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
                    "Show namespace events (if recorded)"),
+       OPT_BOOLEAN('\0', "show-cgroup-events", &script.show_cgroup_events,
+                   "Show cgroup events (if recorded)"),
        OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events,
                    "Show lost events (if recorded)"),
        OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events,
@@ -3651,6 +3702,13 @@ int cmd_script(int argc, const char **argv)
                }
        }
 
+       if (reltime && deltatime) {
+               fprintf(stderr,
+                       "reltime and deltatime - the two don't get along well. "
+                       "Please limit to --reltime or --deltatime.\n");
+               return -1;
+       }
+
        if (itrace_synth_opts.callchain &&
            itrace_synth_opts.callchain_sz > scripting_max_stack)
                scripting_max_stack = itrace_synth_opts.callchain_sz;
index d2539b7..289cf83 100644 (file)
@@ -616,6 +616,7 @@ static void *display_thread_tui(void *arg)
                .arg            = top,
                .refresh        = top->delay_secs,
        };
+       int ret;
 
        /* In order to read symbols from other namespaces perf to  needs to call
         * setns(2).  This isn't permitted if the struct_fs has multiple users.
@@ -626,6 +627,7 @@ static void *display_thread_tui(void *arg)
 
        prctl(PR_SET_NAME, "perf-top-UI", 0, 0, 0);
 
+repeat:
        perf_top__sort_new_samples(top);
 
        /*
@@ -638,13 +640,18 @@ static void *display_thread_tui(void *arg)
                hists->uid_filter_str = top->record_opts.target.uid_str;
        }
 
-       perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
+       ret = perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
                                      top->min_percent,
                                      &top->session->header.env,
                                      !top->record_opts.overwrite,
                                      &top->annotation_opts);
 
-       stop_top();
+       if (ret == K_RELOAD) {
+               top->zero = true;
+               goto repeat;
+       } else
+               stop_top();
+
        return NULL;
 }
 
@@ -1246,6 +1253,14 @@ static int __cmd_top(struct perf_top *top)
 
        if (opts->record_namespaces)
                top->tool.namespace_events = true;
+       if (opts->record_cgroup) {
+#ifdef HAVE_FILE_HANDLE
+               top->tool.cgroup_events = true;
+#else
+               pr_err("cgroup tracking is not supported.\n");
+               return -1;
+#endif
+       }
 
        ret = perf_event__synthesize_bpf_events(top->session, perf_event__process,
                                                &top->session->machines.host,
@@ -1253,6 +1268,11 @@ static int __cmd_top(struct perf_top *top)
        if (ret < 0)
                pr_debug("Couldn't synthesize BPF events: Pre-existing BPF programs won't have symbols resolved.\n");
 
+       ret = perf_event__synthesize_cgroups(&top->tool, perf_event__process,
+                                            &top->session->machines.host);
+       if (ret < 0)
+               pr_debug("Couldn't synthesize cgroup events.\n");
+
        machine__synthesize_threads(&top->session->machines.host, &opts->target,
                                    top->evlist->core.threads, false,
                                    top->nr_threads_synthesize);
@@ -1545,6 +1565,12 @@ int cmd_top(int argc, const char **argv)
                        "number of thread to run event synthesize"),
        OPT_BOOLEAN(0, "namespaces", &opts->record_namespaces,
                    "Record namespaces events"),
+       OPT_BOOLEAN(0, "all-cgroups", &opts->record_cgroup,
+                   "Record cgroup events"),
+       OPT_INTEGER(0, "group-sort-idx", &symbol_conf.group_sort_idx,
+                   "Sort the output by the event at the index n in group. "
+                   "If n is invalid, sort by the first event. "
+                   "WARNING: should be used on grouped events."),
        OPTS_EVSWITCH(&top.evswitch),
        OPT_END()
        };
diff --git a/tools/perf/pmu-events/arch/test/test_cpu/branch.json b/tools/perf/pmu-events/arch/test/test_cpu/branch.json
new file mode 100644 (file)
index 0000000..93ddfd8
--- /dev/null
@@ -0,0 +1,12 @@
+[
+  {
+    "EventName": "bp_l1_btb_correct",
+    "EventCode": "0x8a",
+    "BriefDescription": "L1 BTB Correction."
+  },
+  {
+    "EventName": "bp_l2_btb_correct",
+    "EventCode": "0x8b",
+    "BriefDescription": "L2 BTB Correction."
+  }
+]
diff --git a/tools/perf/pmu-events/arch/test/test_cpu/other.json b/tools/perf/pmu-events/arch/test/test_cpu/other.json
new file mode 100644 (file)
index 0000000..7d53d7e
--- /dev/null
@@ -0,0 +1,26 @@
+[
+    {
+        "EventCode": "0x6",
+        "Counter": "0,1",
+        "UMask": "0x80",
+        "EventName": "SEGMENT_REG_LOADS.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Number of segment register loads."
+    },
+    {
+        "EventCode": "0x9",
+        "Counter": "0,1",
+        "UMask": "0x20",
+        "EventName": "DISPATCH_BLOCKED.ANY",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Memory cluster signals to block micro-op dispatch for any reason"
+    },
+    {
+        "EventCode": "0x3A",
+        "Counter": "0,1",
+        "UMask": "0x0",
+        "EventName": "EIST_TRANS",
+        "SampleAfterValue": "200000",
+        "BriefDescription": "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions"
+    }
+]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/test/test_cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_cpu/uncore.json
new file mode 100644 (file)
index 0000000..d0a890c
--- /dev/null
@@ -0,0 +1,21 @@
+[
+ {
+           "EventCode": "0x02",
+           "EventName": "uncore_hisi_ddrc.flux_wcmd",
+           "BriefDescription": "DDRC write commands",
+           "PublicDescription": "DDRC write commands",
+           "Unit": "hisi_sccl,ddrc"
+   },
+   {
+           "Unit": "CBO",
+           "EventCode": "0x22",
+           "UMask": "0x81",
+           "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION",
+           "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+           "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+           "Counter": "0,1",
+           "CounterMask": "0",
+           "Invert": "0",
+           "EdgeDetect": "0"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json b/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json
deleted file mode 100644 (file)
index 93ddfd8..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-[
-  {
-    "EventName": "bp_l1_btb_correct",
-    "EventCode": "0x8a",
-    "BriefDescription": "L1 BTB Correction."
-  },
-  {
-    "EventName": "bp_l2_btb_correct",
-    "EventCode": "0x8b",
-    "BriefDescription": "L2 BTB Correction."
-  }
-]
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json b/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json
deleted file mode 100644 (file)
index 6221a84..0000000
+++ /dev/null
@@ -1,329 +0,0 @@
-[
-  {
-    "EventName": "ic_fw32",
-    "EventCode": "0x80",
-    "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)."
-  },
-  {
-    "EventName": "ic_fw32_miss",
-    "EventCode": "0x81",
-    "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag."
-  },
-  {
-    "EventName": "ic_cache_fill_l2",
-    "EventCode": "0x82",
-    "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache."
-  },
-  {
-    "EventName": "ic_cache_fill_sys",
-    "EventCode": "0x83",
-    "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache."
-  },
-  {
-    "EventName": "bp_l1_tlb_miss_l2_hit",
-    "EventCode": "0x84",
-    "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB."
-  },
-  {
-    "EventName": "bp_l1_tlb_miss_l2_miss",
-    "EventCode": "0x85",
-    "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs."
-  },
-  {
-    "EventName": "bp_snp_re_sync",
-    "EventCode": "0x86",
-    "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event."
-  },
-  {
-    "EventName": "ic_fetch_stall.ic_stall_any",
-    "EventCode": "0x87",
-    "BriefDescription": "IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
-    "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "ic_fetch_stall.ic_stall_dq_empty",
-    "EventCode": "0x87",
-    "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
-    "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "ic_fetch_stall.ic_stall_back_pressure",
-    "EventCode": "0x87",
-    "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
-    "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "ic_cache_inval.l2_invalidating_probe",
-    "EventCode": "0x8c",
-    "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS).",
-    "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to L2 invalidating probe (external or LS).",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "ic_cache_inval.fill_invalidated",
-    "EventCode": "0x8c",
-    "BriefDescription": "IC line invalidated due to overwriting fill response.",
-    "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to overwriting fill response.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "bp_tlb_rel",
-    "EventCode": "0x99",
-    "BriefDescription": "The number of ITLB reload requests."
-  },
-  {
-    "EventName": "l2_request_g1.rd_blk_l",
-    "EventCode": "0x60",
-    "BriefDescription": "Requests to L2 Group1.",
-    "PublicDescription": "Requests to L2 Group1.",
-    "UMask": "0x80"
-  },
-  {
-    "EventName": "l2_request_g1.rd_blk_x",
-    "EventCode": "0x60",
-    "BriefDescription": "Requests to L2 Group1.",
-    "PublicDescription": "Requests to L2 Group1.",
-    "UMask": "0x40"
-  },
-  {
-    "EventName": "l2_request_g1.ls_rd_blk_c_s",
-    "EventCode": "0x60",
-    "BriefDescription": "Requests to L2 Group1.",
-    "PublicDescription": "Requests to L2 Group1.",
-    "UMask": "0x20"
-  },
-  {
-    "EventName": "l2_request_g1.cacheable_ic_read",
-    "EventCode": "0x60",
-    "BriefDescription": "Requests to L2 Group1.",
-    "PublicDescription": "Requests to L2 Group1.",
-    "UMask": "0x10"
-  },
-  {
-    "EventName": "l2_request_g1.change_to_x",
-    "EventCode": "0x60",
-    "BriefDescription": "Requests to L2 Group1.",
-    "PublicDescription": "Requests to L2 Group1.",
-    "UMask": "0x8"
-  },
-  {
-    "EventName": "l2_request_g1.prefetch_l2",
-    "EventCode": "0x60",
-    "BriefDescription": "Requests to L2 Group1.",
-    "PublicDescription": "Requests to L2 Group1.",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "l2_request_g1.l2_hw_pf",
-    "EventCode": "0x60",
-    "BriefDescription": "Requests to L2 Group1.",
-    "PublicDescription": "Requests to L2 Group1.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "l2_request_g1.other_requests",
-    "EventCode": "0x60",
-    "BriefDescription": "Events covered by l2_request_g2.",
-    "PublicDescription": "Requests to L2 Group1. Events covered by l2_request_g2.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "l2_request_g2.group1",
-    "EventCode": "0x61",
-    "BriefDescription": "All Group 1 commands not in unit0.",
-    "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. All Group 1 commands not in unit0.",
-    "UMask": "0x80"
-  },
-  {
-    "EventName": "l2_request_g2.ls_rd_sized",
-    "EventCode": "0x61",
-    "BriefDescription": "RdSized, RdSized32, RdSized64.",
-    "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSized, RdSized32, RdSized64.",
-    "UMask": "0x40"
-  },
-  {
-    "EventName": "l2_request_g2.ls_rd_sized_nc",
-    "EventCode": "0x61",
-    "BriefDescription": "RdSizedNC, RdSized32NC, RdSized64NC.",
-    "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSizedNC, RdSized32NC, RdSized64NC.",
-    "UMask": "0x20"
-  },
-  {
-    "EventName": "l2_request_g2.ic_rd_sized",
-    "EventCode": "0x61",
-    "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
-    "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
-    "UMask": "0x10"
-  },
-  {
-    "EventName": "l2_request_g2.ic_rd_sized_nc",
-    "EventCode": "0x61",
-    "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
-    "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
-    "UMask": "0x8"
-  },
-  {
-    "EventName": "l2_request_g2.smc_inval",
-    "EventCode": "0x61",
-    "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
-    "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "l2_request_g2.bus_locks_originator",
-    "EventCode": "0x61",
-    "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
-    "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "l2_request_g2.bus_locks_responses",
-    "EventCode": "0x61",
-    "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
-    "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "l2_latency.l2_cycles_waiting_on_fills",
-    "EventCode": "0x62",
-    "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
-    "PublicDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "l2_wcb_req.wcb_write",
-    "EventCode": "0x63",
-    "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.",
-    "BriefDescription": "LS to L2 WCB write requests.",
-    "UMask": "0x40"
-  },
-  {
-    "EventName": "l2_wcb_req.wcb_close",
-    "EventCode": "0x63",
-    "BriefDescription": "LS to L2 WCB close requests.",
-    "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.",
-    "UMask": "0x20"
-  },
-  {
-    "EventName": "l2_wcb_req.zero_byte_store",
-    "EventCode": "0x63",
-    "BriefDescription": "LS to L2 WCB zero byte store requests.",
-    "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "l2_wcb_req.cl_zero",
-    "EventCode": "0x63",
-    "PublicDescription": "LS to L2 WCB cache line zeroing requests.",
-    "BriefDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "l2_cache_req_stat.ls_rd_blk_cs",
-    "EventCode": "0x64",
-    "BriefDescription": "LS ReadBlock C/S Hit.",
-    "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS ReadBlock C/S Hit.",
-    "UMask": "0x80"
-  },
-  {
-    "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x",
-    "EventCode": "0x64",
-    "BriefDescription": "LS Read Block L Hit X.",
-    "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block L Hit X.",
-    "UMask": "0x40"
-  },
-  {
-    "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s",
-    "EventCode": "0x64",
-    "BriefDescription": "LsRdBlkL Hit Shared.",
-    "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkL Hit Shared.",
-    "UMask": "0x20"
-  },
-  {
-    "EventName": "l2_cache_req_stat.ls_rd_blk_x",
-    "EventCode": "0x64",
-    "BriefDescription": "LsRdBlkX/ChgToX Hit X.  Count RdBlkX finding Shared as a Miss.",
-    "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkX/ChgToX Hit X.  Count RdBlkX finding Shared as a Miss.",
-    "UMask": "0x10"
-  },
-  {
-    "EventName": "l2_cache_req_stat.ls_rd_blk_c",
-    "EventCode": "0x64",
-    "BriefDescription": "LS Read Block C S L X Change to X Miss.",
-    "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block C S L X Change to X Miss.",
-    "UMask": "0x8"
-  },
-  {
-    "EventName": "l2_cache_req_stat.ic_fill_hit_x",
-    "EventCode": "0x64",
-    "BriefDescription": "IC Fill Hit Exclusive Stale.",
-    "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Exclusive Stale.",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "l2_cache_req_stat.ic_fill_hit_s",
-    "EventCode": "0x64",
-    "BriefDescription": "IC Fill Hit Shared.",
-    "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Shared.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "l2_cache_req_stat.ic_fill_miss",
-    "EventCode": "0x64",
-    "BriefDescription": "IC Fill Miss.",
-    "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Miss.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "l2_fill_pending.l2_fill_busy",
-    "EventCode": "0x6d",
-    "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.",
-    "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "l3_request_g1.caching_l3_cache_accesses",
-    "EventCode": "0x01",
-    "BriefDescription": "Caching: L3 cache accesses",
-    "UMask": "0x80",
-    "Unit": "L3PMC"
-  },
-  {
-    "EventName": "l3_lookup_state.all_l3_req_typs",
-    "EventCode": "0x04",
-    "BriefDescription": "All L3 Request Types",
-    "UMask": "0xff",
-    "Unit": "L3PMC"
-  },
-  {
-    "EventName": "l3_comb_clstr_state.other_l3_miss_typs",
-    "EventCode": "0x06",
-    "BriefDescription": "Other L3 Miss Request Types",
-    "UMask": "0xfe",
-    "Unit": "L3PMC"
-  },
-  {
-    "EventName": "l3_comb_clstr_state.request_miss",
-    "EventCode": "0x06",
-    "BriefDescription": "L3 cache misses",
-    "UMask": "0x01",
-    "Unit": "L3PMC"
-  },
-  {
-    "EventName": "xi_sys_fill_latency",
-    "EventCode": "0x90",
-    "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.",
-    "UMask": "0x00",
-    "Unit": "L3PMC"
-  },
-  {
-    "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs",
-    "EventCode": "0x9a",
-    "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.",
-    "UMask": "0x3f",
-    "Unit": "L3PMC"
-  }
-]
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json b/tools/perf/pmu-events/arch/x86/amdfam17h/core.json
deleted file mode 100644 (file)
index 1079544..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-[
-  {
-    "EventName": "ex_ret_instr",
-    "EventCode": "0xc0",
-    "BriefDescription": "Retired Instructions."
-  },
-  {
-    "EventName": "ex_ret_cops",
-    "EventCode": "0xc1",
-    "BriefDescription": "Retired Uops.",
-    "PublicDescription": "The number of uOps retired. This includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 4."
-  },
-  {
-    "EventName": "ex_ret_brn",
-    "EventCode": "0xc2",
-    "BriefDescription": "Retired Branch Instructions.",
-    "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
-  },
-  {
-    "EventName": "ex_ret_brn_misp",
-    "EventCode": "0xc3",
-    "BriefDescription": "Retired Branch Instructions Mispredicted.",
-    "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)."
-  },
-  {
-    "EventName": "ex_ret_brn_tkn",
-    "EventCode": "0xc4",
-    "BriefDescription": "Retired Taken Branch Instructions.",
-    "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
-  },
-  {
-    "EventName": "ex_ret_brn_tkn_misp",
-    "EventCode": "0xc5",
-    "BriefDescription": "Retired Taken Branch Instructions Mispredicted.",
-    "PublicDescription": "The number of retired taken branch instructions that were mispredicted."
-  },
-  {
-    "EventName": "ex_ret_brn_far",
-    "EventCode": "0xc6",
-    "BriefDescription": "Retired Far Control Transfers.",
-    "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction."
-  },
-  {
-    "EventName": "ex_ret_brn_resync",
-    "EventCode": "0xc7",
-    "BriefDescription": "Retired Branch Resyncs.",
-    "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare."
-  },
-  {
-    "EventName": "ex_ret_near_ret",
-    "EventCode": "0xc8",
-    "BriefDescription": "Retired Near Returns.",
-    "PublicDescription": "The number of near return instructions (RET or RET Iw) retired."
-  },
-  {
-    "EventName": "ex_ret_near_ret_mispred",
-    "EventCode": "0xc9",
-    "BriefDescription": "Retired Near Returns Mispredicted.",
-    "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction."
-  },
-  {
-    "EventName": "ex_ret_brn_ind_misp",
-    "EventCode": "0xca",
-    "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.",
-    "PublicDescription": "Retired Indirect Branch Instructions Mispredicted."
-  },
-  {
-    "EventName": "ex_ret_mmx_fp_instr.sse_instr",
-    "EventCode": "0xcb",
-    "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).",
-    "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "ex_ret_mmx_fp_instr.mmx_instr",
-    "EventCode": "0xcb",
-    "BriefDescription": "MMX instructions.",
-    "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "ex_ret_mmx_fp_instr.x87_instr",
-    "EventCode": "0xcb",
-    "BriefDescription": "x87 instructions.",
-    "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "ex_ret_cond",
-    "EventCode": "0xd1",
-    "BriefDescription": "Retired Conditional Branch Instructions."
-  },
-  {
-    "EventName": "ex_ret_cond_misp",
-    "EventCode": "0xd2",
-    "BriefDescription": "Retired Conditional Branch Instructions Mispredicted."
-  },
-  {
-    "EventName": "ex_div_busy",
-    "EventCode": "0xd3",
-    "BriefDescription": "Div Cycles Busy count."
-  },
-  {
-    "EventName": "ex_div_count",
-    "EventCode": "0xd4",
-    "BriefDescription": "Div Op Count."
-  },
-  {
-    "EventName": "ex_tagged_ibs_ops.ibs_count_rollover",
-    "EventCode": "0x1cf",
-    "BriefDescription": "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
-    "PublicDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret",
-    "EventCode": "0x1cf",
-    "BriefDescription": "Number of Ops tagged by IBS that retired.",
-    "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops",
-    "EventCode": "0x1cf",
-    "BriefDescription": "Number of Ops tagged by IBS.",
-    "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "ex_ret_fus_brnch_inst",
-    "EventCode": "0x1d0",
-    "BriefDescription": "The number of fused retired branch instructions retired per cycle. The number of events logged per cycle can vary from 0 to 3."
-  }
-]
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json b/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json
deleted file mode 100644 (file)
index ea47119..0000000
+++ /dev/null
@@ -1,168 +0,0 @@
-[
-  {
-    "EventName": "fpu_pipe_assignment.dual",
-    "EventCode": "0x00",
-    "BriefDescription": "Total number multi-pipe uOps.",
-    "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to Pipe 3.",
-    "UMask": "0xf0"
-  },
-  {
-    "EventName": "fpu_pipe_assignment.total",
-    "EventCode": "0x00",
-    "BriefDescription": "Total number uOps.",
-    "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to Pipe 3.",
-    "UMask": "0xf"
-  },
-  {
-    "EventName": "fp_sched_empty",
-    "EventCode": "0x01",
-    "BriefDescription": "This is a speculative event. The number of cycles in which the FPU scheduler is empty. Note that some Ops like FP loads bypass the scheduler."
-  },
-  {
-    "EventName": "fp_retx87_fp_ops.all",
-    "EventCode": "0x02",
-    "BriefDescription": "All Ops.",
-    "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8.",
-    "UMask": "0x7"
-  },
-  {
-    "EventName": "fp_retx87_fp_ops.div_sqr_r_ops",
-    "EventCode": "0x02",
-    "BriefDescription": "Divide and square root Ops.",
-    "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Divide and square root Ops.",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "fp_retx87_fp_ops.mul_ops",
-    "EventCode": "0x02",
-    "BriefDescription": "Multiply Ops.",
-    "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Multiply Ops.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "fp_retx87_fp_ops.add_sub_ops",
-    "EventCode": "0x02",
-    "BriefDescription": "Add/subtract Ops.",
-    "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Add/subtract Ops.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "fp_ret_sse_avx_ops.all",
-    "EventCode": "0x03",
-    "BriefDescription": "All FLOPS.",
-    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
-    "UMask": "0xff"
-  },
-  {
-    "EventName": "fp_ret_sse_avx_ops.dp_mult_add_flops",
-    "EventCode": "0x03",
-    "BriefDescription": "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.",
-    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.",
-    "UMask": "0x80"
-  },
-  {
-    "EventName": "fp_ret_sse_avx_ops.dp_div_flops",
-    "EventCode": "0x03",
-    "BriefDescription": "Double precision divide/square root FLOPS.",
-    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision divide/square root FLOPS.",
-    "UMask": "0x40"
-  },
-  {
-    "EventName": "fp_ret_sse_avx_ops.dp_mult_flops",
-    "EventCode": "0x03",
-    "BriefDescription": "Double precision multiply FLOPS.",
-    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply FLOPS.",
-    "UMask": "0x20"
-  },
-  {
-    "EventName": "fp_ret_sse_avx_ops.dp_add_sub_flops",
-    "EventCode": "0x03",
-    "BriefDescription": "Double precision add/subtract FLOPS.",
-    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision add/subtract FLOPS.",
-    "UMask": "0x10"
-  },
-  {
-    "EventName": "fp_ret_sse_avx_ops.sp_mult_add_flops",
-    "EventCode": "0x03",
-    "BriefDescription": "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.",
-    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.",
-    "UMask": "0x8"
-  },
-  {
-    "EventName": "fp_ret_sse_avx_ops.sp_div_flops",
-    "EventCode": "0x03",
-    "BriefDescription": "Single-precision divide/square root FLOPS.",
-    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision divide/square root FLOPS.",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "fp_ret_sse_avx_ops.sp_mult_flops",
-    "EventCode": "0x03",
-    "BriefDescription": "Single-precision multiply FLOPS.",
-    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision multiply FLOPS.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "fp_ret_sse_avx_ops.sp_add_sub_flops",
-    "EventCode": "0x03",
-    "BriefDescription": "Single-precision add/subtract FLOPS.",
-    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision add/subtract FLOPS.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "fp_num_mov_elim_scal_op.optimized",
-    "EventCode": "0x04",
-    "BriefDescription": "Number of Scalar Ops optimized.",
-    "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Scalar Ops optimized.",
-    "UMask": "0x8"
-  },
-  {
-    "EventName": "fp_num_mov_elim_scal_op.opt_potential",
-    "EventCode": "0x04",
-    "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass).",
-    "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Ops that are candidates for optimization (have Z-bit either set or pass).",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim",
-    "EventCode": "0x04",
-    "BriefDescription": "Number of SSE Move Ops eliminated.",
-    "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops eliminated.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops",
-    "EventCode": "0x04",
-    "BriefDescription": "Number of SSE Move Ops.",
-    "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "fp_retired_ser_ops.x87_ctrl_ret",
-    "EventCode": "0x05",
-    "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.",
-    "PublicDescription": "The number of serializing Ops retired. x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.",
-    "UMask": "0x8"
-  },
-  {
-    "EventName": "fp_retired_ser_ops.x87_bot_ret",
-    "EventCode": "0x05",
-    "BriefDescription": "x87 bottom-executing uOps retired.",
-    "PublicDescription": "The number of serializing Ops retired. x87 bottom-executing uOps retired.",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "fp_retired_ser_ops.sse_ctrl_ret",
-    "EventCode": "0x05",
-    "BriefDescription": "SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.",
-    "PublicDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "fp_retired_ser_ops.sse_bot_ret",
-    "EventCode": "0x05",
-    "BriefDescription": "SSE bottom-executing uOps retired.",
-    "PublicDescription": "The number of serializing Ops retired. SSE bottom-executing uOps retired.",
-    "UMask": "0x1"
-  }
-]
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json b/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json
deleted file mode 100644 (file)
index fa2d60d..0000000
+++ /dev/null
@@ -1,162 +0,0 @@
-[
-  {
-    "EventName": "ls_locks.bus_lock",
-    "EventCode": "0x25",
-    "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.",
-    "PublicDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "ls_dispatch.ld_st_dispatch",
-    "EventCode": "0x29",
-    "BriefDescription": "Load-op-Stores.",
-    "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "ls_dispatch.store_dispatch",
-    "EventCode": "0x29",
-    "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
-    "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "ls_dispatch.ld_dispatch",
-    "EventCode": "0x29",
-    "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
-    "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "ls_stlf",
-    "EventCode": "0x35",
-    "BriefDescription": "Number of STLF hits."
-  },
-  {
-    "EventName": "ls_dc_accesses",
-    "EventCode": "0x40",
-    "BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event."
-  },
-  {
-    "EventName": "ls_l1_d_tlb_miss.all",
-    "EventCode": "0x45",
-    "BriefDescription": "L1 DTLB Miss or Reload off all sizes.",
-    "PublicDescription": "L1 DTLB Miss or Reload off all sizes.",
-    "UMask": "0xff"
-  },
-  {
-    "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss",
-    "EventCode": "0x45",
-    "BriefDescription": "L1 DTLB Miss of a page of 1G size.",
-    "PublicDescription": "L1 DTLB Miss of a page of 1G size.",
-    "UMask": "0x80"
-  },
-  {
-    "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss",
-    "EventCode": "0x45",
-    "BriefDescription": "L1 DTLB Miss of a page of 2M size.",
-    "PublicDescription": "L1 DTLB Miss of a page of 2M size.",
-    "UMask": "0x40"
-  },
-  {
-    "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss",
-    "EventCode": "0x45",
-    "BriefDescription": "L1 DTLB Miss of a page of 32K size.",
-    "PublicDescription": "L1 DTLB Miss of a page of 32K size.",
-    "UMask": "0x20"
-  },
-  {
-    "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss",
-    "EventCode": "0x45",
-    "BriefDescription": "L1 DTLB Miss of a page of 4K size.",
-    "PublicDescription": "L1 DTLB Miss of a page of 4K size.",
-    "UMask": "0x10"
-  },
-  {
-    "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit",
-    "EventCode": "0x45",
-    "BriefDescription": "L1 DTLB Reload of a page of 1G size.",
-    "PublicDescription": "L1 DTLB Reload of a page of 1G size.",
-    "UMask": "0x8"
-  },
-  {
-    "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit",
-    "EventCode": "0x45",
-    "BriefDescription": "L1 DTLB Reload of a page of 2M size.",
-    "PublicDescription": "L1 DTLB Reload of a page of 2M size.",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit",
-    "EventCode": "0x45",
-    "BriefDescription": "L1 DTLB Reload of a page of 32K size.",
-    "PublicDescription": "L1 DTLB Reload of a page of 32K size.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit",
-    "EventCode": "0x45",
-    "BriefDescription": "L1 DTLB Reload of a page of 4K size.",
-    "PublicDescription": "L1 DTLB Reload of a page of 4K size.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_iside",
-    "EventCode": "0x46",
-    "BriefDescription": "Tablewalker allocation.",
-    "PublicDescription": "Tablewalker allocation.",
-    "UMask": "0xc"
-  },
-  {
-    "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_dside",
-    "EventCode": "0x46",
-    "BriefDescription": "Tablewalker allocation.",
-    "PublicDescription": "Tablewalker allocation.",
-    "UMask": "0x3"
-  },
-  {
-    "EventName": "ls_misal_accesses",
-    "EventCode": "0x47",
-    "BriefDescription": "Misaligned loads."
-  },
-  {
-    "EventName": "ls_pref_instr_disp.prefetch_nta",
-    "EventCode": "0x4b",
-    "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.",
-    "PublicDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "ls_pref_instr_disp.store_prefetch_w",
-    "EventCode": "0x4b",
-    "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.",
-    "PublicDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "ls_pref_instr_disp.load_prefetch_w",
-    "EventCode": "0x4b",
-    "BriefDescription": "Prefetch, Prefetch_T0_T1_T2.",
-    "PublicDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "ls_inef_sw_pref.mab_mch_cnt",
-    "EventCode": "0x52",
-    "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
-    "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit",
-    "EventCode": "0x52",
-    "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
-    "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "ls_not_halted_cyc",
-    "EventCode": "0x76",
-    "BriefDescription": "Cycles not in Halt."
-  }
-]
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/other.json b/tools/perf/pmu-events/arch/x86/amdfam17h/other.json
deleted file mode 100644 (file)
index b26a00d..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-[
-  {
-    "EventName": "ic_oc_mode_switch.oc_ic_mode_switch",
-    "EventCode": "0x28a",
-    "BriefDescription": "OC to IC mode switch.",
-    "PublicDescription": "OC Mode Switch. OC to IC mode switch.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "ic_oc_mode_switch.ic_oc_mode_switch",
-    "EventCode": "0x28a",
-    "BriefDescription": "IC to OC mode switch.",
-    "PublicDescription": "OC Mode Switch. IC to OC mode switch.",
-    "UMask": "0x1"
-  },
-  {
-    "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall",
-    "EventCode": "0xaf",
-    "BriefDescription": "RETIRE Tokens unavailable.",
-    "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.",
-    "UMask": "0x40"
-  },
-  {
-    "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall",
-    "EventCode": "0xaf",
-    "BriefDescription": "AGSQ Tokens unavailable.",
-    "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.",
-    "UMask": "0x20"
-  },
-  {
-    "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall",
-    "EventCode": "0xaf",
-    "BriefDescription": "ALU tokens total unavailable.",
-    "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.",
-    "UMask": "0x10"
-  },
-  {
-    "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall",
-    "EventCode": "0xaf",
-    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.",
-    "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.",
-    "UMask": "0x8"
-  },
-  {
-    "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall",
-    "EventCode": "0xaf",
-    "BriefDescription": "ALSQ 3 Tokens unavailable.",
-    "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.",
-    "UMask": "0x4"
-  },
-  {
-    "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall",
-    "EventCode": "0xaf",
-    "BriefDescription": "ALSQ 2 Tokens unavailable.",
-    "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.",
-    "UMask": "0x2"
-  },
-  {
-    "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall",
-    "EventCode": "0xaf",
-    "BriefDescription": "ALSQ 1 Tokens unavailable.",
-    "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.",
-    "UMask": "0x1"
-  }
-]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json
new file mode 100644 (file)
index 0000000..a9943ee
--- /dev/null
@@ -0,0 +1,23 @@
+[
+  {
+    "EventName": "bp_l1_btb_correct",
+    "EventCode": "0x8a",
+    "BriefDescription": "L1 BTB Correction."
+  },
+  {
+    "EventName": "bp_l2_btb_correct",
+    "EventCode": "0x8b",
+    "BriefDescription": "L2 BTB Correction."
+  },
+  {
+    "EventName": "bp_dyn_ind_pred",
+    "EventCode": "0x8e",
+    "BriefDescription": "Dynamic Indirect Predictions.",
+    "PublicDescription": "Indirect Branch Prediction for potential multi-target branch (speculative)."
+  },
+  {
+    "EventName": "bp_de_redirect",
+    "EventCode": "0x91",
+    "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)."
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
new file mode 100644 (file)
index 0000000..404d4c5
--- /dev/null
@@ -0,0 +1,294 @@
+[
+  {
+    "EventName": "ic_fw32",
+    "EventCode": "0x80",
+    "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)."
+  },
+  {
+    "EventName": "ic_fw32_miss",
+    "EventCode": "0x81",
+    "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag."
+  },
+  {
+    "EventName": "ic_cache_fill_l2",
+    "EventCode": "0x82",
+    "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache."
+  },
+  {
+    "EventName": "ic_cache_fill_sys",
+    "EventCode": "0x83",
+    "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache."
+  },
+  {
+    "EventName": "bp_l1_tlb_miss_l2_hit",
+    "EventCode": "0x84",
+    "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB."
+  },
+  {
+    "EventName": "bp_l1_tlb_miss_l2_miss",
+    "EventCode": "0x85",
+    "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs."
+  },
+  {
+    "EventName": "bp_snp_re_sync",
+    "EventCode": "0x86",
+    "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event."
+  },
+  {
+    "EventName": "ic_fetch_stall.ic_stall_any",
+    "EventCode": "0x87",
+    "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ic_fetch_stall.ic_stall_dq_empty",
+    "EventCode": "0x87",
+    "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ic_fetch_stall.ic_stall_back_pressure",
+    "EventCode": "0x87",
+    "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ic_cache_inval.l2_invalidating_probe",
+    "EventCode": "0x8c",
+    "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ic_cache_inval.fill_invalidated",
+    "EventCode": "0x8c",
+    "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "bp_tlb_rel",
+    "EventCode": "0x99",
+    "BriefDescription": "The number of ITLB reload requests."
+  },
+  {
+    "EventName": "l2_request_g1.rd_blk_l",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).",
+    "UMask": "0x80"
+  },
+  {
+    "EventName": "l2_request_g1.rd_blk_x",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "l2_request_g1.ls_rd_blk_c_s",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "l2_request_g1.cacheable_ic_read",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "l2_request_g1.change_to_x",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "l2_request_g1.prefetch_l2_cmd",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "l2_request_g1.l2_hw_pf",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "l2_request_g1.group2",
+    "EventCode": "0x60",
+    "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "l2_request_g2.group1",
+    "EventCode": "0x61",
+    "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).",
+    "UMask": "0x80"
+  },
+  {
+    "EventName": "l2_request_g2.ls_rd_sized",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "l2_request_g2.ls_rd_sized_nc",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "l2_request_g2.ic_rd_sized",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "l2_request_g2.ic_rd_sized_nc",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "l2_request_g2.smc_inval",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "l2_request_g2.bus_locks_originator",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "l2_request_g2.bus_locks_responses",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "l2_latency.l2_cycles_waiting_on_fills",
+    "EventCode": "0x62",
+    "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "l2_wcb_req.wcb_write",
+    "EventCode": "0x63",
+    "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "l2_wcb_req.wcb_close",
+    "EventCode": "0x63",
+    "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "l2_wcb_req.zero_byte_store",
+    "EventCode": "0x63",
+    "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "l2_wcb_req.cl_zero",
+    "EventCode": "0x63",
+    "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ls_rd_blk_cs",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2",
+    "UMask": "0x80"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit on shared line in L2.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ls_rd_blk_x",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ls_rd_blk_c",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ic_fill_hit_x",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ic_fill_hit_s",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ic_fill_miss",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "l2_fill_pending.l2_fill_busy",
+    "EventCode": "0x6d",
+    "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "l3_request_g1.caching_l3_cache_accesses",
+    "EventCode": "0x01",
+    "BriefDescription": "Caching: L3 cache accesses",
+    "UMask": "0x80",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "l3_lookup_state.all_l3_req_typs",
+    "EventCode": "0x04",
+    "BriefDescription": "All L3 Request Types",
+    "UMask": "0xff",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "l3_comb_clstr_state.other_l3_miss_typs",
+    "EventCode": "0x06",
+    "BriefDescription": "Other L3 Miss Request Types",
+    "UMask": "0xfe",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "l3_comb_clstr_state.request_miss",
+    "EventCode": "0x06",
+    "BriefDescription": "L3 cache misses",
+    "UMask": "0x01",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "xi_sys_fill_latency",
+    "EventCode": "0x90",
+    "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.",
+    "UMask": "0x00",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs",
+    "EventCode": "0x9a",
+    "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.",
+    "UMask": "0x3f",
+    "Unit": "L3PMC"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json
new file mode 100644 (file)
index 0000000..7e1aa82
--- /dev/null
@@ -0,0 +1,125 @@
+[
+  {
+    "EventName": "ex_ret_instr",
+    "EventCode": "0xc0",
+    "BriefDescription": "Retired Instructions."
+  },
+  {
+    "EventName": "ex_ret_cops",
+    "EventCode": "0xc1",
+    "BriefDescription": "Retired Uops.",
+    "PublicDescription": "The number of uOps retired. This includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 4."
+  },
+  {
+    "EventName": "ex_ret_brn",
+    "EventCode": "0xc2",
+    "BriefDescription": "Retired Branch Instructions.",
+    "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
+  },
+  {
+    "EventName": "ex_ret_brn_misp",
+    "EventCode": "0xc3",
+    "BriefDescription": "Retired Branch Instructions Mispredicted.",
+    "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)."
+  },
+  {
+    "EventName": "ex_ret_brn_tkn",
+    "EventCode": "0xc4",
+    "BriefDescription": "Retired Taken Branch Instructions.",
+    "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
+  },
+  {
+    "EventName": "ex_ret_brn_tkn_misp",
+    "EventCode": "0xc5",
+    "BriefDescription": "Retired Taken Branch Instructions Mispredicted.",
+    "PublicDescription": "The number of retired taken branch instructions that were mispredicted."
+  },
+  {
+    "EventName": "ex_ret_brn_far",
+    "EventCode": "0xc6",
+    "BriefDescription": "Retired Far Control Transfers.",
+    "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction."
+  },
+  {
+    "EventName": "ex_ret_brn_resync",
+    "EventCode": "0xc7",
+    "BriefDescription": "Retired Branch Resyncs.",
+    "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare."
+  },
+  {
+    "EventName": "ex_ret_near_ret",
+    "EventCode": "0xc8",
+    "BriefDescription": "Retired Near Returns.",
+    "PublicDescription": "The number of near return instructions (RET or RET Iw) retired."
+  },
+  {
+    "EventName": "ex_ret_near_ret_mispred",
+    "EventCode": "0xc9",
+    "BriefDescription": "Retired Near Returns Mispredicted.",
+    "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction."
+  },
+  {
+    "EventName": "ex_ret_brn_ind_misp",
+    "EventCode": "0xca",
+    "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.",
+  },
+  {
+    "EventName": "ex_ret_mmx_fp_instr.sse_instr",
+    "EventCode": "0xcb",
+    "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).",
+    "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ex_ret_mmx_fp_instr.mmx_instr",
+    "EventCode": "0xcb",
+    "BriefDescription": "MMX instructions.",
+    "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ex_ret_mmx_fp_instr.x87_instr",
+    "EventCode": "0xcb",
+    "BriefDescription": "x87 instructions.",
+    "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ex_ret_cond",
+    "EventCode": "0xd1",
+    "BriefDescription": "Retired Conditional Branch Instructions."
+  },
+  {
+    "EventName": "ex_div_busy",
+    "EventCode": "0xd3",
+    "BriefDescription": "Div Cycles Busy count."
+  },
+  {
+    "EventName": "ex_div_count",
+    "EventCode": "0xd4",
+    "BriefDescription": "Div Op Count."
+  },
+  {
+    "EventName": "ex_tagged_ibs_ops.ibs_count_rollover",
+    "EventCode": "0x1cf",
+    "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret",
+    "EventCode": "0x1cf",
+    "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops",
+    "EventCode": "0x1cf",
+    "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ex_ret_fus_brnch_inst",
+    "EventCode": "0x1d0",
+    "BriefDescription": "The number of fused retired branch instructions retired per cycle. The number of events logged per cycle can vary from 0 to 3."
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json
new file mode 100644 (file)
index 0000000..a35542b
--- /dev/null
@@ -0,0 +1,224 @@
+[
+  {
+    "EventName": "fpu_pipe_assignment.dual",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number multi-pipe uOps assigned to all pipes.",
+    "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to all pipes.",
+    "UMask": "0xf0"
+  },
+  {
+    "EventName": "fpu_pipe_assignment.dual3",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number multi-pipe uOps assigned to pipe 3.",
+    "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 3.",
+    "UMask": "0x80"
+  },
+  {
+    "EventName": "fpu_pipe_assignment.dual2",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number multi-pipe uOps assigned to pipe 2.",
+    "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 2.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "fpu_pipe_assignment.dual1",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number multi-pipe uOps assigned to pipe 1.",
+    "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 1.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "fpu_pipe_assignment.dual0",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number multi-pipe uOps assigned to pipe 0.",
+    "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 0.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "fpu_pipe_assignment.total",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number uOps assigned to all fpu pipes.",
+    "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to all pipes.",
+    "UMask": "0xf"
+  },
+  {
+    "EventName": "fpu_pipe_assignment.total3",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number of fp uOps on pipe 3.",
+    "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "fpu_pipe_assignment.total2",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number of fp uOps on pipe 2.",
+    "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "fpu_pipe_assignment.total1",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number of fp uOps on pipe 1.",
+    "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "fpu_pipe_assignment.total0",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number of fp uOps  on pipe 0.",
+    "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "fp_sched_empty",
+    "EventCode": "0x01",
+    "BriefDescription": "This is a speculative event. The number of cycles in which the FPU scheduler is empty. Note that some Ops like FP loads bypass the scheduler."
+  },
+  {
+    "EventName": "fp_retx87_fp_ops.all",
+    "EventCode": "0x02",
+    "BriefDescription": "All Ops.",
+    "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8.",
+    "UMask": "0x7"
+  },
+  {
+    "EventName": "fp_retx87_fp_ops.div_sqr_r_ops",
+    "EventCode": "0x02",
+    "BriefDescription": "Divide and square root Ops.",
+    "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Divide and square root Ops.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "fp_retx87_fp_ops.mul_ops",
+    "EventCode": "0x02",
+    "BriefDescription": "Multiply Ops.",
+    "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Multiply Ops.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "fp_retx87_fp_ops.add_sub_ops",
+    "EventCode": "0x02",
+    "BriefDescription": "Add/subtract Ops.",
+    "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Add/subtract Ops.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.all",
+    "EventCode": "0x03",
+    "BriefDescription": "All FLOPS.",
+    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+    "UMask": "0xff"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.dp_mult_add_flops",
+    "EventCode": "0x03",
+    "BriefDescription": "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.",
+    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.",
+    "UMask": "0x80"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.dp_div_flops",
+    "EventCode": "0x03",
+    "BriefDescription": "Double precision divide/square root FLOPS.",
+    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision divide/square root FLOPS.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.dp_mult_flops",
+    "EventCode": "0x03",
+    "BriefDescription": "Double precision multiply FLOPS.",
+    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply FLOPS.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.dp_add_sub_flops",
+    "EventCode": "0x03",
+    "BriefDescription": "Double precision add/subtract FLOPS.",
+    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision add/subtract FLOPS.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.sp_mult_add_flops",
+    "EventCode": "0x03",
+    "BriefDescription": "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.",
+    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.sp_div_flops",
+    "EventCode": "0x03",
+    "BriefDescription": "Single-precision divide/square root FLOPS.",
+    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision divide/square root FLOPS.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.sp_mult_flops",
+    "EventCode": "0x03",
+    "BriefDescription": "Single-precision multiply FLOPS.",
+    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision multiply FLOPS.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.sp_add_sub_flops",
+    "EventCode": "0x03",
+    "BriefDescription": "Single-precision add/subtract FLOPS.",
+    "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision add/subtract FLOPS.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "fp_num_mov_elim_scal_op.optimized",
+    "EventCode": "0x04",
+    "BriefDescription": "Number of Scalar Ops optimized.",
+    "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Scalar Ops optimized.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "fp_num_mov_elim_scal_op.opt_potential",
+    "EventCode": "0x04",
+    "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass).",
+    "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Ops that are candidates for optimization (have Z-bit either set or pass).",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim",
+    "EventCode": "0x04",
+    "BriefDescription": "Number of SSE Move Ops eliminated.",
+    "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops eliminated.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops",
+    "EventCode": "0x04",
+    "BriefDescription": "Number of SSE Move Ops.",
+    "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "fp_retired_ser_ops.x87_ctrl_ret",
+    "EventCode": "0x05",
+    "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.",
+    "PublicDescription": "The number of serializing Ops retired. x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "fp_retired_ser_ops.x87_bot_ret",
+    "EventCode": "0x05",
+    "BriefDescription": "x87 bottom-executing uOps retired.",
+    "PublicDescription": "The number of serializing Ops retired. x87 bottom-executing uOps retired.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "fp_retired_ser_ops.sse_ctrl_ret",
+    "EventCode": "0x05",
+    "BriefDescription": "SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.",
+    "PublicDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "fp_retired_ser_ops.sse_bot_ret",
+    "EventCode": "0x05",
+    "BriefDescription": "SSE bottom-executing uOps retired.",
+    "PublicDescription": "The number of serializing Ops retired. SSE bottom-executing uOps retired.",
+    "UMask": "0x1"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/memory.json b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json
new file mode 100644 (file)
index 0000000..b33a3c3
--- /dev/null
@@ -0,0 +1,184 @@
+[
+  {
+    "EventName": "ls_locks.bus_lock",
+    "EventCode": "0x25",
+    "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_dispatch.ld_st_dispatch",
+    "EventCode": "0x29",
+    "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ls_dispatch.store_dispatch",
+    "EventCode": "0x29",
+    "BriefDescription": "Counts the number of stores dispatched to the LS unit. Unit Masks ADDed.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_dispatch.ld_dispatch",
+    "EventCode": "0x29",
+    "BriefDescription": "Counts the number of loads dispatched to the LS unit. Unit Masks ADDed.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_stlf",
+    "EventCode": "0x35",
+    "BriefDescription": "Number of STLF hits."
+  },
+  {
+    "EventName": "ls_dc_accesses",
+    "EventCode": "0x40",
+    "BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event."
+  },
+  {
+    "EventName": "ls_mab_alloc.dc_prefetcher",
+    "EventCode": "0x41",
+    "BriefDescription": "LS MAB allocates by type - DC prefetcher.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "ls_mab_alloc.stores",
+    "EventCode": "0x41",
+    "BriefDescription": "LS MAB allocates by type - stores.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_mab_alloc.loads",
+    "EventCode": "0x41",
+    "BriefDescription": "LS MAB allocates by type - loads.",
+    "UMask": "0x01"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.all",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Miss or Reload off all sizes.",
+    "UMask": "0xff"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Miss of a page of 1G size.",
+    "UMask": "0x80"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Miss of a page of 2M size.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Miss of a page of 32K size.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Miss of a page of 4K size.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Reload of a page of 1G size.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Reload of a page of 2M size.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Reload of a page of 32K size.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Reload of a page of 4K size.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_tablewalker.iside",
+    "EventCode": "0x46",
+    "BriefDescription": "Total Page Table Walks on I-side.",
+    "UMask": "0xc"
+  },
+  {
+    "EventName": "ls_tablewalker.ic_type1",
+    "EventCode": "0x46",
+    "BriefDescription": "Total Page Table Walks IC Type 1.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "ls_tablewalker.ic_type0",
+    "EventCode": "0x46",
+    "BriefDescription": "Total Page Table Walks IC Type 0.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ls_tablewalker.dside",
+    "EventCode": "0x46",
+    "BriefDescription": "Total Page Table Walks on D-side.",
+    "UMask": "0x3"
+  },
+  {
+    "EventName": "ls_tablewalker.dc_type1",
+    "EventCode": "0x46",
+    "BriefDescription": "Total Page Table Walks DC Type 1.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_tablewalker.dc_type0",
+    "EventCode": "0x46",
+    "BriefDescription": "Total Page Table Walks DC Type 0.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_misal_accesses",
+    "EventCode": "0x47",
+    "BriefDescription": "Misaligned loads."
+  },
+  {
+    "EventName": "ls_pref_instr_disp.prefetch_nta",
+    "EventCode": "0x4b",
+    "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ls_pref_instr_disp.store_prefetch_w",
+    "EventCode": "0x4b",
+    "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_pref_instr_disp.load_prefetch_w",
+    "EventCode": "0x4b",
+    "BriefDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_inef_sw_pref.mab_mch_cnt",
+    "EventCode": "0x52",
+    "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit",
+    "EventCode": "0x52",
+    "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_not_halted_cyc",
+    "EventCode": "0x76",
+    "BriefDescription": "Cycles not in Halt."
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/other.json b/tools/perf/pmu-events/arch/x86/amdzen1/other.json
new file mode 100644 (file)
index 0000000..ff78009
--- /dev/null
@@ -0,0 +1,56 @@
+[
+  {
+    "EventName": "ic_oc_mode_switch.oc_ic_mode_switch",
+    "EventCode": "0x28a",
+    "BriefDescription": "OC Mode Switch. OC to IC mode switch.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ic_oc_mode_switch.ic_oc_mode_switch",
+    "EventCode": "0x28a",
+    "BriefDescription": "OC Mode Switch. IC to OC mode switch.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3_0 Tokens unavailable.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.",
+    "UMask": "0x1"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/branch.json b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json
new file mode 100644 (file)
index 0000000..ef4166a
--- /dev/null
@@ -0,0 +1,52 @@
+[
+  {
+    "EventName": "bp_l1_btb_correct",
+    "EventCode": "0x8a",
+    "BriefDescription": "L1 Branch Prediction Overrides Existing Prediction (speculative)."
+  },
+  {
+    "EventName": "bp_l2_btb_correct",
+    "EventCode": "0x8b",
+    "BriefDescription": "L2 Branch Prediction Overrides Existing Prediction (speculative)."
+  },
+  {
+    "EventName": "bp_dyn_ind_pred",
+    "EventCode": "0x8e",
+    "BriefDescription": "Dynamic Indirect Predictions.",
+    "PublicDescription": "Indirect Branch Prediction for potential multi-target branch (speculative)."
+  },
+  {
+    "EventName": "bp_de_redirect",
+    "EventCode": "0x91",
+    "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)."
+  },
+  {
+    "EventName": "bp_l1_tlb_fetch_hit",
+    "EventCode": "0x94",
+    "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB.",
+    "UMask": "0xFF"
+  },
+  {
+    "EventName": "bp_l1_tlb_fetch_hit.if1g",
+    "EventCode": "0x94",
+    "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 1GB page.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "bp_l1_tlb_fetch_hit.if2m",
+    "EventCode": "0x94",
+    "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 2MB page.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "bp_l1_tlb_fetch_hit.if4k",
+    "EventCode": "0x94",
+    "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 4KB page.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "bp_tlb_rel",
+    "EventCode": "0x99",
+    "BriefDescription": "The number of ITLB reload requests."
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json
new file mode 100644 (file)
index 0000000..1c60bfa
--- /dev/null
@@ -0,0 +1,338 @@
+[
+  {
+    "EventName": "l2_request_g1.rd_blk_l",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).",
+    "UMask": "0x80"
+  },
+  {
+    "EventName": "l2_request_g1.rd_blk_x",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "l2_request_g1.ls_rd_blk_c_s",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "l2_request_g1.cacheable_ic_read",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "l2_request_g1.change_to_x",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "l2_request_g1.prefetch_l2_cmd",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "l2_request_g1.l2_hw_pf",
+    "EventCode": "0x60",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "l2_request_g1.group2",
+    "EventCode": "0x60",
+    "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "l2_request_g2.group1",
+    "EventCode": "0x61",
+    "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).",
+    "UMask": "0x80"
+  },
+  {
+    "EventName": "l2_request_g2.ls_rd_sized",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "l2_request_g2.ls_rd_sized_nc",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "l2_request_g2.ic_rd_sized",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "l2_request_g2.ic_rd_sized_nc",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "l2_request_g2.smc_inval",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "l2_request_g2.bus_locks_originator",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "l2_request_g2.bus_locks_responses",
+    "EventCode": "0x61",
+    "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "l2_latency.l2_cycles_waiting_on_fills",
+    "EventCode": "0x62",
+    "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "l2_wcb_req.wcb_write",
+    "EventCode": "0x63",
+    "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "l2_wcb_req.wcb_close",
+    "EventCode": "0x63",
+    "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "l2_wcb_req.zero_byte_store",
+    "EventCode": "0x63",
+    "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "l2_wcb_req.cl_zero",
+    "EventCode": "0x63",
+    "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ls_rd_blk_cs",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2",
+    "UMask": "0x80"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit on shared line in L2.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ls_rd_blk_x",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ls_rd_blk_c",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ic_fill_hit_x",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ic_fill_hit_s",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "l2_cache_req_stat.ic_fill_miss",
+    "EventCode": "0x64",
+    "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "l2_fill_pending.l2_fill_busy",
+    "EventCode": "0x6d",
+    "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "l2_pf_hit_l2",
+    "EventCode": "0x70",
+    "BriefDescription": "L2 prefetch hit in L2.",
+    "UMask": "0xff"
+  },
+  {
+    "EventName": "l2_pf_miss_l2_hit_l3",
+    "EventCode": "0x71",
+    "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.",
+    "UMask": "0xff"
+  },
+  {
+    "EventName": "l2_pf_miss_l2_l3",
+    "EventCode": "0x72",
+    "BriefDescription": "L2 prefetcher misses in L3. All L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.",
+    "UMask": "0xff"
+  },
+  {
+    "EventName": "ic_fw32",
+    "EventCode": "0x80",
+    "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)."
+  },
+  {
+    "EventName": "ic_fw32_miss",
+    "EventCode": "0x81",
+    "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag."
+  },
+  {
+    "EventName": "ic_cache_fill_l2",
+    "EventCode": "0x82",
+    "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache."
+  },
+  {
+    "EventName": "ic_cache_fill_sys",
+    "EventCode": "0x83",
+    "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache."
+  },
+  {
+    "EventName": "bp_l1_tlb_miss_l2_hit",
+    "EventCode": "0x84",
+    "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB."
+  },
+  {
+    "EventName": "bp_l1_tlb_miss_l2_tlb_miss",
+    "EventCode": "0x85",
+    "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs.",
+    "UMask": "0xff"
+  },
+  {
+    "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if1g",
+    "EventCode": "0x85",
+    "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 1GB page.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if2m",
+    "EventCode": "0x85",
+    "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 2MB page.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if4k",
+    "EventCode": "0x85",
+    "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 4KB page.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "bp_snp_re_sync",
+    "EventCode": "0x86",
+    "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event."
+  },
+  {
+    "EventName": "ic_fetch_stall.ic_stall_any",
+    "EventCode": "0x87",
+    "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ic_fetch_stall.ic_stall_dq_empty",
+    "EventCode": "0x87",
+    "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ic_fetch_stall.ic_stall_back_pressure",
+    "EventCode": "0x87",
+    "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ic_cache_inval.l2_invalidating_probe",
+    "EventCode": "0x8c",
+    "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ic_cache_inval.fill_invalidated",
+    "EventCode": "0x8c",
+    "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ic_oc_mode_switch.oc_ic_mode_switch",
+    "EventCode": "0x28a",
+    "BriefDescription": "OC Mode Switch. OC to IC mode switch.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ic_oc_mode_switch.ic_oc_mode_switch",
+    "EventCode": "0x28a",
+    "BriefDescription": "OC Mode Switch. IC to OC mode switch.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "l3_request_g1.caching_l3_cache_accesses",
+    "EventCode": "0x01",
+    "BriefDescription": "Caching: L3 cache accesses",
+    "UMask": "0x80",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "l3_lookup_state.all_l3_req_typs",
+    "EventCode": "0x04",
+    "BriefDescription": "All L3 Request Types",
+    "UMask": "0xff",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "l3_comb_clstr_state.other_l3_miss_typs",
+    "EventCode": "0x06",
+    "BriefDescription": "Other L3 Miss Request Types",
+    "UMask": "0xfe",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "l3_comb_clstr_state.request_miss",
+    "EventCode": "0x06",
+    "BriefDescription": "L3 cache misses",
+    "UMask": "0x01",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "xi_sys_fill_latency",
+    "EventCode": "0x90",
+    "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.",
+    "UMask": "0x00",
+    "Unit": "L3PMC"
+  },
+  {
+    "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs",
+    "EventCode": "0x9A",
+    "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.",
+    "UMask": "0x3f",
+    "Unit": "L3PMC"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/core.json b/tools/perf/pmu-events/arch/x86/amdzen2/core.json
new file mode 100644 (file)
index 0000000..de89e5a
--- /dev/null
@@ -0,0 +1,130 @@
+[
+  {
+    "EventName": "ex_ret_instr",
+    "EventCode": "0xc0",
+    "BriefDescription": "Retired Instructions."
+  },
+  {
+    "EventName": "ex_ret_cops",
+    "EventCode": "0xc1",
+    "BriefDescription": "Retired Uops.",
+    "PublicDescription": "The number of micro-ops retired. This count includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 8."
+  },
+  {
+    "EventName": "ex_ret_brn",
+    "EventCode": "0xc2",
+    "BriefDescription": "Retired Branch Instructions.",
+    "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
+  },
+  {
+    "EventName": "ex_ret_brn_misp",
+    "EventCode": "0xc3",
+    "BriefDescription": "Retired Branch Instructions Mispredicted.",
+    "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)."
+  },
+  {
+    "EventName": "ex_ret_brn_tkn",
+    "EventCode": "0xc4",
+    "BriefDescription": "Retired Taken Branch Instructions.",
+    "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
+  },
+  {
+    "EventName": "ex_ret_brn_tkn_misp",
+    "EventCode": "0xc5",
+    "BriefDescription": "Retired Taken Branch Instructions Mispredicted.",
+    "PublicDescription": "The number of retired taken branch instructions that were mispredicted."
+  },
+  {
+    "EventName": "ex_ret_brn_far",
+    "EventCode": "0xc6",
+    "BriefDescription": "Retired Far Control Transfers.",
+    "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction."
+  },
+  {
+    "EventName": "ex_ret_brn_resync",
+    "EventCode": "0xc7",
+    "BriefDescription": "Retired Branch Resyncs.",
+    "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare."
+  },
+  {
+    "EventName": "ex_ret_near_ret",
+    "EventCode": "0xc8",
+    "BriefDescription": "Retired Near Returns.",
+    "PublicDescription": "The number of near return instructions (RET or RET Iw) retired."
+  },
+  {
+    "EventName": "ex_ret_near_ret_mispred",
+    "EventCode": "0xc9",
+    "BriefDescription": "Retired Near Returns Mispredicted.",
+    "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction."
+  },
+  {
+    "EventName": "ex_ret_brn_ind_misp",
+    "EventCode": "0xca",
+    "BriefDescription": "Retired Indirect Branch Instructions Mispredicted."
+  },
+  {
+    "EventName": "ex_ret_mmx_fp_instr.sse_instr",
+    "EventCode": "0xcb",
+    "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).",
+    "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ex_ret_mmx_fp_instr.mmx_instr",
+    "EventCode": "0xcb",
+    "BriefDescription": "MMX instructions.",
+    "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ex_ret_mmx_fp_instr.x87_instr",
+    "EventCode": "0xcb",
+    "BriefDescription": "x87 instructions.",
+    "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ex_ret_cond",
+    "EventCode": "0xd1",
+    "BriefDescription": "Retired Conditional Branch Instructions."
+  },
+  {
+    "EventName": "ex_ret_cond_misp",
+    "EventCode": "0xd2",
+    "BriefDescription": "Retired Conditional Branch Instructions Mispredicted."
+  },
+  {
+    "EventName": "ex_div_busy",
+    "EventCode": "0xd3",
+    "BriefDescription": "Div Cycles Busy count."
+  },
+  {
+    "EventName": "ex_div_count",
+    "EventCode": "0xd4",
+    "BriefDescription": "Div Op Count."
+  },
+  {
+    "EventName": "ex_tagged_ibs_ops.ibs_count_rollover",
+    "EventCode": "0x1cf",
+    "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret",
+    "EventCode": "0x1cf",
+    "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops",
+    "EventCode": "0x1cf",
+    "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ex_ret_fus_brnch_inst",
+    "EventCode": "0x1d0",
+    "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8.",
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json
new file mode 100644 (file)
index 0000000..622a0c4
--- /dev/null
@@ -0,0 +1,140 @@
+[
+  {
+    "EventName": "fpu_pipe_assignment.total",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number of fp uOps.",
+    "PublicDescription": "Total number of fp uOps. The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS.",
+    "UMask": "0xf"
+  },
+  {
+    "EventName": "fpu_pipe_assignment.total3",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number uOps assigned to pipe 3.",
+    "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "fpu_pipe_assignment.total2",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number uOps assigned to pipe 2.",
+    "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "fpu_pipe_assignment.total1",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number uOps assigned to pipe 1.",
+    "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "fpu_pipe_assignment.total0",
+    "EventCode": "0x00",
+    "BriefDescription": "Total number of fp uOps  on pipe 0.",
+    "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.all",
+    "EventCode": "0x03",
+    "BriefDescription": "All FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+    "UMask": "0xff"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.mac_flops",
+    "EventCode": "0x03",
+    "BriefDescription": "Multiply-add FLOPS. Multiply-add counts as 2 FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+    "PublicDescription": "",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.div_flops",
+    "EventCode": "0x03",
+    "BriefDescription": "Divide/square root FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.mult_flops",
+    "EventCode": "0x03",
+    "BriefDescription": "Multiply FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "fp_ret_sse_avx_ops.add_sub_flops",
+    "EventCode": "0x03",
+    "BriefDescription": "Add/subtract FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "fp_num_mov_elim_scal_op.optimized",
+    "EventCode": "0x04",
+    "BriefDescription": "Number of Scalar Ops optimized. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "fp_num_mov_elim_scal_op.opt_potential",
+    "EventCode": "0x04",
+    "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass). This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim",
+    "EventCode": "0x04",
+    "BriefDescription": "Number of SSE Move Ops eliminated. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops",
+    "EventCode": "0x04",
+    "BriefDescription": "Number of SSE Move Ops. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "fp_retired_ser_ops.sse_bot_ret",
+    "EventCode": "0x05",
+    "BriefDescription": "SSE bottom-executing uOps retired. The number of serializing Ops retired.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "fp_retired_ser_ops.sse_ctrl_ret",
+    "EventCode": "0x05",
+    "BriefDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "fp_retired_ser_ops.x87_bot_ret",
+    "EventCode": "0x05",
+    "BriefDescription": "x87 bottom-executing uOps retired. The number of serializing Ops retired.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "fp_retired_ser_ops.x87_ctrl_ret",
+    "EventCode": "0x05",
+    "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits. The number of serializing Ops retired.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "fp_disp_faults.ymm_spill_fault",
+    "EventCode": "0x0e",
+    "BriefDescription": "Floating Point Dispatch Faults. YMM spill fault.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "fp_disp_faults.ymm_fill_fault",
+    "EventCode": "0x0e",
+    "BriefDescription": "Floating Point Dispatch Faults. YMM fill fault.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "fp_disp_faults.xmm_fill_fault",
+    "EventCode": "0x0e",
+    "BriefDescription": "Floating Point Dispatch Faults. XMM fill fault.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "fp_disp_faults.x87_fill_fault",
+    "EventCode": "0x0e",
+    "BriefDescription": "Floating Point Dispatch Faults. x87 fill fault.",
+    "UMask": "0x1"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/memory.json b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json
new file mode 100644 (file)
index 0000000..715046b
--- /dev/null
@@ -0,0 +1,341 @@
+[
+  {
+    "EventName": "ls_bad_status2.stli_other",
+    "EventCode": "0x24",
+    "BriefDescription": "Non-forwardable conflict; used to reduce STLI's via software. All reasons. Store To Load Interlock (STLI) are loads that were unable to complete because of a possible match with an older store, and the older store could not do STLF for some reason.",
+    "PublicDescription" : "Store-to-load conflicts: A load was unable to complete due to a non-forwardable conflict with an older store. Most commonly, a load's address range partially but not completely overlaps with an uncompleted older store. Software can avoid this problem by using same-size and same-alignment loads and stores when accessing the same data. Vector/SIMD code is particularly susceptible to this problem; software should construct wide vector stores by manipulating vector elements in registers using shuffle/blend/swap instructions prior to storing to memory, instead of using narrow element-by-element stores.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_locks.spec_lock_hi_spec",
+    "EventCode": "0x25",
+    "BriefDescription": "Retired lock instructions. High speculative cacheable lock speculation succeeded.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "ls_locks.spec_lock_lo_spec",
+    "EventCode": "0x25",
+    "BriefDescription": "Retired lock instructions. Low speculative cacheable lock speculation succeeded.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ls_locks.non_spec_lock",
+    "EventCode": "0x25",
+    "BriefDescription": "Retired lock instructions. Non-speculative lock succeeded.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_locks.bus_lock",
+    "EventCode": "0x25",
+    "BriefDescription": "Retired lock instructions. Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type. Comparable to legacy bus lock.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_ret_cl_flush",
+    "EventCode": "0x26",
+    "BriefDescription": "Number of retired CLFLUSH instructions."
+  },
+  {
+    "EventName": "ls_ret_cpuid",
+    "EventCode": "0x27",
+    "BriefDescription": "Number of retired CPUID instructions."
+  },
+  {
+    "EventName": "ls_dispatch.ld_st_dispatch",
+    "EventCode": "0x29",
+    "BriefDescription": "Dispatch of a single op that performs a load from and store to the same memory address. Number of single ops that do load/store to an address.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ls_dispatch.store_dispatch",
+    "EventCode": "0x29",
+    "BriefDescription": "Number of stores dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_dispatch.ld_dispatch",
+    "EventCode": "0x29",
+    "BriefDescription": "Number of loads dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_smi_rx",
+    "EventCode": "0x2B",
+    "BriefDescription": "Number of SMIs received."
+  },
+  {
+    "EventName": "ls_int_taken",
+    "EventCode": "0x2C",
+    "BriefDescription": "Number of interrupts taken."
+  },
+  {
+    "EventName": "ls_rdtsc",
+    "EventCode": "0x2D",
+    "BriefDescription": "Number of reads of the TSC (RDTSC instructions). The count is speculative."
+  },
+  {
+    "EventName": "ls_stlf",
+    "EventCode": "0x35",
+    "BriefDescription": "Number of STLF hits."
+  },
+  {
+    "EventName": "ls_st_commit_cancel2.st_commit_cancel_wcb_full",
+    "EventCode": "0x37",
+    "BriefDescription": "A non-cacheable store and the non-cacheable commit buffer is full."
+  },
+  {
+    "EventName": "ls_dc_accesses",
+    "EventCode": "0x40",
+    "BriefDescription": "Number of accesses to the dcache for load/store references.",
+    "PublicDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event."
+  },
+  {
+    "EventName": "ls_mab_alloc.dc_prefetcher",
+    "EventCode": "0x41",
+    "BriefDescription": "LS MAB Allocates by Type. DC prefetcher.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "ls_mab_alloc.stores",
+    "EventCode": "0x41",
+    "BriefDescription": "LS MAB Allocates by Type. Stores.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_mab_alloc.loads",
+    "EventCode": "0x41",
+    "BriefDescription": "LS MAB Allocates by Type. Loads.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_refills_from_sys.ls_mabresp_rmt_dram",
+    "EventCode": "0x43",
+    "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from different die.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "ls_refills_from_sys.ls_mabresp_rmt_cache",
+    "EventCode": "0x43",
+    "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; Remote CCX and the address's Home Node is on a different die.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "ls_refills_from_sys.ls_mabresp_lcl_dram",
+    "EventCode": "0x43",
+    "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from this thread's die.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "ls_refills_from_sys.ls_mabresp_lcl_cache",
+    "EventCode": "0x43",
+    "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; local CCX (not Local L2), or Remote CCX and the address's Home Node is on this thread's die.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_refills_from_sys.ls_mabresp_lcl_l2",
+    "EventCode": "0x43",
+    "BriefDescription": "Demand Data Cache Fills by Data Source. Local L2 hit.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.all",
+    "EventCode": "0x45",
+    "BriefDescription": "All L1 DTLB Misses or Reloads.",
+    "UMask": "0xff"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that miss in the L2 TLB.",
+    "UMask": "0x80"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that miss in the L2 TLB.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Miss. DTLB reload coalesced page miss.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that miss the L2 TLB.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Miss. DTLB reload hit a coalesced page.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit",
+    "EventCode": "0x45",
+    "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_tablewalker.iside",
+    "EventCode": "0x46",
+    "BriefDescription": "Total Page Table Walks on I-side.",
+    "UMask": "0xc"
+  },
+  {
+    "EventName": "ls_tablewalker.ic_type1",
+    "EventCode": "0x46",
+    "BriefDescription": "Total Page Table Walks IC Type 1.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "ls_tablewalker.ic_type0",
+    "EventCode": "0x46",
+    "BriefDescription": "Total Page Table Walks IC Type 0.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ls_tablewalker.dside",
+    "EventCode": "0x46",
+    "BriefDescription": "Total Page Table Walks on D-side.",
+    "UMask": "0x3"
+  },
+  {
+    "EventName": "ls_tablewalker.dc_type1",
+    "EventCode": "0x46",
+    "BriefDescription": "Total Page Table Walks DC Type 1.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_tablewalker.dc_type0",
+    "EventCode": "0x46",
+    "BriefDescription": "Total Page Table Walks DC Type 0.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_misal_accesses",
+    "EventCode": "0x47",
+    "BriefDescription": "Misaligned loads."
+  },
+  {
+    "EventName": "ls_pref_instr_disp",
+    "EventCode": "0x4b",
+    "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative).",
+    "UMask": "0xff"
+  },
+  {
+    "EventName": "ls_pref_instr_disp.prefetch_nta",
+    "EventCode": "0x4b",
+    "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA instruction. See docAPM3 PREFETCHlevel.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "ls_pref_instr_disp.prefetch_w",
+    "EventCode": "0x4b",
+    "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). See docAPM3 PREFETCHW.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_pref_instr_disp.prefetch",
+    "EventCode": "0x4b",
+    "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). Prefetch_T0_T1_T2. PrefetchT0, T1 and T2 instructions. See docAPM3 PREFETCHlevel.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_inef_sw_pref.mab_mch_cnt",
+    "EventCode": "0x52",
+    "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit",
+    "EventCode": "0x52",
+    "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_dram",
+    "EventCode": "0x59",
+    "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_cache",
+    "EventCode": "0x59",
+    "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node remote).",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_dram",
+    "EventCode": "0x59",
+    "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. DRAM or IO from this thread's die.  From DRAM (home node local).",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_cache",
+    "EventCode": "0x59",
+    "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node local).",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_l2",
+    "EventCode": "0x59",
+    "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. Local L2 hit.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_dram",
+    "EventCode": "0x5A",
+    "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_cache",
+    "EventCode": "0x5A",
+    "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node remote).",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_dram",
+    "EventCode": "0x5A",
+    "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node local).",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_cache",
+    "EventCode": "0x5A",
+    "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node local).",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_l2",
+    "EventCode": "0x5A",
+    "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. Local L2 hit.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "ls_not_halted_cyc",
+    "EventCode": "0x76",
+    "BriefDescription": "Cycles not in Halt."
+  },
+  {
+    "EventName": "ls_tlb_flush",
+    "EventCode": "0x78",
+    "BriefDescription": "All TLB Flushes"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/other.json b/tools/perf/pmu-events/arch/x86/amdzen2/other.json
new file mode 100644 (file)
index 0000000..e94994d
--- /dev/null
@@ -0,0 +1,115 @@
+[
+  {
+    "EventName": "de_dis_uop_queue_empty_di0",
+    "EventCode": "0xa9",
+    "BriefDescription": "Cycles where the Micro-Op Queue is empty."
+  },
+  {
+    "EventName": "de_dis_uops_from_decoder",
+    "EventCode": "0xaa",
+    "BriefDescription": "Ops dispatched from either the decoders, OpCache or both.",
+    "UMask": "0xff"
+  },
+  {
+    "EventName": "de_dis_uops_from_decoder.opcache_dispatched",
+    "EventCode": "0xaa",
+    "BriefDescription": "Count of dispatched Ops from OpCache.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "de_dis_uops_from_decoder.decoder_dispatched",
+    "EventCode": "0xaa",
+    "BriefDescription": "Count of dispatched Ops from Decoder.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls1.fp_misc_rsrc_stall",
+    "EventCode": "0xae",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. FP Miscellaneous resource unavailable. Applies to the recovery of mispredicts with FP ops.",
+    "UMask": "0x80"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls1.fp_sch_rsrc_stall",
+    "EventCode": "0xae",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. FP scheduler resource stall. Applies to ops that use the FP scheduler.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls1.fp_reg_file_rsrc_stall",
+    "EventCode": "0xae",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Floating point register file resource stall. Applies to all FP ops that have a destination register.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls1.taken_branch_buffer_rsrc_stall",
+    "EventCode": "0xae",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Taken branch buffer resource stall.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls1.int_sched_misc_token_stall",
+    "EventCode": "0xae",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Scheduler miscellaneous resource stall.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls1.store_queue_token_stall",
+    "EventCode": "0xae",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Store queue resource stall. Applies to all ops with store semantics.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls1.load_queue_token_stall",
+    "EventCode": "0xae",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Load queue resource stall. Applies to all ops with load semantics.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls1.int_phy_reg_file_token_stall",
+    "EventCode": "0xae",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Physical Register File resource stall. Applies to all ops that have an integer destination register.",
+    "UMask": "0x1"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.sc_agu_dispatch_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. SC AGU dispatch stall.",
+    "UMask": "0x40"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.",
+    "UMask": "0x20"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.",
+    "UMask": "0x10"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.",
+    "UMask": "0x8"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ3_0_TokenStall.",
+    "UMask": "0x4"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.",
+    "UMask": "0x2"
+  },
+  {
+    "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall",
+    "EventCode": "0xaf",
+    "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.",
+    "UMask": "0x1"
+  }
+]
index 45a34ce..8cdc7c1 100644 (file)
     },
     {
         "BriefDescription": "Fraction of cycles spent in Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
index 961fe43..16fd8a7 100644 (file)
     },
     {
         "BriefDescription": "Fraction of cycles spent in Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
index 746734c..1eb0415 100644 (file)
     },
     {
         "BriefDescription": "Fraction of cycles spent in Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
index a728c6e..7fde0d2 100644 (file)
     },
     {
         "BriefDescription": "Fraction of cycles spent in Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
index 5402cd3..f57c5f3 100644 (file)
     },
     {
         "BriefDescription": "Fraction of cycles spent in Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
index 832f3cb..311a005 100644 (file)
     },
     {
         "BriefDescription": "Fraction of cycles spent in Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
index d69b2a8..28e2544 100644 (file)
     },
     {
         "BriefDescription": "Fraction of cycles spent in Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
index 5f465fd..db23db2 100644 (file)
     },
     {
         "BriefDescription": "Fraction of cycles spent in Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
index 3e909b3..dbb33e0 100644 (file)
     },
     {
         "BriefDescription": "Fraction of cycles spent in Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
index 745ced0..25b06cf 100644 (file)
@@ -36,4 +36,5 @@ GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core
 GenuineIntel-6-7D,v1,icelake,core
 GenuineIntel-6-7E,v1,icelake,core
 GenuineIntel-6-86,v1,tremontx,core
-AuthenticAMD-23-[[:xdigit:]]+,v1,amdfam17h,core
+AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core
+AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core
index 50c0532..fb2d7b8 100644 (file)
     },
     {
         "BriefDescription": "Fraction of cycles spent in Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
index f97e831..8704efe 100644 (file)
     },
     {
         "BriefDescription": "Fraction of cycles spent in Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
index 35f5db1..b4f9113 100644 (file)
     },
     {
         "BriefDescription": "Fraction of cycles spent in Kernel mode",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
         "MetricGroup": "Summary",
         "MetricName": "Kernel_Utilization"
     },
index 3c4236a..fa86c5f 100644 (file)
@@ -771,6 +771,19 @@ static void print_mapping_table_suffix(FILE *outfp)
        fprintf(outfp, "};\n");
 }
 
+static void print_mapping_test_table(FILE *outfp)
+{
+       /*
+        * Print the terminating, NULL entry.
+        */
+       fprintf(outfp, "{\n");
+       fprintf(outfp, "\t.cpuid = \"testcpu\",\n");
+       fprintf(outfp, "\t.version = \"v1\",\n");
+       fprintf(outfp, "\t.type = \"core\",\n");
+       fprintf(outfp, "\t.table = pme_test_cpu,\n");
+       fprintf(outfp, "},\n");
+}
+
 static int process_mapfile(FILE *outfp, char *fpath)
 {
        int n = 16384;
@@ -848,6 +861,7 @@ static int process_mapfile(FILE *outfp, char *fpath)
        }
 
 out:
+       print_mapping_test_table(outfp);
        print_mapping_table_suffix(outfp);
        fclose(mapfp);
        free(line);
@@ -1168,6 +1182,22 @@ int main(int argc, char *argv[])
                goto empty_map;
        }
 
+       sprintf(ldirname, "%s/test", start_dirname);
+
+       rc = nftw(ldirname, process_one_file, maxfds, 0);
+       if (rc && verbose) {
+               pr_info("%s: Error walking file tree %s rc=%d for test\n",
+                       prog, ldirname, rc);
+               goto empty_map;
+       } else if (rc < 0) {
+               /* Make build fail */
+               free_arch_std_events();
+               ret = 1;
+               goto out_free_mapfile;
+       } else if (rc) {
+               goto empty_map;
+       }
+
        if (close_table)
                print_events_table_suffix(eventsfp);
 
index 1692529..b3d1bf1 100644 (file)
@@ -14,6 +14,7 @@ perf-y += evsel-roundtrip-name.o
 perf-y += evsel-tp-sched.o
 perf-y += fdarray.o
 perf-y += pmu.o
+perf-y += pmu-events.o
 perf-y += hists_common.o
 perf-y += hists_link.o
 perf-y += hists_filter.o
index 54d9516..b6322eb 100644 (file)
@@ -72,6 +72,10 @@ static struct test generic_tests[] = {
                .desc = "Parse perf pmu format",
                .func = test__pmu,
        },
+       {
+               .desc = "PMU events",
+               .func = test__pmu_events,
+       },
        {
                .desc = "DSO data read",
                .func = test__dso_data,
index c850d16..5d0c3a9 100644 (file)
@@ -28,9 +28,13 @@ endif
 
 PARALLEL_OPT=
 ifeq ($(SET_PARALLEL),1)
-  cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
-  ifeq ($(cores),0)
-    cores := 1
+  ifeq ($(JOBS),)
+    cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+    ifeq ($(cores),0)
+      cores := 1
+    endif
+  else
+    cores=$(JOBS)
   endif
   PARALLEL_OPT="-j$(cores)"
 endif
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
new file mode 100644 (file)
index 0000000..d64261d
--- /dev/null
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "parse-events.h"
+#include "pmu.h"
+#include "tests.h"
+#include <errno.h>
+#include <stdio.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "../pmu-events/pmu-events.h"
+
+struct perf_pmu_test_event {
+       struct pmu_event event;
+
+       /* extra events for aliases */
+       const char *alias_str;
+
+       /*
+        * Note: For when PublicDescription does not exist in the JSON, we
+        * will have no long_desc in pmu_event.long_desc, but long_desc may
+        * be set in the alias.
+        */
+       const char *alias_long_desc;
+};
+
+static struct perf_pmu_test_event test_cpu_events[] = {
+       {
+               .event = {
+                       .name = "bp_l1_btb_correct",
+                       .event = "event=0x8a",
+                       .desc = "L1 BTB Correction",
+                       .topic = "branch",
+               },
+               .alias_str = "event=0x8a",
+               .alias_long_desc = "L1 BTB Correction",
+       },
+       {
+               .event = {
+                       .name = "bp_l2_btb_correct",
+                       .event = "event=0x8b",
+                       .desc = "L2 BTB Correction",
+                       .topic = "branch",
+               },
+               .alias_str = "event=0x8b",
+               .alias_long_desc = "L2 BTB Correction",
+       },
+       {
+               .event = {
+                       .name = "segment_reg_loads.any",
+                       .event = "umask=0x80,period=200000,event=0x6",
+                       .desc = "Number of segment register loads",
+                       .topic = "other",
+               },
+               .alias_str = "umask=0x80,(null)=0x30d40,event=0x6",
+               .alias_long_desc = "Number of segment register loads",
+       },
+       {
+               .event = {
+                       .name = "dispatch_blocked.any",
+                       .event = "umask=0x20,period=200000,event=0x9",
+                       .desc = "Memory cluster signals to block micro-op dispatch for any reason",
+                       .topic = "other",
+               },
+               .alias_str = "umask=0x20,(null)=0x30d40,event=0x9",
+               .alias_long_desc = "Memory cluster signals to block micro-op dispatch for any reason",
+       },
+       {
+               .event = {
+                       .name = "eist_trans",
+                       .event = "umask=0x0,period=200000,event=0x3a",
+                       .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
+                       .topic = "other",
+               },
+               .alias_str = "umask=0,(null)=0x30d40,event=0x3a",
+               .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
+       },
+       { /* sentinel */
+               .event = {
+                       .name = NULL,
+               },
+       },
+};
+
+static struct perf_pmu_test_event test_uncore_events[] = {
+       {
+               .event = {
+                       .name = "uncore_hisi_ddrc.flux_wcmd",
+                       .event = "event=0x2",
+                       .desc = "DDRC write commands. Unit: hisi_sccl,ddrc ",
+                       .topic = "uncore",
+                       .long_desc = "DDRC write commands",
+                       .pmu = "hisi_sccl,ddrc",
+               },
+               .alias_str = "event=0x2",
+               .alias_long_desc = "DDRC write commands",
+       },
+       {
+               .event = {
+                       .name = "unc_cbo_xsnp_response.miss_eviction",
+                       .event = "umask=0x81,event=0x22",
+                       .desc = "Unit: uncore_cbox A cross-core snoop resulted from L3 Eviction which misses in some processor core",
+                       .topic = "uncore",
+                       .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
+                       .pmu = "uncore_cbox",
+               },
+               .alias_str = "umask=0x81,event=0x22",
+               .alias_long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
+       },
+       { /* sentinel */
+               .event = {
+                       .name = NULL,
+               },
+       }
+};
+
+const int total_test_events_size = ARRAY_SIZE(test_uncore_events);
+
+static bool is_same(const char *reference, const char *test)
+{
+       if (!reference && !test)
+               return true;
+
+       if (reference && !test)
+               return false;
+
+       if (!reference && test)
+               return false;
+
+       return !strcmp(reference, test);
+}
+
+static struct pmu_events_map *__test_pmu_get_events_map(void)
+{
+       struct pmu_events_map *map;
+
+       for (map = &pmu_events_map[0]; map->cpuid; map++) {
+               if (!strcmp(map->cpuid, "testcpu"))
+                       return map;
+       }
+
+       pr_err("could not find test events map\n");
+
+       return NULL;
+}
+
+/* Verify generated events from pmu-events.c is as expected */
+static int __test_pmu_event_table(void)
+{
+       struct pmu_events_map *map = __test_pmu_get_events_map();
+       struct pmu_event *table;
+       int map_events = 0, expected_events;
+
+       /* ignore 2x sentinels */
+       expected_events = ARRAY_SIZE(test_cpu_events) +
+                         ARRAY_SIZE(test_uncore_events) - 2;
+
+       if (!map)
+               return -1;
+
+       for (table = map->table; table->name; table++) {
+               struct perf_pmu_test_event *test;
+               struct pmu_event *te;
+               bool found = false;
+
+               if (table->pmu)
+                       test = &test_uncore_events[0];
+               else
+                       test = &test_cpu_events[0];
+
+               te = &test->event;
+
+               for (; te->name; test++, te = &test->event) {
+                       if (strcmp(table->name, te->name))
+                               continue;
+                       found = true;
+                       map_events++;
+
+                       if (!is_same(table->desc, te->desc)) {
+                               pr_debug2("testing event table %s: mismatched desc, %s vs %s\n",
+                                         table->name, table->desc, te->desc);
+                               return -1;
+                       }
+
+                       if (!is_same(table->topic, te->topic)) {
+                               pr_debug2("testing event table %s: mismatched topic, %s vs %s\n",
+                                         table->name, table->topic,
+                                         te->topic);
+                               return -1;
+                       }
+
+                       if (!is_same(table->long_desc, te->long_desc)) {
+                               pr_debug2("testing event table %s: mismatched long_desc, %s vs %s\n",
+                                         table->name, table->long_desc,
+                                         te->long_desc);
+                               return -1;
+                       }
+
+                       if (!is_same(table->unit, te->unit)) {
+                               pr_debug2("testing event table %s: mismatched unit, %s vs %s\n",
+                                         table->name, table->unit,
+                                         te->unit);
+                               return -1;
+                       }
+
+                       if (!is_same(table->perpkg, te->perpkg)) {
+                               pr_debug2("testing event table %s: mismatched perpkg, %s vs %s\n",
+                                         table->name, table->perpkg,
+                                         te->perpkg);
+                               return -1;
+                       }
+
+                       if (!is_same(table->metric_expr, te->metric_expr)) {
+                               pr_debug2("testing event table %s: mismatched metric_expr, %s vs %s\n",
+                                         table->name, table->metric_expr,
+                                         te->metric_expr);
+                               return -1;
+                       }
+
+                       if (!is_same(table->metric_name, te->metric_name)) {
+                               pr_debug2("testing event table %s: mismatched metric_name, %s vs %s\n",
+                                         table->name,  table->metric_name,
+                                         te->metric_name);
+                               return -1;
+                       }
+
+                       if (!is_same(table->deprecated, te->deprecated)) {
+                               pr_debug2("testing event table %s: mismatched deprecated, %s vs %s\n",
+                                         table->name, table->deprecated,
+                                         te->deprecated);
+                               return -1;
+                       }
+
+                       pr_debug("testing event table %s: pass\n", table->name);
+               }
+
+               if (!found) {
+                       pr_err("testing event table: could not find event %s\n",
+                              table->name);
+                       return -1;
+               }
+       }
+
+       if (map_events != expected_events) {
+               pr_err("testing event table: found %d, but expected %d\n",
+                      map_events, expected_events);
+               return -1;
+       }
+
+       return 0;
+}
+
+static struct perf_pmu_alias *find_alias(const char *test_event, struct list_head *aliases)
+{
+       struct perf_pmu_alias *alias;
+
+       list_for_each_entry(alias, aliases, list)
+               if (!strcmp(test_event, alias->name))
+                       return alias;
+
+       return NULL;
+}
+
+/* Verify aliases are as expected */
+static int __test__pmu_event_aliases(char *pmu_name, int *count)
+{
+       struct perf_pmu_test_event *test;
+       struct pmu_event *te;
+       struct perf_pmu *pmu;
+       LIST_HEAD(aliases);
+       int res = 0;
+       bool use_uncore_table;
+       struct pmu_events_map *map = __test_pmu_get_events_map();
+
+       if (!map)
+               return -1;
+
+       if (is_pmu_core(pmu_name)) {
+               test = &test_cpu_events[0];
+               use_uncore_table = false;
+       } else {
+               test = &test_uncore_events[0];
+               use_uncore_table = true;
+       }
+
+       pmu = zalloc(sizeof(*pmu));
+       if (!pmu)
+               return -1;
+
+       pmu->name = pmu_name;
+
+       pmu_add_cpu_aliases_map(&aliases, pmu, map);
+
+       for (te = &test->event; te->name; test++, te = &test->event) {
+               struct perf_pmu_alias *alias = find_alias(te->name, &aliases);
+
+               if (!alias) {
+                       bool uncore_match = pmu_uncore_alias_match(pmu_name,
+                                                                  te->pmu);
+
+                       if (use_uncore_table && !uncore_match) {
+                               pr_debug3("testing aliases PMU %s: skip matching alias %s\n",
+                                         pmu_name, te->name);
+                               continue;
+                       }
+
+                       pr_debug2("testing aliases PMU %s: no alias, alias_table->name=%s\n",
+                                 pmu_name, te->name);
+                       res = -1;
+                       break;
+               }
+
+               if (!is_same(alias->desc, te->desc)) {
+                       pr_debug2("testing aliases PMU %s: mismatched desc, %s vs %s\n",
+                                 pmu_name, alias->desc, te->desc);
+                       res = -1;
+                       break;
+               }
+
+               if (!is_same(alias->long_desc, test->alias_long_desc)) {
+                       pr_debug2("testing aliases PMU %s: mismatched long_desc, %s vs %s\n",
+                                 pmu_name, alias->long_desc,
+                                 test->alias_long_desc);
+                       res = -1;
+                       break;
+               }
+
+               if (!is_same(alias->str, test->alias_str)) {
+                       pr_debug2("testing aliases PMU %s: mismatched str, %s vs %s\n",
+                                 pmu_name, alias->str, test->alias_str);
+                       res = -1;
+                       break;
+               }
+
+               if (!is_same(alias->topic, te->topic)) {
+                       pr_debug2("testing aliases PMU %s: mismatched topic, %s vs %s\n",
+                                 pmu_name, alias->topic, te->topic);
+                       res = -1;
+                       break;
+               }
+
+               (*count)++;
+               pr_debug2("testing aliases PMU %s: matched event %s\n",
+                         pmu_name, alias->name);
+       }
+
+       free(pmu);
+       return res;
+}
+
+int test__pmu_events(struct test *test __maybe_unused,
+                    int subtest __maybe_unused)
+{
+       struct perf_pmu *pmu = NULL;
+
+       if (__test_pmu_event_table())
+               return -1;
+
+       while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+               int count = 0;
+
+               if (list_empty(&pmu->format)) {
+                       pr_debug2("skipping testing PMU %s\n", pmu->name);
+                       continue;
+               }
+
+               if (__test__pmu_event_aliases(pmu->name, &count)) {
+                       pr_debug("testing PMU %s aliases: failed\n", pmu->name);
+                       return -1;
+               }
+
+               if (count == 0)
+                       pr_debug3("testing PMU %s aliases: no events to match\n",
+                                 pmu->name);
+               else
+                       pr_debug("testing PMU %s aliases: pass\n", pmu->name);
+       }
+
+       return 0;
+}
index 14239e4..6186569 100644 (file)
@@ -151,6 +151,9 @@ static bool samples_same(const struct perf_sample *s1,
        if (type & PERF_SAMPLE_PHYS_ADDR)
                COMP(phys_addr);
 
+       if (type & PERF_SAMPLE_CGROUP)
+               COMP(cgroup);
+
        if (type & PERF_SAMPLE_AUX) {
                COMP(aux_sample.size);
                if (memcmp(s1->aux_sample.data, s2->aux_sample.data,
@@ -230,6 +233,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
                        .regs   = regs,
                },
                .phys_addr      = 113,
+               .cgroup         = 114,
                .aux_sample     = {
                        .size   = sizeof(aux_data),
                        .data   = (void *)aux_data,
@@ -336,7 +340,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
         * were added.  Please actually update the test rather than just change
         * the condition below.
         */
-       if (PERF_SAMPLE_MAX > PERF_SAMPLE_AUX << 1) {
+       if (PERF_SAMPLE_MAX > PERF_SAMPLE_CGROUP << 1) {
                pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
                return -1;
        }
index 9a160fe..61a1ab0 100644 (file)
@@ -49,6 +49,7 @@ int test__perf_evsel__roundtrip_name_test(struct test *test, int subtest);
 int test__perf_evsel__tp_sched_test(struct test *test, int subtest);
 int test__syscall_openat_tp_fields(struct test *test, int subtest);
 int test__pmu(struct test *test, int subtest);
+int test__pmu_events(struct test *test, int subtest);
 int test__attr(struct test *test, int subtest);
 int test__dso_data(struct test *test, int subtest);
 int test__dso_data_cache(struct test *test, int subtest);
index f36dee4..487e54e 100644 (file)
@@ -677,7 +677,7 @@ static int hist_browser__title(struct hist_browser *browser, char *bf, size_t si
        return browser->title ? browser->title(browser, bf, size) : 0;
 }
 
-static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_lost_event, char *title, int key)
+static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_lost_event, char *title, size_t size, int key)
 {
        switch (key) {
        case K_TIMER: {
@@ -703,7 +703,7 @@ static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_l
                        ui_browser__warn_lost_events(&browser->b);
                }
 
-               hist_browser__title(browser, title, sizeof(title));
+               hist_browser__title(browser, title, size);
                ui_browser__show_title(&browser->b, title);
                break;
        }
@@ -764,13 +764,13 @@ int hist_browser__run(struct hist_browser *browser, const char *help,
        if (ui_browser__show(&browser->b, title, "%s", help) < 0)
                return -1;
 
-       if (key && hist_browser__handle_hotkey(browser, warn_lost_event, title, key))
+       if (key && hist_browser__handle_hotkey(browser, warn_lost_event, title, sizeof(title), key))
                goto out;
 
        while (1) {
                key = ui_browser__run(&browser->b, delay_secs);
 
-               if (hist_browser__handle_hotkey(browser, warn_lost_event, title, key))
+               if (hist_browser__handle_hotkey(browser, warn_lost_event, title, sizeof(title), key))
                        break;
        }
 out:
@@ -2465,13 +2465,41 @@ do_annotate(struct hist_browser *browser, struct popup_action *act)
        return 0;
 }
 
+static struct symbol *symbol__new_unresolved(u64 addr, struct map *map)
+{
+       struct annotated_source *src;
+       struct symbol *sym;
+       char name[64];
+
+       snprintf(name, sizeof(name), "%.*" PRIx64, BITS_PER_LONG / 4, addr);
+
+       sym = symbol__new(addr, ANNOTATION_DUMMY_LEN, 0, 0, name);
+       if (sym) {
+               src = symbol__hists(sym, 1);
+               if (!src) {
+                       symbol__delete(sym);
+                       return NULL;
+               }
+
+               dso__insert_symbol(map->dso, sym);
+       }
+
+       return sym;
+}
+
 static int
 add_annotate_opt(struct hist_browser *browser __maybe_unused,
                 struct popup_action *act, char **optstr,
-                struct map_symbol *ms)
+                struct map_symbol *ms,
+                u64 addr)
 {
-       if (ms->sym == NULL || ms->map->dso->annotate_warned ||
-           symbol__annotation(ms->sym)->src == NULL)
+       if (!ms->map || !ms->map->dso || ms->map->dso->annotate_warned)
+               return 0;
+
+       if (!ms->sym)
+               ms->sym = symbol__new_unresolved(addr, ms->map);
+
+       if (ms->sym == NULL || symbol__annotation(ms->sym)->src == NULL)
                return 0;
 
        if (asprintf(optstr, "Annotate %s", ms->sym->name) < 0)
@@ -2964,7 +2992,8 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
        "s             Switch to another data file in PWD\n"
        "t             Zoom into current Thread\n"
        "V             Verbose (DSO names in callchains, etc)\n"
-       "/             Filter symbol by name";
+       "/             Filter symbol by name\n"
+       "0-9           Sort by event n in group";
        static const char top_help[] = HIST_BROWSER_HELP_COMMON
        "P             Print histograms to perf.hist.N\n"
        "t             Zoom into current Thread\n"
@@ -3025,6 +3054,31 @@ do_hotkey:                // key came straight from options ui__popup_menu()
                         * go to the next or previous
                         */
                        goto out_free_stack;
+               case '0' ... '9':
+                       if (!symbol_conf.event_group ||
+                           evsel->core.nr_members < 2) {
+                               snprintf(buf, sizeof(buf),
+                                        "Sort by index only available with group events!");
+                               helpline = buf;
+                               continue;
+                       }
+
+                       if (key - '0' == symbol_conf.group_sort_idx)
+                               continue;
+
+                       symbol_conf.group_sort_idx = key - '0';
+
+                       if (symbol_conf.group_sort_idx >= evsel->core.nr_members) {
+                               snprintf(buf, sizeof(buf),
+                                        "Max event group index to sort is %d (index from 0 to %d)",
+                                        evsel->core.nr_members - 1,
+                                        evsel->core.nr_members - 1);
+                               helpline = buf;
+                               continue;
+                       }
+
+                       key = K_RELOAD;
+                       goto out_free_stack;
                case 'a':
                        if (!hists__has(hists, sym)) {
                                ui_browser__warning(&browser->b, delay_secs * 2,
@@ -3033,21 +3087,45 @@ do_hotkey:               // key came straight from options ui__popup_menu()
                                continue;
                        }
 
-                       if (browser->selection == NULL ||
-                           browser->selection->sym == NULL ||
-                           browser->selection->map->dso->annotate_warned)
+                       if (!browser->selection ||
+                           !browser->selection->map ||
+                           !browser->selection->map->dso ||
+                           browser->selection->map->dso->annotate_warned) {
                                continue;
+                       }
 
-                       if (symbol__annotation(browser->selection->sym)->src == NULL) {
-                               ui_browser__warning(&browser->b, delay_secs * 2,
-                                                   "No samples for the \"%s\" symbol.\n\n"
-                                                   "Probably appeared just in a callchain",
-                                                   browser->selection->sym->name);
-                               continue;
+                       if (!browser->selection->sym) {
+                               if (!browser->he_selection)
+                                       continue;
+
+                               if (sort__mode == SORT_MODE__BRANCH) {
+                                       bi = browser->he_selection->branch_info;
+                                       if (!bi || !bi->to.ms.map)
+                                               continue;
+
+                                       actions->ms.sym = symbol__new_unresolved(bi->to.al_addr, bi->to.ms.map);
+                                       actions->ms.map = bi->to.ms.map;
+                               } else {
+                                       actions->ms.sym = symbol__new_unresolved(browser->he_selection->ip,
+                                                                                browser->selection->map);
+                                       actions->ms.map = browser->selection->map;
+                               }
+
+                               if (!actions->ms.sym)
+                                       continue;
+                       } else {
+                               if (symbol__annotation(browser->selection->sym)->src == NULL) {
+                                       ui_browser__warning(&browser->b, delay_secs * 2,
+                                               "No samples for the \"%s\" symbol.\n\n"
+                                               "Probably appeared just in a callchain",
+                                               browser->selection->sym->name);
+                                       continue;
+                               }
+
+                               actions->ms.map = browser->selection->map;
+                               actions->ms.sym = browser->selection->sym;
                        }
 
-                       actions->ms.map = browser->selection->map;
-                       actions->ms.sym = browser->selection->sym;
                        do_annotate(browser, actions);
                        continue;
                case 'P':
@@ -3219,17 +3297,20 @@ do_hotkey:               // key came straight from options ui__popup_menu()
                        nr_options += add_annotate_opt(browser,
                                                       &actions[nr_options],
                                                       &options[nr_options],
-                                                      &bi->from.ms);
+                                                      &bi->from.ms,
+                                                      bi->from.al_addr);
                        if (bi->to.ms.sym != bi->from.ms.sym)
                                nr_options += add_annotate_opt(browser,
                                                        &actions[nr_options],
                                                        &options[nr_options],
-                                                       &bi->to.ms);
+                                                       &bi->to.ms,
+                                                       bi->to.al_addr);
                } else {
                        nr_options += add_annotate_opt(browser,
                                                       &actions[nr_options],
                                                       &options[nr_options],
-                                                      browser->selection);
+                                                      browser->selection,
+                                                      browser->he_selection->ip);
                }
 skip_annotation:
                nr_options += add_thread_opt(browser, &actions[nr_options],
@@ -3440,6 +3521,7 @@ browse_hists:
                                        pos = perf_evsel__prev(pos);
                                goto browse_hists;
                        case K_SWITCH_INPUT_DATA:
+                       case K_RELOAD:
                        case 'q':
                        case CTRL('c'):
                                goto out;
index f736755..025f4c7 100644 (file)
@@ -151,15 +151,90 @@ static int field_cmp(u64 field_a, u64 field_b)
        return 0;
 }
 
+static int hist_entry__new_pair(struct hist_entry *a, struct hist_entry *b,
+                               hpp_field_fn get_field, int nr_members,
+                               u64 **fields_a, u64 **fields_b)
+{
+       u64 *fa = calloc(nr_members, sizeof(*fa)),
+           *fb = calloc(nr_members, sizeof(*fb));
+       struct hist_entry *pair;
+
+       if (!fa || !fb)
+               goto out_free;
+
+       list_for_each_entry(pair, &a->pairs.head, pairs.node) {
+               struct evsel *evsel = hists_to_evsel(pair->hists);
+               fa[perf_evsel__group_idx(evsel)] = get_field(pair);
+       }
+
+       list_for_each_entry(pair, &b->pairs.head, pairs.node) {
+               struct evsel *evsel = hists_to_evsel(pair->hists);
+               fb[perf_evsel__group_idx(evsel)] = get_field(pair);
+       }
+
+       *fields_a = fa;
+       *fields_b = fb;
+       return 0;
+out_free:
+       free(fa);
+       free(fb);
+       *fields_a = *fields_b = NULL;
+       return -1;
+}
+
+static int __hpp__group_sort_idx(struct hist_entry *a, struct hist_entry *b,
+                                hpp_field_fn get_field, int idx)
+{
+       struct evsel *evsel = hists_to_evsel(a->hists);
+       u64 *fields_a, *fields_b;
+       int cmp, nr_members, ret, i;
+
+       cmp = field_cmp(get_field(a), get_field(b));
+       if (!perf_evsel__is_group_event(evsel))
+               return cmp;
+
+       nr_members = evsel->core.nr_members;
+       if (idx < 1 || idx >= nr_members)
+               return cmp;
+
+       ret = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b);
+       if (ret) {
+               ret = cmp;
+               goto out;
+       }
+
+       ret = field_cmp(fields_a[idx], fields_b[idx]);
+       if (ret)
+               goto out;
+
+       for (i = 1; i < nr_members; i++) {
+               if (i != idx) {
+                       ret = field_cmp(fields_a[i], fields_b[i]);
+                       if (ret)
+                               goto out;
+               }
+       }
+
+out:
+       free(fields_a);
+       free(fields_b);
+
+       return ret;
+}
+
 static int __hpp__sort(struct hist_entry *a, struct hist_entry *b,
                       hpp_field_fn get_field)
 {
        s64 ret;
        int i, nr_members;
        struct evsel *evsel;
-       struct hist_entry *pair;
        u64 *fields_a, *fields_b;
 
+       if (symbol_conf.group_sort_idx && symbol_conf.event_group) {
+               return __hpp__group_sort_idx(a, b, get_field,
+                                            symbol_conf.group_sort_idx);
+       }
+
        ret = field_cmp(get_field(a), get_field(b));
        if (ret || !symbol_conf.event_group)
                return ret;
@@ -169,22 +244,10 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b,
                return ret;
 
        nr_members = evsel->core.nr_members;
-       fields_a = calloc(nr_members, sizeof(*fields_a));
-       fields_b = calloc(nr_members, sizeof(*fields_b));
-
-       if (!fields_a || !fields_b)
+       i = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b);
+       if (i)
                goto out;
 
-       list_for_each_entry(pair, &a->pairs.head, pairs.node) {
-               evsel = hists_to_evsel(pair->hists);
-               fields_a[perf_evsel__group_idx(evsel)] = get_field(pair);
-       }
-
-       list_for_each_entry(pair, &b->pairs.head, pairs.node) {
-               evsel = hists_to_evsel(pair->hists);
-               fields_b[perf_evsel__group_idx(evsel)] = get_field(pair);
-       }
-
        for (i = 1; i < nr_members; i++) {
                ret = field_cmp(fields_a[i], fields_b[i]);
                if (ret)
index fbfac29..04cc4e5 100644 (file)
@@ -25,5 +25,6 @@
 #define K_ERROR         -2
 #define K_RESIZE -3
 #define K_SWITCH_INPUT_DATA -4
+#define K_RELOAD -5
 
 #endif /* _PERF_KEYSYMS_H_ */
index 07c7759..2d88069 100644 (file)
@@ -74,6 +74,7 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
 #define ANNOTATION__CYCLES_WIDTH 6
 #define ANNOTATION__MINMAX_CYCLES_WIDTH 19
 #define ANNOTATION__AVG_IPC_WIDTH 36
+#define ANNOTATION_DUMMY_LEN   256
 
 struct annotation_options {
        bool hide_src_code,
index 5bc9d3b..b73fb78 100644 (file)
@@ -191,3 +191,83 @@ int parse_cgroups(const struct option *opt, const char *str,
        }
        return 0;
 }
+
+static struct cgroup *__cgroup__findnew(struct rb_root *root, uint64_t id,
+                                       bool create, const char *path)
+{
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent = NULL;
+       struct cgroup *cgrp;
+
+       while (*p != NULL) {
+               parent = *p;
+               cgrp = rb_entry(parent, struct cgroup, node);
+
+               if (cgrp->id == id)
+                       return cgrp;
+
+               if (cgrp->id < id)
+                       p = &(*p)->rb_left;
+               else
+                       p = &(*p)->rb_right;
+       }
+
+       if (!create)
+               return NULL;
+
+       cgrp = malloc(sizeof(*cgrp));
+       if (cgrp == NULL)
+               return NULL;
+
+       cgrp->name = strdup(path);
+       if (cgrp->name == NULL) {
+               free(cgrp);
+               return NULL;
+       }
+
+       cgrp->fd = -1;
+       cgrp->id = id;
+       refcount_set(&cgrp->refcnt, 1);
+
+       rb_link_node(&cgrp->node, parent, p);
+       rb_insert_color(&cgrp->node, root);
+
+       return cgrp;
+}
+
+struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id,
+                              const char *path)
+{
+       struct cgroup *cgrp;
+
+       down_write(&env->cgroups.lock);
+       cgrp = __cgroup__findnew(&env->cgroups.tree, id, true, path);
+       up_write(&env->cgroups.lock);
+       return cgrp;
+}
+
+struct cgroup *cgroup__find(struct perf_env *env, uint64_t id)
+{
+       struct cgroup *cgrp;
+
+       down_read(&env->cgroups.lock);
+       cgrp = __cgroup__findnew(&env->cgroups.tree, id, false, NULL);
+       up_read(&env->cgroups.lock);
+       return cgrp;
+}
+
+void perf_env__purge_cgroups(struct perf_env *env)
+{
+       struct rb_node *node;
+       struct cgroup *cgrp;
+
+       down_write(&env->cgroups.lock);
+       while (!RB_EMPTY_ROOT(&env->cgroups.tree)) {
+               node = rb_first(&env->cgroups.tree);
+               cgrp = rb_entry(node, struct cgroup, node);
+
+               rb_erase(node, &env->cgroups.tree);
+               cgroup__put(cgrp);
+       }
+       up_write(&env->cgroups.lock);
+}
index 2ec11f0..e98d597 100644 (file)
@@ -3,16 +3,19 @@
 #define __CGROUP_H__
 
 #include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include "util/env.h"
 
 struct option;
 
 struct cgroup {
-       char *name;
-       int fd;
-       refcount_t refcnt;
+       struct rb_node          node;
+       u64                     id;
+       char                    *name;
+       int                     fd;
+       refcount_t              refcnt;
 };
 
-
 extern int nr_cgroups; /* number of explicit cgroups defined */
 
 struct cgroup *cgroup__get(struct cgroup *cgroup);
@@ -26,4 +29,10 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup);
 
 int parse_cgroups(const struct option *opt, const char *str, int unset);
 
+struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id,
+                              const char *path);
+struct cgroup *cgroup__find(struct perf_env *env, uint64_t id);
+
+void perf_env__purge_cgroups(struct perf_env *env);
+
 #endif /* __CGROUP_H__ */
index 983b738..dc5c5e6 100644 (file)
@@ -317,7 +317,7 @@ static void set_max_cpu_num(void)
 
        /* get the highest possible cpu number for a sparse allocation */
        ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt);
-       if (ret == PATH_MAX) {
+       if (ret >= PATH_MAX) {
                pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
                goto out;
        }
@@ -328,7 +328,7 @@ static void set_max_cpu_num(void)
 
        /* get the highest present cpu number for a sparse allocation */
        ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
-       if (ret == PATH_MAX) {
+       if (ret >= PATH_MAX) {
                pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
                goto out;
        }
@@ -356,7 +356,7 @@ static void set_max_node_num(void)
 
        /* get the highest possible cpu number for a sparse allocation */
        ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt);
-       if (ret == PATH_MAX) {
+       if (ret >= PATH_MAX) {
                pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
                goto out;
        }
@@ -441,7 +441,7 @@ int cpu__setup_cpunode_map(void)
                return 0;
 
        n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt);
-       if (n == PATH_MAX) {
+       if (n >= PATH_MAX) {
                pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
                return -1;
        }
@@ -456,7 +456,7 @@ int cpu__setup_cpunode_map(void)
                        continue;
 
                n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name);
-               if (n == PATH_MAX) {
+               if (n >= PATH_MAX) {
                        pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
                        continue;
                }
index 591707c..9394717 100644 (file)
@@ -26,13 +26,29 @@ static int __dso_id__cmp(struct dso_id *a, struct dso_id *b)
        return 0;
 }
 
+static bool dso_id__empty(struct dso_id *id)
+{
+       if (!id)
+               return true;
+
+       return !id->maj && !id->min && !id->ino && !id->ino_generation;
+}
+
+static void dso__inject_id(struct dso *dso, struct dso_id *id)
+{
+       dso->id.maj = id->maj;
+       dso->id.min = id->min;
+       dso->id.ino = id->ino;
+       dso->id.ino_generation = id->ino_generation;
+}
+
 static int dso_id__cmp(struct dso_id *a, struct dso_id *b)
 {
        /*
         * The second is always dso->id, so zeroes if not set, assume passing
         * NULL for a means a zeroed id
         */
-       if (a == NULL)
+       if (dso_id__empty(a) || dso_id__empty(b))
                return 0;
 
        return __dso_id__cmp(a, b);
@@ -249,6 +265,10 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
 static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
 {
        struct dso *dso = __dsos__find_id(dsos, name, id, false);
+
+       if (dso && dso_id__empty(&dso->id) && !dso_id__empty(id))
+               dso__inject_id(dso, id);
+
        return dso ? dso : __dsos__addnew_id(dsos, name, id);
 }
 
index 4154f94..fadc597 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/ctype.h>
 #include <linux/zalloc.h>
 #include "bpf-event.h"
+#include "cgroup.h"
 #include <errno.h>
 #include <sys/utsname.h>
 #include <bpf/libbpf.h>
@@ -168,6 +169,7 @@ void perf_env__exit(struct perf_env *env)
        int i;
 
        perf_env__purge_bpf(env);
+       perf_env__purge_cgroups(env);
        zfree(&env->hostname);
        zfree(&env->os_release);
        zfree(&env->version);
index 11d05ae..7632075 100644 (file)
@@ -88,6 +88,12 @@ struct perf_env {
                u32                     btfs_cnt;
        } bpf_progs;
 
+       /* same reason as above (for perf-top) */
+       struct {
+               struct rw_semaphore     lock;
+               struct rb_root          tree;
+       } cgroups;
+
        /* For fast cpu to numa node lookup via perf_env__numa_node */
        int                     *numa_map;
        int                      nr_numa_map;
index c5447ff..dc0e112 100644 (file)
@@ -54,6 +54,7 @@ static const char *perf_event__names[] = {
        [PERF_RECORD_NAMESPACES]                = "NAMESPACES",
        [PERF_RECORD_KSYMBOL]                   = "KSYMBOL",
        [PERF_RECORD_BPF_EVENT]                 = "BPF_EVENT",
+       [PERF_RECORD_CGROUP]                    = "CGROUP",
        [PERF_RECORD_HEADER_ATTR]               = "ATTR",
        [PERF_RECORD_HEADER_EVENT_TYPE]         = "EVENT_TYPE",
        [PERF_RECORD_HEADER_TRACING_DATA]       = "TRACING_DATA",
@@ -180,6 +181,12 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp)
        return ret;
 }
 
+size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp)
+{
+       return fprintf(fp, " cgroup: %" PRI_lu64 " %s\n",
+                      event->cgroup.id, event->cgroup.path);
+}
+
 int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
                             union perf_event *event,
                             struct perf_sample *sample,
@@ -196,6 +203,14 @@ int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
        return machine__process_namespaces_event(machine, event, sample);
 }
 
+int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused,
+                              union perf_event *event,
+                              struct perf_sample *sample,
+                              struct machine *machine)
+{
+       return machine__process_cgroup_event(machine, event, sample);
+}
+
 int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
                             union perf_event *event,
                             struct perf_sample *sample,
@@ -417,6 +432,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
        case PERF_RECORD_NAMESPACES:
                ret += perf_event__fprintf_namespaces(event, fp);
                break;
+       case PERF_RECORD_CGROUP:
+               ret += perf_event__fprintf_cgroup(event, fp);
+               break;
        case PERF_RECORD_MMAP2:
                ret += perf_event__fprintf_mmap2(event, fp);
                break;
@@ -599,10 +617,23 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
                al->sym = map__find_symbol(al->map, al->addr);
        }
 
-       if (symbol_conf.sym_list &&
-               (!al->sym || !strlist__has_entry(symbol_conf.sym_list,
-                                               al->sym->name))) {
-               al->filtered |= (1 << HIST_FILTER__SYMBOL);
+       if (symbol_conf.sym_list) {
+               int ret = 0;
+               char al_addr_str[32];
+               size_t sz = sizeof(al_addr_str);
+
+               if (al->sym) {
+                       ret = strlist__has_entry(symbol_conf.sym_list,
+                                               al->sym->name);
+               }
+               if (!(ret && al->sym)) {
+                       snprintf(al_addr_str, sz, "0x%"PRIx64,
+                               al->map->unmap_ip(al->map, al->sym->start));
+                       ret = strlist__has_entry(symbol_conf.sym_list,
+                                               al_addr_str);
+               }
+               if (!ret)
+                       al->filtered |= (1 << HIST_FILTER__SYMBOL);
        }
 
        return 0;
index 3cda40a..b8289f1 100644 (file)
@@ -135,6 +135,7 @@ struct perf_sample {
        u32 raw_size;
        u64 data_src;
        u64 phys_addr;
+       u64 cgroup;
        u32 flags;
        u16 insn_len;
        u8  cpumode;
@@ -322,6 +323,10 @@ int perf_event__process_namespaces(struct perf_tool *tool,
                                   union perf_event *event,
                                   struct perf_sample *sample,
                                   struct machine *machine);
+int perf_event__process_cgroup(struct perf_tool *tool,
+                              union perf_event *event,
+                              struct perf_sample *sample,
+                              struct machine *machine);
 int perf_event__process_mmap(struct perf_tool *tool,
                             union perf_event *event,
                             struct perf_sample *sample,
@@ -377,6 +382,7 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf(union perf_event *event, FILE *fp);
index 816d930..eb880ef 100644 (file)
@@ -1104,6 +1104,11 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
        if (opts->record_namespaces)
                attr->namespaces  = track;
 
+       if (opts->record_cgroup) {
+               attr->cgroup = track && !perf_missing_features.cgroup;
+               perf_evsel__set_sample_bit(evsel, CGROUP);
+       }
+
        if (opts->record_switch_events)
                attr->context_switch = track;
 
@@ -1287,6 +1292,7 @@ void perf_evsel__exit(struct evsel *evsel)
        perf_thread_map__put(evsel->core.threads);
        zfree(&evsel->group_name);
        zfree(&evsel->name);
+       zfree(&evsel->pmu_name);
        perf_evsel__object.fini(evsel);
 }
 
@@ -1788,7 +1794,11 @@ try_fallback:
         * Must probe features in the order they were added to the
         * perf_event_attr interface.
         */
-       if (!perf_missing_features.branch_hw_idx &&
+        if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
+               perf_missing_features.cgroup = true;
+               pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n");
+               goto out_close;
+        } else if (!perf_missing_features.branch_hw_idx &&
            (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
                perf_missing_features.branch_hw_idx = true;
                pr_debug2("switching off branch HW index support\n");
@@ -2266,6 +2276,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
                array++;
        }
 
+       data->cgroup = 0;
+       if (type & PERF_SAMPLE_CGROUP) {
+               data->cgroup = *array;
+               array++;
+       }
+
        if (type & PERF_SAMPLE_AUX) {
                OVERFLOW_CHECK_u64(array);
                sz = *array++;
index 3380474..53187c5 100644 (file)
@@ -120,6 +120,7 @@ struct perf_missing_features {
        bool bpf;
        bool aux_output;
        bool branch_hw_idx;
+       bool cgroup;
 };
 
 extern struct perf_missing_features perf_missing_features;
index e74a5ac..283a69f 100644 (file)
@@ -10,6 +10,7 @@
 #include "mem-events.h"
 #include "session.h"
 #include "namespaces.h"
+#include "cgroup.h"
 #include "sort.h"
 #include "units.h"
 #include "evlist.h"
@@ -194,6 +195,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
                hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
        }
 
+       hists__new_col_len(hists, HISTC_CGROUP, 6);
        hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
        hists__new_col_len(hists, HISTC_CPU, 3);
        hists__new_col_len(hists, HISTC_SOCKET, 6);
@@ -222,6 +224,16 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 
        if (h->trace_output)
                hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output));
+
+       if (h->cgroup) {
+               const char *cgrp_name = "unknown";
+               struct cgroup *cgrp = cgroup__find(h->ms.maps->machine->env,
+                                                  h->cgroup);
+               if (cgrp != NULL)
+                       cgrp_name = cgrp->name;
+
+               hists__new_col_len(hists, HISTC_CGROUP, strlen(cgrp_name));
+       }
 }
 
 void hists__output_recalc_col_len(struct hists *hists, int max_rows)
@@ -691,6 +703,7 @@ __hists__add_entry(struct hists *hists,
                        .dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0,
                        .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0,
                },
+               .cgroup = sample->cgroup,
                .ms = {
                        .maps   = al->maps,
                        .map    = al->map,
index 0aa63ae..4141295 100644 (file)
@@ -38,6 +38,7 @@ enum hist_column {
        HISTC_THREAD,
        HISTC_COMM,
        HISTC_CGROUP_ID,
+       HISTC_CGROUP,
        HISTC_PARENT,
        HISTC_CPU,
        HISTC_SOCKET,
@@ -536,6 +537,7 @@ static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused,
 #define K_LEFT  -1000
 #define K_RIGHT -2000
 #define K_SWITCH_INPUT_DATA -3000
+#define K_RELOAD -4000
 #endif
 
 unsigned int hists__sort_list_width(struct hists *hists);
index fd14f14..97142e9 100644 (file)
@@ -33,6 +33,7 @@
 #include "asm/bug.h"
 #include "bpf-event.h"
 #include <internal/lib.h> // page_size
+#include "cgroup.h"
 
 #include <linux/ctype.h>
 #include <symbol/kallsyms.h>
@@ -654,6 +655,22 @@ int machine__process_namespaces_event(struct machine *machine __maybe_unused,
        return err;
 }
 
+int machine__process_cgroup_event(struct machine *machine,
+                                 union perf_event *event,
+                                 struct perf_sample *sample __maybe_unused)
+{
+       struct cgroup *cgrp;
+
+       if (dump_trace)
+               perf_event__fprintf_cgroup(event, stdout);
+
+       cgrp = cgroup__findnew(machine->env, event->cgroup.id, event->cgroup.path);
+       if (cgrp == NULL)
+               return -ENOMEM;
+
+       return 0;
+}
+
 int machine__process_lost_event(struct machine *machine __maybe_unused,
                                union perf_event *event, struct perf_sample *sample __maybe_unused)
 {
@@ -1878,6 +1895,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
                ret = machine__process_mmap_event(machine, event, sample); break;
        case PERF_RECORD_NAMESPACES:
                ret = machine__process_namespaces_event(machine, event, sample); break;
+       case PERF_RECORD_CGROUP:
+               ret = machine__process_cgroup_event(machine, event, sample); break;
        case PERF_RECORD_MMAP2:
                ret = machine__process_mmap2_event(machine, event, sample); break;
        case PERF_RECORD_FORK:
index be0a930..fa1be9e 100644 (file)
@@ -128,6 +128,9 @@ int machine__process_switch_event(struct machine *machine,
 int machine__process_namespaces_event(struct machine *machine,
                                      union perf_event *event,
                                      struct perf_sample *sample);
+int machine__process_cgroup_event(struct machine *machine,
+                                 union perf_event *event,
+                                 struct perf_sample *sample);
 int machine__process_mmap_event(struct machine *machine, union perf_event *event,
                                struct perf_sample *sample);
 int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
index c3a8c70..926449a 100644 (file)
@@ -95,13 +95,16 @@ struct egroup {
 static struct evsel *find_evsel_group(struct evlist *perf_evlist,
                                      const char **ids,
                                      int idnum,
-                                     struct evsel **metric_events)
+                                     struct evsel **metric_events,
+                                     bool *evlist_used)
 {
        struct evsel *ev;
-       int i = 0;
+       int i = 0, j = 0;
        bool leader_found;
 
        evlist__for_each_entry (perf_evlist, ev) {
+               if (evlist_used[j++])
+                       continue;
                if (!strcmp(ev->name, ids[i])) {
                        if (!metric_events[i])
                                metric_events[i] = ev;
@@ -109,22 +112,17 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
                        if (i == idnum)
                                break;
                } else {
-                       if (i + 1 == idnum) {
-                               /* Discard the whole match and start again */
-                               i = 0;
-                               memset(metric_events, 0,
-                                      sizeof(struct evsel *) * idnum);
-                               continue;
-                       }
-
-                       if (!strcmp(ev->name, ids[i]))
-                               metric_events[i] = ev;
-                       else {
-                               /* Discard the whole match and start again */
-                               i = 0;
-                               memset(metric_events, 0,
-                                      sizeof(struct evsel *) * idnum);
-                               continue;
+                       /* Discard the whole match and start again */
+                       i = 0;
+                       memset(metric_events, 0,
+                               sizeof(struct evsel *) * idnum);
+
+                       if (!strcmp(ev->name, ids[i])) {
+                               if (!metric_events[i])
+                                       metric_events[i] = ev;
+                               i++;
+                               if (i == idnum)
+                                       break;
                        }
                }
        }
@@ -146,7 +144,10 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
                            !strcmp(ev->name, metric_events[i]->name)) {
                                ev->metric_leader = metric_events[i];
                        }
+                       j++;
                }
+               ev = metric_events[i];
+               evlist_used[ev->idx] = true;
        }
 
        return metric_events[0];
@@ -162,6 +163,13 @@ static int metricgroup__setup_events(struct list_head *groups,
        int ret = 0;
        struct egroup *eg;
        struct evsel *evsel;
+       bool *evlist_used;
+
+       evlist_used = calloc(perf_evlist->core.nr_entries, sizeof(bool));
+       if (!evlist_used) {
+               ret = -ENOMEM;
+               return ret;
+       }
 
        list_for_each_entry (eg, groups, nd) {
                struct evsel **metric_events;
@@ -172,7 +180,7 @@ static int metricgroup__setup_events(struct list_head *groups,
                        break;
                }
                evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum,
-                                        metric_events);
+                                        metric_events, evlist_used);
                if (!evsel) {
                        pr_debug("Cannot resolve %s: %s\n",
                                        eg->metric_name, eg->metric_expr);
@@ -196,6 +204,9 @@ static int metricgroup__setup_events(struct list_head *groups,
                expr->metric_events = metric_events;
                list_add(&expr->nd, &me->head);
        }
+
+       free(evlist_used);
+
        return ret;
 }
 
index a7dc0b0..1010774 100644 (file)
@@ -1449,7 +1449,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
                evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL,
                                    auto_merge_stats, NULL);
                if (evsel) {
-                       evsel->pmu_name = name;
+                       evsel->pmu_name = name ? strdup(name) : NULL;
                        evsel->use_uncore_alias = use_uncore_alias;
                        return 0;
                } else {
@@ -1497,7 +1497,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
                evsel->snapshot = info.snapshot;
                evsel->metric_expr = info.metric_expr;
                evsel->metric_name = info.metric_name;
-               evsel->pmu_name = name;
+               evsel->pmu_name = name ? strdup(name) : NULL;
                evsel->use_uncore_alias = use_uncore_alias;
                evsel->percore = config_term_percore(&evsel->config_terms);
        }
@@ -1547,7 +1547,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
                                if (!parse_events_add_pmu(parse_state, list,
                                                          pmu->name, head,
                                                          true, true)) {
-                                       pr_debug("%s -> %s/%s/\n", config,
+                                       pr_debug("%s -> %s/%s/\n", str,
                                                 pmu->name, alias->str);
                                        ok++;
                                }
index 7b1c8ee..baa48f2 100644 (file)
@@ -342,11 +342,13 @@ bpf-output                                        { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUT
         * Because the prefix cycles is mixed up with cpu-cycles.
         * loads and stores are mixed up with cache event
         */
-cycles-ct                                      { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
-cycles-t                                       { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
-mem-loads                                      { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
-mem-stores                                     { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
-topdown-[a-z-]+                                        { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+cycles-ct                              |
+cycles-t                               |
+mem-loads                              |
+mem-stores                             |
+topdown-[a-z-]+                                |
+tx-capacity-[a-z-]+                    |
+el-capacity-[a-z-]+                    { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
 
 L1-dcache|l1-d|l1d|L1-data             |
 L1-icache|l1-i|l1i|L1-instruction      |
index 355d345..b94fa07 100644 (file)
@@ -35,6 +35,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
                bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
                bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
                bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX),
+               bit_name(CGROUP),
                { .name = NULL, }
        };
 #undef bit_name
@@ -132,6 +133,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
        PRINT_ATTRf(ksymbol, p_unsigned);
        PRINT_ATTRf(bpf_event, p_unsigned);
        PRINT_ATTRf(aux_output, p_unsigned);
+       PRINT_ATTRf(cgroup, p_unsigned);
 
        PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
        PRINT_ATTRf(bp_type, p_unsigned);
index 8b99fd3..ef6a63f 100644 (file)
@@ -21,7 +21,6 @@
 #include "pmu.h"
 #include "parse-events.h"
 #include "header.h"
-#include "pmu-events/pmu-events.h"
 #include "string2.h"
 #include "strbuf.h"
 #include "fncache.h"
@@ -699,7 +698,7 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu)
        return map;
 }
 
-static bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
+bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
 {
        char *tmp = NULL, *tok, *str;
        bool res;
@@ -744,16 +743,11 @@ out:
  * to the current running CPU. Then, add all PMU events from that table
  * as aliases.
  */
-static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
+void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu,
+                            struct pmu_events_map *map)
 {
        int i;
-       struct pmu_events_map *map;
        const char *name = pmu->name;
-
-       map = perf_pmu__find_map(pmu);
-       if (!map)
-               return;
-
        /*
         * Found a matching PMU events table. Create aliases
         */
@@ -788,6 +782,17 @@ new_alias:
        }
 }
 
+static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
+{
+       struct pmu_events_map *map;
+
+       map = perf_pmu__find_map(pmu);
+       if (!map)
+               return;
+
+       pmu_add_cpu_aliases_map(head, pmu, map);
+}
+
 struct perf_event_attr * __weak
 perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
 {
@@ -979,12 +984,11 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
        struct parse_events_term *t;
 
        list_for_each_entry(t, head_terms, list) {
-               if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
-                       if (!strcmp(t->config, term->config)) {
-                               t->used = true;
-                               *value = t->val.num;
-                               return 0;
-                       }
+               if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM &&
+                   t->config && !strcmp(t->config, term->config)) {
+                       t->used = true;
+                       *value = t->val.num;
+                       return 0;
                }
        }
 
@@ -1395,6 +1399,11 @@ static void wordwrap(char *s, int start, int max, int corr)
        }
 }
 
+bool is_pmu_core(const char *name)
+{
+       return !strcmp(name, "cpu") || is_arm_pmu_core(name);
+}
+
 void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
                        bool long_desc, bool details_flag, bool deprecated)
 {
index 6737e3d..5fb3f16 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/perf_event.h>
 #include <stdbool.h>
 #include "parse-events.h"
+#include "pmu-events/pmu-events.h"
 
 struct perf_evsel_config_term;
 
@@ -87,6 +88,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head);
 
 struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
 
+bool is_pmu_core(const char *name);
 void print_pmu_events(const char *event_glob, bool name_only, bool quiet,
                      bool long_desc, bool details_flag,
                      bool deprecated);
@@ -97,8 +99,11 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
 int perf_pmu__test(void);
 
 struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
+void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu,
+                            struct pmu_events_map *map);
 
 struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu);
+bool pmu_uncore_alias_match(const char *pmu_name, const char *name);
 
 int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
 
index e7279ea..a9d9c14 100644 (file)
@@ -34,3 +34,4 @@ util/string.c
 util/symbol_fprintf.c
 util/units.c
 util/affinity.c
+util/rwsem.c
index 5421fd2..2431645 100644 (file)
@@ -34,6 +34,7 @@ struct record_opts {
        bool          auxtrace_snapshot_on_exit;
        bool          auxtrace_sample_mode;
        bool          record_namespaces;
+       bool          record_cgroup;
        bool          record_switch_events;
        bool          all_kernel;
        bool          all_user;
index 8c1b27c..2c372cf 100644 (file)
@@ -694,6 +694,9 @@ static int regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
 
        bf[0] = 0;
 
+       if (!regs || !regs->regs)
+               return 0;
+
        for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
                u64 val = regs->regs[i++];
 
index 055b00a..0b0bfe5 100644 (file)
@@ -471,6 +471,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
                tool->comm = process_event_stub;
        if (tool->namespaces == NULL)
                tool->namespaces = process_event_stub;
+       if (tool->cgroup == NULL)
+               tool->cgroup = process_event_stub;
        if (tool->fork == NULL)
                tool->fork = process_event_stub;
        if (tool->exit == NULL)
@@ -1436,6 +1438,8 @@ static int machines__deliver_event(struct machines *machines,
                return tool->comm(tool, event, sample, machine);
        case PERF_RECORD_NAMESPACES:
                return tool->namespaces(tool, event, sample, machine);
+       case PERF_RECORD_CGROUP:
+               return tool->cgroup(tool, event, sample, machine);
        case PERF_RECORD_FORK:
                return tool->fork(tool, event, sample, machine);
        case PERF_RECORD_EXIT:
index 8a065a6..347b2c0 100644 (file)
@@ -3,7 +3,7 @@ from subprocess import Popen, PIPE
 from re import sub
 
 cc = getenv("CC")
-cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline()
+cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline()
 
 def clang_has_option(option):
     return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
index ab0cfd7..f14cc72 100644 (file)
@@ -12,6 +12,7 @@
 #include "cacheline.h"
 #include "comm.h"
 #include "map.h"
+#include "maps.h"
 #include "symbol.h"
 #include "map_symbol.h"
 #include "branch.h"
@@ -25,6 +26,8 @@
 #include "mem-events.h"
 #include "annotate.h"
 #include "time-utils.h"
+#include "cgroup.h"
+#include "machine.h"
 #include <linux/kernel.h>
 #include <linux/string.h>
 
@@ -634,6 +637,39 @@ struct sort_entry sort_cgroup_id = {
        .se_width_idx   = HISTC_CGROUP_ID,
 };
 
+/* --sort cgroup */
+
+static int64_t
+sort__cgroup_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       return right->cgroup - left->cgroup;
+}
+
+static int hist_entry__cgroup_snprintf(struct hist_entry *he,
+                                      char *bf, size_t size,
+                                      unsigned int width __maybe_unused)
+{
+       const char *cgrp_name = "N/A";
+
+       if (he->cgroup) {
+               struct cgroup *cgrp = cgroup__find(he->ms.maps->machine->env,
+                                                  he->cgroup);
+               if (cgrp != NULL)
+                       cgrp_name = cgrp->name;
+               else
+                       cgrp_name = "unknown";
+       }
+
+       return repsep_snprintf(bf, size, "%s", cgrp_name);
+}
+
+struct sort_entry sort_cgroup = {
+       .se_header      = "Cgroup",
+       .se_cmp         = sort__cgroup_cmp,
+       .se_snprintf    = hist_entry__cgroup_snprintf,
+       .se_width_idx   = HISTC_CGROUP,
+};
+
 /* --sort socket */
 
 static int64_t
@@ -869,7 +905,8 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
        if (he->branch_info) {
                struct addr_map_symbol *from = &he->branch_info->from;
 
-               return _hist_entry__sym_snprintf(&from->ms, from->addr, he->level, bf, size, width);
+               return _hist_entry__sym_snprintf(&from->ms, from->al_addr,
+                                                he->level, bf, size, width);
        }
 
        return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
@@ -881,7 +918,8 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
        if (he->branch_info) {
                struct addr_map_symbol *to = &he->branch_info->to;
 
-               return _hist_entry__sym_snprintf(&to->ms, to->addr, he->level, bf, size, width);
+               return _hist_entry__sym_snprintf(&to->ms, to->al_addr,
+                                                he->level, bf, size, width);
        }
 
        return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
@@ -1658,6 +1696,7 @@ static struct sort_dimension common_sort_dimensions[] = {
        DIM(SORT_TRACE, "trace", sort_trace),
        DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
        DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size),
+       DIM(SORT_CGROUP, "cgroup", sort_cgroup),
        DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
        DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null),
        DIM(SORT_TIME, "time", sort_time),
index 6c862d6..cfa6ac6 100644 (file)
@@ -101,6 +101,7 @@ struct hist_entry {
        struct thread           *thread;
        struct comm             *comm;
        struct namespace_id     cgroup_id;
+       u64                     cgroup;
        u64                     ip;
        u64                     transaction;
        s32                     socket;
@@ -224,6 +225,7 @@ enum sort_type {
        SORT_TRACE,
        SORT_SYM_SIZE,
        SORT_DSO_SIZE,
+       SORT_CGROUP,
        SORT_CGROUP_ID,
        SORT_SYM_IPC_NULL,
        SORT_TIME,
index 76c6052..9e757d1 100644 (file)
@@ -115,11 +115,11 @@ static void aggr_printout(struct perf_stat_config *config,
                        fprintf(config->output, "S%d-D%d-C%*d%s",
                                cpu_map__id_to_socket(id),
                                cpu_map__id_to_die(id),
-                               config->csv_output ? 0 : -5,
+                               config->csv_output ? 0 : -3,
                                cpu_map__id_to_cpu(id), config->csv_sep);
                } else {
-                       fprintf(config->output, "CPU%*d%s ",
-                               config->csv_output ? 0 : -5,
+                       fprintf(config->output, "CPU%*d%s",
+                               config->csv_output ? 0 : -7,
                                evsel__cpus(evsel)->map[id],
                                config->csv_sep);
                }
index 1965aef..be5b493 100644 (file)
@@ -704,9 +704,15 @@ void symsrc__destroy(struct symsrc *ss)
        close(ss->fd);
 }
 
-bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
 {
-       return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL;
+       /*
+        * Usually vmlinux is an ELF file with type ET_EXEC for most
+        * architectures; except Arm64 kernel is linked with option
+        * '-share', so need to check type ET_DYN.
+        */
+       return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL ||
+              ehdr.e_type == ET_DYN;
 }
 
 int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
index 10f1ec3..b916afb 100644 (file)
@@ -73,6 +73,7 @@ struct symbol_conf {
        const char      *symfs;
        int             res_sample;
        int             pad_output_len_dso;
+       int             group_sort_idx;
 };
 
 extern struct symbol_conf symbol_conf;
index 3f28af3..a661b12 100644 (file)
@@ -16,6 +16,7 @@
 #include "util/synthetic-events.h"
 #include "util/target.h"
 #include "util/time-utils.h"
+#include "util/cgroup.h"
 #include <linux/bitops.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -414,6 +415,127 @@ out:
        return rc;
 }
 
+#ifdef HAVE_FILE_HANDLE
+static int perf_event__synthesize_cgroup(struct perf_tool *tool,
+                                        union perf_event *event,
+                                        char *path, size_t mount_len,
+                                        perf_event__handler_t process,
+                                        struct machine *machine)
+{
+       size_t event_size = sizeof(event->cgroup) - sizeof(event->cgroup.path);
+       size_t path_len = strlen(path) - mount_len + 1;
+       struct {
+               struct file_handle fh;
+               uint64_t cgroup_id;
+       } handle;
+       int mount_id;
+
+       while (path_len % sizeof(u64))
+               path[mount_len + path_len++] = '\0';
+
+       memset(&event->cgroup, 0, event_size);
+
+       event->cgroup.header.type = PERF_RECORD_CGROUP;
+       event->cgroup.header.size = event_size + path_len + machine->id_hdr_size;
+
+       handle.fh.handle_bytes = sizeof(handle.cgroup_id);
+       if (name_to_handle_at(AT_FDCWD, path, &handle.fh, &mount_id, 0) < 0) {
+               pr_debug("stat failed: %s\n", path);
+               return -1;
+       }
+
+       event->cgroup.id = handle.cgroup_id;
+       strncpy(event->cgroup.path, path + mount_len, path_len);
+       memset(event->cgroup.path + path_len, 0, machine->id_hdr_size);
+
+       if (perf_tool__process_synth_event(tool, event, machine, process) < 0) {
+               pr_debug("process synth event failed\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int perf_event__walk_cgroup_tree(struct perf_tool *tool,
+                                       union perf_event *event,
+                                       char *path, size_t mount_len,
+                                       perf_event__handler_t process,
+                                       struct machine *machine)
+{
+       size_t pos = strlen(path);
+       DIR *d;
+       struct dirent *dent;
+       int ret = 0;
+
+       if (perf_event__synthesize_cgroup(tool, event, path, mount_len,
+                                         process, machine) < 0)
+               return -1;
+
+       d = opendir(path);
+       if (d == NULL) {
+               pr_debug("failed to open directory: %s\n", path);
+               return -1;
+       }
+
+       while ((dent = readdir(d)) != NULL) {
+               if (dent->d_type != DT_DIR)
+                       continue;
+               if (!strcmp(dent->d_name, ".") ||
+                   !strcmp(dent->d_name, ".."))
+                       continue;
+
+               /* any sane path should be less than PATH_MAX */
+               if (strlen(path) + strlen(dent->d_name) + 1 >= PATH_MAX)
+                       continue;
+
+               if (path[pos - 1] != '/')
+                       strcat(path, "/");
+               strcat(path, dent->d_name);
+
+               ret = perf_event__walk_cgroup_tree(tool, event, path,
+                                                  mount_len, process, machine);
+               if (ret < 0)
+                       break;
+
+               path[pos] = '\0';
+       }
+
+       closedir(d);
+       return ret;
+}
+
+int perf_event__synthesize_cgroups(struct perf_tool *tool,
+                                  perf_event__handler_t process,
+                                  struct machine *machine)
+{
+       union perf_event event;
+       char cgrp_root[PATH_MAX];
+       size_t mount_len;  /* length of mount point in the path */
+
+       if (cgroupfs_find_mountpoint(cgrp_root, PATH_MAX, "perf_event") < 0) {
+               pr_debug("cannot find cgroup mount point\n");
+               return -1;
+       }
+
+       mount_len = strlen(cgrp_root);
+       /* make sure the path starts with a slash (after mount point) */
+       strcat(cgrp_root, "/");
+
+       if (perf_event__walk_cgroup_tree(tool, &event, cgrp_root, mount_len,
+                                        process, machine) < 0)
+               return -1;
+
+       return 0;
+}
+#else
+int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused,
+                                  perf_event__handler_t process __maybe_unused,
+                                  struct machine *machine __maybe_unused)
+{
+       return -1;
+}
+#endif
+
 int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process,
                                   struct machine *machine)
 {
@@ -1230,6 +1352,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
        if (type & PERF_SAMPLE_PHYS_ADDR)
                result += sizeof(u64);
 
+       if (type & PERF_SAMPLE_CGROUP)
+               result += sizeof(u64);
+
        if (type & PERF_SAMPLE_AUX) {
                result += sizeof(u64);
                result += sample->aux_sample.size;
@@ -1404,6 +1529,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
                array++;
        }
 
+       if (type & PERF_SAMPLE_CGROUP) {
+               *array = sample->cgroup;
+               array++;
+       }
+
        if (type & PERF_SAMPLE_AUX) {
                sz = sample->aux_sample.size;
                *array++ = sz;
index baead0c..e7a3e95 100644 (file)
@@ -45,6 +45,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handl
 int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data);
 int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
 int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_cgroups(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
 int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample);
 int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
 int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
index 2abbf66..3fb67bd 100644 (file)
@@ -46,6 +46,7 @@ struct perf_tool {
                        mmap2,
                        comm,
                        namespaces,
+                       cgroup,
                        fork,
                        exit,
                        lost,
@@ -78,6 +79,7 @@ struct perf_tool {
        bool            ordered_events;
        bool            ordering_requires_timestamps;
        bool            namespace_events;
+       bool            cgroup_events;
        bool            no_warn;
        enum show_feature_header show_feat_hdr;
 };