Merge remote-tracking branch 'torvalds/master' into perf/core
authorArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 29 Mar 2021 13:39:10 +0000 (10:39 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 29 Mar 2021 13:39:10 +0000 (10:39 -0300)
To pick up fixes sent via perf/urgent and in the BPF tools/ directories.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
139 files changed:
MAINTAINERS
tools/include/linux/types.h
tools/perf/Documentation/perf-buildid-cache.txt
tools/perf/Documentation/perf-config.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Documentation/perf.txt
tools/perf/Makefile.config
tools/perf/Makefile.perf
tools/perf/arch/arm/util/cs-etm.c
tools/perf/arch/arm64/util/machine.c
tools/perf/arch/arm64/util/perf_regs.c
tools/perf/arch/mips/Makefile [new file with mode: 0644]
tools/perf/arch/mips/entry/syscalls/mksyscalltbl [new file with mode: 0644]
tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl [new file with mode: 0644]
tools/perf/arch/mips/include/dwarf-regs-table.h [new file with mode: 0644]
tools/perf/arch/mips/include/perf_regs.h [new file with mode: 0644]
tools/perf/arch/mips/util/Build [new file with mode: 0644]
tools/perf/arch/mips/util/dwarf-regs.c [new file with mode: 0644]
tools/perf/arch/mips/util/perf_regs.c [new file with mode: 0644]
tools/perf/arch/mips/util/unwind-libunwind.c [new file with mode: 0644]
tools/perf/arch/powerpc/util/Build
tools/perf/arch/powerpc/util/event.c [new file with mode: 0644]
tools/perf/arch/powerpc/util/evsel.c [new file with mode: 0644]
tools/perf/arch/powerpc/util/kvm-stat.c
tools/perf/arch/powerpc/util/utils_header.h
tools/perf/arch/x86/tests/bp-modify.c
tools/perf/arch/x86/util/perf_regs.c
tools/perf/bench/epoll-wait.c
tools/perf/bench/inject-buildid.c
tools/perf/bench/numa.c
tools/perf/builtin-annotate.c
tools/perf/builtin-daemon.c
tools/perf/builtin-diff.c
tools/perf/builtin-lock.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/check-headers.sh
tools/perf/examples/bpf/augmented_raw_syscalls.c
tools/perf/jvmti/jvmti_agent.c
tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json
tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/mapfile.csv
tools/perf/pmu-events/arch/powerpc/power8/metrics.json
tools/perf/pmu-events/arch/powerpc/power9/metrics.json
tools/perf/pmu-events/jevents.c
tools/perf/scripts/python/netdev-times.py
tools/perf/tests/bp_signal.c
tools/perf/tests/code-reading.c
tools/perf/tests/demangle-ocaml-test.c
tools/perf/tests/hists_cumulate.c
tools/perf/tests/parse-events.c
tools/perf/tests/parse-metric.c
tools/perf/tests/shell/buildid.sh
tools/perf/tests/shell/daemon.sh
tools/perf/tests/shell/stat+csv_summary.sh [new file with mode: 0755]
tools/perf/tests/shell/stat_bpf_counters.sh [new file with mode: 0755]
tools/perf/tests/topology.c
tools/perf/trace/beauty/include/linux/socket.h
tools/perf/ui/browsers/annotate.c
tools/perf/ui/browsers/hists.c
tools/perf/util/annotate.c
tools/perf/util/annotate.h
tools/perf/util/bpf-loader.c
tools/perf/util/bpf_counter.c
tools/perf/util/bpf_counter.h
tools/perf/util/bpf_skel/bperf.h [new file with mode: 0644]
tools/perf/util/bpf_skel/bperf_follower.bpf.c [new file with mode: 0644]
tools/perf/util/bpf_skel/bperf_leader.bpf.c [new file with mode: 0644]
tools/perf/util/bpf_skel/bperf_u.h [new file with mode: 0644]
tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c
tools/perf/util/call-path.h
tools/perf/util/callchain.c
tools/perf/util/config.c
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
tools/perf/util/cs-etm.c
tools/perf/util/cs-etm.h
tools/perf/util/data-convert-bt.c
tools/perf/util/demangle-java.c
tools/perf/util/dso.h
tools/perf/util/dwarf-aux.c
tools/perf/util/dwarf-aux.h
tools/perf/util/dwarf-regs.c
tools/perf/util/event.h
tools/perf/util/events_stats.h
tools/perf/util/evlist.c
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/expr.h
tools/perf/util/header.c
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/intel-pt.c
tools/perf/util/levenshtein.c
tools/perf/util/libunwind/arm64.c
tools/perf/util/libunwind/x86_32.c
tools/perf/util/llvm-utils.c
tools/perf/util/machine.c
tools/perf/util/map.h
tools/perf/util/mem-events.h
tools/perf/util/metricgroup.c
tools/perf/util/metricgroup.h
tools/perf/util/parse-events.c
tools/perf/util/pmu.c
tools/perf/util/probe-event.c
tools/perf/util/probe-finder.c
tools/perf/util/s390-cpumsf.c
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/stat-display.c
tools/perf/util/stat-shadow.c
tools/perf/util/stat.c
tools/perf/util/stat.h
tools/perf/util/strbuf.h
tools/perf/util/strfilter.h
tools/perf/util/symbol-elf.c
tools/perf/util/symbol_fprintf.c
tools/perf/util/synthetic-events.c
tools/perf/util/syscalltbl.c
tools/perf/util/target.h
tools/perf/util/thread-stack.h
tools/perf/util/units.c
tools/perf/util/units.h
tools/perf/util/unwind-libunwind-local.c

index fb2a363..ab2edbb 100644 (file)
@@ -14017,8 +14017,10 @@ R:     Mark Rutland <mark.rutland@arm.com>
 R:     Alexander Shishkin <alexander.shishkin@linux.intel.com>
 R:     Jiri Olsa <jolsa@redhat.com>
 R:     Namhyung Kim <namhyung@kernel.org>
+L:     linux-perf-users@vger.kernel.org
 L:     linux-kernel@vger.kernel.org
 S:     Supported
+W:     https://perf.wiki.kernel.org/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
 F:     arch/*/events/*
 F:     arch/*/events/*/*
index e9c5a21..6e14a53 100644 (file)
@@ -61,6 +61,9 @@ typedef __u32 __bitwise __be32;
 typedef __u64 __bitwise __le64;
 typedef __u64 __bitwise __be64;
 
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+
 typedef struct {
        int counter;
 } atomic_t;
index bb167e3..cd8ce6e 100644 (file)
@@ -57,7 +57,7 @@ OPTIONS
 -u::
 --update=::
        Update specified file of the cache. Note that this doesn't remove
-       older entires since those may be still needed for annotating old
+       older entries since those may be still needed for annotating old
        (or remote) perf.data. Only if there is already a cache which has
        exactly same build-id, that is replaced by new one. It can be used
        to update kallsyms and kernel dso to vmlinux in order to support
index 153bde1..154a1ce 100644 (file)
@@ -393,6 +393,12 @@ annotate.*::
 
                This option works with tui, stdio2 browsers.
 
+       annotate.demangle::
+               Demangle symbol names to human readable form. Default is 'true'.
+
+       annotate.demangle_kernel::
+               Demangle kernel symbol names to human readable form. Default is 'true'.
+
 hist.*::
        hist.percentage::
                This option control the way to calculate overhead of filtered entries -
index f546b5e..f51f000 100644 (file)
@@ -112,6 +112,8 @@ OPTIONS
        - ins_lat: Instruction latency in core cycles. This is the global instruction
          latency
        - local_ins_lat: Local instruction latency version
+       - p_stage_cyc: On powerpc, this presents the number of cycles spent in a
+         pipeline stage. And currently supported only on powerpc.
 
        By default, comm, dso and symbol keys are used.
        (i.e. --sort comm,dso,symbol)
@@ -224,6 +226,9 @@ OPTIONS
 --dump-raw-trace::
         Dump raw trace in ASCII.
 
+--disable-order::
+       Disable raw trace ordering.
+
 -g::
 --call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>::
         Display call chains using type, min percent threshold, print limit,
@@ -472,7 +477,7 @@ OPTIONS
        but probably we'll make the default not to show the switch-on/off events
         on the --group mode and if there is only one event besides the off/on ones,
        go straight to the histogram browser, just like 'perf report' with no events
-       explicitely specified does.
+       explicitly specified does.
 
 --itrace::
        Options for decoding instruction tracing data. The options are:
index 08a1714..6ec5960 100644 (file)
@@ -93,6 +93,17 @@ report::
 
         1.102235068 seconds time elapsed
 
+--bpf-counters::
+       Use BPF programs to aggregate readings from perf_events.  This
+       allows multiple perf-stat sessions that are counting the same metric (cycles,
+       instructions, etc.) to share hardware counters.
+
+--bpf-attr-map::
+       With option "--bpf-counters", different perf-stat sessions share
+       information about shared BPF programs and maps via a pinned hashmap.
+       Use "--bpf-attr-map" to specify the path of this pinned hashmap.
+       The default path is /sys/fs/bpf/perf_attr_map.
+
 ifdef::HAVE_LIBPFM[]
 --pfm-events events::
 Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
@@ -142,7 +153,10 @@ Do not aggregate counts across all monitored CPUs.
 
 -n::
 --null::
-        null run - don't start any counters
+null run - Don't start any counters.
+
+This can be useful to measure just elapsed wall-clock time - or to assess the
+raw overhead of perf stat itself, without running any counters.
 
 -v::
 --verbose::
@@ -468,6 +482,15 @@ convenient for post processing.
 --summary::
 Print summary for interval mode (-I).
 
+--no-csv-summary::
+Don't print 'summary' at the first column for CVS summary output.
+This option must be used with -x and --summary.
+
+This option can be enabled in perf config by setting the variable
+'stat.no-csv-summary'.
+
+$ perf config stat.no-csv-summary=true
+
 EXAMPLES
 --------
 
index ee20246..bba5ffb 100644 (file)
@@ -317,7 +317,7 @@ Default is to monitor all CPUS.
        but probably we'll make the default not to show the switch-on/off events
         on the --group mode and if there is only one event besides the off/on ones,
        go straight to the histogram browser, just like 'perf top' with no events
-       explicitely specified does.
+       explicitly specified does.
 
 --stitch-lbr::
        Show callgraph with stitched LBRs, which may have more complete
index c130a3c..9c330cd 100644 (file)
@@ -76,3 +76,15 @@ SEE ALSO
 linkperf:perf-stat[1], linkperf:perf-top[1],
 linkperf:perf-record[1], linkperf:perf-report[1],
 linkperf:perf-list[1]
+
+linkperf:perf-annotate[1],linkperf:perf-archive[1],
+linkperf:perf-bench[1], linkperf:perf-buildid-cache[1],
+linkperf:perf-buildid-list[1], linkperf:perf-c2c[1],
+linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1],
+linkperf:perf-evlist[1], linkperf:perf-ftrace[1],
+linkperf:perf-help[1], linkperf:perf-inject[1],
+linkperf:perf-intel-pt[1], linkperf:perf-kallsyms[1],
+linkperf:perf-kmem[1], linkperf:perf-kvm[1], linkperf:perf-lock[1],
+linkperf:perf-mem[1], linkperf:perf-probe[1], linkperf:perf-sched[1],
+linkperf:perf-script[1], linkperf:perf-test[1],
+linkperf:perf-trace[1], linkperf:perf-version[1]
index d8e59d3..3514fe9 100644 (file)
@@ -32,7 +32,7 @@ ifneq ($(NO_SYSCALL_TABLE),1)
       NO_SYSCALL_TABLE := 0
     endif
   else
-    ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390))
+    ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390 mips))
       NO_SYSCALL_TABLE := 0
     endif
   endif
@@ -87,6 +87,13 @@ ifeq ($(ARCH),s390)
   CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated
 endif
 
+ifeq ($(ARCH),mips)
+  NO_PERF_REGS := 0
+  CFLAGS += -I$(OUTPUT)arch/mips/include/generated
+  CFLAGS += -I../../arch/mips/include/uapi -I../../arch/mips/include/generated/uapi
+  LIBUNWIND_LIBS = -lunwind -lunwind-mips
+endif
+
 ifeq ($(NO_PERF_REGS),0)
   $(call detected,CONFIG_PERF_REGS)
 endif
index f6e6096..090fb9d 100644 (file)
@@ -1007,6 +1007,7 @@ python-clean:
 SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
 SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
 SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
+SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
 
 ifdef BUILD_BPF_SKEL
 BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
@@ -1021,7 +1022,7 @@ $(BPFTOOL): | $(SKEL_TMP_OUT)
                OUTPUT=$(SKEL_TMP_OUT)/ bootstrap
 
 $(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT)
-       $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf $(BPF_INCLUDE) \
+       $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) \
          -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@
 
 $(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL)
index c25c878..d942f11 100644 (file)
@@ -67,6 +67,7 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr,
        char path[PATH_MAX];
        int err = -EINVAL;
        u32 val;
+       u64 contextid;
 
        ptr = container_of(itr, struct cs_etm_recording, itr);
        cs_etm_pmu = ptr->cs_etm_pmu;
@@ -86,25 +87,59 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr,
                goto out;
        }
 
+       /* User has configured for PID tracing, respects it. */
+       contextid = evsel->core.attr.config &
+                       (BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_CTXTID2));
+
        /*
-        * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing
-        * is supported:
-        *  0b00000 Context ID tracing is not supported.
-        *  0b00100 Maximum of 32-bit Context ID size.
-        *  All other values are reserved.
+        * If user doesn't configure the contextid format, parse PMU format and
+        * enable PID tracing according to the "contextid" format bits:
+        *
+        *   If bit ETM_OPT_CTXTID is set, trace CONTEXTIDR_EL1;
+        *   If bit ETM_OPT_CTXTID2 is set, trace CONTEXTIDR_EL2.
         */
-       val = BMVAL(val, 5, 9);
-       if (!val || val != 0x4) {
-               err = -EINVAL;
-               goto out;
+       if (!contextid)
+               contextid = perf_pmu__format_bits(&cs_etm_pmu->format,
+                                                 "contextid");
+
+       if (contextid & BIT(ETM_OPT_CTXTID)) {
+               /*
+                * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID
+                * tracing is supported:
+                *  0b00000 Context ID tracing is not supported.
+                *  0b00100 Maximum of 32-bit Context ID size.
+                *  All other values are reserved.
+                */
+               val = BMVAL(val, 5, 9);
+               if (!val || val != 0x4) {
+                       pr_err("%s: CONTEXTIDR_EL1 isn't supported\n",
+                              CORESIGHT_ETM_PMU_NAME);
+                       err = -EINVAL;
+                       goto out;
+               }
+       }
+
+       if (contextid & BIT(ETM_OPT_CTXTID2)) {
+               /*
+                * TRCIDR2.VMIDOPT[30:29] != 0 and
+                * TRCIDR2.VMIDSIZE[14:10] == 0b00100 (32bit virtual contextid)
+                * We can't support CONTEXTIDR in VMID if the size of the
+                * virtual context id is < 32bit.
+                * Any value of VMIDSIZE >= 4 (i.e, > 32bit) is fine for us.
+                */
+               if (!BMVAL(val, 29, 30) || BMVAL(val, 10, 14) < 4) {
+                       pr_err("%s: CONTEXTIDR_EL2 isn't supported\n",
+                              CORESIGHT_ETM_PMU_NAME);
+                       err = -EINVAL;
+                       goto out;
+               }
        }
 
        /* All good, let the kernel know */
-       evsel->core.attr.config |= (1 << ETM_OPT_CTXTID);
+       evsel->core.attr.config |= contextid;
        err = 0;
 
 out:
-
        return err;
 }
 
@@ -173,17 +208,17 @@ static int cs_etm_set_option(struct auxtrace_record *itr,
                    !cpu_map__has(online_cpus, i))
                        continue;
 
-               if (option & ETM_SET_OPT_CTXTID) {
+               if (option & BIT(ETM_OPT_CTXTID)) {
                        err = cs_etm_set_context_id(itr, evsel, i);
                        if (err)
                                goto out;
                }
-               if (option & ETM_SET_OPT_TS) {
+               if (option & BIT(ETM_OPT_TS)) {
                        err = cs_etm_set_timestamp(itr, evsel, i);
                        if (err)
                                goto out;
                }
-               if (option & ~(ETM_SET_OPT_MASK))
+               if (option & ~(BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS)))
                        /* Nothing else is currently supported */
                        goto out;
        }
@@ -343,7 +378,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
                        opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
                }
 
-               /* Snapshost size can't be bigger than the auxtrace area */
+               /* Snapshot size can't be bigger than the auxtrace area */
                if (opts->auxtrace_snapshot_size >
                                opts->auxtrace_mmap_pages * (size_t)page_size) {
                        pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
@@ -410,7 +445,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
                evsel__set_sample_bit(cs_etm_evsel, CPU);
 
                err = cs_etm_set_option(itr, cs_etm_evsel,
-                                       ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS);
+                                       BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS));
                if (err)
                        goto out;
        }
@@ -489,7 +524,9 @@ static u64 cs_etmv4_get_config(struct auxtrace_record *itr)
                config |= BIT(ETM4_CFG_BIT_TS);
        if (config_opts & BIT(ETM_OPT_RETSTK))
                config |= BIT(ETM4_CFG_BIT_RETSTK);
-
+       if (config_opts & BIT(ETM_OPT_CTXTID2))
+               config |= BIT(ETM4_CFG_BIT_VMID) |
+                         BIT(ETM4_CFG_BIT_VMID_OPT);
        return config;
 }
 
@@ -576,7 +613,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset,
                                struct auxtrace_record *itr,
                                struct perf_record_auxtrace_info *info)
 {
-       u32 increment;
+       u32 increment, nr_trc_params;
        u64 magic;
        struct cs_etm_recording *ptr =
                        container_of(itr, struct cs_etm_recording, itr);
@@ -611,6 +648,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset,
 
                /* How much space was used */
                increment = CS_ETMV4_PRIV_MAX;
+               nr_trc_params = CS_ETMV4_PRIV_MAX - CS_ETMV4_TRCCONFIGR;
        } else {
                magic = __perf_cs_etmv3_magic;
                /* Get configuration register */
@@ -628,11 +666,13 @@ static void cs_etm_get_metadata(int cpu, u32 *offset,
 
                /* How much space was used */
                increment = CS_ETM_PRIV_MAX;
+               nr_trc_params = CS_ETM_PRIV_MAX - CS_ETM_ETMCR;
        }
 
        /* Build generic header portion */
        info->priv[*offset + CS_ETM_MAGIC] = magic;
        info->priv[*offset + CS_ETM_CPU] = cpu;
+       info->priv[*offset + CS_ETM_NR_TRC_PARAMS] = nr_trc_params;
        /* Where the next CPU entry should start from */
        *offset += increment;
 }
@@ -678,7 +718,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
 
        /* First fill out the session header */
        info->type = PERF_AUXTRACE_CS_ETM;
-       info->priv[CS_HEADER_VERSION_0] = 0;
+       info->priv[CS_HEADER_VERSION] = CS_HEADER_CURRENT_VERSION;
        info->priv[CS_PMU_TYPE_CPUS] = type << 32;
        info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu;
        info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode;
index 40c5e0b..7e77142 100644 (file)
@@ -6,11 +6,11 @@
 #include "debug.h"
 #include "symbol.h"
 
-/* On arm64, kernel text segment start at high memory address,
+/* On arm64, kernel text segment starts at high memory address,
  * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
- * address, like 0xffff 0000 00ax xxxx. When only samll amount of
+ * address, like 0xffff 0000 00ax xxxx. When only small amount of
  * memory is used by modules, gap between end of module's text segment
- * and start of kernel text segment may be reach 2G.
+ * and start of kernel text segment may reach 2G.
  * Therefore do not fill this gap and do not assign it to the kernel dso map.
  */
 
index 2518cde..476b037 100644 (file)
@@ -108,7 +108,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
                /* [sp], [sp, NUM] or [sp,NUM] */
                new_len = 7;    /* + ( % s p ) NULL */
 
-               /* If the arugment is [sp], need to fill offset '0' */
+               /* If the argument is [sp], need to fill offset '0' */
                if (rm[2].rm_so == -1)
                        new_len += 1;
                else
diff --git a/tools/perf/arch/mips/Makefile b/tools/perf/arch/mips/Makefile
new file mode 100644 (file)
index 0000000..8bc0907
--- /dev/null
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
+
+# Syscall table generation for perf
+out    := $(OUTPUT)arch/mips/include/generated/asm
+header := $(out)/syscalls_n64.c
+sysprf := $(srctree)/tools/perf/arch/mips/entry/syscalls
+sysdef := $(sysprf)/syscall_n64.tbl
+systbl := $(sysprf)/mksyscalltbl
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header): $(sysdef) $(systbl)
+       $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
+
+clean::
+       $(call QUIET_CLEAN, mips) $(RM) $(header)
+
+archheaders: $(header)
diff --git a/tools/perf/arch/mips/entry/syscalls/mksyscalltbl b/tools/perf/arch/mips/entry/syscalls/mksyscalltbl
new file mode 100644 (file)
index 0000000..fb1f494
--- /dev/null
@@ -0,0 +1,32 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate system call table for perf. Derived from
+# s390 script.
+#
+# Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+# Changed by: Tiezhu Yang <yangtiezhu@loongson.cn>
+
+SYSCALL_TBL=$1
+
+if ! test -r $SYSCALL_TBL; then
+       echo "Could not read input file" >&2
+       exit 1
+fi
+
+create_table()
+{
+       local max_nr nr abi sc discard
+
+       echo 'static const char *syscalltbl_mips_n64[] = {'
+       while read nr abi sc discard; do
+               printf '\t[%d] = "%s",\n' $nr $sc
+               max_nr=$nr
+       done
+       echo '};'
+       echo "#define SYSCALLTBL_MIPS_N64_MAX_ID $max_nr"
+}
+
+grep -E "^[[:digit:]]+[[:space:]]+(n64)" $SYSCALL_TBL  \
+       |sort -k1 -n                                    \
+       |create_table
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
new file mode 100644 (file)
index 0000000..9164969
--- /dev/null
@@ -0,0 +1,358 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for mips
+#
+# The format is:
+# <number> <abi> <name> <entry point>
+#
+# The <abi> is always "n64" for this file.
+#
+0      n64     read                            sys_read
+1      n64     write                           sys_write
+2      n64     open                            sys_open
+3      n64     close                           sys_close
+4      n64     stat                            sys_newstat
+5      n64     fstat                           sys_newfstat
+6      n64     lstat                           sys_newlstat
+7      n64     poll                            sys_poll
+8      n64     lseek                           sys_lseek
+9      n64     mmap                            sys_mips_mmap
+10     n64     mprotect                        sys_mprotect
+11     n64     munmap                          sys_munmap
+12     n64     brk                             sys_brk
+13     n64     rt_sigaction                    sys_rt_sigaction
+14     n64     rt_sigprocmask                  sys_rt_sigprocmask
+15     n64     ioctl                           sys_ioctl
+16     n64     pread64                         sys_pread64
+17     n64     pwrite64                        sys_pwrite64
+18     n64     readv                           sys_readv
+19     n64     writev                          sys_writev
+20     n64     access                          sys_access
+21     n64     pipe                            sysm_pipe
+22     n64     _newselect                      sys_select
+23     n64     sched_yield                     sys_sched_yield
+24     n64     mremap                          sys_mremap
+25     n64     msync                           sys_msync
+26     n64     mincore                         sys_mincore
+27     n64     madvise                         sys_madvise
+28     n64     shmget                          sys_shmget
+29     n64     shmat                           sys_shmat
+30     n64     shmctl                          sys_old_shmctl
+31     n64     dup                             sys_dup
+32     n64     dup2                            sys_dup2
+33     n64     pause                           sys_pause
+34     n64     nanosleep                       sys_nanosleep
+35     n64     getitimer                       sys_getitimer
+36     n64     setitimer                       sys_setitimer
+37     n64     alarm                           sys_alarm
+38     n64     getpid                          sys_getpid
+39     n64     sendfile                        sys_sendfile64
+40     n64     socket                          sys_socket
+41     n64     connect                         sys_connect
+42     n64     accept                          sys_accept
+43     n64     sendto                          sys_sendto
+44     n64     recvfrom                        sys_recvfrom
+45     n64     sendmsg                         sys_sendmsg
+46     n64     recvmsg                         sys_recvmsg
+47     n64     shutdown                        sys_shutdown
+48     n64     bind                            sys_bind
+49     n64     listen                          sys_listen
+50     n64     getsockname                     sys_getsockname
+51     n64     getpeername                     sys_getpeername
+52     n64     socketpair                      sys_socketpair
+53     n64     setsockopt                      sys_setsockopt
+54     n64     getsockopt                      sys_getsockopt
+55     n64     clone                           __sys_clone
+56     n64     fork                            __sys_fork
+57     n64     execve                          sys_execve
+58     n64     exit                            sys_exit
+59     n64     wait4                           sys_wait4
+60     n64     kill                            sys_kill
+61     n64     uname                           sys_newuname
+62     n64     semget                          sys_semget
+63     n64     semop                           sys_semop
+64     n64     semctl                          sys_old_semctl
+65     n64     shmdt                           sys_shmdt
+66     n64     msgget                          sys_msgget
+67     n64     msgsnd                          sys_msgsnd
+68     n64     msgrcv                          sys_msgrcv
+69     n64     msgctl                          sys_old_msgctl
+70     n64     fcntl                           sys_fcntl
+71     n64     flock                           sys_flock
+72     n64     fsync                           sys_fsync
+73     n64     fdatasync                       sys_fdatasync
+74     n64     truncate                        sys_truncate
+75     n64     ftruncate                       sys_ftruncate
+76     n64     getdents                        sys_getdents
+77     n64     getcwd                          sys_getcwd
+78     n64     chdir                           sys_chdir
+79     n64     fchdir                          sys_fchdir
+80     n64     rename                          sys_rename
+81     n64     mkdir                           sys_mkdir
+82     n64     rmdir                           sys_rmdir
+83     n64     creat                           sys_creat
+84     n64     link                            sys_link
+85     n64     unlink                          sys_unlink
+86     n64     symlink                         sys_symlink
+87     n64     readlink                        sys_readlink
+88     n64     chmod                           sys_chmod
+89     n64     fchmod                          sys_fchmod
+90     n64     chown                           sys_chown
+91     n64     fchown                          sys_fchown
+92     n64     lchown                          sys_lchown
+93     n64     umask                           sys_umask
+94     n64     gettimeofday                    sys_gettimeofday
+95     n64     getrlimit                       sys_getrlimit
+96     n64     getrusage                       sys_getrusage
+97     n64     sysinfo                         sys_sysinfo
+98     n64     times                           sys_times
+99     n64     ptrace                          sys_ptrace
+100    n64     getuid                          sys_getuid
+101    n64     syslog                          sys_syslog
+102    n64     getgid                          sys_getgid
+103    n64     setuid                          sys_setuid
+104    n64     setgid                          sys_setgid
+105    n64     geteuid                         sys_geteuid
+106    n64     getegid                         sys_getegid
+107    n64     setpgid                         sys_setpgid
+108    n64     getppid                         sys_getppid
+109    n64     getpgrp                         sys_getpgrp
+110    n64     setsid                          sys_setsid
+111    n64     setreuid                        sys_setreuid
+112    n64     setregid                        sys_setregid
+113    n64     getgroups                       sys_getgroups
+114    n64     setgroups                       sys_setgroups
+115    n64     setresuid                       sys_setresuid
+116    n64     getresuid                       sys_getresuid
+117    n64     setresgid                       sys_setresgid
+118    n64     getresgid                       sys_getresgid
+119    n64     getpgid                         sys_getpgid
+120    n64     setfsuid                        sys_setfsuid
+121    n64     setfsgid                        sys_setfsgid
+122    n64     getsid                          sys_getsid
+123    n64     capget                          sys_capget
+124    n64     capset                          sys_capset
+125    n64     rt_sigpending                   sys_rt_sigpending
+126    n64     rt_sigtimedwait                 sys_rt_sigtimedwait
+127    n64     rt_sigqueueinfo                 sys_rt_sigqueueinfo
+128    n64     rt_sigsuspend                   sys_rt_sigsuspend
+129    n64     sigaltstack                     sys_sigaltstack
+130    n64     utime                           sys_utime
+131    n64     mknod                           sys_mknod
+132    n64     personality                     sys_personality
+133    n64     ustat                           sys_ustat
+134    n64     statfs                          sys_statfs
+135    n64     fstatfs                         sys_fstatfs
+136    n64     sysfs                           sys_sysfs
+137    n64     getpriority                     sys_getpriority
+138    n64     setpriority                     sys_setpriority
+139    n64     sched_setparam                  sys_sched_setparam
+140    n64     sched_getparam                  sys_sched_getparam
+141    n64     sched_setscheduler              sys_sched_setscheduler
+142    n64     sched_getscheduler              sys_sched_getscheduler
+143    n64     sched_get_priority_max          sys_sched_get_priority_max
+144    n64     sched_get_priority_min          sys_sched_get_priority_min
+145    n64     sched_rr_get_interval           sys_sched_rr_get_interval
+146    n64     mlock                           sys_mlock
+147    n64     munlock                         sys_munlock
+148    n64     mlockall                        sys_mlockall
+149    n64     munlockall                      sys_munlockall
+150    n64     vhangup                         sys_vhangup
+151    n64     pivot_root                      sys_pivot_root
+152    n64     _sysctl                         sys_ni_syscall
+153    n64     prctl                           sys_prctl
+154    n64     adjtimex                        sys_adjtimex
+155    n64     setrlimit                       sys_setrlimit
+156    n64     chroot                          sys_chroot
+157    n64     sync                            sys_sync
+158    n64     acct                            sys_acct
+159    n64     settimeofday                    sys_settimeofday
+160    n64     mount                           sys_mount
+161    n64     umount2                         sys_umount
+162    n64     swapon                          sys_swapon
+163    n64     swapoff                         sys_swapoff
+164    n64     reboot                          sys_reboot
+165    n64     sethostname                     sys_sethostname
+166    n64     setdomainname                   sys_setdomainname
+167    n64     create_module                   sys_ni_syscall
+168    n64     init_module                     sys_init_module
+169    n64     delete_module                   sys_delete_module
+170    n64     get_kernel_syms                 sys_ni_syscall
+171    n64     query_module                    sys_ni_syscall
+172    n64     quotactl                        sys_quotactl
+173    n64     nfsservctl                      sys_ni_syscall
+174    n64     getpmsg                         sys_ni_syscall
+175    n64     putpmsg                         sys_ni_syscall
+176    n64     afs_syscall                     sys_ni_syscall
+# 177 reserved for security
+177    n64     reserved177                     sys_ni_syscall
+178    n64     gettid                          sys_gettid
+179    n64     readahead                       sys_readahead
+180    n64     setxattr                        sys_setxattr
+181    n64     lsetxattr                       sys_lsetxattr
+182    n64     fsetxattr                       sys_fsetxattr
+183    n64     getxattr                        sys_getxattr
+184    n64     lgetxattr                       sys_lgetxattr
+185    n64     fgetxattr                       sys_fgetxattr
+186    n64     listxattr                       sys_listxattr
+187    n64     llistxattr                      sys_llistxattr
+188    n64     flistxattr                      sys_flistxattr
+189    n64     removexattr                     sys_removexattr
+190    n64     lremovexattr                    sys_lremovexattr
+191    n64     fremovexattr                    sys_fremovexattr
+192    n64     tkill                           sys_tkill
+193    n64     reserved193                     sys_ni_syscall
+194    n64     futex                           sys_futex
+195    n64     sched_setaffinity               sys_sched_setaffinity
+196    n64     sched_getaffinity               sys_sched_getaffinity
+197    n64     cacheflush                      sys_cacheflush
+198    n64     cachectl                        sys_cachectl
+199    n64     sysmips                         __sys_sysmips
+200    n64     io_setup                        sys_io_setup
+201    n64     io_destroy                      sys_io_destroy
+202    n64     io_getevents                    sys_io_getevents
+203    n64     io_submit                       sys_io_submit
+204    n64     io_cancel                       sys_io_cancel
+205    n64     exit_group                      sys_exit_group
+206    n64     lookup_dcookie                  sys_lookup_dcookie
+207    n64     epoll_create                    sys_epoll_create
+208    n64     epoll_ctl                       sys_epoll_ctl
+209    n64     epoll_wait                      sys_epoll_wait
+210    n64     remap_file_pages                sys_remap_file_pages
+211    n64     rt_sigreturn                    sys_rt_sigreturn
+212    n64     set_tid_address                 sys_set_tid_address
+213    n64     restart_syscall                 sys_restart_syscall
+214    n64     semtimedop                      sys_semtimedop
+215    n64     fadvise64                       sys_fadvise64_64
+216    n64     timer_create                    sys_timer_create
+217    n64     timer_settime                   sys_timer_settime
+218    n64     timer_gettime                   sys_timer_gettime
+219    n64     timer_getoverrun                sys_timer_getoverrun
+220    n64     timer_delete                    sys_timer_delete
+221    n64     clock_settime                   sys_clock_settime
+222    n64     clock_gettime                   sys_clock_gettime
+223    n64     clock_getres                    sys_clock_getres
+224    n64     clock_nanosleep                 sys_clock_nanosleep
+225    n64     tgkill                          sys_tgkill
+226    n64     utimes                          sys_utimes
+227    n64     mbind                           sys_mbind
+228    n64     get_mempolicy                   sys_get_mempolicy
+229    n64     set_mempolicy                   sys_set_mempolicy
+230    n64     mq_open                         sys_mq_open
+231    n64     mq_unlink                       sys_mq_unlink
+232    n64     mq_timedsend                    sys_mq_timedsend
+233    n64     mq_timedreceive                 sys_mq_timedreceive
+234    n64     mq_notify                       sys_mq_notify
+235    n64     mq_getsetattr                   sys_mq_getsetattr
+236    n64     vserver                         sys_ni_syscall
+237    n64     waitid                          sys_waitid
+# 238 was sys_setaltroot
+239    n64     add_key                         sys_add_key
+240    n64     request_key                     sys_request_key
+241    n64     keyctl                          sys_keyctl
+242    n64     set_thread_area                 sys_set_thread_area
+243    n64     inotify_init                    sys_inotify_init
+244    n64     inotify_add_watch               sys_inotify_add_watch
+245    n64     inotify_rm_watch                sys_inotify_rm_watch
+246    n64     migrate_pages                   sys_migrate_pages
+247    n64     openat                          sys_openat
+248    n64     mkdirat                         sys_mkdirat
+249    n64     mknodat                         sys_mknodat
+250    n64     fchownat                        sys_fchownat
+251    n64     futimesat                       sys_futimesat
+252    n64     newfstatat                      sys_newfstatat
+253    n64     unlinkat                        sys_unlinkat
+254    n64     renameat                        sys_renameat
+255    n64     linkat                          sys_linkat
+256    n64     symlinkat                       sys_symlinkat
+257    n64     readlinkat                      sys_readlinkat
+258    n64     fchmodat                        sys_fchmodat
+259    n64     faccessat                       sys_faccessat
+260    n64     pselect6                        sys_pselect6
+261    n64     ppoll                           sys_ppoll
+262    n64     unshare                         sys_unshare
+263    n64     splice                          sys_splice
+264    n64     sync_file_range                 sys_sync_file_range
+265    n64     tee                             sys_tee
+266    n64     vmsplice                        sys_vmsplice
+267    n64     move_pages                      sys_move_pages
+268    n64     set_robust_list                 sys_set_robust_list
+269    n64     get_robust_list                 sys_get_robust_list
+270    n64     kexec_load                      sys_kexec_load
+271    n64     getcpu                          sys_getcpu
+272    n64     epoll_pwait                     sys_epoll_pwait
+273    n64     ioprio_set                      sys_ioprio_set
+274    n64     ioprio_get                      sys_ioprio_get
+275    n64     utimensat                       sys_utimensat
+276    n64     signalfd                        sys_signalfd
+277    n64     timerfd                         sys_ni_syscall
+278    n64     eventfd                         sys_eventfd
+279    n64     fallocate                       sys_fallocate
+280    n64     timerfd_create                  sys_timerfd_create
+281    n64     timerfd_gettime                 sys_timerfd_gettime
+282    n64     timerfd_settime                 sys_timerfd_settime
+283    n64     signalfd4                       sys_signalfd4
+284    n64     eventfd2                        sys_eventfd2
+285    n64     epoll_create1                   sys_epoll_create1
+286    n64     dup3                            sys_dup3
+287    n64     pipe2                           sys_pipe2
+288    n64     inotify_init1                   sys_inotify_init1
+289    n64     preadv                          sys_preadv
+290    n64     pwritev                         sys_pwritev
+291    n64     rt_tgsigqueueinfo               sys_rt_tgsigqueueinfo
+292    n64     perf_event_open                 sys_perf_event_open
+293    n64     accept4                         sys_accept4
+294    n64     recvmmsg                        sys_recvmmsg
+295    n64     fanotify_init                   sys_fanotify_init
+296    n64     fanotify_mark                   sys_fanotify_mark
+297    n64     prlimit64                       sys_prlimit64
+298    n64     name_to_handle_at               sys_name_to_handle_at
+299    n64     open_by_handle_at               sys_open_by_handle_at
+300    n64     clock_adjtime                   sys_clock_adjtime
+301    n64     syncfs                          sys_syncfs
+302    n64     sendmmsg                        sys_sendmmsg
+303    n64     setns                           sys_setns
+304    n64     process_vm_readv                sys_process_vm_readv
+305    n64     process_vm_writev               sys_process_vm_writev
+306    n64     kcmp                            sys_kcmp
+307    n64     finit_module                    sys_finit_module
+308    n64     getdents64                      sys_getdents64
+309    n64     sched_setattr                   sys_sched_setattr
+310    n64     sched_getattr                   sys_sched_getattr
+311    n64     renameat2                       sys_renameat2
+312    n64     seccomp                         sys_seccomp
+313    n64     getrandom                       sys_getrandom
+314    n64     memfd_create                    sys_memfd_create
+315    n64     bpf                             sys_bpf
+316    n64     execveat                        sys_execveat
+317    n64     userfaultfd                     sys_userfaultfd
+318    n64     membarrier                      sys_membarrier
+319    n64     mlock2                          sys_mlock2
+320    n64     copy_file_range                 sys_copy_file_range
+321    n64     preadv2                         sys_preadv2
+322    n64     pwritev2                        sys_pwritev2
+323    n64     pkey_mprotect                   sys_pkey_mprotect
+324    n64     pkey_alloc                      sys_pkey_alloc
+325    n64     pkey_free                       sys_pkey_free
+326    n64     statx                           sys_statx
+327    n64     rseq                            sys_rseq
+328    n64     io_pgetevents                   sys_io_pgetevents
+# 329 through 423 are reserved to sync up with other architectures
+424    n64     pidfd_send_signal               sys_pidfd_send_signal
+425    n64     io_uring_setup                  sys_io_uring_setup
+426    n64     io_uring_enter                  sys_io_uring_enter
+427    n64     io_uring_register               sys_io_uring_register
+428    n64     open_tree                       sys_open_tree
+429    n64     move_mount                      sys_move_mount
+430    n64     fsopen                          sys_fsopen
+431    n64     fsconfig                        sys_fsconfig
+432    n64     fsmount                         sys_fsmount
+433    n64     fspick                          sys_fspick
+434    n64     pidfd_open                      sys_pidfd_open
+435    n64     clone3                          __sys_clone3
+436    n64     close_range                     sys_close_range
+437    n64     openat2                         sys_openat2
+438    n64     pidfd_getfd                     sys_pidfd_getfd
+439    n64     faccessat2                      sys_faccessat2
+440    n64     process_madvise                 sys_process_madvise
+441    n64     epoll_pwait2                    sys_epoll_pwait2
diff --git a/tools/perf/arch/mips/include/dwarf-regs-table.h b/tools/perf/arch/mips/include/dwarf-regs-table.h
new file mode 100644 (file)
index 0000000..5badbcd
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * dwarf-regs-table.h : Mapping of DWARF debug register numbers into
+ * register names.
+ *
+ * Copyright (C) 2013 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+#undef REG_DWARFNUM_NAME
+#define REG_DWARFNUM_NAME(reg, idx)    [idx] = "$" #reg
+static const char * const mips_regstr_tbl[] = {
+       "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9",
+       "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19",
+       "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "%29",
+       "$30", "$31",
+       REG_DWARFNUM_NAME(hi, 64),
+       REG_DWARFNUM_NAME(lo, 65),
+};
+#endif
diff --git a/tools/perf/arch/mips/include/perf_regs.h b/tools/perf/arch/mips/include/perf_regs.h
new file mode 100644 (file)
index 0000000..ee73b36
--- /dev/null
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+#define PERF_REGS_MAX PERF_REG_MIPS_MAX
+#define PERF_REG_IP PERF_REG_MIPS_PC
+#define PERF_REG_SP PERF_REG_MIPS_R29
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1)
+
+static inline const char *__perf_reg_name(int id)
+{
+       switch (id) {
+       case PERF_REG_MIPS_PC:
+               return "PC";
+       case PERF_REG_MIPS_R1:
+               return "$1";
+       case PERF_REG_MIPS_R2:
+               return "$2";
+       case PERF_REG_MIPS_R3:
+               return "$3";
+       case PERF_REG_MIPS_R4:
+               return "$4";
+       case PERF_REG_MIPS_R5:
+               return "$5";
+       case PERF_REG_MIPS_R6:
+               return "$6";
+       case PERF_REG_MIPS_R7:
+               return "$7";
+       case PERF_REG_MIPS_R8:
+               return "$8";
+       case PERF_REG_MIPS_R9:
+               return "$9";
+       case PERF_REG_MIPS_R10:
+               return "$10";
+       case PERF_REG_MIPS_R11:
+               return "$11";
+       case PERF_REG_MIPS_R12:
+               return "$12";
+       case PERF_REG_MIPS_R13:
+               return "$13";
+       case PERF_REG_MIPS_R14:
+               return "$14";
+       case PERF_REG_MIPS_R15:
+               return "$15";
+       case PERF_REG_MIPS_R16:
+               return "$16";
+       case PERF_REG_MIPS_R17:
+               return "$17";
+       case PERF_REG_MIPS_R18:
+               return "$18";
+       case PERF_REG_MIPS_R19:
+               return "$19";
+       case PERF_REG_MIPS_R20:
+               return "$20";
+       case PERF_REG_MIPS_R21:
+               return "$21";
+       case PERF_REG_MIPS_R22:
+               return "$22";
+       case PERF_REG_MIPS_R23:
+               return "$23";
+       case PERF_REG_MIPS_R24:
+               return "$24";
+       case PERF_REG_MIPS_R25:
+               return "$25";
+       case PERF_REG_MIPS_R28:
+               return "$28";
+       case PERF_REG_MIPS_R29:
+               return "$29";
+       case PERF_REG_MIPS_R30:
+               return "$30";
+       case PERF_REG_MIPS_R31:
+               return "$31";
+       default:
+               break;
+       }
+       return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/mips/util/Build b/tools/perf/arch/mips/util/Build
new file mode 100644 (file)
index 0000000..51c8900
--- /dev/null
@@ -0,0 +1,3 @@
+perf-y += perf_regs.o
+perf-$(CONFIG_DWARF) += dwarf-regs.o
+perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
diff --git a/tools/perf/arch/mips/util/dwarf-regs.c b/tools/perf/arch/mips/util/dwarf-regs.c
new file mode 100644 (file)
index 0000000..25c13a9
--- /dev/null
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2013 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <dwarf-regs.h>
+
+static const char *mips_gpr_names[32] = {
+       "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9",
+       "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19",
+       "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29",
+       "$30", "$31"
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+       if (n < 32)
+               return mips_gpr_names[n];
+       if (n == 64)
+               return "hi";
+       if (n == 65)
+               return "lo";
+       return NULL;
+}
diff --git a/tools/perf/arch/mips/util/perf_regs.c b/tools/perf/arch/mips/util/perf_regs.c
new file mode 100644 (file)
index 0000000..2864e2e
--- /dev/null
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+       SMPL_REG_END
+};
diff --git a/tools/perf/arch/mips/util/unwind-libunwind.c b/tools/perf/arch/mips/util/unwind-libunwind.c
new file mode 100644 (file)
index 0000000..0d8c99c
--- /dev/null
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+#include "util/debug.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+       switch (regnum) {
+       case UNW_MIPS_R1 ... UNW_MIPS_R25:
+               return regnum - UNW_MIPS_R1 + PERF_REG_MIPS_R1;
+       case UNW_MIPS_R28 ... UNW_MIPS_R31:
+               return regnum - UNW_MIPS_R28 + PERF_REG_MIPS_R28;
+       case UNW_MIPS_PC:
+               return PERF_REG_MIPS_PC;
+       default:
+               pr_err("unwind: invalid reg id %d\n", regnum);
+               return -EINVAL;
+       }
+}
index b7945e5..8a79c41 100644 (file)
@@ -4,6 +4,8 @@ perf-y += kvm-stat.o
 perf-y += perf_regs.o
 perf-y += mem-events.o
 perf-y += sym-handling.o
+perf-y += evsel.o
+perf-y += event.o
 
 perf-$(CONFIG_DWARF) += dwarf-regs.o
 perf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c
new file mode 100644 (file)
index 0000000..3bf4412
--- /dev/null
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+
+#include "../../../util/event.h"
+#include "../../../util/synthetic-events.h"
+#include "../../../util/machine.h"
+#include "../../../util/tool.h"
+#include "../../../util/map.h"
+#include "../../../util/debug.h"
+
+void arch_perf_parse_sample_weight(struct perf_sample *data,
+                                  const __u64 *array, u64 type)
+{
+       union perf_sample_weight weight;
+
+       weight.full = *array;
+       if (type & PERF_SAMPLE_WEIGHT)
+               data->weight = weight.full;
+       else {
+               data->weight = weight.var1_dw;
+               data->ins_lat = weight.var2_w;
+               data->p_stage_cyc = weight.var3_w;
+       }
+}
+
+void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
+                                       __u64 *array, u64 type)
+{
+       *array = data->weight;
+
+       if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
+               *array &= 0xffffffff;
+               *array |= ((u64)data->ins_lat << 32);
+       }
+}
+
+const char *arch_perf_header_entry(const char *se_header)
+{
+       if (!strcmp(se_header, "Local INSTR Latency"))
+               return "Finish Cyc";
+       else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+               return "Dispatch Cyc";
+       return se_header;
+}
+
+int arch_support_sort_key(const char *sort_key)
+{
+       if (!strcmp(sort_key, "p_stage_cyc"))
+               return 1;
+       return 0;
+}
diff --git a/tools/perf/arch/powerpc/util/evsel.c b/tools/perf/arch/powerpc/util/evsel.c
new file mode 100644 (file)
index 0000000..2f733cd
--- /dev/null
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "util/evsel.h"
+
+void arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+       evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
+}
index eed9e5a..1651068 100644 (file)
@@ -176,7 +176,7 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
 }
 
 /*
- * Incase of powerpc architecture, pmu registers are programmable
+ * In case of powerpc architecture, pmu registers are programmable
  * by guest kernel. So monitoring guest via host may not provide
  * valid samples with default 'cycles' event. It is better to use
  * 'trace_imc/trace_cycles' event for guest profiling, since it
index 5788eb1..2baeb1c 100644 (file)
@@ -10,6 +10,6 @@
 
 #define SPRN_PVR        0x11F   /* Processor Version Register */
 #define PVR_VER(pvr)    (((pvr) >>  16) & 0xFFFF) /* Version field */
-#define PVR_REV(pvr)    (((pvr) >>   0) & 0xFFFF) /* Revison field */
+#define PVR_REV(pvr)    (((pvr) >>   0) & 0xFFFF) /* Revision field */
 
 #endif /* __PERF_UTIL_HEADER_H */
index adcacf1..dffcf9b 100644 (file)
@@ -73,7 +73,7 @@ static int bp_modify1(void)
        /*
         * The parent does following steps:
         *  - creates a new breakpoint (id 0) for bp_2 function
-        *  - changes that breakponit to bp_1 function
+        *  - changes that breakpoint to bp_1 function
         *  - waits for the breakpoint to hit and checks
         *    it has proper rip of bp_1 function
         *  - detaches the child
index fca81b3..207c568 100644 (file)
@@ -165,7 +165,7 @@ static int sdt_init_op_regex(void)
 /*
  * Max x86 register name length is 5(ex: %r15d). So, 6th char
  * should always contain NULL. This helps to find register name
- * length using strlen, insted of maintaing one more variable.
+ * length using strlen, instead of maintaining one more variable.
  */
 #define SDT_REG_NAME_SIZE  6
 
@@ -207,7 +207,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
         * and displacement 0 (Both sign and displacement 0 are
         * optional so it may be empty). Use one more character
         * to hold last NULL so that strlen can be used to find
-        * prefix length, instead of maintaing one more variable.
+        * prefix length, instead of maintaining one more variable.
         */
        char prefix[3] = {0};
 
index 0a0ff12..79d13db 100644 (file)
@@ -17,7 +17,7 @@
  * While the second model, enabled via --multiq option, uses multiple
  * queueing (which refers to one epoll instance per worker). For example,
  * short lived tcp connections in a high throughput httpd server will
- * ditribute the accept()'ing  connections across CPUs. In this case each
+ * distribute the accept()'ing  connections across CPUs. In this case each
  * worker does a limited  amount of processing.
  *
  *             [queue A]  ---> [worker]
@@ -198,7 +198,7 @@ static void *workerfn(void *arg)
 
        do {
                /*
-                * Block undefinitely waiting for the IN event.
+                * Block indefinitely waiting for the IN event.
                 * In order to stress the epoll_wait(2) syscall,
                 * call it event per event, instead of a larger
                 * batch (max)limit.
index 280227e..55d373b 100644 (file)
@@ -372,7 +372,7 @@ static int inject_build_id(struct bench_data *data, u64 *max_rss)
                        len += synthesize_flush(data);
        }
 
-       /* tihs makes the child to finish */
+       /* this makes the child to finish */
        close(data->input_pipe[1]);
 
        wait4(data->pid, &status, 0, &rusage);
index 20b87e2..f264017 100644 (file)
@@ -42,7 +42,7 @@
 #endif
 
 /*
- * Regular printout to the terminal, supressed if -q is specified:
+ * Regular printout to the terminal, suppressed if -q is specified:
  */
 #define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
 
index a23ba6b..0f3a196 100644 (file)
@@ -239,7 +239,7 @@ static int evsel__add_sample(struct evsel *evsel, struct perf_sample *sample,
        }
 
        /*
-        * XXX filtered samples can still have branch entires pointing into our
+        * XXX filtered samples can still have branch entries pointing into our
         * symbol and are missed.
         */
        process_branch_stack(sample->branch_stack, al, sample);
index 7c4a9d4..61929f6 100644 (file)
@@ -6,7 +6,6 @@
 #include <linux/zalloc.h>
 #include <linux/string.h>
 #include <linux/limits.h>
-#include <linux/string.h>
 #include <string.h>
 #include <sys/file.h>
 #include <signal.h>
@@ -24,8 +23,6 @@
 #include <sys/signalfd.h>
 #include <sys/wait.h>
 #include <poll.h>
-#include <sys/stat.h>
-#include <time.h>
 #include "builtin.h"
 #include "perf.h"
 #include "debug.h"
index 878e04b..f52b3a7 100644 (file)
@@ -1796,7 +1796,7 @@ static int ui_init(void)
        data__for_each_file(i, d) {
 
                /*
-                * Baseline or compute realted columns:
+                * Baseline or compute related columns:
                 *
                 *   PERF_HPP_DIFF__BASELINE
                 *   PERF_HPP_DIFF__DELTA
index a2f1e53..01326e3 100644 (file)
@@ -49,7 +49,7 @@ struct lock_stat {
 
        /*
         * FIXME: evsel__intval() returns u64,
-        * so address of lockdep_map should be dealed as 64bit.
+        * so address of lockdep_map should be treated as 64bit.
         * Is there more better solution?
         */
        void                    *addr;          /* address of lockdep_map, used as ID */
index 2a845d6..0d65c98 100644 (file)
@@ -84,6 +84,7 @@ struct report {
        bool                    nonany_branch_mode;
        bool                    group_set;
        bool                    stitch_lbr;
+       bool                    disable_order;
        int                     max_stack;
        struct perf_read_values show_threads_values;
        struct annotation_options annotation_opts;
@@ -1296,6 +1297,8 @@ int cmd_report(int argc, const char **argv)
        OPTS_EVSWITCH(&report.evswitch),
        OPT_BOOLEAN(0, "total-cycles", &report.total_cycles_mode,
                    "Sort all blocks by 'Sampled Cycles%'"),
+       OPT_BOOLEAN(0, "disable-order", &report.disable_order,
+                   "Disable raw trace ordering"),
        OPT_END()
        };
        struct perf_data data = {
@@ -1329,7 +1332,7 @@ int cmd_report(int argc, const char **argv)
        if (report.mmaps_mode)
                report.tasks_mode = true;
 
-       if (dump_trace)
+       if (dump_trace && report.disable_order)
                report.tool.ordered_events = false;
 
        if (quiet)
index 69c769b..954ce2f 100644 (file)
@@ -1712,7 +1712,7 @@ static int perf_sched__process_fork_event(struct perf_tool *tool,
 {
        struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
 
-       /* run the fork event through the perf machineruy */
+       /* run the fork event through the perf machinery */
        perf_event__process_fork(tool, event, sample, machine);
 
        /* and then run additional processing needed for this command */
index 5915f19..1280cbf 100644 (file)
@@ -314,8 +314,7 @@ static inline struct evsel_script *evsel_script(struct evsel *evsel)
        return (struct evsel_script *)evsel->priv;
 }
 
-static struct evsel_script *perf_evsel_script__new(struct evsel *evsel,
-                                                       struct perf_data *data)
+static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data)
 {
        struct evsel_script *es = zalloc(sizeof(*es));
 
@@ -335,7 +334,7 @@ out_free:
        return NULL;
 }
 
-static void perf_evsel_script__delete(struct evsel_script *es)
+static void evsel_script__delete(struct evsel_script *es)
 {
        zfree(&es->filename);
        fclose(es->fp);
@@ -343,7 +342,7 @@ static void perf_evsel_script__delete(struct evsel_script *es)
        free(es);
 }
 
-static int perf_evsel_script__fprintf(struct evsel_script *es, FILE *fp)
+static int evsel_script__fprintf(struct evsel_script *es, FILE *fp)
 {
        struct stat st;
 
@@ -2219,8 +2218,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 
        if (!evsel->priv) {
                if (scr->per_event_dump) {
-                       evsel->priv = perf_evsel_script__new(evsel,
-                                               scr->session->data);
+                       evsel->priv = evsel_script__new(evsel, scr->session->data);
                } else {
                        es = zalloc(sizeof(*es));
                        if (!es)
@@ -2475,7 +2473,7 @@ static void perf_script__fclose_per_event_dump(struct perf_script *script)
        evlist__for_each_entry(evlist, evsel) {
                if (!evsel->priv)
                        break;
-               perf_evsel_script__delete(evsel->priv);
+               evsel_script__delete(evsel->priv);
                evsel->priv = NULL;
        }
 }
@@ -2488,14 +2486,14 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script)
                /*
                 * Already setup? I.e. we may be called twice in cases like
                 * Intel PT, one for the intel_pt// and dummy events, then
-                * for the evsels syntheized from the auxtrace info.
+                * for the evsels synthesized from the auxtrace info.
                 *
                 * Ses perf_script__process_auxtrace_info.
                 */
                if (evsel->priv != NULL)
                        continue;
 
-               evsel->priv = perf_evsel_script__new(evsel, script->session->data);
+               evsel->priv = evsel_script__new(evsel, script->session->data);
                if (evsel->priv == NULL)
                        goto out_err_fclose;
        }
@@ -2530,8 +2528,8 @@ static void perf_script__exit_per_event_dump_stats(struct perf_script *script)
        evlist__for_each_entry(script->session->evlist, evsel) {
                struct evsel_script *es = evsel->priv;
 
-               perf_evsel_script__fprintf(es, stdout);
-               perf_evsel_script__delete(es);
+               evsel_script__fprintf(es, stdout);
+               evsel_script__delete(es);
                evsel->priv = NULL;
        }
 }
@@ -3085,7 +3083,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
  *
  * Fixme: All existing "xxx-record" are all in good formats "-e event ",
  * which is covered well now. And new parsing code should be added to
- * cover the future complexing formats like event groups etc.
+ * cover the future complex formats like event groups etc.
  */
 static int check_ev_match(char *dir_name, char *scriptname,
                        struct perf_session *session)
index 2e2e4a8..2a2c15c 100644 (file)
@@ -792,6 +792,12 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
        }
 
        evlist__for_each_cpu (evsel_list, i, cpu) {
+               /*
+                * bperf calls evsel__open_per_cpu() in bperf__load(), so
+                * no need to call it again here.
+                */
+               if (target.use_bpf)
+                       break;
                affinity__set(&affinity, cpu);
 
                evlist__for_each_entry(evsel_list, counter) {
@@ -925,15 +931,15 @@ try_again_reset:
        /*
         * Enable counters and exec the command:
         */
-       t0 = rdclock();
-       clock_gettime(CLOCK_MONOTONIC, &ref_time);
-
        if (forks) {
                evlist__start_workload(evsel_list);
                err = enable_counters();
                if (err)
                        return -1;
 
+               t0 = rdclock();
+               clock_gettime(CLOCK_MONOTONIC, &ref_time);
+
                if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
                        status = dispatch_events(forks, timeout, interval, &times);
                if (child_pid != -1) {
@@ -954,6 +960,10 @@ try_again_reset:
                err = enable_counters();
                if (err)
                        return -1;
+
+               t0 = rdclock();
+               clock_gettime(CLOCK_MONOTONIC, &ref_time);
+
                status = dispatch_events(forks, timeout, interval, &times);
        }
 
@@ -1083,6 +1093,11 @@ void perf_stat__set_big_num(int set)
        stat_config.big_num = (set != 0);
 }
 
+void perf_stat__set_no_csv_summary(int set)
+{
+       stat_config.no_csv_summary = (set != 0);
+}
+
 static int stat__set_big_num(const struct option *opt __maybe_unused,
                             const char *s __maybe_unused, int unset)
 {
@@ -1146,6 +1161,10 @@ static struct option stat_options[] = {
 #ifdef HAVE_BPF_SKEL
        OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id",
                   "stat events on existing bpf program id"),
+       OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf,
+                   "use bpf program to count events"),
+       OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path",
+                  "path to perf_event_attr map"),
 #endif
        OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
                    "system-wide collection from all CPUs"),
@@ -1235,6 +1254,8 @@ static struct option stat_options[] = {
                    "threads of same physical core"),
        OPT_BOOLEAN(0, "summary", &stat_config.summary,
                       "print summary for interval mode"),
+       OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary,
+                      "don't print 'summary' for CSV summary output"),
        OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
                        "don't print output (useful with record)"),
 #ifdef HAVE_LIBPFM
@@ -1705,7 +1726,7 @@ static int add_default_attributes(void)
        bzero(&errinfo, sizeof(errinfo));
        if (transaction_run) {
                /* Handle -T as -M transaction. Once platform specific metrics
-                * support has been added to the json files, all archictures
+                * support has been added to the json files, all architectures
                 * will use this approach. To determine transaction support
                 * on an architecture test for such a metric name.
                 */
@@ -2459,7 +2480,7 @@ int cmd_stat(int argc, const char **argv)
                /*
                 * We synthesize the kernel mmap record just so that older tools
                 * don't emit warnings about not being able to resolve symbols
-                * due to /proc/sys/kernel/kptr_restrict settings and instear provide
+                * due to /proc/sys/kernel/kptr_restrict settings and instead provide
                 * a saner message about no samples being in the perf.data file.
                 *
                 * This also serves to suppress a warning about f_header.data.size == 0
index 3673c04..173ace4 100644 (file)
@@ -1607,7 +1607,7 @@ int cmd_top(int argc, const char **argv)
        if (status) {
                /*
                 * Some arches do not provide a get_cpuid(), so just use pr_debug, otherwise
-                * warn the user explicitely.
+                * warn the user explicitly.
                 */
                eprintf(status == ENOSYS ? 1 : 0, verbose,
                        "Couldn't read the cpuid for this machine: %s\n",
index dded93a..39eada9 100755 (executable)
@@ -146,6 +146,7 @@ check arch/x86/lib/insn.c             '-I "^#include [\"<]\(../include/\)*asm/in
 check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
 check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl
 check_2 tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl
+check_2 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl
 
 for i in $BEAUTY_FILES; do
   beauty_check $i -B
index b804379..a262dcd 100644 (file)
@@ -262,7 +262,7 @@ int sys_enter(struct syscall_enter_args *args)
        /*
         * Jump to syscall specific augmenter, even if the default one,
         * "!raw_syscalls:unaugmented" that will just return 1 to return the
-        * unagmented tracepoint payload.
+        * unaugmented tracepoint payload.
         */
        bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
 
@@ -282,7 +282,7 @@ int sys_exit(struct syscall_exit_args *args)
        /*
         * Jump to syscall specific return augmenter, even if the default one,
         * "!raw_syscalls:unaugmented" that will just return 1 to return the
-        * unagmented tracepoint payload.
+        * unaugmented tracepoint payload.
         */
        bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr);
        /*
index 8810859..526dcaf 100644 (file)
@@ -390,7 +390,7 @@ jvmti_write_code(void *agent, char const *sym,
                rec.p.total_size += size;
 
        /*
-        * If JVM is multi-threaded, nultiple concurrent calls to agent
+        * If JVM is multi-threaded, multiple concurrent calls to agent
         * may be possible, so protect file writes
         */
        flockfile(fp);
@@ -457,7 +457,7 @@ jvmti_write_debug_info(void *agent, uint64_t code,
        rec.p.total_size = size;
 
        /*
-        * If JVM is multi-threaded, nultiple concurrent calls to agent
+        * If JVM is multi-threaded, multiple concurrent calls to agent
         * may be possible, so protect file writes
         */
        flockfile(fp);
index 75376c7..913fb20 100644 (file)
         "EventName": "L2D_TLB_REFILL",
         "BriefDescription": "Attributable Level 2 data TLB refill"
     },
+    {
+        "PublicDescription": "Attributable Level 2 instruction TLB refill.",
+        "EventCode": "0x2E",
+        "EventName": "L2I_TLB_REFILL",
+        "BriefDescription": "Attributable Level 2 instruction TLB refill."
+    },
     {
         "PublicDescription": "Attributable Level 2 data or unified TLB access",
         "EventCode": "0x2F",
         "EventName": "L2D_TLB",
         "BriefDescription": "Attributable Level 2 data or unified TLB access"
     },
+    {
+        "PublicDescription": "Attributable Level 2 instruction TLB access.",
+        "EventCode": "0x30",
+        "EventName": "L2I_TLB",
+        "BriefDescription": "Attributable Level 2 instruction TLB access."
+    },
     {
         "PublicDescription": "Access to another socket in a multi-socket system",
         "EventCode": "0x31",
         "EventCode": "0x37",
         "EventName": "LL_CACHE_MISS_RD",
         "BriefDescription": "Last level cache miss, read"
+    },
+    {
+        "PublicDescription": "SIMD Instruction architecturally executed.",
+        "EventCode": "0x8000",
+        "EventName": "SIMD_INST_RETIRED",
+        "BriefDescription": "SIMD Instruction architecturally executed."
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, SVE.",
+        "EventCode": "0x8002",
+        "EventName": "SVE_INST_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, SVE."
+    },
+    {
+        "PublicDescription": "Microarchitectural operation, Operations speculatively executed.",
+        "EventCode": "0x8008",
+        "EventName": "UOP_SPEC",
+        "BriefDescription": "Microarchitectural operation, Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "SVE Math accelerator Operations speculatively executed.",
+        "EventCode": "0x800E",
+        "EventName": "SVE_MATH_SPEC",
+        "BriefDescription": "SVE Math accelerator Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Floating-point Operations speculatively executed.",
+        "EventCode": "0x8010",
+        "EventName": "FP_SPEC",
+        "BriefDescription": "Floating-point Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Floating-point FMA Operations speculatively executed.",
+        "EventCode": "0x8028",
+        "EventName": "FP_FMA_SPEC",
+        "BriefDescription": "Floating-point FMA Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Floating-point reciprocal estimate Operations speculatively executed.",
+        "EventCode": "0x8034",
+        "EventName": "FP_RECPE_SPEC",
+        "BriefDescription": "Floating-point reciprocal estimate Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "floating-point convert Operations speculatively executed.",
+        "EventCode": "0x8038",
+        "EventName": "FP_CVT_SPEC",
+        "BriefDescription": "floating-point convert Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Advanced SIMD and SVE integer Operations speculatively executed.",
+        "EventCode": "0x8043",
+        "EventName": "ASE_SVE_INT_SPEC",
+        "BriefDescription": "Advanced SIMD and SVE integer Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "SVE predicated Operations speculatively executed.",
+        "EventCode": "0x8074",
+        "EventName": "SVE_PRED_SPEC",
+        "BriefDescription": "SVE predicated Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "SVE MOVPRFX Operations speculatively executed.",
+        "EventCode": "0x807C",
+        "EventName": "SVE_MOVPRFX_SPEC",
+        "BriefDescription": "SVE MOVPRFX Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "SVE MOVPRFX unfused Operations speculatively executed.",
+        "EventCode": "0x807F",
+        "EventName": "SVE_MOVPRFX_U_SPEC",
+        "BriefDescription": "SVE MOVPRFX unfused Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Advanced SIMD and SVE load Operations speculatively executed.",
+        "EventCode": "0x8085",
+        "EventName": "ASE_SVE_LD_SPEC",
+        "BriefDescription": "Advanced SIMD and SVE load Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Advanced SIMD and SVE store Operations speculatively executed.",
+        "EventCode": "0x8086",
+        "EventName": "ASE_SVE_ST_SPEC",
+        "BriefDescription": "Advanced SIMD and SVE store Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Prefetch Operations speculatively executed.",
+        "EventCode": "0x8087",
+        "EventName": "PRF_SPEC",
+        "BriefDescription": "Prefetch Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "General-purpose register load Operations speculatively executed.",
+        "EventCode": "0x8089",
+        "EventName": "BASE_LD_REG_SPEC",
+        "BriefDescription": "General-purpose register load Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "General-purpose register store Operations speculatively executed.",
+        "EventCode": "0x808A",
+        "EventName": "BASE_ST_REG_SPEC",
+        "BriefDescription": "General-purpose register store Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "SVE unpredicated load register Operations speculatively executed.",
+        "EventCode": "0x8091",
+        "EventName": "SVE_LDR_REG_SPEC",
+        "BriefDescription": "SVE unpredicated load register Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "SVE unpredicated store register Operations speculatively executed.",
+        "EventCode": "0x8092",
+        "EventName": "SVE_STR_REG_SPEC",
+        "BriefDescription": "SVE unpredicated store register Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "SVE load predicate register Operations speculatively executed.",
+        "EventCode": "0x8095",
+        "EventName": "SVE_LDR_PREG_SPEC",
+        "BriefDescription": "SVE load predicate register Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "SVE store predicate register Operations speculatively executed.",
+        "EventCode": "0x8096",
+        "EventName": "SVE_STR_PREG_SPEC",
+        "BriefDescription": "SVE store predicate register Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "SVE contiguous prefetch element Operations speculatively executed.",
+        "EventCode": "0x809F",
+        "EventName": "SVE_PRF_CONTIG_SPEC",
+        "BriefDescription": "SVE contiguous prefetch element Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Advanced SIMD and SVE contiguous load multiple vector Operations speculatively executed.",
+        "EventCode": "0x80A5",
+        "EventName": "ASE_SVE_LD_MULTI_SPEC",
+        "BriefDescription": "Advanced SIMD and SVE contiguous load multiple vector Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Advanced SIMD and SVE contiguous store multiple vector Operations speculatively executed.",
+        "EventCode": "0x80A6",
+        "EventName": "ASE_SVE_ST_MULTI_SPEC",
+        "BriefDescription": "Advanced SIMD and SVE contiguous store multiple vector Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "SVE gather-load Operations speculatively executed.",
+        "EventCode": "0x80AD",
+        "EventName": "SVE_LD_GATHER_SPEC",
+        "BriefDescription": "SVE gather-load Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "SVE scatter-store Operations speculatively executed.",
+        "EventCode": "0x80AE",
+        "EventName": "SVE_ST_SCATTER_SPEC",
+        "BriefDescription": "SVE scatter-store Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "SVE gather-prefetch Operations speculatively executed.",
+        "EventCode": "0x80AF",
+        "EventName": "SVE_PRF_GATHER_SPEC",
+        "BriefDescription": "SVE gather-prefetch Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "SVE First-fault load Operations speculatively executed.",
+        "EventCode": "0x80BC",
+        "EventName": "SVE_LDFF_SPEC",
+        "BriefDescription": "SVE First-fault load Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Scalable floating-point element Operations speculatively executed.",
+        "EventCode": "0x80C0",
+        "EventName": "FP_SCALE_OPS_SPEC",
+        "BriefDescription": "Scalable floating-point element Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Non-scalable floating-point element Operations speculatively executed.",
+        "EventCode": "0x80C1",
+        "EventName": "FP_FIXED_OPS_SPEC",
+        "BriefDescription": "Non-scalable floating-point element Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Scalable half-precision floating-point element Operations speculatively executed.",
+        "EventCode": "0x80C2",
+        "EventName": "FP_HP_SCALE_OPS_SPEC",
+        "BriefDescription": "Scalable half-precision floating-point element Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Non-scalable half-precision floating-point element Operations speculatively executed.",
+        "EventCode": "0x80C3",
+        "EventName": "FP_HP_FIXED_OPS_SPEC",
+        "BriefDescription": "Non-scalable half-precision floating-point element Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Scalable single-precision floating-point element Operations speculatively executed.",
+        "EventCode": "0x80C4",
+        "EventName": "FP_SP_SCALE_OPS_SPEC",
+        "BriefDescription": "Scalable single-precision floating-point element Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Non-scalable single-precision floating-point element Operations speculatively executed.",
+        "EventCode": "0x80C5",
+        "EventName": "FP_SP_FIXED_OPS_SPEC",
+        "BriefDescription": "Non-scalable single-precision floating-point element Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Scalable double-precision floating-point element Operations speculatively executed.",
+        "EventCode": "0x80C6",
+        "EventName": "FP_DP_SCALE_OPS_SPEC",
+        "BriefDescription": "Scalable double-precision floating-point element Operations speculatively executed."
+    },
+    {
+        "PublicDescription": "Non-scalable double-precision floating-point element Operations speculatively executed.",
+        "EventCode": "0x80C7",
+        "EventName": "FP_DP_FIXED_OPS_SPEC",
+        "BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed."
     }
 ]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json
new file mode 100644 (file)
index 0000000..b011af1
--- /dev/null
@@ -0,0 +1,8 @@
+[
+  {
+    "ArchStdEvent": "BR_MIS_PRED"
+  },
+  {
+    "ArchStdEvent": "BR_PRED"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json
new file mode 100644 (file)
index 0000000..084e88d
--- /dev/null
@@ -0,0 +1,62 @@
+[
+  {
+    "PublicDescription": "This event counts read transactions from tofu controller to measured CMG.",
+    "EventCode": "0x314",
+    "EventName": "BUS_READ_TOTAL_TOFU",
+    "BriefDescription": "This event counts read transactions from tofu controller to measured CMG."
+  },
+  {
+    "PublicDescription": "This event counts read transactions from PCI controller to measured CMG.",
+    "EventCode": "0x315",
+    "EventName": "BUS_READ_TOTAL_PCI",
+    "BriefDescription": "This event counts read transactions from PCI controller to measured CMG."
+  },
+  {
+    "PublicDescription": "This event counts read transactions from measured CMG local memory to measured CMG.",
+    "EventCode": "0x316",
+    "EventName": "BUS_READ_TOTAL_MEM",
+    "BriefDescription": "This event counts read transactions from measured CMG local memory to measured CMG."
+  },
+  {
+    "PublicDescription": "This event counts write transactions from measured CMG to CMG0, if measured CMG is not CMG0.",
+    "EventCode": "0x318",
+    "EventName": "BUS_WRITE_TOTAL_CMG0",
+    "BriefDescription": "This event counts write transactions from measured CMG to CMG0, if measured CMG is not CMG0."
+  },
+  {
+    "PublicDescription": "This event counts write transactions from measured CMG to CMG1, if measured CMG is not CMG1.",
+    "EventCode": "0x319",
+    "EventName": "BUS_WRITE_TOTAL_CMG1",
+    "BriefDescription": "This event counts write transactions from measured CMG to CMG1, if measured CMG is not CMG1."
+  },
+  {
+    "PublicDescription": "This event counts write transactions from measured CMG to CMG2, if measured CMG is not CMG2.",
+    "EventCode": "0x31A",
+    "EventName": "BUS_WRITE_TOTAL_CMG2",
+    "BriefDescription": "This event counts write transactions from measured CMG to CMG2, if measured CMG is not CMG2."
+  },
+  {
+    "PublicDescription": "This event counts write transactions from measured CMG to CMG3, if measured CMG is not CMG3.",
+    "EventCode": "0x31B",
+    "EventName": "BUS_WRITE_TOTAL_CMG3",
+    "BriefDescription": "This event counts write transactions from measured CMG to CMG3, if measured CMG is not CMG3."
+  },
+  {
+    "PublicDescription": "This event counts write transactions from measured CMG to tofu controller.",
+    "EventCode": "0x31C",
+    "EventName": "BUS_WRITE_TOTAL_TOFU",
+    "BriefDescription": "This event counts write transactions from measured CMG to tofu controller."
+  },
+  {
+    "PublicDescription": "This event counts write transactions from measured CMG to PCI controller.",
+    "EventCode": "0x31D",
+    "EventName": "BUS_WRITE_TOTAL_PCI",
+    "BriefDescription": "This event counts write transactions from measured CMG to PCI controller."
+  },
+  {
+    "PublicDescription": "This event counts write transactions from measured CMG to measured CMG local memory.",
+    "EventCode": "0x31E",
+    "EventName": "BUS_WRITE_TOTAL_MEM",
+    "BriefDescription": "This event counts write transactions from measured CMG to measured CMG local memory."
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json
new file mode 100644 (file)
index 0000000..2e341a9
--- /dev/null
@@ -0,0 +1,128 @@
+[
+  {
+    "ArchStdEvent": "L1I_CACHE_REFILL"
+  },
+  {
+    "ArchStdEvent": "L1I_TLB_REFILL"
+  },
+  {
+    "ArchStdEvent": "L1D_CACHE_REFILL"
+  },
+  {
+    "ArchStdEvent": "L1D_CACHE"
+  },
+  {
+    "ArchStdEvent": "L1D_TLB_REFILL"
+  },
+  {
+    "ArchStdEvent": "L1I_CACHE"
+  },
+  {
+    "ArchStdEvent": "L1D_CACHE_WB"
+  },
+  {
+    "ArchStdEvent": "L2D_CACHE"
+  },
+  {
+    "ArchStdEvent": "L2D_CACHE_REFILL"
+  },
+  {
+    "ArchStdEvent": "L2D_CACHE_WB"
+  },
+  {
+    "ArchStdEvent": "L2D_TLB_REFILL"
+  },
+  {
+    "ArchStdEvent": "L2I_TLB_REFILL"
+  },
+  {
+    "ArchStdEvent": "L2D_TLB"
+  },
+  {
+    "ArchStdEvent": "L2I_TLB"
+  },
+  {
+    "PublicDescription": "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch.",
+    "EventCode": "0x49",
+    "EventName": "L1D_CACHE_REFILL_PRF",
+    "BriefDescription": "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch."
+  },
+  {
+    "PublicDescription": "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch.",
+    "EventCode": "0x59",
+    "EventName": "L2D_CACHE_REFILL_PRF",
+    "BriefDescription": "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch."
+  },
+  {
+    "PublicDescription": "This event counts L1D_CACHE_REFILL caused by demand access.",
+    "EventCode": "0x200",
+    "EventName": "L1D_CACHE_REFILL_DM",
+    "BriefDescription": "This event counts L1D_CACHE_REFILL caused by demand access."
+  },
+  {
+    "PublicDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch.",
+    "EventCode": "0x202",
+    "EventName": "L1D_CACHE_REFILL_HWPRF",
+    "BriefDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch."
+  },
+  {
+    "PublicDescription": "This event counts outstanding L1D cache miss requests per cycle.",
+    "EventCode": "0x208",
+    "EventName": "L1_MISS_WAIT",
+    "BriefDescription": "This event counts outstanding L1D cache miss requests per cycle."
+  },
+  {
+    "PublicDescription": "This event counts outstanding L1I cache miss requests per cycle.",
+    "EventCode": "0x209",
+    "EventName": "L1I_MISS_WAIT",
+    "BriefDescription": "This event counts outstanding L1I cache miss requests per cycle."
+  },
+  {
+    "PublicDescription": "This event counts L2D_CACHE_REFILL caused by demand access.",
+    "EventCode": "0x300",
+    "EventName": "L2D_CACHE_REFILL_DM",
+    "BriefDescription": "This event counts L2D_CACHE_REFILL caused by demand access."
+  },
+  {
+    "PublicDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch.",
+    "EventCode": "0x302",
+    "EventName": "L2D_CACHE_REFILL_HWPRF",
+    "BriefDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch."
+  },
+  {
+    "PublicDescription": "This event counts outstanding L2 cache miss requests per cycle.",
+    "EventCode": "0x308",
+    "EventName": "L2_MISS_WAIT",
+    "BriefDescription": "This event counts outstanding L2 cache miss requests per cycle."
+  },
+  {
+    "PublicDescription": "This event counts the number of times of L2 cache miss.",
+    "EventCode": "0x309",
+    "EventName": "L2_MISS_COUNT",
+    "BriefDescription": "This event counts the number of times of L2 cache miss."
+  },
+  {
+    "PublicDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.",
+    "EventCode": "0x325",
+    "EventName": "L2D_SWAP_DM",
+    "BriefDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch."
+  },
+  {
+    "PublicDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.",
+    "EventCode": "0x326",
+    "EventName": "L2D_CACHE_MIBMCH_PRF",
+    "BriefDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access."
+  },
+  {
+    "PublicDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.",
+    "EventCode": "0x396",
+    "EventName": "L2D_CACHE_SWAP_LOCAL",
+    "BriefDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch."
+  },
+  {
+    "PublicDescription": "This event counts energy consumption per cycle of L2 cache.",
+    "EventCode": "0x3E0",
+    "EventName": "EA_L2",
+    "BriefDescription": "This event counts energy consumption per cycle of L2 cache."
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json
new file mode 100644 (file)
index 0000000..b164846
--- /dev/null
@@ -0,0 +1,5 @@
+[
+  {
+    "ArchStdEvent": "CPU_CYCLES"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json
new file mode 100644 (file)
index 0000000..348749c
--- /dev/null
@@ -0,0 +1,29 @@
+[
+  {
+    "ArchStdEvent": "EXC_TAKEN"
+  },
+  {
+    "ArchStdEvent": "EXC_UNDEF"
+  },
+  {
+    "ArchStdEvent": "EXC_SVC"
+  },
+  {
+    "ArchStdEvent": "EXC_PABORT"
+  },
+  {
+    "ArchStdEvent": "EXC_DABORT"
+  },
+  {
+    "ArchStdEvent": "EXC_IRQ"
+  },
+  {
+    "ArchStdEvent": "EXC_FIQ"
+  },
+  {
+    "ArchStdEvent": "EXC_SMC"
+  },
+  {
+    "ArchStdEvent": "EXC_HVC"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json
new file mode 100644 (file)
index 0000000..6d258b1
--- /dev/null
@@ -0,0 +1,131 @@
+[
+  {
+    "ArchStdEvent": "SW_INCR"
+  },
+  {
+    "ArchStdEvent": "INST_RETIRED"
+  },
+  {
+    "ArchStdEvent": "EXC_RETURN"
+  },
+  {
+    "ArchStdEvent": "CID_WRITE_RETIRED"
+  },
+  {
+    "ArchStdEvent": "INST_SPEC"
+  },
+  {
+    "ArchStdEvent": "LDREX_SPEC"
+  },
+  {
+    "ArchStdEvent": "STREX_SPEC"
+  },
+  {
+    "ArchStdEvent": "LD_SPEC"
+  },
+  {
+    "ArchStdEvent": "ST_SPEC"
+  },
+  {
+    "ArchStdEvent": "LDST_SPEC"
+  },
+  {
+    "ArchStdEvent": "DP_SPEC"
+  },
+  {
+    "ArchStdEvent": "ASE_SPEC"
+  },
+  {
+    "ArchStdEvent": "VFP_SPEC"
+  },
+  {
+    "ArchStdEvent": "PC_WRITE_SPEC"
+  },
+  {
+    "ArchStdEvent": "CRYPTO_SPEC"
+  },
+  {
+    "ArchStdEvent": "BR_IMMED_SPEC"
+  },
+  {
+    "ArchStdEvent": "BR_RETURN_SPEC"
+  },
+  {
+    "ArchStdEvent": "BR_INDIRECT_SPEC"
+  },
+  {
+    "ArchStdEvent": "ISB_SPEC"
+  },
+  {
+    "ArchStdEvent": "DSB_SPEC"
+  },
+  {
+    "ArchStdEvent": "DMB_SPEC"
+  },
+  {
+    "PublicDescription": "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction.",
+    "EventCode": "0x9F",
+    "EventName": "DCZVA_SPEC",
+    "BriefDescription": "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction."
+  },
+  {
+    "PublicDescription": "This event counts architecturally executed floating-point move operations.",
+    "EventCode": "0x105",
+    "EventName": "FP_MV_SPEC",
+    "BriefDescription": "This event counts architecturally executed floating-point move operations."
+  },
+  {
+    "PublicDescription": "This event counts architecturally executed operations that using predicate register.",
+    "EventCode": "0x108",
+    "EventName": "PRD_SPEC",
+    "BriefDescription": "This event counts architecturally executed operations that using predicate register."
+  },
+  {
+    "PublicDescription": "This event counts architecturally executed inter-element manipulation operations.",
+    "EventCode": "0x109",
+    "EventName": "IEL_SPEC",
+    "BriefDescription": "This event counts architecturally executed inter-element manipulation operations."
+  },
+  {
+    "PublicDescription": "This event counts architecturally executed inter-register manipulation operations.",
+    "EventCode": "0x10A",
+    "EventName": "IREG_SPEC",
+    "BriefDescription": "This event counts architecturally executed inter-register manipulation operations."
+  },
+  {
+    "PublicDescription": "This event counts architecturally executed NOSIMD load operations that using SIMD&FP registers.",
+    "EventCode": "0x112",
+    "EventName": "FP_LD_SPEC",
+    "BriefDescription": "This event counts architecturally executed NOSIMD load operations that using SIMD&FP registers."
+  },
+  {
+    "PublicDescription": "This event counts architecturally executed NOSIMD store operations that using SIMD&FP registers.",
+    "EventCode": "0x113",
+    "EventName": "FP_ST_SPEC",
+    "BriefDescription": "This event counts architecturally executed NOSIMD store operations that using SIMD&FP registers."
+  },
+  {
+    "PublicDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations.",
+    "EventCode": "0x11A",
+    "EventName": "BC_LD_SPEC",
+    "BriefDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations."
+  },
+  {
+    "PublicDescription": "This event counts architecturally executed instructions, excluding the MOVPRFX instruction.",
+    "EventCode": "0x121",
+    "EventName": "EFFECTIVE_INST_SPEC",
+    "BriefDescription": "This event counts architecturally executed instructions, excluding the MOVPRFX instruction."
+  },
+  {
+    "PublicDescription": "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode.",
+    "EventCode": "0x123",
+    "EventName": "PRE_INDEX_SPEC",
+    "BriefDescription": "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode."
+  },
+  {
+    "PublicDescription": "This event counts architecturally executed operations that uses 'post-index' as its addressing mode.",
+    "EventCode": "0x124",
+    "EventName": "POST_INDEX_SPEC",
+    "BriefDescription": "This event counts architecturally executed operations that uses 'post-index' as its addressing mode."
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json
new file mode 100644 (file)
index 0000000..c1f6479
--- /dev/null
@@ -0,0 +1,8 @@
+[
+  {
+    "PublicDescription": "This event counts energy consumption per cycle of CMG local memory.",
+    "EventCode": "0x3E8",
+    "EventName": "EA_MEMORY",
+    "BriefDescription": "This event counts energy consumption per cycle of CMG local memory."
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json
new file mode 100644 (file)
index 0000000..10c823a
--- /dev/null
@@ -0,0 +1,188 @@
+[
+  {
+    "PublicDescription": "This event counts the occurrence count of the micro-operation split.",
+    "EventCode": "0x139",
+    "EventName": "UOP_SPLIT",
+    "BriefDescription": "This event counts the occurrence count of the micro-operation split."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store/prefetch operation waits for memory access.",
+    "EventCode": "0x180",
+    "EventName": "LD_COMP_WAIT_L2_MISS",
+    "BriefDescription": "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store/prefetch operation waits for memory access."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for memory access.",
+    "EventCode": "0x181",
+    "EventName": "LD_COMP_WAIT_L2_MISS_EX",
+    "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for memory access."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache access.",
+    "EventCode": "0x182",
+    "EventName": "LD_COMP_WAIT_L1_MISS",
+    "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache access."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache access.",
+    "EventCode": "0x183",
+    "EventName": "LD_COMP_WAIT_L1_MISS_EX",
+    "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache access."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access.",
+    "EventCode": "0x184",
+    "EventName": "LD_COMP_WAIT",
+    "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access.",
+    "EventCode": "0x185",
+    "EventName": "LD_COMP_WAIT_EX",
+    "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that no instruction was committed due to the lack of an available prefetch port.",
+    "EventCode": "0x186",
+    "EventName": "LD_COMP_WAIT_PFP_BUSY",
+    "BriefDescription": "This event counts every cycle that no instruction was committed due to the lack of an available prefetch port."
+  },
+  {
+    "PublicDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation.",
+    "EventCode": "0x187",
+    "EventName": "LD_COMP_WAIT_PFP_BUSY_EX",
+    "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation."
+  },
+  {
+    "PublicDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction.",
+    "EventCode": "0x188",
+    "EventName": "LD_COMP_WAIT_PFP_BUSY_SWPF",
+    "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is an integer or floating-point/SIMD instruction.",
+    "EventCode": "0x189",
+    "EventName": "EU_COMP_WAIT",
+    "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is an integer or floating-point/SIMD instruction."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a floating-point/SIMD instruction.",
+    "EventCode": "0x18A",
+    "EventName": "FL_COMP_WAIT",
+    "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a floating-point/SIMD instruction."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a branch instruction.",
+    "EventCode": "0x18B",
+    "EventName": "BR_COMP_WAIT",
+    "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a branch instruction."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that no instruction was committed because the CSE is empty.",
+    "EventCode": "0x18C",
+    "EventName": "ROB_EMPTY",
+    "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that no instruction was committed because the CSE is empty and the store port (SP) is full.",
+    "EventCode": "0x18D",
+    "EventName": "ROB_EMPTY_STQ_BUSY",
+    "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty and the store port (SP) is full."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that the instruction unit is halted by the WFE/WFI instruction.",
+    "EventCode": "0x18E",
+    "EventName": "WFE_WFI_CYCLE",
+    "BriefDescription": "This event counts every cycle that the instruction unit is halted by the WFE/WFI instruction."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that no instruction was committed, but counts at the time when commits MOVPRFX only.",
+    "EventCode": "0x190",
+    "EventName": "_0INST_COMMIT",
+    "BriefDescription": "This event counts every cycle that no instruction was committed, but counts at the time when commits MOVPRFX only."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that one instruction is committed.",
+    "EventCode": "0x191",
+    "EventName": "_1INST_COMMIT",
+    "BriefDescription": "This event counts every cycle that one instruction is committed."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that two instructions are committed.",
+    "EventCode": "0x192",
+    "EventName": "_2INST_COMMIT",
+    "BriefDescription": "This event counts every cycle that two instructions are committed."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that three instructions are committed.",
+    "EventCode": "0x193",
+    "EventName": "_3INST_COMMIT",
+    "BriefDescription": "This event counts every cycle that three instructions are committed."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that four instructions are committed.",
+    "EventCode": "0x194",
+    "EventName": "_4INST_COMMIT",
+    "BriefDescription": "This event counts every cycle that four instructions are committed."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that only any micro-operations are committed.",
+    "EventCode": "0x198",
+    "EventName": "UOP_ONLY_COMMIT",
+    "BriefDescription": "This event counts every cycle that only any micro-operations are committed."
+  },
+  {
+    "PublicDescription": "This event counts every cycle that only the MOVPRFX instruction is committed.",
+    "EventCode": "0x199",
+    "EventName": "SINGLE_MOVPRFX_COMMIT",
+    "BriefDescription": "This event counts every cycle that only the MOVPRFX instruction is committed."
+  },
+  {
+    "PublicDescription": "This event counts energy consumption per cycle of core.",
+    "EventCode": "0x1E0",
+    "EventName": "EA_CORE",
+    "BriefDescription": "This event counts energy consumption per cycle of core."
+  },
+  {
+    "PublicDescription": "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher.",
+    "EventCode": "0x230",
+    "EventName": "L1HWPF_STREAM_PF",
+    "BriefDescription": "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher."
+  },
+  {
+    "PublicDescription": "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.",
+    "EventCode": "0x231",
+    "EventName": "L1HWPF_INJ_ALLOC_PF",
+    "BriefDescription": "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher."
+  },
+  {
+    "PublicDescription": "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.",
+    "EventCode": "0x232",
+    "EventName": "L1HWPF_INJ_NOALLOC_PF",
+    "BriefDescription": "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher."
+  },
+  {
+    "PublicDescription": "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher.",
+    "EventCode": "0x233",
+    "EventName": "L2HWPF_STREAM_PF",
+    "BriefDescription": "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher."
+  },
+  {
+    "PublicDescription": "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.",
+    "EventCode": "0x234",
+    "EventName": "L2HWPF_INJ_ALLOC_PF",
+    "BriefDescription": "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher."
+  },
+  {
+    "PublicDescription": "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.",
+    "EventCode": "0x235",
+    "EventName": "L2HWPF_INJ_NOALLOC_PF",
+    "BriefDescription": "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher."
+  },
+  {
+    "PublicDescription": "This event counts prefetch requests to L2 cache generated by the other causes.",
+    "EventCode": "0x236",
+    "EventName": "L2HWPF_OTHER",
+    "BriefDescription": "This event counts prefetch requests to L2 cache generated by the other causes."
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json
new file mode 100644 (file)
index 0000000..dd7c97a
--- /dev/null
@@ -0,0 +1,194 @@
+[
+  {
+    "ArchStdEvent": "STALL_FRONTEND"
+  },
+  {
+    "ArchStdEvent": "STALL_BACKEND"
+  },
+  {
+    "PublicDescription": "This event counts valid cycles of EAGA pipeline.",
+    "EventCode": "0x1A0",
+    "EventName": "EAGA_VAL",
+    "BriefDescription": "This event counts valid cycles of EAGA pipeline."
+  },
+  {
+    "PublicDescription": "This event counts valid cycles of EAGB pipeline.",
+    "EventCode": "0x1A1",
+    "EventName": "EAGB_VAL",
+    "BriefDescription": "This event counts valid cycles of EAGB pipeline."
+  },
+  {
+    "PublicDescription": "This event counts valid cycles of EXA pipeline.",
+    "EventCode": "0x1A2",
+    "EventName": "EXA_VAL",
+    "BriefDescription": "This event counts valid cycles of EXA pipeline."
+  },
+  {
+    "PublicDescription": "This event counts valid cycles of EXB pipeline.",
+    "EventCode": "0x1A3",
+    "EventName": "EXB_VAL",
+    "BriefDescription": "This event counts valid cycles of EXB pipeline."
+  },
+  {
+    "PublicDescription": "This event counts valid cycles of FLA pipeline.",
+    "EventCode": "0x1A4",
+    "EventName": "FLA_VAL",
+    "BriefDescription": "This event counts valid cycles of FLA pipeline."
+  },
+  {
+    "PublicDescription": "This event counts valid cycles of FLB pipeline.",
+    "EventCode": "0x1A5",
+    "EventName": "FLB_VAL",
+    "BriefDescription": "This event counts valid cycles of FLB pipeline."
+  },
+  {
+    "PublicDescription": "This event counts valid cycles of PRX pipeline.",
+    "EventCode": "0x1A6",
+    "EventName": "PRX_VAL",
+    "BriefDescription": "This event counts valid cycles of PRX pipeline."
+  },
+  {
+    "PublicDescription": "This event counts the number of 1's in the predicate bits of request in FLA pipeline, where it is corrected so that it becomes 16 when all bits are 1.",
+    "EventCode": "0x1B4",
+    "EventName": "FLA_VAL_PRD_CNT",
+    "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLA pipeline, where it is corrected so that it becomes 16 when all bits are 1."
+  },
+  {
+    "PublicDescription": "This event counts the number of 1's in the predicate bits of request in FLB pipeline, where it is corrected so that it becomes 16 when all bits are 1.",
+    "EventCode": "0x1B5",
+    "EventName": "FLB_VAL_PRD_CNT",
+    "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLB pipeline, where it is corrected so that it becomes 16 when all bits are 1."
+  },
+  {
+    "PublicDescription": "This event counts valid cycles of L1D cache pipeline#0.",
+    "EventCode": "0x240",
+    "EventName": "L1_PIPE0_VAL",
+    "BriefDescription": "This event counts valid cycles of L1D cache pipeline#0."
+  },
+  {
+    "PublicDescription": "This event counts valid cycles of L1D cache pipeline#1.",
+    "EventCode": "0x241",
+    "EventName": "L1_PIPE1_VAL",
+    "BriefDescription": "This event counts valid cycles of L1D cache pipeline#1."
+  },
+  {
+    "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1.",
+    "EventCode": "0x250",
+    "EventName": "L1_PIPE0_VAL_IU_TAG_ADRS_SCE",
+    "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1."
+  },
+  {
+    "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1.",
+    "EventCode": "0x251",
+    "EventName": "L1_PIPE0_VAL_IU_TAG_ADRS_PFE",
+    "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1."
+  },
+  {
+    "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1.",
+    "EventCode": "0x252",
+    "EventName": "L1_PIPE1_VAL_IU_TAG_ADRS_SCE",
+    "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1."
+  },
+  {
+    "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1.",
+    "EventCode": "0x253",
+    "EventName": "L1_PIPE1_VAL_IU_TAG_ADRS_PFE",
+    "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1."
+  },
+  {
+    "PublicDescription": "This event counts completed requests in L1D cache pipeline#0.",
+    "EventCode": "0x260",
+    "EventName": "L1_PIPE0_COMP",
+    "BriefDescription": "This event counts completed requests in L1D cache pipeline#0."
+  },
+  {
+    "PublicDescription": "This event counts completed requests in L1D cache pipeline#1.",
+    "EventCode": "0x261",
+    "EventName": "L1_PIPE1_COMP",
+    "BriefDescription": "This event counts completed requests in L1D cache pipeline#1."
+  },
+  {
+    "PublicDescription": "This event counts completed requests in L1I cache pipeline.",
+    "EventCode": "0x268",
+    "EventName": "L1I_PIPE_COMP",
+    "BriefDescription": "This event counts completed requests in L1I cache pipeline."
+  },
+  {
+    "PublicDescription": "This event counts valid cycles of L1I cache pipeline.",
+    "EventCode": "0x269",
+    "EventName": "L1I_PIPE_VAL",
+    "BriefDescription": "This event counts valid cycles of L1I cache pipeline."
+  },
+  {
+    "PublicDescription": "This event counts aborted requests in L1D pipelines that due to store-load interlock.",
+    "EventCode": "0x274",
+    "EventName": "L1_PIPE_ABORT_STLD_INTLK",
+    "BriefDescription": "This event counts aborted requests in L1D pipelines that due to store-load interlock."
+  },
+  {
+    "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0.",
+    "EventCode": "0x2A0",
+    "EventName": "L1_PIPE0_VAL_IU_NOT_SEC0",
+    "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0."
+  },
+  {
+    "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0.",
+    "EventCode": "0x2A1",
+    "EventName": "L1_PIPE1_VAL_IU_NOT_SEC0",
+    "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0."
+  },
+  {
+    "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined.",
+    "EventCode": "0x2B0",
+    "EventName": "L1_PIPE_COMP_GATHER_2FLOW",
+    "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined."
+  },
+  {
+    "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined.",
+    "EventCode": "0x2B1",
+    "EventName": "L1_PIPE_COMP_GATHER_1FLOW",
+    "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined."
+  },
+  {
+    "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0.",
+    "EventCode": "0x2B2",
+    "EventName": "L1_PIPE_COMP_GATHER_0FLOW",
+    "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0."
+  },
+  {
+    "PublicDescription": "This event counts the number of flows of the scatter instructions.",
+    "EventCode": "0x2B3",
+    "EventName": "L1_PIPE_COMP_SCATTER_1FLOW",
+    "BriefDescription": "This event counts the number of flows of the scatter instructions."
+  },
+  {
+    "PublicDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#0, where it is corrected so that it becomes 16 when all bits are 1.",
+    "EventCode": "0x2B8",
+    "EventName": "L1_PIPE0_COMP_PRD_CNT",
+    "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#0, where it is corrected so that it becomes 16 when all bits are 1."
+  },
+  {
+    "PublicDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#1, where it is corrected so that it becomes 16 when all bits are 1.",
+    "EventCode": "0x2B9",
+    "EventName": "L1_PIPE1_COMP_PRD_CNT",
+    "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#1, where it is corrected so that it becomes 16 when all bits are 1."
+  },
+  {
+    "PublicDescription": "This event counts valid cycles of L2 cache pipeline.",
+    "EventCode": "0x330",
+    "EventName": "L2_PIPE_VAL",
+    "BriefDescription": "This event counts valid cycles of L2 cache pipeline."
+  },
+  {
+    "PublicDescription": "This event counts completed requests in L2 cache pipeline.",
+    "EventCode": "0x350",
+    "EventName": "L2_PIPE_COMP_ALL",
+    "BriefDescription": "This event counts completed requests in L2 cache pipeline."
+  },
+  {
+    "PublicDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.",
+    "EventCode": "0x370",
+    "EventName": "L2_PIPE_COMP_PF_L2MIB_MCH",
+    "BriefDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access."
+  }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json
new file mode 100644 (file)
index 0000000..dc1b95e
--- /dev/null
@@ -0,0 +1,110 @@
+[
+  {
+    "ArchStdEvent": "SIMD_INST_RETIRED"
+  },
+  {
+    "ArchStdEvent": "SVE_INST_RETIRED"
+  },
+  {
+    "ArchStdEvent": "UOP_SPEC"
+  },
+  {
+    "ArchStdEvent": "SVE_MATH_SPEC"
+  },
+  {
+    "ArchStdEvent": "FP_SPEC"
+  },
+  {
+    "ArchStdEvent": "FP_FMA_SPEC"
+  },
+  {
+    "ArchStdEvent": "FP_RECPE_SPEC"
+  },
+  {
+    "ArchStdEvent": "FP_CVT_SPEC"
+  },
+  {
+    "ArchStdEvent": "ASE_SVE_INT_SPEC"
+  },
+  {
+    "ArchStdEvent": "SVE_PRED_SPEC"
+  },
+  {
+    "ArchStdEvent": "SVE_MOVPRFX_SPEC"
+  },
+  {
+    "ArchStdEvent": "SVE_MOVPRFX_U_SPEC"
+  },
+  {
+    "ArchStdEvent": "ASE_SVE_LD_SPEC"
+  },
+  {
+    "ArchStdEvent": "ASE_SVE_ST_SPEC"
+  },
+  {
+    "ArchStdEvent": "PRF_SPEC"
+  },
+  {
+    "ArchStdEvent": "BASE_LD_REG_SPEC"
+  },
+  {
+    "ArchStdEvent": "BASE_ST_REG_SPEC"
+  },
+  {
+    "ArchStdEvent": "SVE_LDR_REG_SPEC"
+  },
+  {
+    "ArchStdEvent": "SVE_STR_REG_SPEC"
+  },
+  {
+    "ArchStdEvent": "SVE_LDR_PREG_SPEC"
+  },
+  {
+    "ArchStdEvent": "SVE_STR_PREG_SPEC"
+  },
+  {
+    "ArchStdEvent": "SVE_PRF_CONTIG_SPEC"
+  },
+  {
+    "ArchStdEvent": "ASE_SVE_LD_MULTI_SPEC"
+  },
+  {
+    "ArchStdEvent": "ASE_SVE_ST_MULTI_SPEC"
+  },
+  {
+    "ArchStdEvent": "SVE_LD_GATHER_SPEC"
+  },
+  {
+    "ArchStdEvent": "SVE_ST_SCATTER_SPEC"
+  },
+  {
+    "ArchStdEvent": "SVE_PRF_GATHER_SPEC"
+  },
+  {
+    "ArchStdEvent": "SVE_LDFF_SPEC"
+  },
+  {
+    "ArchStdEvent": "FP_SCALE_OPS_SPEC"
+  },
+  {
+    "ArchStdEvent": "FP_FIXED_OPS_SPEC"
+  },
+  {
+    "ArchStdEvent": "FP_HP_SCALE_OPS_SPEC"
+  },
+  {
+    "ArchStdEvent": "FP_HP_FIXED_OPS_SPEC"
+  },
+  {
+    "ArchStdEvent": "FP_SP_SCALE_OPS_SPEC"
+  },
+  {
+    "ArchStdEvent": "FP_SP_FIXED_OPS_SPEC"
+  },
+  {
+    "ArchStdEvent": "FP_DP_SCALE_OPS_SPEC"
+  },
+  {
+    "ArchStdEvent": "FP_DP_FIXED_OPS_SPEC"
+  }
+]
index 0d60914..c43591d 100644 (file)
@@ -20,5 +20,6 @@
 0x00000000410fd0c0,v1,arm/cortex-a76-n1,core
 0x00000000420f5160,v1,cavium/thunderx2,core
 0x00000000430f0af0,v1,cavium/thunderx2,core
+0x00000000460f0010,v1,fujitsu/a64fx,core
 0x00000000480fd010,v1,hisilicon/hip08,core
 0x00000000500f0000,v1,ampere/emag,core
index fc4aa6c..4e25525 100644 (file)
         "MetricName": "flush_rate_percent"
     },
     {
-        "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had atleast 1 slot valid",
+        "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had at least 1 slot valid",
         "MetricExpr": "PM_GCT_UTIL_11_14_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
         "MetricGroup": "general",
         "MetricName": "gct_util_11to14_slots_percent"
     },
     {
-        "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had atleast 1 slot valid",
+        "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had at least 1 slot valid",
         "MetricExpr": "PM_GCT_UTIL_15_17_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
         "MetricGroup": "general",
         "MetricName": "gct_util_15to17_slots_percent"
     },
     {
-        "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had atleast 1 slot valid",
+        "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had at least 1 slot valid",
         "MetricExpr": "PM_GCT_UTIL_18_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
         "MetricGroup": "general",
         "MetricName": "gct_util_18plus_slots_percent"
     },
     {
-        "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had atleast 1 slot valid",
+        "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had at least 1 slot valid",
         "MetricExpr": "PM_GCT_UTIL_1_2_ENTRIES /  ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
         "MetricGroup": "general",
         "MetricName": "gct_util_1to2_slots_percent"
     },
     {
-        "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had atleast 1 slot valid",
+        "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had at least 1 slot valid",
         "MetricExpr": "PM_GCT_UTIL_3_6_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
         "MetricGroup": "general",
         "MetricName": "gct_util_3to6_slots_percent"
     },
     {
-        "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had atleast 1 slot valid",
+        "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had at least 1 slot valid",
         "MetricExpr": "PM_GCT_UTIL_7_10_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
         "MetricGroup": "general",
         "MetricName": "gct_util_7to10_slots_percent"
index f8784c6..db86ba3 100644 (file)
         "MetricGroup": "instruction_stats_percent_per_ref",
         "MetricName": "inst_from_rmem_percent"
     },
-    {
-        "BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)",
-        "MetricExpr": "((PM_L2_CASTOUT_MOD/2)*4)/ PM_RUN_CYC * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_co_m_rd_util"
-    },
-    {
-        "BriefDescription": "L2 dcache invalidates per run inst (per core)",
-        "MetricExpr": "(PM_L2_DC_INV / 2) / PM_RUN_INST_CMPL * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_dc_inv_rate_percent"
-    },
     {
         "BriefDescription": "Demand load misses as a % of L2 LD dispatches (per thread)",
         "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID / (PM_L2_LD / 2) * 100",
         "MetricGroup": "l2_stats",
         "MetricName": "l2_dem_ld_disp_percent"
     },
-    {
-        "BriefDescription": "L2 Icache invalidates per run inst (per core)",
-        "MetricExpr": "(PM_L2_IC_INV / 2) / PM_RUN_INST_CMPL * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_ic_inv_rate_percent"
-    },
-    {
-        "BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)",
-        "MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_inst_miss_ratio_percent"
-    },
-    {
-        "BriefDescription": "Average number of cycles between L2 Load hits",
-        "MetricExpr": "(PM_L2_LD_HIT / PM_RUN_CYC) / 2",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_ld_hit_frequency"
-    },
-    {
-        "BriefDescription": "Average number of cycles between L2 Load misses",
-        "MetricExpr": "(PM_L2_LD_MISS / PM_RUN_CYC) / 2",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_ld_miss_frequency"
-    },
-    {
-        "BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)",
-        "MetricExpr": "PM_L2_LD_MISS / PM_L2_LD * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_ld_miss_ratio_percent"
-    },
-    {
-        "BriefDescription": "% L2 load disp attempts Cache read Utilization (4 pclks per disp attempt)",
-        "MetricExpr": "((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_ld_rd_util"
-    },
-    {
-        "BriefDescription": "L2 load misses that require a cache write (4 pclks per disp attempt) % of pclks",
-        "MetricExpr": "((( PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4)/ PM_RUN_CYC * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_ldmiss_wr_util"
-    },
-    {
-        "BriefDescription": "L2 local pump prediction success",
-        "MetricExpr": "PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_local_pred_correct_percent"
-    },
-    {
-        "BriefDescription": "L2 COs that were in M,Me,Mu state as a % of all L2 COs",
-        "MetricExpr": "PM_L2_CASTOUT_MOD / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_mod_co_percent"
-    },
-    {
-        "BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts",
-        "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR )/ PM_L2_RCLD_DISP * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_rc_ld_disp_addr_fail_percent"
-    },
-    {
-        "BriefDescription": "% of L2 Load RC dispatch attempts that failed",
-        "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR + PM_L2_RCLD_DISP_FAIL_OTHER)/ PM_L2_RCLD_DISP * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_rc_ld_disp_fail_percent"
-    },
-    {
-        "BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts",
-        "MetricExpr": "PM_L2_RCST_DISP_FAIL_ADDR / PM_L2_RCST_DISP * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_rc_st_disp_addr_fail_percent"
-    },
-    {
-        "BriefDescription": "% of L2 Store RC dispatch attempts that failed",
-        "MetricExpr": "(PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/ PM_L2_RCST_DISP * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_rc_st_disp_fail_percent"
-    },
-    {
-        "BriefDescription": "L2 Cache Read Utilization (per core)",
-        "MetricExpr": "(((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100) + (((PM_L2_RCST_DISP/2)*4)/PM_RUN_CYC * 100) + (((PM_L2_CASTOUT_MOD/2)*4)/PM_RUN_CYC * 100)",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_rd_util_percent"
-    },
-    {
-        "BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs",
-        "MetricExpr": "PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_shr_co_percent"
-    },
     {
         "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)",
         "MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100",
         "MetricGroup": "l2_stats",
         "MetricName": "l2_st_miss_ratio_percent"
     },
-    {
-        "BriefDescription": "% L2 store disp attempts Cache read Utilization (4 pclks per disp attempt)",
-        "MetricExpr": "((PM_L2_RCST_DISP/2)*4) / PM_RUN_CYC * 100",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_st_rd_util"
-    },
     {
         "BriefDescription": "L2 stores that require a cache write (4 pclks per disp attempt) % of pclks",
         "MetricExpr": "((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100",
         "MetricGroup": "l2_stats",
         "MetricName": "l2_st_wr_util"
     },
-    {
-        "BriefDescription": "L2 Cache Write Utilization (per core)",
-        "MetricExpr": "((((PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4) / PM_RUN_CYC * 100) + (((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100)",
-        "MetricGroup": "l2_stats",
-        "MetricName": "l2_wr_util_percent"
-    },
-    {
-        "BriefDescription": "Average number of cycles between L3 Load hits",
-        "MetricExpr": "(PM_L3_LD_HIT / PM_RUN_CYC) / 2",
-        "MetricGroup": "l3_stats",
-        "MetricName": "l3_ld_hit_frequency"
-    },
-    {
-        "BriefDescription": "Average number of cycles between L3 Load misses",
-        "MetricExpr": "(PM_L3_LD_MISS / PM_RUN_CYC) / 2",
-        "MetricGroup": "l3_stats",
-        "MetricName": "l3_ld_miss_frequency"
-    },
-    {
-        "BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle",
-        "MetricExpr": "(PM_L3_WI_USAGE / PM_RUN_CYC) * 8",
-        "MetricGroup": "l3_stats",
-        "MetricName": "l3_wi_usage"
-    },
     {
         "BriefDescription": "Average icache miss latency",
         "MetricExpr": "PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ",
         "MetricName": "custom_secs"
     },
     {
-        "BriefDescription": "Percentage Cycles atleast one instruction dispatched",
+        "BriefDescription": "Percentage Cycles at least one instruction dispatched",
         "MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100",
         "MetricName": "cycles_atleast_one_inst_dispatched_percent"
     },
index e1f3f5c..33aa3c8 100644 (file)
@@ -1149,7 +1149,7 @@ static int process_one_file(const char *fpath, const struct stat *sb,
         * and directory tree could result in build failure due to table
         * names not being found.
         *
-        * Atleast for now, be strict with processing JSON file names.
+        * At least for now, be strict with processing JSON file names.
         * i.e. if JSON file name cannot be mapped to C-style table name,
         * fail.
         */
index ea0c8b9..a0cfc7f 100644 (file)
@@ -356,7 +356,7 @@ def handle_irq_softirq_exit(event_info):
                return
        rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time,
                        'irq_list':irq_list, 'event_list':event_list}
-       # merge information realted to a NET_RX softirq
+       # merge information related to a NET_RX softirq
        receive_hunk_list.append(rec_data)
 
 def handle_napi_poll(event_info):
index cc9fbce..ef37353 100644 (file)
@@ -225,11 +225,11 @@ int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused
         *
         * The test case check following error conditions:
         * - we get stuck in signal handler because of debug
-        *   exception being triggered receursively due to
+        *   exception being triggered recursively due to
         *   the wrong RF EFLAG management
         *
         * - we never trigger the sig_handler breakpoint due
-        *   to the rong RF EFLAG management
+        *   to the wrong RF EFLAG management
         *
         */
 
@@ -242,7 +242,7 @@ int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused
        ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0);
 
        /*
-        * Kick off the test by trigering 'fd1'
+        * Kick off the test by triggering 'fd1'
         * breakpoint.
         */
        test_function();
index 2fdc7b2..9866cdd 100644 (file)
@@ -658,7 +658,7 @@ static int do_test_code_reading(bool try_kcore)
                                /*
                                 * Both cpus and threads are now owned by evlist
                                 * and will be freed by following perf_evlist__set_maps
-                                * call. Getting refference to keep them alive.
+                                * call. Getting reference to keep them alive.
                                 */
                                perf_cpu_map__get(cpus);
                                perf_thread_map__get(threads);
index a273ed5..2fac7d7 100644 (file)
@@ -26,7 +26,7 @@ int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_u
                  "Stdlib.bytes.++" },
        };
 
-       for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) {
+       for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
                buf = ocaml_demangle_sym(test_cases[i].mangled);
                if ((buf == NULL && test_cases[i].demangled != NULL)
                                || (buf != NULL && test_cases[i].demangled == NULL)
index 3f2e1a5..890cb1f 100644 (file)
@@ -47,7 +47,7 @@ static struct sample fake_samples[] = {
 };
 
 /*
- * Will be casted to struct ip_callchain which has all 64 bit entries
+ * Will be cast to struct ip_callchain which has all 64 bit entries
  * of nr and ips[].
  */
 static u64 fake_callchains[][10] = {
@@ -297,7 +297,7 @@ out:
        return err;
 }
 
-/* callcain + NO children */
+/* callchain + NO children */
 static int test2(struct evsel *evsel, struct machine *machine)
 {
        int err;
index a7f6661..026c547 100644 (file)
@@ -20,7 +20,7 @@
 
 #if defined(__s390x__)
 /* Return true if kvm module is available and loaded. Test this
- * and retun success when trace point kvm_s390_create_vm
+ * and return success when trace point kvm_s390_create_vm
  * exists. Otherwise this test always fails.
  */
 static bool kvm_s390_create_vm_valid(void)
index 6dc1db1..4968c41 100644 (file)
@@ -98,7 +98,7 @@ static u64 find_value(const char *name, struct value *values)
                if (!strcmp(name, v->event))
                        return v->val;
                v++;
-       };
+       }
        return 0;
 }
 
@@ -186,7 +186,7 @@ static int __compute_metric(const char *name, struct value *vals,
                *ratio2 = compute_single(&metric_events, evlist, &st, name2);
 
 out:
-       /* ... clenup. */
+       /* ... cleanup. */
        metricgroup__rblist_exit(&metric_events);
        runtime_stat__exit(&st);
        evlist__free_stats(evlist);
index 416af61..f05670d 100755 (executable)
@@ -14,18 +14,56 @@ if ! [ -x "$(command -v cc)" ]; then
        exit 2
 fi
 
+# check what we need to test windows binaries
+add_pe=1
+run_pe=1
+if ! perf version --build-options | grep -q 'libbfd: .* on '; then
+       echo "WARNING: perf not built with libbfd. PE binaries will not be tested."
+       add_pe=0
+       run_pe=0
+fi
+if ! which wine > /dev/null; then
+       echo "WARNING: wine not found. PE binaries will not be run."
+       run_pe=0
+fi
+
+# set up wine
+if [ ${run_pe} -eq 1 ]; then
+       wineprefix=$(mktemp -d /tmp/perf.wineprefix.XXX)
+       export WINEPREFIX=${wineprefix}
+       # clear display variables to prevent wine from popping up dialogs
+       unset DISPLAY
+       unset WAYLAND_DISPLAY
+fi
+
 ex_md5=$(mktemp /tmp/perf.ex.MD5.XXX)
 ex_sha1=$(mktemp /tmp/perf.ex.SHA1.XXX)
+ex_pe=$(dirname $0)/../pe-file.exe
 
 echo 'int main(void) { return 0; }' | cc -Wl,--build-id=sha1 -o ${ex_sha1} -x c -
 echo 'int main(void) { return 0; }' | cc -Wl,--build-id=md5 -o ${ex_md5} -x c -
 
-echo "test binaries: ${ex_sha1} ${ex_md5}"
+echo "test binaries: ${ex_sha1} ${ex_md5} ${ex_pe}"
 
 check()
 {
-       id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'`
-
+       case $1 in
+       *.exe)
+               # We don't have a tool that can pull a nicely formatted build-id out of
+               # a PE file, but we can extract the whole section with objcopy and
+               # format it ourselves. The .buildid section is a Debug Directory
+               # containing a CodeView entry:
+               #     https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#debug-directory-image-only
+               #     https://github.com/dotnet/runtime/blob/da94c022576a5c3bbc0e896f006565905eb137f9/docs/design/specs/PE-COFF.md
+               # The build-id starts at byte 33 and must be rearranged into a GUID.
+               id=`objcopy -O binary --only-section=.buildid $1 /dev/stdout | \
+                       cut -c 33-48 | hexdump -ve '/1 "%02x"' | \
+                       sed 's@^\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(.*\)0a$@\4\3\2\1\6\5\8\7\9@'`
+               ;;
+       *)
+               id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'`
+               ;;
+       esac
        echo "build id: ${id}"
 
        link=${build_id_dir}/.build-id/${id:0:2}/${id:2}
@@ -50,7 +88,7 @@ check()
                exit 1
        fi
 
-       ${perf} buildid-cache -l | grep $id
+       ${perf} buildid-cache -l | grep ${id}
        if [ $? -ne 0 ]; then
                echo "failed: ${id} is not reported by \"perf buildid-cache -l\""
                exit 1
@@ -79,16 +117,20 @@ test_record()
 {
        data=$(mktemp /tmp/perf.data.XXX)
        build_id_dir=$(mktemp -d /tmp/perf.debug.XXX)
+       log=$(mktemp /tmp/perf.log.XXX)
        perf="perf --buildid-dir ${build_id_dir}"
 
-       ${perf} record --buildid-all -o ${data} ${1}
+       echo "running: perf record $@"
+       ${perf} record --buildid-all -o ${data} $@ &> ${log}
        if [ $? -ne 0 ]; then
-               echo "failed: record ${1}"
+               echo "failed: record $@"
+               echo "see log: ${log}"
                exit 1
        fi
 
-       check ${1}
+       check ${@: -1}
 
+       rm -f ${log}
        rm -rf ${build_id_dir}
        rm -rf ${data}
 }
@@ -96,12 +138,21 @@ test_record()
 # add binaries manual via perf buildid-cache -a
 test_add ${ex_sha1}
 test_add ${ex_md5}
+if [ ${add_pe} -eq 1 ]; then
+       test_add ${ex_pe}
+fi
 
 # add binaries via perf record post processing
 test_record ${ex_sha1}
 test_record ${ex_md5}
+if [ ${run_pe} -eq 1 ]; then
+       test_record wine ${ex_pe}
+fi
 
 # cleanup
 rm ${ex_sha1} ${ex_md5}
+if [ ${run_pe} -eq 1 ]; then
+       rm -r ${wineprefix}
+fi
 
 exit ${err}
index 5898438..45fc24a 100755 (executable)
@@ -98,6 +98,23 @@ check_line_other()
        fi
 }
 
+daemon_exit()
+{
+       local config=$1
+
+       local line=`perf daemon --config ${config} -x: | head -1`
+       local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
+
+       # Reset trap handler.
+       trap - SIGINT SIGTERM
+
+       # stop daemon
+       perf daemon stop --config ${config}
+
+       # ... and wait for the pid to go away
+       tail --pid=${pid} -f /dev/null
+}
+
 daemon_start()
 {
        local config=$1
@@ -105,29 +122,24 @@ daemon_start()
 
        perf daemon start --config ${config}
 
+       # Clean up daemon if interrupted.
+       trap "echo 'FAILED: Signal caught'; daemon_exit ${config}; exit 1" SIGINT SIGTERM
+
        # wait for the session to ping
        local state="FAIL"
+       local retries=0
        while [ "${state}" != "OK" ]; do
                state=`perf daemon ping --config ${config} --session ${session} | awk '{ print $1 }'`
                sleep 0.05
+               retries=$((${retries} +1))
+               if [ ${retries} -ge 600 ]; then
+                       echo "FAILED: Timeout waiting for daemon to ping"
+                       daemon_exit ${config}
+                       exit 1
+               fi
        done
 }
 
-daemon_exit()
-{
-       local base=$1
-       local config=$2
-
-       local line=`perf daemon --config ${config} -x: | head -1`
-       local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
-
-       # stop daemon
-       perf daemon stop --config ${config}
-
-       # ... and wait for the pid to go away
-       tail --pid=${pid} -f /dev/null
-}
-
 test_list()
 {
        echo "test daemon list"
@@ -171,7 +183,7 @@ EOF
                         ${base}/session-time/ack "0"
 
        # stop daemon
-       daemon_exit ${base} ${config}
+       daemon_exit ${config}
 
        rm -rf ${base}
        rm -f ${config}
@@ -288,7 +300,7 @@ EOF
        done
 
        # stop daemon
-       daemon_exit ${base} ${config}
+       daemon_exit ${config}
 
        rm -rf ${base}
        rm -f ${config}
@@ -333,7 +345,7 @@ EOF
        fi
 
        # stop daemon
-       daemon_exit ${base} ${config}
+       daemon_exit ${config}
 
        # check that sessions are gone
        if [ -d "/proc/${pid_size}" ]; then
@@ -374,7 +386,7 @@ EOF
        perf daemon signal --config ${config}
 
        # stop daemon
-       daemon_exit ${base} ${config}
+       daemon_exit ${config}
 
        # count is 2 perf.data for signals and 1 for perf record finished
        count=`ls ${base}/session-test/ | grep perf.data | wc -l`
@@ -420,7 +432,7 @@ EOF
        fi
 
        # stop daemon
-       daemon_exit ${base} ${config}
+       daemon_exit ${config}
 
        rm -rf ${base}
        rm -f ${config}
@@ -457,7 +469,7 @@ EOF
        fi
 
        # stop daemon
-       daemon_exit ${base} ${config}
+       daemon_exit ${config}
 
        rm -rf ${base}
        rm -f ${config}
diff --git a/tools/perf/tests/shell/stat+csv_summary.sh b/tools/perf/tests/shell/stat+csv_summary.sh
new file mode 100755 (executable)
index 0000000..5571ff7
--- /dev/null
@@ -0,0 +1,31 @@
+#!/bin/sh
+# perf stat csv summary test
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+#
+#     1.001364330 9224197  cycles 8012885033 100.00
+#         summary 9224197  cycles 8012885033 100.00
+#
+perf stat -e cycles  -x' ' -I1000 --interval-count 1 --summary 2>&1 | \
+grep -e summary | \
+while read summary num event run pct
+do
+       if [ $summary != "summary" ]; then
+               exit 1
+       fi
+done
+
+#
+#     1.001360298 9148534  cycles 8012853854 100.00
+#9148534  cycles 8012853854 100.00
+#
+perf stat -e cycles  -x' ' -I1000 --interval-count 1 --summary --no-csv-summary 2>&1 | \
+grep -e summary | \
+while read num event run pct
+do
+       exit 1
+done
+
+exit 0
diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh
new file mode 100755 (executable)
index 0000000..22eb31e
--- /dev/null
@@ -0,0 +1,31 @@
+#!/bin/sh
+# perf stat --bpf-counters test
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# check whether $2 is within +/- 10% of $1
+compare_number()
+{
+       first_num=$1
+       second_num=$2
+
+       # upper bound is first_num * 110%
+       upper=$(( $first_num + $first_num / 10 ))
+       # lower bound is first_num * 90%
+       lower=$(( $first_num - $first_num / 10 ))
+
+       if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then
+               echo "The difference between $first_num and $second_num are greater than 10%."
+               exit 1
+       fi
+}
+
+# skip if --bpf-counters is not supported
+perf stat --bpf-counters true > /dev/null 2>&1 || exit 2
+
+base_cycles=$(perf stat --no-big-num -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}')
+bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}')
+
+compare_number $base_cycles $bpf_cycles
+exit 0
index 74748ed..0504898 100644 (file)
@@ -80,7 +80,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
         *   CPU 1 is on core_id 1 and physical_package_id 3
         *
         *   Core_id and physical_package_id are platform and architecture
-        *   dependend and might have higher numbers than the CPU id.
+        *   dependent and might have higher numbers than the CPU id.
         *   This actually depends on the configuration.
         *
         *  In this case process_cpu_topology() prints error message:
index 385894b..b8fc5c5 100644 (file)
@@ -85,7 +85,7 @@ struct mmsghdr {
 
 /*
  *     POSIX 1003.1g - ancillary data object information
- *     Ancillary data consits of a sequence of pairs of
+ *     Ancillary data consists of a sequence of pairs of
  *     (cmsghdr, cmsg_data[])
  */
 
index 35b82ca..ad0a70f 100644 (file)
@@ -381,6 +381,25 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser)
        return true;
 }
 
+#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64)
+
+static void annotate_browser__show_full_location(struct ui_browser *browser)
+{
+       struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
+       struct disasm_line *cursor = disasm_line(ab->selection);
+       struct annotation_line *al = &cursor->al;
+
+       if (al->offset != -1)
+               ui_helpline__puts("Only available for source code lines.");
+       else if (al->fileloc == NULL)
+               ui_helpline__puts("No source file location.");
+       else {
+               char help_line[SYM_TITLE_MAX_SIZE];
+               sprintf (help_line, "Source file location: %s", al->fileloc);
+               ui_helpline__puts(help_line);
+       }
+}
+
 static void ui_browser__init_asm_mode(struct ui_browser *browser)
 {
        struct annotation *notes = browser__annotation(browser);
@@ -388,8 +407,6 @@ static void ui_browser__init_asm_mode(struct ui_browser *browser)
        browser->nr_entries = notes->nr_asm_entries;
 }
 
-#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64)
-
 static int sym_title(struct symbol *sym, struct map *map, char *title,
                     size_t sz, int percent_type)
 {
@@ -398,7 +415,7 @@ static int sym_title(struct symbol *sym, struct map *map, char *title,
 }
 
 /*
- * This can be called from external jumps, i.e. jumps from one functon
+ * This can be called from external jumps, i.e. jumps from one function
  * to another, like from the kernel's entry_SYSCALL_64 function to the
  * swapgs_restore_regs_and_return_to_usermode() function.
  *
@@ -747,6 +764,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
                "c             Show min/max cycle\n"
                "/             Search string\n"
                "k             Toggle line numbers\n"
+               "l             Show full source file location\n"
                "P             Print to [symbol_name].annotation file.\n"
                "r             Run available scripts\n"
                "p             Toggle percent type [local/global]\n"
@@ -760,6 +778,9 @@ static int annotate_browser__run(struct annotate_browser *browser,
                case 'k':
                        notes->options->show_linenr = !notes->options->show_linenr;
                        continue;
+               case 'l':
+                       annotate_browser__show_full_location (&browser->b);
+                       continue;
                case 'H':
                        nd = browser->curr_hot;
                        break;
index 3b9818e..bcfd0a4 100644 (file)
@@ -117,7 +117,7 @@ static void hist_browser__update_rows(struct hist_browser *hb)
        browser->rows -= browser->extra_title_lines;
        /*
         * Verify if we were at the last line and that line isn't
-        * visibe because we now show the header line(s).
+        * visible because we now show the header line(s).
         */
        index_row = browser->index - browser->top_idx;
        if (index_row >= browser->rows)
index e60841b..18eee25 100644 (file)
@@ -1161,6 +1161,7 @@ struct annotate_args {
        s64                       offset;
        char                      *line;
        int                       line_nr;
+       char                      *fileloc;
 };
 
 static void annotation_line__init(struct annotation_line *al,
@@ -1170,6 +1171,7 @@ static void annotation_line__init(struct annotation_line *al,
        al->offset = args->offset;
        al->line = strdup(args->line);
        al->line_nr = args->line_nr;
+       al->fileloc = args->fileloc;
        al->data_nr = nr;
 }
 
@@ -1482,7 +1484,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
  */
 static int symbol__parse_objdump_line(struct symbol *sym,
                                      struct annotate_args *args,
-                                     char *parsed_line, int *line_nr)
+                                     char *parsed_line, int *line_nr, char **fileloc)
 {
        struct map *map = args->ms.map;
        struct annotation *notes = symbol__annotation(sym);
@@ -1494,6 +1496,7 @@ static int symbol__parse_objdump_line(struct symbol *sym,
        /* /filename:linenr ? Save line number and ignore. */
        if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) {
                *line_nr = atoi(parsed_line + match[1].rm_so);
+               *fileloc = strdup(parsed_line);
                return 0;
        }
 
@@ -1513,6 +1516,7 @@ static int symbol__parse_objdump_line(struct symbol *sym,
        args->offset  = offset;
        args->line    = parsed_line;
        args->line_nr = *line_nr;
+       args->fileloc = *fileloc;
        args->ms.sym  = sym;
 
        dl = disasm_line__new(args);
@@ -1807,6 +1811,7 @@ static int symbol__disassemble_bpf(struct symbol *sym,
                        args->offset = -1;
                        args->line = strdup(srcline);
                        args->line_nr = 0;
+                       args->fileloc = NULL;
                        args->ms.sym  = sym;
                        dl = disasm_line__new(args);
                        if (dl) {
@@ -1818,6 +1823,7 @@ static int symbol__disassemble_bpf(struct symbol *sym,
                args->offset = pc;
                args->line = buf + prev_buf_size;
                args->line_nr = 0;
+               args->fileloc = NULL;
                args->ms.sym  = sym;
                dl = disasm_line__new(args);
                if (dl)
@@ -1852,6 +1858,7 @@ symbol__disassemble_bpf_image(struct symbol *sym,
        args->offset = -1;
        args->line = strdup("to be implemented");
        args->line_nr = 0;
+       args->fileloc = NULL;
        dl = disasm_line__new(args);
        if (dl)
                annotation_line__add(&dl->al, &notes->src->source);
@@ -1933,6 +1940,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
        bool delete_extract = false;
        bool decomp = false;
        int lineno = 0;
+       char *fileloc = NULL;
        int nline;
        char *line;
        size_t line_len;
@@ -2060,7 +2068,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
                 * See disasm_line__new() and struct disasm_line::line_nr.
                 */
                if (symbol__parse_objdump_line(sym, args, expanded_line,
-                                              &lineno) < 0)
+                                              &lineno, &fileloc) < 0)
                        break;
                nline++;
        }
@@ -3144,6 +3152,10 @@ static int annotation__config(const char *var, const char *value, void *data)
                opt->use_offset = perf_config_bool("use_offset", value);
        } else if (!strcmp(var, "annotate.disassembler_style")) {
                opt->disassembler_style = value;
+       } else if (!strcmp(var, "annotate.demangle")) {
+               symbol_conf.demangle = perf_config_bool("demangle", value);
+       } else if (!strcmp(var, "annotate.demangle_kernel")) {
+               symbol_conf.demangle_kernel = perf_config_bool("demangle_kernel", value);
        } else {
                pr_debug("%s variable unknown, ignoring...", var);
        }
index 096cdaf..3757416 100644 (file)
@@ -84,6 +84,7 @@ struct annotation_options {
             print_lines,
             full_path,
             show_linenr,
+            show_fileloc,
             show_nr_jumps,
             show_minmax_cycle,
             show_asm_raw,
@@ -136,6 +137,7 @@ struct annotation_line {
        s64                      offset;
        char                    *line;
        int                      line_nr;
+       char                    *fileloc;
        int                      jump_sources;
        float                    ipc;
        u64                      cycles;
index 9087f1b..fbb3c40 100644 (file)
@@ -671,7 +671,7 @@ int bpf__probe(struct bpf_object *obj)
                 * After probing, let's consider prologue, which
                 * adds program fetcher to BPF programs.
                 *
-                * hook_load_preprocessorr() hooks pre-processor
+                * hook_load_preprocessor() hooks pre-processor
                 * to bpf_program, let it generate prologue
                 * dynamically during loading.
                 */
index 04f8912..81d1df3 100644 (file)
@@ -5,6 +5,7 @@
 #include <assert.h>
 #include <limits.h>
 #include <unistd.h>
+#include <sys/file.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <linux/err.h>
 #include <bpf/bpf.h>
 #include <bpf/btf.h>
 #include <bpf/libbpf.h>
+#include <api/fs/fs.h>
 
 #include "bpf_counter.h"
 #include "counts.h"
 #include "debug.h"
 #include "evsel.h"
+#include "evlist.h"
 #include "target.h"
+#include "cpumap.h"
+#include "thread_map.h"
 
 #include "bpf_skel/bpf_prog_profiler.skel.h"
+#include "bpf_skel/bperf_u.h"
+#include "bpf_skel/bperf_leader.skel.h"
+#include "bpf_skel/bperf_follower.skel.h"
+
+/*
+ * bperf uses a hashmap, the attr_map, to track all the leader programs.
+ * The hashmap is pinned in bpffs. flock() on this file is used to ensure
+ * no concurrent access to the attr_map.  The key of attr_map is struct
+ * perf_event_attr, and the value is struct perf_event_attr_map_entry.
+ *
+ * struct perf_event_attr_map_entry contains two __u32 IDs, bpf_link of the
+ * leader prog, and the diff_map. Each perf-stat session holds a reference
+ * to the bpf_link to make sure the leader prog is attached to sched_switch
+ * tracepoint.
+ *
+ * Since the hashmap only contains IDs of the bpf_link and diff_map, it
+ * does not hold any references to the leader program. Once all perf-stat
+ * sessions of these events exit, the leader prog, its maps, and the
+ * perf_events will be freed.
+ */
+struct perf_event_attr_map_entry {
+       __u32 link_id;
+       __u32 diff_map_id;
+};
+
+#define DEFAULT_ATTR_MAP_PATH "fs/bpf/perf_attr_map"
+#define ATTR_MAP_SIZE 16
 
 static inline void *u64_to_ptr(__u64 ptr)
 {
@@ -274,17 +306,494 @@ struct bpf_counter_ops bpf_program_profiler_ops = {
        .install_pe = bpf_program_profiler__install_pe,
 };
 
+static __u32 bpf_link_get_id(int fd)
+{
+       struct bpf_link_info link_info = {0};
+       __u32 link_info_len = sizeof(link_info);
+
+       bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+       return link_info.id;
+}
+
+static __u32 bpf_link_get_prog_id(int fd)
+{
+       struct bpf_link_info link_info = {0};
+       __u32 link_info_len = sizeof(link_info);
+
+       bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+       return link_info.prog_id;
+}
+
+static __u32 bpf_map_get_id(int fd)
+{
+       struct bpf_map_info map_info = {0};
+       __u32 map_info_len = sizeof(map_info);
+
+       bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len);
+       return map_info.id;
+}
+
+static int bperf_lock_attr_map(struct target *target)
+{
+       char path[PATH_MAX];
+       int map_fd, err;
+
+       if (target->attr_map) {
+               scnprintf(path, PATH_MAX, "%s", target->attr_map);
+       } else {
+               scnprintf(path, PATH_MAX, "%s/%s", sysfs__mountpoint(),
+                         DEFAULT_ATTR_MAP_PATH);
+       }
+
+       if (access(path, F_OK)) {
+               map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
+                                       sizeof(struct perf_event_attr),
+                                       sizeof(struct perf_event_attr_map_entry),
+                                       ATTR_MAP_SIZE, 0);
+               if (map_fd < 0)
+                       return -1;
+
+               err = bpf_obj_pin(map_fd, path);
+               if (err) {
+                       /* someone pinned the map in parallel? */
+                       close(map_fd);
+                       map_fd = bpf_obj_get(path);
+                       if (map_fd < 0)
+                               return -1;
+               }
+       } else {
+               map_fd = bpf_obj_get(path);
+               if (map_fd < 0)
+                       return -1;
+       }
+
+       err = flock(map_fd, LOCK_EX);
+       if (err) {
+               close(map_fd);
+               return -1;
+       }
+       return map_fd;
+}
+
+/* trigger the leader program on a cpu */
+static int bperf_trigger_reading(int prog_fd, int cpu)
+{
+       DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+                           .ctx_in = NULL,
+                           .ctx_size_in = 0,
+                           .flags = BPF_F_TEST_RUN_ON_CPU,
+                           .cpu = cpu,
+                           .retval = 0,
+               );
+
+       return bpf_prog_test_run_opts(prog_fd, &opts);
+}
+
+static int bperf_check_target(struct evsel *evsel,
+                             struct target *target,
+                             enum bperf_filter_type *filter_type,
+                             __u32 *filter_entry_cnt)
+{
+       if (evsel->leader->core.nr_members > 1) {
+               pr_err("bpf managed perf events do not yet support groups.\n");
+               return -1;
+       }
+
+       /* determine filter type based on target */
+       if (target->system_wide) {
+               *filter_type = BPERF_FILTER_GLOBAL;
+               *filter_entry_cnt = 1;
+       } else if (target->cpu_list) {
+               *filter_type = BPERF_FILTER_CPU;
+               *filter_entry_cnt = perf_cpu_map__nr(evsel__cpus(evsel));
+       } else if (target->tid) {
+               *filter_type = BPERF_FILTER_PID;
+               *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads);
+       } else if (target->pid || evsel->evlist->workload.pid != -1) {
+               *filter_type = BPERF_FILTER_TGID;
+               *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads);
+       } else {
+               pr_err("bpf managed perf events do not yet support these targets.\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+static struct perf_cpu_map *all_cpu_map;
+
+static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
+                                      struct perf_event_attr_map_entry *entry)
+{
+       struct bperf_leader_bpf *skel = bperf_leader_bpf__open();
+       int link_fd, diff_map_fd, err;
+       struct bpf_link *link = NULL;
+
+       if (!skel) {
+               pr_err("Failed to open leader skeleton\n");
+               return -1;
+       }
+
+       bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus());
+       err = bperf_leader_bpf__load(skel);
+       if (err) {
+               pr_err("Failed to load leader skeleton\n");
+               goto out;
+       }
+
+       err = -1;
+       link = bpf_program__attach(skel->progs.on_switch);
+       if (!link) {
+               pr_err("Failed to attach leader program\n");
+               goto out;
+       }
+
+       link_fd = bpf_link__fd(link);
+       diff_map_fd = bpf_map__fd(skel->maps.diff_readings);
+       entry->link_id = bpf_link_get_id(link_fd);
+       entry->diff_map_id = bpf_map_get_id(diff_map_fd);
+       err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, BPF_ANY);
+       assert(err == 0);
+
+       evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id);
+       assert(evsel->bperf_leader_link_fd >= 0);
+
+       /*
+        * save leader_skel for install_pe, which is called within
+        * following evsel__open_per_cpu call
+        */
+       evsel->leader_skel = skel;
+       evsel__open_per_cpu(evsel, all_cpu_map, -1);
+
+out:
+       bperf_leader_bpf__destroy(skel);
+       bpf_link__destroy(link);
+       return err;
+}
+
+static int bperf__load(struct evsel *evsel, struct target *target)
+{
+       struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff};
+       int attr_map_fd, diff_map_fd = -1, err;
+       enum bperf_filter_type filter_type;
+       __u32 filter_entry_cnt, i;
+
+       if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt))
+               return -1;
+
+       if (!all_cpu_map) {
+               all_cpu_map = perf_cpu_map__new(NULL);
+               if (!all_cpu_map)
+                       return -1;
+       }
+
+       evsel->bperf_leader_prog_fd = -1;
+       evsel->bperf_leader_link_fd = -1;
+
+       /*
+        * Step 1: hold a fd on the leader program and the bpf_link, if
+        * the program is not already gone, reload the program.
+        * Use flock() to ensure exclusive access to the perf_event_attr
+        * map.
+        */
+       attr_map_fd = bperf_lock_attr_map(target);
+       if (attr_map_fd < 0) {
+               pr_err("Failed to lock perf_event_attr map\n");
+               return -1;
+       }
+
+       err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry);
+       if (err) {
+               err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, &entry, BPF_ANY);
+               if (err)
+                       goto out;
+       }
+
+       evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id);
+       if (evsel->bperf_leader_link_fd < 0 &&
+           bperf_reload_leader_program(evsel, attr_map_fd, &entry))
+               goto out;
+
+       /*
+        * The bpf_link holds reference to the leader program, and the
+        * leader program holds reference to the maps. Therefore, if
+        * link_id is valid, diff_map_id should also be valid.
+        */
+       evsel->bperf_leader_prog_fd = bpf_prog_get_fd_by_id(
+               bpf_link_get_prog_id(evsel->bperf_leader_link_fd));
+       assert(evsel->bperf_leader_prog_fd >= 0);
+
+       diff_map_fd = bpf_map_get_fd_by_id(entry.diff_map_id);
+       assert(diff_map_fd >= 0);
+
+       /*
+        * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check
+        * whether the kernel support it
+        */
+       err = bperf_trigger_reading(evsel->bperf_leader_prog_fd, 0);
+       if (err) {
+               pr_err("The kernel does not support test_run for raw_tp BPF programs.\n"
+                      "Therefore, --use-bpf might show inaccurate readings\n");
+               goto out;
+       }
+
+       /* Step 2: load the follower skeleton */
+       evsel->follower_skel = bperf_follower_bpf__open();
+       if (!evsel->follower_skel) {
+               pr_err("Failed to open follower skeleton\n");
+               goto out;
+       }
+
+       /* attach fexit program to the leader program */
+       bpf_program__set_attach_target(evsel->follower_skel->progs.fexit_XXX,
+                                      evsel->bperf_leader_prog_fd, "on_switch");
+
+       /* connect to leader diff_reading map */
+       bpf_map__reuse_fd(evsel->follower_skel->maps.diff_readings, diff_map_fd);
+
+       /* set up reading map */
+       bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings,
+                                filter_entry_cnt);
+       /* set up follower filter based on target */
+       bpf_map__set_max_entries(evsel->follower_skel->maps.filter,
+                                filter_entry_cnt);
+       err = bperf_follower_bpf__load(evsel->follower_skel);
+       if (err) {
+               pr_err("Failed to load follower skeleton\n");
+               bperf_follower_bpf__destroy(evsel->follower_skel);
+               evsel->follower_skel = NULL;
+               goto out;
+       }
+
+       for (i = 0; i < filter_entry_cnt; i++) {
+               int filter_map_fd;
+               __u32 key;
+
+               if (filter_type == BPERF_FILTER_PID ||
+                   filter_type == BPERF_FILTER_TGID)
+                       key = evsel->core.threads->map[i].pid;
+               else if (filter_type == BPERF_FILTER_CPU)
+                       key = evsel->core.cpus->map[i];
+               else
+                       break;
+
+               filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter);
+               bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY);
+       }
+
+       evsel->follower_skel->bss->type = filter_type;
+
+       err = bperf_follower_bpf__attach(evsel->follower_skel);
+
+out:
+       if (err && evsel->bperf_leader_link_fd >= 0)
+               close(evsel->bperf_leader_link_fd);
+       if (err && evsel->bperf_leader_prog_fd >= 0)
+               close(evsel->bperf_leader_prog_fd);
+       if (diff_map_fd >= 0)
+               close(diff_map_fd);
+
+       flock(attr_map_fd, LOCK_UN);
+       close(attr_map_fd);
+
+       return err;
+}
+
+static int bperf__install_pe(struct evsel *evsel, int cpu, int fd)
+{
+       struct bperf_leader_bpf *skel = evsel->leader_skel;
+
+       return bpf_map_update_elem(bpf_map__fd(skel->maps.events),
+                                  &cpu, &fd, BPF_ANY);
+}
+
+/*
+ * trigger the leader prog on each cpu, so the accum_reading map could get
+ * the latest readings.
+ */
+static int bperf_sync_counters(struct evsel *evsel)
+{
+       int num_cpu, i, cpu;
+
+       num_cpu = all_cpu_map->nr;
+       for (i = 0; i < num_cpu; i++) {
+               cpu = all_cpu_map->map[i];
+               bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu);
+       }
+       return 0;
+}
+
+static int bperf__enable(struct evsel *evsel)
+{
+       evsel->follower_skel->bss->enabled = 1;
+       return 0;
+}
+
+static int bperf__read(struct evsel *evsel)
+{
+       struct bperf_follower_bpf *skel = evsel->follower_skel;
+       __u32 num_cpu_bpf = cpu__max_cpu();
+       struct bpf_perf_event_value values[num_cpu_bpf];
+       int reading_map_fd, err = 0;
+       __u32 i, j, num_cpu;
+
+       bperf_sync_counters(evsel);
+       reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
+
+       for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) {
+               __u32 cpu;
+
+               err = bpf_map_lookup_elem(reading_map_fd, &i, values);
+               if (err)
+                       goto out;
+               switch (evsel->follower_skel->bss->type) {
+               case BPERF_FILTER_GLOBAL:
+                       assert(i == 0);
+
+                       num_cpu = all_cpu_map->nr;
+                       for (j = 0; j < num_cpu; j++) {
+                               cpu = all_cpu_map->map[j];
+                               perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter;
+                               perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled;
+                               perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running;
+                       }
+                       break;
+               case BPERF_FILTER_CPU:
+                       cpu = evsel->core.cpus->map[i];
+                       perf_counts(evsel->counts, i, 0)->val = values[cpu].counter;
+                       perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled;
+                       perf_counts(evsel->counts, i, 0)->run = values[cpu].running;
+                       break;
+               case BPERF_FILTER_PID:
+               case BPERF_FILTER_TGID:
+                       perf_counts(evsel->counts, 0, i)->val = 0;
+                       perf_counts(evsel->counts, 0, i)->ena = 0;
+                       perf_counts(evsel->counts, 0, i)->run = 0;
+
+                       for (cpu = 0; cpu < num_cpu_bpf; cpu++) {
+                               perf_counts(evsel->counts, 0, i)->val += values[cpu].counter;
+                               perf_counts(evsel->counts, 0, i)->ena += values[cpu].enabled;
+                               perf_counts(evsel->counts, 0, i)->run += values[cpu].running;
+                       }
+                       break;
+               default:
+                       break;
+               }
+       }
+out:
+       return err;
+}
+
+static int bperf__destroy(struct evsel *evsel)
+{
+       bperf_follower_bpf__destroy(evsel->follower_skel);
+       close(evsel->bperf_leader_prog_fd);
+       close(evsel->bperf_leader_link_fd);
+       return 0;
+}
+
+/*
+ * bperf: share hardware PMCs with BPF
+ *
+ * perf uses performance monitoring counters (PMC) to monitor system
+ * performance. The PMCs are limited hardware resources. For example,
+ * Intel CPUs have 3x fixed PMCs and 4x programmable PMCs per cpu.
+ *
+ * Modern data center systems use these PMCs in many different ways:
+ * system level monitoring, (maybe nested) container level monitoring, per
+ * process monitoring, profiling (in sample mode), etc. In some cases,
+ * there are more active perf_events than available hardware PMCs. To allow
+ * all perf_events to have a chance to run, it is necessary to do expensive
+ * time multiplexing of events.
+ *
+ * On the other hand, many monitoring tools count the common metrics
+ * (cycles, instructions). It is a waste to have multiple tools create
+ * multiple perf_events of "cycles" and occupy multiple PMCs.
+ *
+ * bperf tries to reduce such wastes by allowing multiple perf_events of
+ * "cycles" or "instructions" (at different scopes) to share PMUs. Instead
+ * of having each perf-stat session to read its own perf_events, bperf uses
+ * BPF programs to read the perf_events and aggregate readings to BPF maps.
+ * Then, the perf-stat session(s) reads the values from these BPF maps.
+ *
+ *                                ||
+ *       shared progs and maps <- || -> per session progs and maps
+ *                                ||
+ *   ---------------              ||
+ *   | perf_events |              ||
+ *   ---------------       fexit  ||      -----------------
+ *          |             --------||----> | follower prog |
+ *       --------------- /        || ---  -----------------
+ * cs -> | leader prog |/         ||/        |         |
+ *   --> ---------------         /||  --------------  ------------------
+ *  /       |         |         / ||  | filter map |  | accum_readings |
+ * /  ------------  ------------  ||  --------------  ------------------
+ * |  | prev map |  | diff map |  ||                        |
+ * |  ------------  ------------  ||                        |
+ *  \                             ||                        |
+ * = \ ==================================================== | ============
+ *    \                                                    /   user space
+ *     \                                                  /
+ *      \                                                /
+ *    BPF_PROG_TEST_RUN                    BPF_MAP_LOOKUP_ELEM
+ *        \                                            /
+ *         \                                          /
+ *          \------  perf-stat ----------------------/
+ *
+ * The figure above shows the architecture of bperf. Note that the figure
+ * is divided into 3 regions: shared progs and maps (top left), per session
+ * progs and maps (top right), and user space (bottom).
+ *
+ * The leader prog is triggered on each context switch (cs). The leader
+ * prog reads perf_events and stores the difference (current_reading -
+ * previous_reading) to the diff map. For the same metric, e.g. "cycles",
+ * multiple perf-stat sessions share the same leader prog.
+ *
+ * Each perf-stat session creates a follower prog as fexit program to the
+ * leader prog. It is possible to attach up to BPF_MAX_TRAMP_PROGS (38)
+ * follower progs to the same leader prog. The follower prog checks current
+ * task and processor ID to decide whether to add the value from the diff
+ * map to its accumulated reading map (accum_readings).
+ *
+ * Finally, perf-stat user space reads the value from accum_reading map.
+ *
+ * Besides context switch, it is also necessary to trigger the leader prog
+ * before perf-stat reads the value. Otherwise, the accum_reading map may
+ * not have the latest reading from the perf_events. This is achieved by
+ * triggering the event via sys_bpf(BPF_PROG_TEST_RUN) to each CPU.
+ *
+ * Comment before the definition of struct perf_event_attr_map_entry
+ * describes how different sessions of perf-stat share information about
+ * the leader prog.
+ */
+
+struct bpf_counter_ops bperf_ops = {
+       .load       = bperf__load,
+       .enable     = bperf__enable,
+       .read       = bperf__read,
+       .install_pe = bperf__install_pe,
+       .destroy    = bperf__destroy,
+};
+
+static inline bool bpf_counter_skip(struct evsel *evsel)
+{
+       return list_empty(&evsel->bpf_counter_list) &&
+               evsel->follower_skel == NULL;
+}
+
 int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd)
 {
-       if (list_empty(&evsel->bpf_counter_list))
+       if (bpf_counter_skip(evsel))
                return 0;
        return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd);
 }
 
 int bpf_counter__load(struct evsel *evsel, struct target *target)
 {
-       if (target__has_bpf(target))
+       if (target->bpf_str)
                evsel->bpf_counter_ops = &bpf_program_profiler_ops;
+       else if (target->use_bpf)
+               evsel->bpf_counter_ops = &bperf_ops;
 
        if (evsel->bpf_counter_ops)
                return evsel->bpf_counter_ops->load(evsel, target);
@@ -293,21 +802,21 @@ int bpf_counter__load(struct evsel *evsel, struct target *target)
 
 int bpf_counter__enable(struct evsel *evsel)
 {
-       if (list_empty(&evsel->bpf_counter_list))
+       if (bpf_counter_skip(evsel))
                return 0;
        return evsel->bpf_counter_ops->enable(evsel);
 }
 
 int bpf_counter__read(struct evsel *evsel)
 {
-       if (list_empty(&evsel->bpf_counter_list))
+       if (bpf_counter_skip(evsel))
                return -EAGAIN;
        return evsel->bpf_counter_ops->read(evsel);
 }
 
 void bpf_counter__destroy(struct evsel *evsel)
 {
-       if (list_empty(&evsel->bpf_counter_list))
+       if (bpf_counter_skip(evsel))
                return;
        evsel->bpf_counter_ops->destroy(evsel);
        evsel->bpf_counter_ops = NULL;
index 2eca210..cb9c532 100644 (file)
@@ -38,7 +38,7 @@ int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd);
 
 #else /* HAVE_BPF_SKEL */
 
-#include<linux/err.h>
+#include <linux/err.h>
 
 static inline int bpf_counter__load(struct evsel *evsel __maybe_unused,
                                    struct target *target __maybe_unused)
diff --git a/tools/perf/util/bpf_skel/bperf.h b/tools/perf/util/bpf_skel/bperf.h
new file mode 100644 (file)
index 0000000..186a555
--- /dev/null
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+
+#ifndef __BPERF_STAT_H
+#define __BPERF_STAT_H
+
+typedef struct {
+       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(struct bpf_perf_event_value));
+       __uint(max_entries, 1);
+} reading_map;
+
+#endif /* __BPERF_STAT_H */
diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c
new file mode 100644 (file)
index 0000000..b8fa3cb
--- /dev/null
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bperf.h"
+#include "bperf_u.h"
+
+reading_map diff_readings SEC(".maps");
+reading_map accum_readings SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(__u32));
+} filter SEC(".maps");
+
+enum bperf_filter_type type = 0;
+int enabled = 0;
+
+SEC("fexit/XXX")
+int BPF_PROG(fexit_XXX)
+{
+       struct bpf_perf_event_value *diff_val, *accum_val;
+       __u32 filter_key, zero = 0;
+       __u32 *accum_key;
+
+       if (!enabled)
+               return 0;
+
+       switch (type) {
+       case BPERF_FILTER_GLOBAL:
+               accum_key = &zero;
+               goto do_add;
+       case BPERF_FILTER_CPU:
+               filter_key = bpf_get_smp_processor_id();
+               break;
+       case BPERF_FILTER_PID:
+               filter_key = bpf_get_current_pid_tgid() & 0xffffffff;
+               break;
+       case BPERF_FILTER_TGID:
+               filter_key = bpf_get_current_pid_tgid() >> 32;
+               break;
+       default:
+               return 0;
+       }
+
+       accum_key = bpf_map_lookup_elem(&filter, &filter_key);
+       if (!accum_key)
+               return 0;
+
+do_add:
+       diff_val = bpf_map_lookup_elem(&diff_readings, &zero);
+       if (!diff_val)
+               return 0;
+
+       accum_val = bpf_map_lookup_elem(&accum_readings, accum_key);
+       if (!accum_val)
+               return 0;
+
+       accum_val->counter += diff_val->counter;
+       accum_val->enabled += diff_val->enabled;
+       accum_val->running += diff_val->running;
+
+       return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c
new file mode 100644 (file)
index 0000000..4f70d14
--- /dev/null
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bperf.h"
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(int));
+       __uint(map_flags, BPF_F_PRESERVE_ELEMS);
+} events SEC(".maps");
+
+reading_map prev_readings SEC(".maps");
+reading_map diff_readings SEC(".maps");
+
+SEC("raw_tp/sched_switch")
+int BPF_PROG(on_switch)
+{
+       struct bpf_perf_event_value val, *prev_val, *diff_val;
+       __u32 key = bpf_get_smp_processor_id();
+       __u32 zero = 0;
+       long err;
+
+       prev_val = bpf_map_lookup_elem(&prev_readings, &zero);
+       if (!prev_val)
+               return 0;
+
+       diff_val = bpf_map_lookup_elem(&diff_readings, &zero);
+       if (!diff_val)
+               return 0;
+
+       err = bpf_perf_event_read_value(&events, key, &val, sizeof(val));
+       if (err)
+               return 0;
+
+       diff_val->counter = val.counter - prev_val->counter;
+       diff_val->enabled = val.enabled - prev_val->enabled;
+       diff_val->running = val.running - prev_val->running;
+       *prev_val = val;
+       return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h
new file mode 100644 (file)
index 0000000..1ce0c2c
--- /dev/null
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+
+#ifndef __BPERF_STAT_U_H
+#define __BPERF_STAT_U_H
+
+enum bperf_filter_type {
+       BPERF_FILTER_GLOBAL = 1,
+       BPERF_FILTER_CPU,
+       BPERF_FILTER_PID,
+       BPERF_FILTER_TGID,
+};
+
+#endif /* __BPERF_STAT_U_H */
index c7cec92..ab12b4c 100644 (file)
@@ -52,7 +52,7 @@ int BPF_PROG(fentry_XXX)
 static inline void
 fexit_update_maps(struct bpf_perf_event_value *after)
 {
-       struct bpf_perf_event_value *before, diff, *accum;
+       struct bpf_perf_event_value *before, diff;
        __u32 zero = 0;
 
        before = bpf_map_lookup_elem(&fentry_readings, &zero);
@@ -78,7 +78,6 @@ int BPF_PROG(fexit_XXX)
 {
        struct bpf_perf_event_value reading;
        __u32 cpu = bpf_get_smp_processor_id();
-       __u32 one = 1, zero = 0;
        int err;
 
        /* read all events before updating the maps, to reduce error */
index 6b32291..5875cfc 100644 (file)
@@ -23,7 +23,7 @@
  * @children: tree of call paths of functions called
  *
  * In combination with the call_return structure, the call_path structure
- * defines a context-sensitve call-graph.
+ * defines a context-sensitive call-graph.
  */
 struct call_path {
        struct call_path *parent;
index 1b60985..8e27771 100644 (file)
@@ -877,7 +877,7 @@ append_chain_children(struct callchain_node *root,
        if (!node)
                return -1;
 
-       /* lookup in childrens */
+       /* lookup in children */
        while (*p) {
                enum match_result ret;
 
index 6984c77..6bcb5ef 100644 (file)
@@ -457,6 +457,9 @@ static int perf_stat_config(const char *var, const char *value)
        if (!strcmp(var, "stat.big-num"))
                perf_stat__set_big_num(perf_config_bool(var, value));
 
+       if (!strcmp(var, "stat.no-csv-summary"))
+               perf_stat__set_no_csv_summary(perf_config_bool(var, value));
+
        /* Add other config variables here. */
        return 0;
 }
@@ -699,7 +702,7 @@ static int collect_config(const char *var, const char *value,
        /* perf_config_set can contain both user and system config items.
         * So we should know where each value is from.
         * The classification would be needed when a particular config file
-        * is overwrited by setting feature i.e. set_config().
+        * is overwritten by setting feature i.e. set_config().
         */
        if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) {
                section->from_system_config = true;
index 3f4bc40..059bcec 100644 (file)
@@ -6,6 +6,7 @@
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
  */
 
+#include <linux/coresight-pmu.h>
 #include <linux/err.h>
 #include <linux/list.h>
 #include <linux/zalloc.h>
@@ -316,7 +317,7 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
         * This is the first timestamp we've seen since the beginning of traces
         * or a discontinuity.  Since timestamps packets are generated *after*
         * range packets have been generated, we need to estimate the time at
-        * which instructions started by substracting the number of instructions
+        * which instructions started by subtracting the number of instructions
         * executed to the timestamp.
         */
        packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
@@ -491,13 +492,42 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
                        const ocsd_generic_trace_elem *elem,
                        const uint8_t trace_chan_id)
 {
-       pid_t tid;
+       pid_t tid = -1;
+       static u64 pid_fmt;
+       int ret;
 
-       /* Ignore PE_CONTEXT packets that don't have a valid contextID */
-       if (!elem->context.ctxt_id_valid)
+       /*
+        * As all the ETMs run at the same exception level, the system should
+        * have the same PID format crossing CPUs.  So cache the PID format
+        * and reuse it for sequential decoding.
+        */
+       if (!pid_fmt) {
+               ret = cs_etm__get_pid_fmt(trace_chan_id, &pid_fmt);
+               if (ret)
+                       return OCSD_RESP_FATAL_SYS_ERR;
+       }
+
+       /*
+        * Process the PE_CONTEXT packets if we have a valid contextID or VMID.
+        * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2
+        * as VMID, Bit ETM_OPT_CTXTID2 is set in this case.
+        */
+       switch (pid_fmt) {
+       case BIT(ETM_OPT_CTXTID):
+               if (elem->context.ctxt_id_valid)
+                       tid = elem->context.context_id;
+               break;
+       case BIT(ETM_OPT_CTXTID2):
+               if (elem->context.vmid_valid)
+                       tid = elem->context.vmid;
+               break;
+       default:
+               break;
+       }
+
+       if (tid == -1)
                return OCSD_RESP_CONT;
 
-       tid =  elem->context.context_id;
        if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id))
                return OCSD_RESP_FATAL_SYS_ERR;
 
index a2a369e..7e63e7d 100644 (file)
@@ -7,6 +7,7 @@
  */
 
 #include <linux/bitops.h>
+#include <linux/coresight-pmu.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/log2.h>
@@ -156,11 +157,52 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
        return 0;
 }
 
+/*
+ * The returned PID format is presented by two bits:
+ *
+ *   Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced;
+ *   Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced.
+ *
+ * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
+ * are enabled at the same time when the session runs on an EL2 kernel.
+ * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
+ * recorded in the trace data, the tool will selectively use
+ * CONTEXTIDR_EL2 as PID.
+ */
+int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt)
+{
+       struct int_node *inode;
+       u64 *metadata, val;
+
+       inode = intlist__find(traceid_list, trace_chan_id);
+       if (!inode)
+               return -EINVAL;
+
+       metadata = inode->priv;
+
+       if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
+               val = metadata[CS_ETM_ETMCR];
+               /* CONTEXTIDR is traced */
+               if (val & BIT(ETM_OPT_CTXTID))
+                       *pid_fmt = BIT(ETM_OPT_CTXTID);
+       } else {
+               val = metadata[CS_ETMV4_TRCCONFIGR];
+               /* CONTEXTIDR_EL2 is traced */
+               if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
+                       *pid_fmt = BIT(ETM_OPT_CTXTID2);
+               /* CONTEXTIDR_EL1 is traced */
+               else if (val & BIT(ETM4_CFG_BIT_CTXTID))
+                       *pid_fmt = BIT(ETM_OPT_CTXTID);
+       }
+
+       return 0;
+}
+
 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
                                              u8 trace_chan_id)
 {
        /*
-        * Wnen a timestamp packet is encountered the backend code
+        * When a timestamp packet is encountered the backend code
         * is stopped so that the front end has time to process packets
         * that were accumulated in the traceID queue.  Since there can
         * be more than one channel per cs_etm_queue, we need to specify
@@ -1655,7 +1697,7 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
                 * | 1 1 0 1 1 1 1 1 |  imm8  |
                 * +-----------------+--------+
                 *
-                * According to the specifiction, it only defines SVC for T32
+                * According to the specification, it only defines SVC for T32
                 * with 16 bits instruction and has no definition for 32bits;
                 * so below only read 2 bytes as instruction size for T32.
                 */
@@ -1887,7 +1929,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
 
                /*
                 * If the previous packet is an exception return packet
-                * and the return address just follows SVC instuction,
+                * and the return address just follows SVC instruction,
                 * it needs to calibrate the previous packet sample flags
                 * as PERF_IP_FLAG_SYSCALLRET.
                 */
@@ -1961,7 +2003,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
                 * contain exception type related info so we cannot decide
                 * the exception type purely based on exception return packet.
                 * If we record the exception number from exception packet and
-                * reuse it for excpetion return packet, this is not reliable
+                * reuse it for exception return packet, this is not reliable
                 * due the trace can be discontinuity or the interrupt can
                 * be nested, thus the recorded exception number cannot be
                 * used for exception return packet for these two cases.
@@ -2435,7 +2477,7 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm)
 }
 
 static const char * const cs_etm_global_header_fmts[] = {
-       [CS_HEADER_VERSION_0]   = "     Header version                 %llx\n",
+       [CS_HEADER_VERSION]     = "     Header version                 %llx\n",
        [CS_PMU_TYPE_CPUS]      = "     PMU type/num cpus              %llx\n",
        [CS_ETM_SNAPSHOT]       = "     Snapshot                       %llx\n",
 };
@@ -2443,6 +2485,7 @@ static const char * const cs_etm_global_header_fmts[] = {
 static const char * const cs_etm_priv_fmts[] = {
        [CS_ETM_MAGIC]          = "     Magic number                   %llx\n",
        [CS_ETM_CPU]            = "     CPU                            %lld\n",
+       [CS_ETM_NR_TRC_PARAMS]  = "     NR_TRC_PARAMS                  %llx\n",
        [CS_ETM_ETMCR]          = "     ETMCR                          %llx\n",
        [CS_ETM_ETMTRACEIDR]    = "     ETMTRACEIDR                    %llx\n",
        [CS_ETM_ETMCCER]        = "     ETMCCER                        %llx\n",
@@ -2452,6 +2495,7 @@ static const char * const cs_etm_priv_fmts[] = {
 static const char * const cs_etmv4_priv_fmts[] = {
        [CS_ETM_MAGIC]          = "     Magic number                   %llx\n",
        [CS_ETM_CPU]            = "     CPU                            %lld\n",
+       [CS_ETM_NR_TRC_PARAMS]  = "     NR_TRC_PARAMS                  %llx\n",
        [CS_ETMV4_TRCCONFIGR]   = "     TRCCONFIGR                     %llx\n",
        [CS_ETMV4_TRCTRACEIDR]  = "     TRCTRACEIDR                    %llx\n",
        [CS_ETMV4_TRCIDR0]      = "     TRCIDR0                        %llx\n",
@@ -2461,26 +2505,167 @@ static const char * const cs_etmv4_priv_fmts[] = {
        [CS_ETMV4_TRCAUTHSTATUS] = "    TRCAUTHSTATUS                  %llx\n",
 };
 
-static void cs_etm__print_auxtrace_info(__u64 *val, int num)
+static const char * const param_unk_fmt =
+       "       Unknown parameter [%d]         %llx\n";
+static const char * const magic_unk_fmt =
+       "       Magic number Unknown           %llx\n";
+
+static int cs_etm__print_cpu_metadata_v0(__u64 *val, int *offset)
 {
-       int i, j, cpu = 0;
+       int i = *offset, j, nr_params = 0, fmt_offset;
+       __u64 magic;
 
-       for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++)
-               fprintf(stdout, cs_etm_global_header_fmts[i], val[i]);
+       /* check magic value */
+       magic = val[i + CS_ETM_MAGIC];
+       if ((magic != __perf_cs_etmv3_magic) &&
+           (magic != __perf_cs_etmv4_magic)) {
+               /* failure - note bad magic value */
+               fprintf(stdout, magic_unk_fmt, magic);
+               return -EINVAL;
+       }
+
+       /* print common header block */
+       fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]);
+       fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]);
+
+       if (magic == __perf_cs_etmv3_magic) {
+               nr_params = CS_ETM_NR_TRC_PARAMS_V0;
+               fmt_offset = CS_ETM_ETMCR;
+               /* after common block, offset format index past NR_PARAMS */
+               for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++)
+                       fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
+       } else if (magic == __perf_cs_etmv4_magic) {
+               nr_params = CS_ETMV4_NR_TRC_PARAMS_V0;
+               fmt_offset = CS_ETMV4_TRCCONFIGR;
+               /* after common block, offset format index past NR_PARAMS */
+               for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++)
+                       fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
+       }
+       *offset = i;
+       return 0;
+}
+
+static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset)
+{
+       int i = *offset, j, total_params = 0;
+       __u64 magic;
+
+       magic = val[i + CS_ETM_MAGIC];
+       /* total params to print is NR_PARAMS + common block size for v1 */
+       total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1;
 
-       for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) {
-               if (val[i] == __perf_cs_etmv3_magic)
-                       for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++)
+       if (magic == __perf_cs_etmv3_magic) {
+               for (j = 0; j < total_params; j++, i++) {
+                       /* if newer record - could be excess params */
+                       if (j >= CS_ETM_PRIV_MAX)
+                               fprintf(stdout, param_unk_fmt, j, val[i]);
+                       else
                                fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
-               else if (val[i] == __perf_cs_etmv4_magic)
-                       for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++)
+               }
+       } else if (magic == __perf_cs_etmv4_magic) {
+               for (j = 0; j < total_params; j++, i++) {
+                       /* if newer record - could be excess params */
+                       if (j >= CS_ETMV4_PRIV_MAX)
+                               fprintf(stdout, param_unk_fmt, j, val[i]);
+                       else
                                fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
-               else
-                       /* failure.. return */
+               }
+       } else {
+               /* failure - note bad magic value and error out */
+               fprintf(stdout, magic_unk_fmt, magic);
+               return -EINVAL;
+       }
+       *offset = i;
+       return 0;
+}
+
+static void cs_etm__print_auxtrace_info(__u64 *val, int num)
+{
+       int i, cpu = 0, version, err;
+
+       /* bail out early on bad header version */
+       version = val[0];
+       if (version > CS_HEADER_CURRENT_VERSION) {
+               /* failure.. return */
+               fprintf(stdout, "       Unknown Header Version = %x, ", version);
+               fprintf(stdout, "Version supported <= %x\n", CS_HEADER_CURRENT_VERSION);
+               return;
+       }
+
+       for (i = 0; i < CS_HEADER_VERSION_MAX; i++)
+               fprintf(stdout, cs_etm_global_header_fmts[i], val[i]);
+
+       for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) {
+               if (version == 0)
+                       err = cs_etm__print_cpu_metadata_v0(val, &i);
+               else if (version == 1)
+                       err = cs_etm__print_cpu_metadata_v1(val, &i);
+               if (err)
                        return;
        }
 }
 
+/*
+ * Read a single cpu parameter block from the auxtrace_info priv block.
+ *
+ * For version 1 there is a per cpu nr_params entry. If we are handling
+ * version 1 file, then there may be less, the same, or more params
+ * indicated by this value than the compile time number we understand.
+ *
+ * For a version 0 info block, there are a fixed number, and we need to
+ * fill out the nr_param value in the metadata we create.
+ */
+static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
+                                   int out_blk_size, int nr_params_v0)
+{
+       u64 *metadata = NULL;
+       int hdr_version;
+       int nr_in_params, nr_out_params, nr_cmn_params;
+       int i, k;
+
+       metadata = zalloc(sizeof(*metadata) * out_blk_size);
+       if (!metadata)
+               return NULL;
+
+       /* read block current index & version */
+       i = *buff_in_offset;
+       hdr_version = buff_in[CS_HEADER_VERSION];
+
+       if (!hdr_version) {
+       /* read version 0 info block into a version 1 metadata block  */
+               nr_in_params = nr_params_v0;
+               metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
+               metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
+               metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
+               /* remaining block params at offset +1 from source */
+               for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
+                       metadata[k + 1] = buff_in[i + k];
+               /* version 0 has 2 common params */
+               nr_cmn_params = 2;
+       } else {
+       /* read version 1 info block - input and output nr_params may differ */
+               /* version 1 has 3 common params */
+               nr_cmn_params = 3;
+               nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
+
+               /* if input has more params than output - skip excess */
+               nr_out_params = nr_in_params + nr_cmn_params;
+               if (nr_out_params > out_blk_size)
+                       nr_out_params = out_blk_size;
+
+               for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
+                       metadata[k] = buff_in[i + k];
+
+               /* record the actual nr params we copied */
+               metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
+       }
+
+       /* adjust in offset by number of in params used */
+       i += nr_in_params + nr_cmn_params;
+       *buff_in_offset = i;
+       return metadata;
+}
+
 int cs_etm__process_auxtrace_info(union perf_event *event,
                                  struct perf_session *session)
 {
@@ -2492,11 +2677,12 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
        int info_header_size;
        int total_size = auxtrace_info->header.size;
        int priv_size = 0;
-       int num_cpu;
-       int err = 0, idx = -1;
-       int i, j, k;
+       int num_cpu, trcidr_idx;
+       int err = 0;
+       int i, j;
        u64 *ptr, *hdr = NULL;
        u64 **metadata = NULL;
+       u64 hdr_version;
 
        /*
         * sizeof(auxtrace_info_event::type) +
@@ -2512,16 +2698,21 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
        /* First the global part */
        ptr = (u64 *) auxtrace_info->priv;
 
-       /* Look for version '0' of the header */
-       if (ptr[0] != 0)
+       /* Look for version of the header */
+       hdr_version = ptr[0];
+       if (hdr_version > CS_HEADER_CURRENT_VERSION) {
+               /* print routine will print an error on bad version */
+               if (dump_trace)
+                       cs_etm__print_auxtrace_info(auxtrace_info->priv, 0);
                return -EINVAL;
+       }
 
-       hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX);
+       hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_MAX);
        if (!hdr)
                return -ENOMEM;
 
        /* Extract header information - see cs-etm.h for format */
-       for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++)
+       for (i = 0; i < CS_HEADER_VERSION_MAX; i++)
                hdr[i] = ptr[i];
        num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff;
        pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) &
@@ -2552,35 +2743,31 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
         */
        for (j = 0; j < num_cpu; j++) {
                if (ptr[i] == __perf_cs_etmv3_magic) {
-                       metadata[j] = zalloc(sizeof(*metadata[j]) *
-                                            CS_ETM_PRIV_MAX);
-                       if (!metadata[j]) {
-                               err = -ENOMEM;
-                               goto err_free_metadata;
-                       }
-                       for (k = 0; k < CS_ETM_PRIV_MAX; k++)
-                               metadata[j][k] = ptr[i + k];
+                       metadata[j] =
+                               cs_etm__create_meta_blk(ptr, &i,
+                                                       CS_ETM_PRIV_MAX,
+                                                       CS_ETM_NR_TRC_PARAMS_V0);
 
                        /* The traceID is our handle */
-                       idx = metadata[j][CS_ETM_ETMTRACEIDR];
-                       i += CS_ETM_PRIV_MAX;
+                       trcidr_idx = CS_ETM_ETMTRACEIDR;
+
                } else if (ptr[i] == __perf_cs_etmv4_magic) {
-                       metadata[j] = zalloc(sizeof(*metadata[j]) *
-                                            CS_ETMV4_PRIV_MAX);
-                       if (!metadata[j]) {
-                               err = -ENOMEM;
-                               goto err_free_metadata;
-                       }
-                       for (k = 0; k < CS_ETMV4_PRIV_MAX; k++)
-                               metadata[j][k] = ptr[i + k];
+                       metadata[j] =
+                               cs_etm__create_meta_blk(ptr, &i,
+                                                       CS_ETMV4_PRIV_MAX,
+                                                       CS_ETMV4_NR_TRC_PARAMS_V0);
 
                        /* The traceID is our handle */
-                       idx = metadata[j][CS_ETMV4_TRCTRACEIDR];
-                       i += CS_ETMV4_PRIV_MAX;
+                       trcidr_idx = CS_ETMV4_TRCTRACEIDR;
+               }
+
+               if (!metadata[j]) {
+                       err = -ENOMEM;
+                       goto err_free_metadata;
                }
 
                /* Get an RB node for this CPU */
-               inode = intlist__findnew(traceid_list, idx);
+               inode = intlist__findnew(traceid_list, metadata[j][trcidr_idx]);
 
                /* Something went wrong, no need to continue */
                if (!inode) {
@@ -2601,7 +2788,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
        }
 
        /*
-        * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and
+        * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
         * CS_ETMV4_PRIV_MAX mark how many double words are in the
         * global metadata, and each cpu's metadata respectively.
         * The following tests if the correct number of double words was
@@ -2703,6 +2890,12 @@ err_free_traceid_list:
        intlist__delete(traceid_list);
 err_free_hdr:
        zfree(&hdr);
-
+       /*
+        * At this point, as a minimum we have valid header. Dump the rest of
+        * the info section - the print routines will error out on structural
+        * issues.
+        */
+       if (dump_trace)
+               cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
        return err;
 }
index 4ad925d..3642891 100644 (file)
 
 struct perf_session;
 
-/* Versionning header in case things need tro change in the future.  That way
+/*
+ * Versioning header in case things need to change in the future.  That way
  * decoding of old snapshot is still possible.
  */
 enum {
        /* Starting with 0x0 */
-       CS_HEADER_VERSION_0,
+       CS_HEADER_VERSION,
        /* PMU->type (32 bit), total # of CPUs (32 bit) */
        CS_PMU_TYPE_CPUS,
        CS_ETM_SNAPSHOT,
-       CS_HEADER_VERSION_0_MAX,
+       CS_HEADER_VERSION_MAX,
 };
 
+/*
+ * Update the version for new format.
+ *
+ * New version 1 format adds a param count to the per cpu metadata.
+ * This allows easy adding of new metadata parameters.
+ * Requires that new params always added after current ones.
+ * Also allows client reader to handle file versions that are different by
+ * checking the number of params in the file vs the number expected.
+ */
+#define CS_HEADER_CURRENT_VERSION 1
+
 /* Beginning of header common to both ETMv3 and V4 */
 enum {
        CS_ETM_MAGIC,
        CS_ETM_CPU,
+       /* Number of trace config params in following ETM specific block */
+       CS_ETM_NR_TRC_PARAMS,
+       CS_ETM_COMMON_BLK_MAX_V1,
 };
 
 /* ETMv3/PTM metadata */
 enum {
        /* Dynamic, configurable parameters */
-       CS_ETM_ETMCR = CS_ETM_CPU + 1,
+       CS_ETM_ETMCR = CS_ETM_COMMON_BLK_MAX_V1,
        CS_ETM_ETMTRACEIDR,
        /* RO, taken from sysFS */
        CS_ETM_ETMCCER,
@@ -41,10 +56,13 @@ enum {
        CS_ETM_PRIV_MAX,
 };
 
+/* define fixed version 0 length - allow new format reader to read old files. */
+#define CS_ETM_NR_TRC_PARAMS_V0 (CS_ETM_ETMIDR - CS_ETM_ETMCR + 1)
+
 /* ETMv4 metadata */
 enum {
        /* Dynamic, configurable parameters */
-       CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1,
+       CS_ETMV4_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1,
        CS_ETMV4_TRCTRACEIDR,
        /* RO, taken from sysFS */
        CS_ETMV4_TRCIDR0,
@@ -55,9 +73,12 @@ enum {
        CS_ETMV4_PRIV_MAX,
 };
 
+/* define fixed version 0 length - allow new format reader to read old files. */
+#define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1)
+
 /*
  * ETMv3 exception encoding number:
- * See Embedded Trace Macrocell spcification (ARM IHI 0014Q)
+ * See Embedded Trace Macrocell specification (ARM IHI 0014Q)
  * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors.
  */
 enum {
@@ -162,7 +183,7 @@ struct cs_etm_packet_queue {
 
 #define BMVAL(val, lsb, msb)   ((val & GENMASK(msb, lsb)) >> lsb)
 
-#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
+#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_MAX * sizeof(u64))
 
 #define __perf_cs_etmv3_magic 0x3030303030303030ULL
 #define __perf_cs_etmv4_magic 0x4040404040404040ULL
@@ -173,6 +194,7 @@ struct cs_etm_packet_queue {
 int cs_etm__process_auxtrace_info(union perf_event *event,
                                  struct perf_session *session);
 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
+int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt);
 int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
                         pid_t tid, u8 trace_chan_id);
 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq);
index 8b67bd9..a9c375e 100644 (file)
@@ -949,7 +949,7 @@ static char *change_name(char *name, char *orig_name, int dup)
        /*
         * Add '_' prefix to potential keywork.  According to
         * Mathieu Desnoyers (https://lore.kernel.org/lkml/1074266107.40857.1422045946295.JavaMail.zimbra@efficios.com),
-        * futher CTF spec updating may require us to use '$'.
+        * further CTF spec updating may require us to use '$'.
         */
        if (dup < 0)
                len = strlen(name) + sizeof("_");
index 39c0520..ddf33d5 100644 (file)
@@ -147,7 +147,7 @@ error:
  * Demangle Java function signature (openJDK, not GCJ)
  * input:
  *     str: string to parse. String is not modified
- *    flags: comobination of JAVA_DEMANGLE_* flags to modify demangling
+ *    flags: combination of JAVA_DEMANGLE_* flags to modify demangling
  * return:
  *     if input can be demangled, then a newly allocated string is returned.
  *     if input cannot be demangled, then NULL is returned
@@ -164,7 +164,7 @@ java_demangle_sym(const char *str, int flags)
        if (!str)
                return NULL;
 
-       /* find start of retunr type */
+       /* find start of return type */
        p = strrchr(str, ')');
        if (!p)
                return NULL;
index cd2fe64..52e7101 100644 (file)
@@ -216,7 +216,7 @@ struct dso {
 
 /* dso__for_each_symbol - iterate over the symbols of given type
  *
- * @dso: the 'struct dso *' in which symbols itereated
+ * @dso: the 'struct dso *' in which symbols are iterated
  * @pos: the 'struct symbol *' to use as a loop cursor
  * @n: the 'struct rb_node *' to use as a temporary storage
  */
index 7b2d471..b2f4920 100644 (file)
@@ -91,7 +91,7 @@ static Dwarf_Line *cu_getsrc_die(Dwarf_Die *cu_die, Dwarf_Addr addr)
                        return NULL;
        } while (laddr == addr);
        l++;
-       /* Going foward to find the statement line */
+       /* Going forward to find the statement line */
        do {
                line = dwarf_onesrcline(lines, l++);
                if (!line || dwarf_lineaddr(line, &laddr) != 0 ||
@@ -177,7 +177,7 @@ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
  * die_get_linkage_name - Get the linkage name of the object
  * @dw_die: A DIE of the object
  *
- * Get the linkage name attiribute of given @dw_die.
+ * Get the linkage name attribute of given @dw_die.
  * For C++ binary, the linkage name will be the mangled symbol.
  */
 const char *die_get_linkage_name(Dwarf_Die *dw_die)
@@ -739,7 +739,7 @@ static int __die_walk_instances_cb(Dwarf_Die *inst, void *data)
  * @data: user data
  *
  * Walk on the instances of give @in_die. @in_die must be an inlined function
- * declartion. This returns the return value of @callback if it returns
+ * declaration. This returns the return value of @callback if it returns
  * non-zero value, or -ENOENT if there is no instance.
  */
 int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *),
index 506006e..cb99646 100644 (file)
@@ -22,7 +22,7 @@ const char *cu_get_comp_dir(Dwarf_Die *cu_die);
 int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
                     const char **fname, int *lineno);
 
-/* Walk on funcitons at given address */
+/* Walk on functions at given address */
 int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
                         int (*callback)(Dwarf_Die *, void *), void *data);
 
index 1b49ece..3fa4486 100644 (file)
@@ -24,6 +24,7 @@
 #include "../arch/s390/include/dwarf-regs-table.h"
 #include "../arch/sparc/include/dwarf-regs-table.h"
 #include "../arch/xtensa/include/dwarf-regs-table.h"
+#include "../arch/mips/include/dwarf-regs-table.h"
 
 #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL)
 
@@ -53,6 +54,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
                return __get_dwarf_regstr(sparc_regstr_tbl, n);
        case EM_XTENSA:
                return __get_dwarf_regstr(xtensa_regstr_tbl, n);
+       case EM_MIPS:
+               return __get_dwarf_regstr(mips_regstr_tbl, n);
        default:
                pr_err("ELF MACHINE %x is not supported.\n", machine);
        }
index f603edb..8a62fb3 100644 (file)
@@ -147,6 +147,7 @@ struct perf_sample {
        u8  cpumode;
        u16 misc;
        u16 ins_lat;
+       u16 p_stage_cyc;
        bool no_hw_idx;         /* No hw_idx collected in branch_stack */
        char insn[MAX_INSN];
        void *raw_data;
@@ -427,5 +428,7 @@ char *get_page_size_name(u64 size, char *str);
 
 void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type);
 void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type);
+const char *arch_perf_header_entry(const char *se_header);
+int arch_support_sort_key(const char *sort_key);
 
 #endif /* __PERF_RECORD_H */
index 859cb34..631a4af 100644 (file)
@@ -21,7 +21,7 @@
  * all struct perf_record_lost_samples.lost fields reported.
  *
  * The total_period is needed because by default auto-freq is used, so
- * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
+ * multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
  * the total number of low level events, it is necessary to to sum all struct
  * perf_record_sample.period and stash the result in total_period.
  */
index 882cd1f..f1c79ec 100644 (file)
@@ -36,6 +36,7 @@
 #include <fcntl.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
+#include <sys/prctl.h>
 
 #include <linux/bitops.h>
 #include <linux/hash.h>
@@ -1209,7 +1210,7 @@ bool evlist__valid_read_format(struct evlist *evlist)
                }
        }
 
-       /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
+       /* PERF_SAMPLE_READ implies PERF_FORMAT_ID. */
        if ((sample_type & PERF_SAMPLE_READ) &&
            !(read_format & PERF_FORMAT_ID)) {
                return false;
@@ -1405,6 +1406,13 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const
                close(go_pipe[1]);
                fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
 
+               /*
+                * Change the name of this process not to confuse --exclude-perf users
+                * that sees 'perf' in the window up to the execvp() and thinks that
+                * perf samples are not being excluded.
+                */
+               prctl(PR_SET_NAME, "perf-exec");
+
                /*
                 * Tell the parent we're ready to go
                 */
index 7ecbc8e..2d2614e 100644 (file)
@@ -621,7 +621,7 @@ const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_AL
 #define COP(x)         (1 << x)
 
 /*
- * cache operartion stat
+ * cache operation stat
  * L1I : Read and prefetch only
  * ITLB and BPU : Read-only
  */
@@ -2275,7 +2275,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
                /*
                 * Undo swap of u64, then swap on individual u32s,
                 * get the size of the raw area and undo all of the
-                * swap. The pevent interface handles endianity by
+                * swap. The pevent interface handles endianness by
                 * itself.
                 */
                if (swapped) {
index 6026487..dd4f56f 100644 (file)
@@ -20,6 +20,8 @@ union perf_event;
 struct bpf_counter_ops;
 struct target;
 struct hashmap;
+struct bperf_leader_bpf;
+struct bperf_follower_bpf;
 
 typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
 
@@ -130,8 +132,24 @@ struct evsel {
         * See also evsel__has_callchain().
         */
        __u64                   synth_sample_type;
-       struct list_head        bpf_counter_list;
+
+       /*
+        * bpf_counter_ops serves two use cases:
+        *   1. perf-stat -b          counting events used byBPF programs
+        *   2. perf-stat --use-bpf   use BPF programs to aggregate counts
+        */
        struct bpf_counter_ops  *bpf_counter_ops;
+
+       /* for perf-stat -b */
+       struct list_head        bpf_counter_list;
+
+       /* for perf-stat --use-bpf */
+       int                     bperf_leader_prog_fd;
+       int                     bperf_leader_link_fd;
+       union {
+               struct bperf_leader_bpf *leader_skel;
+               struct bperf_follower_bpf *follower_skel;
+       };
 };
 
 struct perf_missing_features {
index dcf8d19..85df3e4 100644 (file)
@@ -3,7 +3,7 @@
 #define PARSE_CTX_H 1
 
 // There are fixes that need to land upstream before we can use libbpf's headers,
-// for now use our copy uncoditionally, since the data structures at this point
+// for now use our copy unconditionally, since the data structures at this point
 // are exactly the same, no problem.
 //#ifdef HAVE_LIBBPF_SUPPORT
 //#include <bpf/hashmap.h>
index 20effdf..aa1e425 100644 (file)
@@ -127,7 +127,7 @@ static int __do_write_buf(struct feat_fd *ff,  const void *buf, size_t size)
        return 0;
 }
 
-/* Return: 0 if succeded, -ERR if failed. */
+/* Return: 0 if succeeded, -ERR if failed. */
 int do_write(struct feat_fd *ff, const void *buf, size_t size)
 {
        if (!ff->buf)
@@ -135,7 +135,7 @@ int do_write(struct feat_fd *ff, const void *buf, size_t size)
        return __do_write_buf(ff, buf, size);
 }
 
-/* Return: 0 if succeded, -ERR if failed. */
+/* Return: 0 if succeeded, -ERR if failed. */
 static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size)
 {
        u64 *p = (u64 *) set;
@@ -154,7 +154,7 @@ static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size)
        return 0;
 }
 
-/* Return: 0 if succeded, -ERR if failed. */
+/* Return: 0 if succeeded, -ERR if failed. */
 int write_padded(struct feat_fd *ff, const void *bf,
                 size_t count, size_t count_aligned)
 {
@@ -170,7 +170,7 @@ int write_padded(struct feat_fd *ff, const void *bf,
 #define string_size(str)                                               \
        (PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32))
 
-/* Return: 0 if succeded, -ERR if failed. */
+/* Return: 0 if succeeded, -ERR if failed. */
 static int do_write_string(struct feat_fd *ff, const char *str)
 {
        u32 len, olen;
@@ -266,7 +266,7 @@ static char *do_read_string(struct feat_fd *ff)
        return NULL;
 }
 
-/* Return: 0 if succeded, -ERR if failed. */
+/* Return: 0 if succeeded, -ERR if failed. */
 static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
 {
        unsigned long *set;
@@ -2874,7 +2874,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused)
        int err = -1;
 
        if (ff->ph->needs_swap) {
-               pr_warning("interpreting bpf_prog_info from systems with endianity is not yet supported\n");
+               pr_warning("interpreting bpf_prog_info from systems with endianness is not yet supported\n");
                return 0;
        }
 
@@ -2942,7 +2942,7 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
        int err = -1;
 
        if (ff->ph->needs_swap) {
-               pr_warning("interpreting btf from systems with endianity is not yet supported\n");
+               pr_warning("interpreting btf from systems with endianness is not yet supported\n");
                return 0;
        }
 
@@ -3481,11 +3481,11 @@ static const size_t attr_pipe_abi_sizes[] = {
 };
 
 /*
- * In the legacy pipe format, there is an implicit assumption that endiannesss
+ * In the legacy pipe format, there is an implicit assumption that endianness
  * between host recording the samples, and host parsing the samples is the
  * same. This is not always the case given that the pipe output may always be
  * redirected into a file and analyzed on a different machine with possibly a
- * different endianness and perf_event ABI revsions in the perf tool itself.
+ * different endianness and perf_event ABI revisions in the perf tool itself.
  */
 static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)
 {
index c82f5fc..9299ee5 100644 (file)
@@ -211,6 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
        hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
        hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
        hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
+       hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
        if (symbol_conf.nanosecs)
                hists__new_col_len(hists, HISTC_TIME, 16);
        else
@@ -289,13 +290,14 @@ static long hist_time(unsigned long htime)
 }
 
 static void he_stat__add_period(struct he_stat *he_stat, u64 period,
-                               u64 weight, u64 ins_lat)
+                               u64 weight, u64 ins_lat, u64 p_stage_cyc)
 {
 
        he_stat->period         += period;
        he_stat->weight         += weight;
        he_stat->nr_events      += 1;
        he_stat->ins_lat        += ins_lat;
+       he_stat->p_stage_cyc    += p_stage_cyc;
 }
 
 static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
@@ -308,6 +310,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
        dest->nr_events         += src->nr_events;
        dest->weight            += src->weight;
        dest->ins_lat           += src->ins_lat;
+       dest->p_stage_cyc               += src->p_stage_cyc;
 }
 
 static void he_stat__decay(struct he_stat *he_stat)
@@ -597,6 +600,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
        u64 period = entry->stat.period;
        u64 weight = entry->stat.weight;
        u64 ins_lat = entry->stat.ins_lat;
+       u64 p_stage_cyc = entry->stat.p_stage_cyc;
        bool leftmost = true;
 
        p = &hists->entries_in->rb_root.rb_node;
@@ -615,11 +619,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
 
                if (!cmp) {
                        if (sample_self) {
-                               he_stat__add_period(&he->stat, period, weight, ins_lat);
+                               he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc);
                                hist_entry__add_callchain_period(he, period);
                        }
                        if (symbol_conf.cumulate_callchain)
-                               he_stat__add_period(he->stat_acc, period, weight, ins_lat);
+                               he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc);
 
                        /*
                         * This mem info was allocated from sample__resolve_mem
@@ -731,6 +735,7 @@ __hists__add_entry(struct hists *hists,
                        .period = sample->period,
                        .weight = sample->weight,
                        .ins_lat = sample->ins_lat,
+                       .p_stage_cyc = sample->p_stage_cyc,
                },
                .parent = sym_parent,
                .filtered = symbol__parent_filter(sym_parent) | al->filtered,
index 3c53723..e2faa74 100644 (file)
@@ -75,6 +75,7 @@ enum hist_column {
        HISTC_MEM_BLOCKED,
        HISTC_LOCAL_INS_LAT,
        HISTC_GLOBAL_INS_LAT,
+       HISTC_P_STAGE_CYC,
        HISTC_NR_COLS, /* Last entry */
 };
 
index f6e28ac..8658d42 100644 (file)
@@ -3569,7 +3569,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
        /*
         * Since this thread will not be kept in any rbtree not in a
         * list, initialize its list node so that at thread__put() the
-        * current thread lifetime assuption is kept and we don't segfault
+        * current thread lifetime assumption is kept and we don't segfault
         * at list_del_init().
         */
        INIT_LIST_HEAD(&pt->unknown_thread->node);
index a217ecf..6a67126 100644 (file)
@@ -30,7 +30,7 @@
  *
  * It does so by calculating the costs of the path ending in characters
  * i (in string1) and j (in string2), respectively, given that the last
- * operation is a substition, a swap, a deletion, or an insertion.
+ * operation is a substitution, a swap, a deletion, or an insertion.
  *
  * This implementation allows the costs to be weighted:
  *
index 6b4e5a0..c397be0 100644 (file)
@@ -4,7 +4,7 @@
  * generic one.
  *
  * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
- * name and the defination of this function is included directly from
+ * name and the definition of this function is included directly from
  * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function
  * is defined no matter what arch the host is.
  *
index 21c216c..b2b92d0 100644 (file)
@@ -4,7 +4,7 @@
  * generic one.
  *
  * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
- * name and the defination of this function is included directly from
+ * name and the definition of this function is included directly from
  * 'arch/x86/util/unwind-libunwind.c', to make sure that this function
  * is defined no matter what arch the host is.
  *
index dbdffb6..3ceaf7e 100644 (file)
@@ -471,7 +471,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 
        /*
         * This is an optional work. Even it fail we can continue our
-        * work. Needn't to check error return.
+        * work. Needn't check error return.
         */
        llvm__get_kbuild_opts(&kbuild_dir, &kbuild_include_opts);
 
index b5c2d8b..3ff4936 100644 (file)
@@ -905,7 +905,7 @@ static struct map *machine__addnew_module_map(struct machine *machine, u64 start
 
        maps__insert(&machine->kmaps, map);
 
-       /* Put the map here because maps__insert alread got it */
+       /* Put the map here because maps__insert already got it */
        map__put(map);
 out:
        /* put the dso here, corresponding to  machine__findnew_module_dso */
@@ -1952,7 +1952,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
         * maps because that is what the kernel just did.
         *
         * But when synthesizing, this should not be done.  If we do, we end up
-        * with overlapping maps as we process the sythesized MMAP2 events that
+        * with overlapping maps as we process the synthesized MMAP2 events that
         * get delivered shortly thereafter.
         *
         * Use the FORK event misc flags in an internal way to signal this
@@ -2038,8 +2038,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
 static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
 {
        if (!regexec(regex, sym->name, 0, NULL, 0))
-               return 1;
-       return 0;
+               return true;
+       return false;
 }
 
 static void ip__resolve_ams(struct thread *thread,
@@ -2518,7 +2518,7 @@ static bool has_stitched_lbr(struct thread *thread,
 
        /*
         * Check if there are identical LBRs between two samples.
-        * Identicall LBRs must have same from, to and flags values. Also,
+        * Identical LBRs must have same from, to and flags values. Also,
         * they have to be saved in the same LBR registers (same physical
         * index).
         *
@@ -2588,7 +2588,7 @@ err:
 }
 
 /*
- * Recolve LBR callstack chain sample
+ * Resolve LBR callstack chain sample
  * Return:
  * 1 on success get LBR callchain information
  * 0 no available LBR callchain information, should try fp
index 9f32825..d32f5b2 100644 (file)
@@ -75,7 +75,7 @@ struct thread;
 
 /* map__for_each_symbol - iterate over the symbols in the given map
  *
- * @map: the 'struct map *' in which symbols itereated
+ * @map: the 'struct map *' in which symbols are iterated
  * @pos: the 'struct symbol *' to use as a loop cursor
  * @n: the 'struct rb_node *' to use as a temporary storage
  * Note: caller must ensure map->dso is not NULL (map is loaded).
@@ -86,7 +86,7 @@ struct thread;
 /* map__for_each_symbol_with_name - iterate over the symbols in the given map
  *                                  that have the given name
  *
- * @map: the 'struct map *' in which symbols itereated
+ * @map: the 'struct map *' in which symbols are iterated
  * @sym_name: the symbol name
  * @pos: the 'struct symbol *' to use as a loop cursor
  */
index 755cef7..63dd383 100644 (file)
@@ -81,7 +81,7 @@ struct c2c_stats {
        u32     rmt_dram;            /* count of loads miss to remote DRAM */
        u32     blk_data;            /* count of loads blocked by data */
        u32     blk_addr;            /* count of loads blocked by address conflict */
-       u32     nomap;               /* count of load/stores with no phys adrs */
+       u32     nomap;               /* count of load/stores with no phys addrs */
        u32     noparse;             /* count of unparsable data sources */
 };
 
index 26c990e..6acb44a 100644 (file)
@@ -181,7 +181,7 @@ static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2)
  * @pctx: the parse context for the metric expression.
  * @metric_no_merge: don't attempt to share events for the metric with other
  * metrics.
- * @has_constraint: is there a contraint on the group of events? In which case
+ * @has_constraint: is there a constraint on the group of events? In which case
  * the events won't be grouped.
  * @metric_events: out argument, null terminated array of evsel's associated
  * with the metric.
index ed1b939..026bbf4 100644 (file)
@@ -9,7 +9,6 @@
 
 struct evlist;
 struct evsel;
-struct evlist;
 struct option;
 struct rblist;
 struct pmu_events_map;
index c0c0fab..8123d21 100644 (file)
@@ -846,9 +846,9 @@ split_bpf_config_terms(struct list_head *evt_head_config,
        struct parse_events_term *term, *temp;
 
        /*
-        * Currectly, all possible user config term
+        * Currently, all possible user config term
         * belong to bpf object. parse_events__is_hardcoded_term()
-        * happends to be a good flag.
+        * happens to be a good flag.
         *
         * See parse_events_config_bpf() and
         * config_term_tracepoint().
@@ -898,7 +898,7 @@ int parse_events_load_bpf(struct parse_events_state *parse_state,
 
        /*
         * Caller doesn't know anything about obj_head_config,
-        * so combine them together again before returnning.
+        * so combine them together again before returning.
         */
        if (head_config)
                list_splice_tail(&obj_head_config, head_config);
@@ -1185,10 +1185,10 @@ do {                                                                       \
        }
 
        /*
-        * Check term availbility after basic checking so
+        * Check term availability after basic checking so
         * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered.
         *
-        * If check availbility at the entry of this function,
+        * If check availability at the entry of this function,
         * user will see "'<sysfs term>' is not usable in 'perf stat'"
         * if an invalid config term is provided for legacy events
         * (for example, instructions/badterm/...), which is confusing.
index 46fd0f9..4e52b94 100644 (file)
@@ -1069,7 +1069,7 @@ int perf_pmu__format_type(struct list_head *formats, const char *name)
 
 /*
  * Sets value based on the format definition (format parameter)
- * and unformated value (value parameter).
+ * and unformatted value (value parameter).
  */
 static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
                             bool zero)
@@ -1408,7 +1408,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
        }
 
        /*
-        * if no unit or scale foundin aliases, then
+        * if no unit or scale found in aliases, then
         * set defaults as for evsel
         * unit cannot left to NULL
         */
index a9cff3a..a78c8d5 100644 (file)
@@ -3228,7 +3228,7 @@ errout:
        return err;
 }
 
-/* Concatinate two arrays */
+/* Concatenate two arrays */
 static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
 {
        void *ret;
@@ -3258,7 +3258,7 @@ concat_probe_trace_events(struct probe_trace_event **tevs, int *ntevs,
        if (*ntevs + ntevs2 > probe_conf.max_probes)
                ret = -E2BIG;
        else {
-               /* Concatinate the array of probe_trace_event */
+               /* Concatenate the array of probe_trace_event */
                new_tevs = memcat(*tevs, (*ntevs) * sizeof(**tevs),
                                  *tevs2, ntevs2 * sizeof(**tevs2));
                if (!new_tevs)
index 1b118c9..866f2d5 100644 (file)
@@ -164,7 +164,7 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs)
 /*
  * Convert a location into trace_arg.
  * If tvar == NULL, this just checks variable can be converted.
- * If fentry == true and vr_die is a parameter, do huristic search
+ * If fentry == true and vr_die is a parameter, do heuristic search
  * for the location fuzzed by function entry mcount.
  */
 static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
@@ -498,7 +498,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
                               " nor array.\n", varname);
                        return -EINVAL;
                }
-               /* While prcessing unnamed field, we don't care about this */
+               /* While processing unnamed field, we don't care about this */
                if (field->ref && dwarf_diename(vr_die)) {
                        pr_err("Semantic error: %s must be referred by '.'\n",
                               field->name);
@@ -1832,7 +1832,7 @@ static int line_range_walk_cb(const char *fname, int lineno,
            (lf->lno_s > lineno || lf->lno_e < lineno))
                return 0;
 
-       /* Make sure this line can be reversable */
+       /* Make sure this line can be reversible */
        if (cu_find_lineinfo(&lf->cu_die, addr, &__fname, &__lineno) > 0
            && (lineno != __lineno || strcmp(fname, __fname)))
                return 0;
index 078a717..8130b56 100644 (file)
@@ -45,7 +45,7 @@
  * the data portion is mmap()'ed.
  *
  * To sort the queues in chronological order, all queue access is controlled
- * by the auxtrace_heap. This is basicly a stack, each stack element has two
+ * by the auxtrace_heap. This is basically a stack, each stack element has two
  * entries, the queue number and a time stamp. However the stack is sorted by
  * the time stamps. The highest time stamp is at the bottom the lowest
  * (nearest) time stamp is at the top. That sort order is maintained at all
  * stamp of the last processed entry of the auxtrace_buffer replaces the
  * current auxtrace_heap top.
  *
- * 3. Auxtrace_queues might run of out data and are feeded by the
+ * 3. Auxtrace_queues might run of out data and are fed by the
  * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
  *
  * Event Generation
- * Each sampling-data entry in the auxilary trace data generates a perf sample.
+ * Each sampling-data entry in the auxiliary trace data generates a perf sample.
  * This sample is filled
  * with data from the auxtrace such as PID/TID, instruction address, CPU state,
  * etc. This sample is processed with perf_session__deliver_synth_event() to
@@ -575,7 +575,7 @@ static unsigned long long get_trailer_time(const unsigned char *buf)
  * pointer to the queue, the second parameter is the time stamp. This
  * is the time stamp:
  * - of the event that triggered this processing.
- * - or the time stamp when the last proccesing of this queue stopped.
+ * - or the time stamp when the last processing of this queue stopped.
  *   In this case it stopped at a 4KB page boundary and record the
  *   position on where to continue processing on the next invocation
  *   (see buffer->use_data and buffer->use_size).
@@ -640,7 +640,7 @@ static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
                        goto out;
                }
 
-               pos += dsdes;   /* Skip diagnositic entry */
+               pos += dsdes;   /* Skip diagnostic entry */
 
                /* Check for trailer entry */
                if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
index c83c2c6..4e4aa4c 100644 (file)
@@ -1531,7 +1531,7 @@ static void set_table_handlers(struct tables *tables)
                 * Attempt to use the call path root from the call return
                 * processor, if the call return processor is in use. Otherwise,
                 * we allocate a new call path root. This prevents exporting
-                * duplicate call path ids when both are in use simultaniously.
+                * duplicate call path ids when both are in use simultaneously.
                 */
                if (tables->dbe.crp)
                        tables->dbe.cpr = tables->dbe.crp->cpr;
index 859832a..eba3769 100644 (file)
@@ -1069,7 +1069,7 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample)
                 * in "to" register.
                 * For example, there is a call stack
                 * "A"->"B"->"C"->"D".
-                * The LBR registers will recorde like
+                * The LBR registers will be recorded like
                 * "C"->"D", "B"->"C", "A"->"B".
                 * So only the first "to" register and all "from"
                 * registers are needed to construct the whole stack.
@@ -1302,8 +1302,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
 
        if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
                printf("... weight: %" PRIu64 "", sample->weight);
-                       if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
+                       if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
                                printf(",0x%"PRIx16"", sample->ins_lat);
+                               printf(",0x%"PRIx16"", sample->p_stage_cyc);
+                       }
                printf("\n");
        }
 
@@ -1584,7 +1586,7 @@ static s64 perf_session__process_user_event(struct perf_session *session,
                return tool->event_update(tool, event, &session->evlist);
        case PERF_RECORD_HEADER_EVENT_TYPE:
                /*
-                * Depreceated, but we need to handle it for sake
+                * Deprecated, but we need to handle it for sake
                 * of old data files create in pipe mode.
                 */
                return 0;
index 552b590..88ce47f 100644 (file)
@@ -25,6 +25,7 @@
 #include <traceevent/event-parse.h>
 #include "mem-events.h"
 #include "annotate.h"
+#include "event.h"
 #include "time-utils.h"
 #include "cgroup.h"
 #include "machine.h"
@@ -36,7 +37,7 @@ const char    default_parent_pattern[] = "^sys_|^do_page_fault";
 const char     *parent_pattern = default_parent_pattern;
 const char     *default_sort_order = "comm,dso,symbol";
 const char     default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
-const char     default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat";
+const char     default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
 const char     default_top_sort_order[] = "dso,symbol";
 const char     default_diff_sort_order[] = "dso,symbol";
 const char     default_tracepoint_sort_order[] = "trace";
@@ -45,6 +46,8 @@ const char    *field_order;
 regex_t                ignore_callees_regex;
 int            have_ignore_callees = 0;
 enum sort_mode sort__mode = SORT_MODE__NORMAL;
+const char     *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
+const char     *arch_specific_sort_keys[] = {"p_stage_cyc"};
 
 /*
  * Replaces all occurrences of a char used with the:
@@ -1408,6 +1411,25 @@ struct sort_entry sort_global_ins_lat = {
        .se_width_idx   = HISTC_GLOBAL_INS_LAT,
 };
 
+static int64_t
+sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       return left->stat.p_stage_cyc - right->stat.p_stage_cyc;
+}
+
+static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
+                                       size_t size, unsigned int width)
+{
+       return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc);
+}
+
+struct sort_entry sort_p_stage_cyc = {
+       .se_header      = "Pipeline Stage Cycle",
+       .se_cmp         = sort__global_p_stage_cyc_cmp,
+       .se_snprintf    = hist_entry__p_stage_cyc_snprintf,
+       .se_width_idx   = HISTC_P_STAGE_CYC,
+};
+
 struct sort_entry sort_mem_daddr_sym = {
        .se_header      = "Data Symbol",
        .se_cmp         = sort__daddr_cmp,
@@ -1816,6 +1838,21 @@ struct sort_dimension {
        int                     taken;
 };
 
+int __weak arch_support_sort_key(const char *sort_key __maybe_unused)
+{
+       return 0;
+}
+
+const char * __weak arch_perf_header_entry(const char *se_header)
+{
+       return se_header;
+}
+
+static void sort_dimension_add_dynamic_header(struct sort_dimension *sd)
+{
+       sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header);
+}
+
 #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
 
 static struct sort_dimension common_sort_dimensions[] = {
@@ -1841,6 +1878,7 @@ static struct sort_dimension common_sort_dimensions[] = {
        DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
        DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
        DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
+       DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
 };
 
 #undef DIM
@@ -2739,7 +2777,20 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
                        struct evlist *evlist,
                        int level)
 {
-       unsigned int i;
+       unsigned int i, j;
+
+       /*
+        * Check to see if there are any arch specific
+        * sort dimensions not applicable for the current
+        * architecture. If so, Skip that sort key since
+        * we don't want to display it in the output fields.
+        */
+       for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) {
+               if (!strcmp(arch_specific_sort_keys[j], tok) &&
+                               !arch_support_sort_key(tok)) {
+                       return 0;
+               }
+       }
 
        for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
                struct sort_dimension *sd = &common_sort_dimensions[i];
@@ -2747,6 +2798,11 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
                if (strncasecmp(tok, sd->name, strlen(tok)))
                        continue;
 
+               for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) {
+                       if (!strcmp(dynamic_headers[j], sd->name))
+                               sort_dimension_add_dynamic_header(sd);
+               }
+
                if (sd->entry == &sort_parent) {
                        int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
                        if (ret) {
index 63f67a3..87a0926 100644 (file)
@@ -51,6 +51,7 @@ struct he_stat {
        u64                     period_guest_us;
        u64                     weight;
        u64                     ins_lat;
+       u64                     p_stage_cyc;
        u32                     nr_events;
 };
 
@@ -234,6 +235,7 @@ enum sort_type {
        SORT_CODE_PAGE_SIZE,
        SORT_LOCAL_INS_LAT,
        SORT_GLOBAL_INS_LAT,
+       SORT_PIPELINE_STAGE_CYC,
 
        /* branch stack specific sort keys */
        __SORT_BRANCH_STACK,
index 7f09cda..d3137bc 100644 (file)
@@ -439,6 +439,12 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
                if (counter->cgrp)
                        os.nfields++;
        }
+
+       if (!config->no_csv_summary && config->csv_output &&
+           config->summary && !config->interval) {
+               fprintf(config->output, "%16s%s", "summary", config->csv_sep);
+       }
+
        if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
                if (config->metric_only) {
                        pm(config, &os, NULL, "", "", 0);
index 6ccf21a..3f800e7 100644 (file)
@@ -9,6 +9,7 @@
 #include "expr.h"
 #include "metricgroup.h"
 #include "cgroup.h"
+#include "units.h"
 #include <linux/zalloc.h>
 
 /*
@@ -1270,18 +1271,15 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
                generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
                                evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
        } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) {
-               char unit = 'M';
-               char unit_buf[10];
+               char unit = ' ';
+               char unit_buf[10] = "/sec";
 
                total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
-
                if (total)
-                       ratio = 1000.0 * avg / total;
-               if (ratio < 0.001) {
-                       ratio *= 1000;
-                       unit = 'K';
-               }
-               snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
+                       ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
+
+               if (unit != ' ')
+                       snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
                print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
        } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
                print_smi_cost(config, cpu, out, st, &rsd);
index c400f8d..2db46b9 100644 (file)
@@ -76,8 +76,7 @@ double rel_stddev_stats(double stddev, double avg)
        return pct;
 }
 
-bool __perf_evsel_stat__is(struct evsel *evsel,
-                          enum perf_stat_evsel_id id)
+bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id)
 {
        struct perf_stat_evsel *ps = evsel->stats;
 
index d85c292..48e6a06 100644 (file)
@@ -128,6 +128,7 @@ struct perf_stat_config {
        bool                     all_user;
        bool                     percore_show_thread;
        bool                     summary;
+       bool                     no_csv_summary;
        bool                     metric_no_group;
        bool                     metric_no_merge;
        bool                     stop_read_counter;
@@ -160,6 +161,7 @@ struct perf_stat_config {
 };
 
 void perf_stat__set_big_num(int set);
+void perf_stat__set_no_csv_summary(int set);
 
 void update_stats(struct stats *stats, u64 val);
 double avg_stats(struct stats *stats);
@@ -187,11 +189,10 @@ struct perf_aggr_thread_value {
        u64 ena;
 };
 
-bool __perf_evsel_stat__is(struct evsel *evsel,
-                          enum perf_stat_evsel_id id);
+bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id);
 
 #define perf_stat_evsel__is(evsel, id) \
-       __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
+       __perf_stat_evsel__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
 
 extern struct runtime_stat rt_stat;
 extern struct stats walltime_nsecs_stats;
index ea94d86..be94d70 100644 (file)
@@ -12,7 +12,7 @@
  *    build complex strings/buffers whose final size isn't easily known.
  *
  *    It is NOT legal to copy the ->buf pointer away.
- *    `strbuf_detach' is the operation that detachs a buffer from its shell
+ *    `strbuf_detach' is the operation that detaches a buffer from its shell
  *    while keeping the shell valid wrt its invariants.
  *
  * 2. the ->buf member is a byte array that has at least ->len + 1 bytes
index e0c25a4..c05aca9 100644 (file)
@@ -8,8 +8,8 @@
 
 /* A node of string filter */
 struct strfilter_node {
-       struct strfilter_node *l;       /* Tree left branche (for &,|) */
-       struct strfilter_node *r;       /* Tree right branche (for !,&,|) */
+       struct strfilter_node *l;       /* Tree left branch (for &,|) */
+       struct strfilter_node *r;       /* Tree right branch (for !,&,|) */
        const char *p;          /* Operator or rule */
 };
 
index 6dff843..4c56aa8 100644 (file)
@@ -1058,7 +1058,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
                curr_dso->symtab_type = dso->symtab_type;
                maps__insert(kmaps, curr_map);
                /*
-                * Add it before we drop the referece to curr_map, i.e. while
+                * Add it before we drop the reference to curr_map, i.e. while
                 * we still are sure to have a reference to this DSO via
                 * *curr_map->dso.
                 */
index 35c936c..2664fb6 100644 (file)
@@ -68,7 +68,7 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso,
 
        for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) {
                pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
-               fprintf(fp, "%s\n", pos->sym.name);
+               ret += fprintf(fp, "%s\n", pos->sym.name);
        }
 
        return ret;
index dff1781..35aa0c0 100644 (file)
@@ -1211,7 +1211,7 @@ static size_t mask_size(struct perf_cpu_map *map, int *max)
        *max = 0;
 
        for (i = 0; i < map->nr; i++) {
-               /* bit possition of the cpu is + 1 */
+               /* bit position of the cpu is + 1 */
                int bit = map->map[i] + 1;
 
                if (bit > *max)
@@ -1237,7 +1237,7 @@ void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int
         *   mask  = size of 'struct perf_record_record_cpu_map' +
         *           maximum cpu bit converted to size of longs
         *
-        * and finaly + the size of 'struct perf_record_cpu_map_data'.
+        * and finally + the size of 'struct perf_record_cpu_map_data'.
         */
        size_cpus = cpus_size(map);
        size_mask = mask_size(map, max);
index 03bd99d..a2e9068 100644 (file)
@@ -34,6 +34,10 @@ static const char **syscalltbl_native = syscalltbl_powerpc_32;
 #include <asm/syscalls.c>
 const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID;
 static const char **syscalltbl_native = syscalltbl_arm64;
+#elif defined(__mips__)
+#include <asm/syscalls_n64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_mips_n64;
 #endif
 
 struct syscall {
index f132c6c..1bce3eb 100644 (file)
@@ -16,6 +16,8 @@ struct target {
        bool         uses_mmap;
        bool         default_per_cpu;
        bool         per_thread;
+       bool         use_bpf;
+       const char   *attr_map;
 };
 
 enum target_errno {
@@ -66,7 +68,7 @@ static inline bool target__has_cpu(struct target *target)
 
 static inline bool target__has_bpf(struct target *target)
 {
-       return target->bpf_str;
+       return target->bpf_str || target->use_bpf;
 }
 
 static inline bool target__none(struct target *target)
index 3bc47a4..b3cd09b 100644 (file)
@@ -16,7 +16,6 @@ struct comm;
 struct ip_callchain;
 struct symbol;
 struct dso;
-struct comm;
 struct perf_sample;
 struct addr_location;
 struct call_path;
index a46762a..32c39cf 100644 (file)
@@ -33,28 +33,35 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
        return (unsigned long) -1;
 }
 
-unsigned long convert_unit(unsigned long value, char *unit)
+double convert_unit_double(double value, char *unit)
 {
        *unit = ' ';
 
-       if (value > 1000) {
-               value /= 1000;
+       if (value > 1000.0) {
+               value /= 1000.0;
                *unit = 'K';
        }
 
-       if (value > 1000) {
-               value /= 1000;
+       if (value > 1000.0) {
+               value /= 1000.0;
                *unit = 'M';
        }
 
-       if (value > 1000) {
-               value /= 1000;
+       if (value > 1000.0) {
+               value /= 1000.0;
                *unit = 'G';
        }
 
        return value;
 }
 
+unsigned long convert_unit(unsigned long value, char *unit)
+{
+       double v = convert_unit_double((double)value, unit);
+
+       return (unsigned long)v;
+}
+
 int unit_number__scnprintf(char *buf, size_t size, u64 n)
 {
        char unit[4] = "BKMG";
index 99263b6..ea43e74 100644 (file)
@@ -12,6 +12,7 @@ struct parse_tag {
 
 unsigned long parse_tag_value(const char *str, struct parse_tag *tags);
 
+double convert_unit_double(double value, char *unit);
 unsigned long convert_unit(unsigned long value, char *unit);
 int unit_number__scnprintf(char *buf, size_t size, u64 n);
 
index 9aededc..71a3533 100644 (file)
@@ -82,7 +82,7 @@ UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
 #define DW_EH_PE_funcrel       0x40    /* start-of-procedure-relative */
 #define DW_EH_PE_aligned       0x50    /* aligned pointer */
 
-/* Flags intentionaly not handled, since they're not needed:
+/* Flags intentionally not handled, since they're not needed:
  * #define DW_EH_PE_indirect      0x80
  * #define DW_EH_PE_uleb128       0x01
  * #define DW_EH_PE_udata2        0x02