To pick up fixes sent via perf/urgent and in the BPF tools/ directories.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
R: Alexander Shishkin <alexander.shishkin@linux.intel.com>
R: Jiri Olsa <jolsa@redhat.com>
R: Namhyung Kim <namhyung@kernel.org>
+L: linux-perf-users@vger.kernel.org
L: linux-kernel@vger.kernel.org
S: Supported
+W: https://perf.wiki.kernel.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
F: arch/*/events/*
F: arch/*/events/*/*
typedef __u64 __bitwise __le64;
typedef __u64 __bitwise __be64;
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+
typedef struct {
int counter;
} atomic_t;
-u::
--update=::
Update specified file of the cache. Note that this doesn't remove
- older entires since those may be still needed for annotating old
+ older entries since those may be still needed for annotating old
(or remote) perf.data. Only if there is already a cache which has
exactly same build-id, that is replaced by new one. It can be used
to update kallsyms and kernel dso to vmlinux in order to support
This option works with tui, stdio2 browsers.
+ annotate.demangle::
+ Demangle symbol names to human readable form. Default is 'true'.
+
+ annotate.demangle_kernel::
+ Demangle kernel symbol names to human readable form. Default is 'true'.
+
hist.*::
hist.percentage::
This option control the way to calculate overhead of filtered entries -
- ins_lat: Instruction latency in core cycles. This is the global instruction
latency
- local_ins_lat: Local instruction latency version
+ - p_stage_cyc: On powerpc, this presents the number of cycles spent in a
+ pipeline stage. And currently supported only on powerpc.
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
--dump-raw-trace::
Dump raw trace in ASCII.
+--disable-order::
+ Disable raw trace ordering.
+
-g::
--call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>::
Display call chains using type, min percent threshold, print limit,
but probably we'll make the default not to show the switch-on/off events
on the --group mode and if there is only one event besides the off/on ones,
go straight to the histogram browser, just like 'perf report' with no events
- explicitely specified does.
+ explicitly specified does.
--itrace::
Options for decoding instruction tracing data. The options are:
1.102235068 seconds time elapsed
+--bpf-counters::
+ Use BPF programs to aggregate readings from perf_events. This
+ allows multiple perf-stat sessions that are counting the same metric (cycles,
+ instructions, etc.) to share hardware counters.
+
+--bpf-attr-map::
+ With option "--bpf-counters", different perf-stat sessions share
+ information about shared BPF programs and maps via a pinned hashmap.
+ Use "--bpf-attr-map" to specify the path of this pinned hashmap.
+ The default path is /sys/fs/bpf/perf_attr_map.
+
ifdef::HAVE_LIBPFM[]
--pfm-events events::
Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
-n::
--null::
- null run - don't start any counters
+null run - Don't start any counters.
+
+This can be useful to measure just elapsed wall-clock time - or to assess the
+raw overhead of perf stat itself, without running any counters.
-v::
--verbose::
--summary::
Print summary for interval mode (-I).
+--no-csv-summary::
+Don't print 'summary' at the first column for CVS summary output.
+This option must be used with -x and --summary.
+
+This option can be enabled in perf config by setting the variable
+'stat.no-csv-summary'.
+
+$ perf config stat.no-csv-summary=true
+
EXAMPLES
--------
but probably we'll make the default not to show the switch-on/off events
on the --group mode and if there is only one event besides the off/on ones,
go straight to the histogram browser, just like 'perf top' with no events
- explicitely specified does.
+ explicitly specified does.
--stitch-lbr::
Show callgraph with stitched LBRs, which may have more complete
linkperf:perf-stat[1], linkperf:perf-top[1],
linkperf:perf-record[1], linkperf:perf-report[1],
linkperf:perf-list[1]
+
+linkperf:perf-annotate[1],linkperf:perf-archive[1],
+linkperf:perf-bench[1], linkperf:perf-buildid-cache[1],
+linkperf:perf-buildid-list[1], linkperf:perf-c2c[1],
+linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1],
+linkperf:perf-evlist[1], linkperf:perf-ftrace[1],
+linkperf:perf-help[1], linkperf:perf-inject[1],
+linkperf:perf-intel-pt[1], linkperf:perf-kallsyms[1],
+linkperf:perf-kmem[1], linkperf:perf-kvm[1], linkperf:perf-lock[1],
+linkperf:perf-mem[1], linkperf:perf-probe[1], linkperf:perf-sched[1],
+linkperf:perf-script[1], linkperf:perf-test[1],
+linkperf:perf-trace[1], linkperf:perf-version[1]
NO_SYSCALL_TABLE := 0
endif
else
- ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390))
+ ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390 mips))
NO_SYSCALL_TABLE := 0
endif
endif
CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated
endif
+ifeq ($(ARCH),mips)
+ NO_PERF_REGS := 0
+ CFLAGS += -I$(OUTPUT)arch/mips/include/generated
+ CFLAGS += -I../../arch/mips/include/uapi -I../../arch/mips/include/generated/uapi
+ LIBUNWIND_LIBS = -lunwind -lunwind-mips
+endif
+
ifeq ($(NO_PERF_REGS),0)
$(call detected,CONFIG_PERF_REGS)
endif
SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
+SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
ifdef BUILD_BPF_SKEL
BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
OUTPUT=$(SKEL_TMP_OUT)/ bootstrap
$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT)
- $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf $(BPF_INCLUDE) \
+ $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) \
-c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@
$(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL)
char path[PATH_MAX];
int err = -EINVAL;
u32 val;
+ u64 contextid;
ptr = container_of(itr, struct cs_etm_recording, itr);
cs_etm_pmu = ptr->cs_etm_pmu;
goto out;
}
+ /* User has configured for PID tracing, respects it. */
+ contextid = evsel->core.attr.config &
+ (BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_CTXTID2));
+
/*
- * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing
- * is supported:
- * 0b00000 Context ID tracing is not supported.
- * 0b00100 Maximum of 32-bit Context ID size.
- * All other values are reserved.
+ * If user doesn't configure the contextid format, parse PMU format and
+ * enable PID tracing according to the "contextid" format bits:
+ *
+ * If bit ETM_OPT_CTXTID is set, trace CONTEXTIDR_EL1;
+ * If bit ETM_OPT_CTXTID2 is set, trace CONTEXTIDR_EL2.
*/
- val = BMVAL(val, 5, 9);
- if (!val || val != 0x4) {
- err = -EINVAL;
- goto out;
+ if (!contextid)
+ contextid = perf_pmu__format_bits(&cs_etm_pmu->format,
+ "contextid");
+
+ if (contextid & BIT(ETM_OPT_CTXTID)) {
+ /*
+ * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID
+ * tracing is supported:
+ * 0b00000 Context ID tracing is not supported.
+ * 0b00100 Maximum of 32-bit Context ID size.
+ * All other values are reserved.
+ */
+ val = BMVAL(val, 5, 9);
+ if (!val || val != 0x4) {
+ pr_err("%s: CONTEXTIDR_EL1 isn't supported\n",
+ CORESIGHT_ETM_PMU_NAME);
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (contextid & BIT(ETM_OPT_CTXTID2)) {
+ /*
+ * TRCIDR2.VMIDOPT[30:29] != 0 and
+ * TRCIDR2.VMIDSIZE[14:10] == 0b00100 (32bit virtual contextid)
+ * We can't support CONTEXTIDR in VMID if the size of the
+ * virtual context id is < 32bit.
+ * Any value of VMIDSIZE >= 4 (i.e, > 32bit) is fine for us.
+ */
+ if (!BMVAL(val, 29, 30) || BMVAL(val, 10, 14) < 4) {
+ pr_err("%s: CONTEXTIDR_EL2 isn't supported\n",
+ CORESIGHT_ETM_PMU_NAME);
+ err = -EINVAL;
+ goto out;
+ }
}
/* All good, let the kernel know */
- evsel->core.attr.config |= (1 << ETM_OPT_CTXTID);
+ evsel->core.attr.config |= contextid;
err = 0;
out:
-
return err;
}
!cpu_map__has(online_cpus, i))
continue;
- if (option & ETM_SET_OPT_CTXTID) {
+ if (option & BIT(ETM_OPT_CTXTID)) {
err = cs_etm_set_context_id(itr, evsel, i);
if (err)
goto out;
}
- if (option & ETM_SET_OPT_TS) {
+ if (option & BIT(ETM_OPT_TS)) {
err = cs_etm_set_timestamp(itr, evsel, i);
if (err)
goto out;
}
- if (option & ~(ETM_SET_OPT_MASK))
+ if (option & ~(BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS)))
/* Nothing else is currently supported */
goto out;
}
opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
}
- /* Snapshost size can't be bigger than the auxtrace area */
+ /* Snapshot size can't be bigger than the auxtrace area */
if (opts->auxtrace_snapshot_size >
opts->auxtrace_mmap_pages * (size_t)page_size) {
pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
evsel__set_sample_bit(cs_etm_evsel, CPU);
err = cs_etm_set_option(itr, cs_etm_evsel,
- ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS);
+ BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS));
if (err)
goto out;
}
config |= BIT(ETM4_CFG_BIT_TS);
if (config_opts & BIT(ETM_OPT_RETSTK))
config |= BIT(ETM4_CFG_BIT_RETSTK);
-
+ if (config_opts & BIT(ETM_OPT_CTXTID2))
+ config |= BIT(ETM4_CFG_BIT_VMID) |
+ BIT(ETM4_CFG_BIT_VMID_OPT);
return config;
}
struct auxtrace_record *itr,
struct perf_record_auxtrace_info *info)
{
- u32 increment;
+ u32 increment, nr_trc_params;
u64 magic;
struct cs_etm_recording *ptr =
container_of(itr, struct cs_etm_recording, itr);
/* How much space was used */
increment = CS_ETMV4_PRIV_MAX;
+ nr_trc_params = CS_ETMV4_PRIV_MAX - CS_ETMV4_TRCCONFIGR;
} else {
magic = __perf_cs_etmv3_magic;
/* Get configuration register */
/* How much space was used */
increment = CS_ETM_PRIV_MAX;
+ nr_trc_params = CS_ETM_PRIV_MAX - CS_ETM_ETMCR;
}
/* Build generic header portion */
info->priv[*offset + CS_ETM_MAGIC] = magic;
info->priv[*offset + CS_ETM_CPU] = cpu;
+ info->priv[*offset + CS_ETM_NR_TRC_PARAMS] = nr_trc_params;
/* Where the next CPU entry should start from */
*offset += increment;
}
/* First fill out the session header */
info->type = PERF_AUXTRACE_CS_ETM;
- info->priv[CS_HEADER_VERSION_0] = 0;
+ info->priv[CS_HEADER_VERSION] = CS_HEADER_CURRENT_VERSION;
info->priv[CS_PMU_TYPE_CPUS] = type << 32;
info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu;
info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode;
#include "debug.h"
#include "symbol.h"
-/* On arm64, kernel text segment start at high memory address,
+/* On arm64, kernel text segment starts at high memory address,
* for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
- * address, like 0xffff 0000 00ax xxxx. When only samll amount of
+ * address, like 0xffff 0000 00ax xxxx. When only small amount of
* memory is used by modules, gap between end of module's text segment
- * and start of kernel text segment may be reach 2G.
+ * and start of kernel text segment may reach 2G.
* Therefore do not fill this gap and do not assign it to the kernel dso map.
*/
/* [sp], [sp, NUM] or [sp,NUM] */
new_len = 7; /* + ( % s p ) NULL */
- /* If the arugment is [sp], need to fill offset '0' */
+ /* If the argument is [sp], need to fill offset '0' */
if (rm[2].rm_so == -1)
new_len += 1;
else
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
+
+# Syscall table generation for perf
+out := $(OUTPUT)arch/mips/include/generated/asm
+header := $(out)/syscalls_n64.c
+sysprf := $(srctree)/tools/perf/arch/mips/entry/syscalls
+sysdef := $(sysprf)/syscall_n64.tbl
+systbl := $(sysprf)/mksyscalltbl
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header): $(sysdef) $(systbl)
+ $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
+
+clean::
+ $(call QUIET_CLEAN, mips) $(RM) $(header)
+
+archheaders: $(header)
--- /dev/null
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate system call table for perf. Derived from
+# s390 script.
+#
+# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+# Changed by: Tiezhu Yang <yangtiezhu@loongson.cn>
+
+SYSCALL_TBL=$1
+
+if ! test -r $SYSCALL_TBL; then
+ echo "Could not read input file" >&2
+ exit 1
+fi
+
+create_table()
+{
+ local max_nr nr abi sc discard
+
+ echo 'static const char *syscalltbl_mips_n64[] = {'
+ while read nr abi sc discard; do
+ printf '\t[%d] = "%s",\n' $nr $sc
+ max_nr=$nr
+ done
+ echo '};'
+ echo "#define SYSCALLTBL_MIPS_N64_MAX_ID $max_nr"
+}
+
+grep -E "^[[:digit:]]+[[:space:]]+(n64)" $SYSCALL_TBL \
+ |sort -k1 -n \
+ |create_table
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for mips
+#
+# The format is:
+# <number> <abi> <name> <entry point>
+#
+# The <abi> is always "n64" for this file.
+#
+0 n64 read sys_read
+1 n64 write sys_write
+2 n64 open sys_open
+3 n64 close sys_close
+4 n64 stat sys_newstat
+5 n64 fstat sys_newfstat
+6 n64 lstat sys_newlstat
+7 n64 poll sys_poll
+8 n64 lseek sys_lseek
+9 n64 mmap sys_mips_mmap
+10 n64 mprotect sys_mprotect
+11 n64 munmap sys_munmap
+12 n64 brk sys_brk
+13 n64 rt_sigaction sys_rt_sigaction
+14 n64 rt_sigprocmask sys_rt_sigprocmask
+15 n64 ioctl sys_ioctl
+16 n64 pread64 sys_pread64
+17 n64 pwrite64 sys_pwrite64
+18 n64 readv sys_readv
+19 n64 writev sys_writev
+20 n64 access sys_access
+21 n64 pipe sysm_pipe
+22 n64 _newselect sys_select
+23 n64 sched_yield sys_sched_yield
+24 n64 mremap sys_mremap
+25 n64 msync sys_msync
+26 n64 mincore sys_mincore
+27 n64 madvise sys_madvise
+28 n64 shmget sys_shmget
+29 n64 shmat sys_shmat
+30 n64 shmctl sys_old_shmctl
+31 n64 dup sys_dup
+32 n64 dup2 sys_dup2
+33 n64 pause sys_pause
+34 n64 nanosleep sys_nanosleep
+35 n64 getitimer sys_getitimer
+36 n64 setitimer sys_setitimer
+37 n64 alarm sys_alarm
+38 n64 getpid sys_getpid
+39 n64 sendfile sys_sendfile64
+40 n64 socket sys_socket
+41 n64 connect sys_connect
+42 n64 accept sys_accept
+43 n64 sendto sys_sendto
+44 n64 recvfrom sys_recvfrom
+45 n64 sendmsg sys_sendmsg
+46 n64 recvmsg sys_recvmsg
+47 n64 shutdown sys_shutdown
+48 n64 bind sys_bind
+49 n64 listen sys_listen
+50 n64 getsockname sys_getsockname
+51 n64 getpeername sys_getpeername
+52 n64 socketpair sys_socketpair
+53 n64 setsockopt sys_setsockopt
+54 n64 getsockopt sys_getsockopt
+55 n64 clone __sys_clone
+56 n64 fork __sys_fork
+57 n64 execve sys_execve
+58 n64 exit sys_exit
+59 n64 wait4 sys_wait4
+60 n64 kill sys_kill
+61 n64 uname sys_newuname
+62 n64 semget sys_semget
+63 n64 semop sys_semop
+64 n64 semctl sys_old_semctl
+65 n64 shmdt sys_shmdt
+66 n64 msgget sys_msgget
+67 n64 msgsnd sys_msgsnd
+68 n64 msgrcv sys_msgrcv
+69 n64 msgctl sys_old_msgctl
+70 n64 fcntl sys_fcntl
+71 n64 flock sys_flock
+72 n64 fsync sys_fsync
+73 n64 fdatasync sys_fdatasync
+74 n64 truncate sys_truncate
+75 n64 ftruncate sys_ftruncate
+76 n64 getdents sys_getdents
+77 n64 getcwd sys_getcwd
+78 n64 chdir sys_chdir
+79 n64 fchdir sys_fchdir
+80 n64 rename sys_rename
+81 n64 mkdir sys_mkdir
+82 n64 rmdir sys_rmdir
+83 n64 creat sys_creat
+84 n64 link sys_link
+85 n64 unlink sys_unlink
+86 n64 symlink sys_symlink
+87 n64 readlink sys_readlink
+88 n64 chmod sys_chmod
+89 n64 fchmod sys_fchmod
+90 n64 chown sys_chown
+91 n64 fchown sys_fchown
+92 n64 lchown sys_lchown
+93 n64 umask sys_umask
+94 n64 gettimeofday sys_gettimeofday
+95 n64 getrlimit sys_getrlimit
+96 n64 getrusage sys_getrusage
+97 n64 sysinfo sys_sysinfo
+98 n64 times sys_times
+99 n64 ptrace sys_ptrace
+100 n64 getuid sys_getuid
+101 n64 syslog sys_syslog
+102 n64 getgid sys_getgid
+103 n64 setuid sys_setuid
+104 n64 setgid sys_setgid
+105 n64 geteuid sys_geteuid
+106 n64 getegid sys_getegid
+107 n64 setpgid sys_setpgid
+108 n64 getppid sys_getppid
+109 n64 getpgrp sys_getpgrp
+110 n64 setsid sys_setsid
+111 n64 setreuid sys_setreuid
+112 n64 setregid sys_setregid
+113 n64 getgroups sys_getgroups
+114 n64 setgroups sys_setgroups
+115 n64 setresuid sys_setresuid
+116 n64 getresuid sys_getresuid
+117 n64 setresgid sys_setresgid
+118 n64 getresgid sys_getresgid
+119 n64 getpgid sys_getpgid
+120 n64 setfsuid sys_setfsuid
+121 n64 setfsgid sys_setfsgid
+122 n64 getsid sys_getsid
+123 n64 capget sys_capget
+124 n64 capset sys_capset
+125 n64 rt_sigpending sys_rt_sigpending
+126 n64 rt_sigtimedwait sys_rt_sigtimedwait
+127 n64 rt_sigqueueinfo sys_rt_sigqueueinfo
+128 n64 rt_sigsuspend sys_rt_sigsuspend
+129 n64 sigaltstack sys_sigaltstack
+130 n64 utime sys_utime
+131 n64 mknod sys_mknod
+132 n64 personality sys_personality
+133 n64 ustat sys_ustat
+134 n64 statfs sys_statfs
+135 n64 fstatfs sys_fstatfs
+136 n64 sysfs sys_sysfs
+137 n64 getpriority sys_getpriority
+138 n64 setpriority sys_setpriority
+139 n64 sched_setparam sys_sched_setparam
+140 n64 sched_getparam sys_sched_getparam
+141 n64 sched_setscheduler sys_sched_setscheduler
+142 n64 sched_getscheduler sys_sched_getscheduler
+143 n64 sched_get_priority_max sys_sched_get_priority_max
+144 n64 sched_get_priority_min sys_sched_get_priority_min
+145 n64 sched_rr_get_interval sys_sched_rr_get_interval
+146 n64 mlock sys_mlock
+147 n64 munlock sys_munlock
+148 n64 mlockall sys_mlockall
+149 n64 munlockall sys_munlockall
+150 n64 vhangup sys_vhangup
+151 n64 pivot_root sys_pivot_root
+152 n64 _sysctl sys_ni_syscall
+153 n64 prctl sys_prctl
+154 n64 adjtimex sys_adjtimex
+155 n64 setrlimit sys_setrlimit
+156 n64 chroot sys_chroot
+157 n64 sync sys_sync
+158 n64 acct sys_acct
+159 n64 settimeofday sys_settimeofday
+160 n64 mount sys_mount
+161 n64 umount2 sys_umount
+162 n64 swapon sys_swapon
+163 n64 swapoff sys_swapoff
+164 n64 reboot sys_reboot
+165 n64 sethostname sys_sethostname
+166 n64 setdomainname sys_setdomainname
+167 n64 create_module sys_ni_syscall
+168 n64 init_module sys_init_module
+169 n64 delete_module sys_delete_module
+170 n64 get_kernel_syms sys_ni_syscall
+171 n64 query_module sys_ni_syscall
+172 n64 quotactl sys_quotactl
+173 n64 nfsservctl sys_ni_syscall
+174 n64 getpmsg sys_ni_syscall
+175 n64 putpmsg sys_ni_syscall
+176 n64 afs_syscall sys_ni_syscall
+# 177 reserved for security
+177 n64 reserved177 sys_ni_syscall
+178 n64 gettid sys_gettid
+179 n64 readahead sys_readahead
+180 n64 setxattr sys_setxattr
+181 n64 lsetxattr sys_lsetxattr
+182 n64 fsetxattr sys_fsetxattr
+183 n64 getxattr sys_getxattr
+184 n64 lgetxattr sys_lgetxattr
+185 n64 fgetxattr sys_fgetxattr
+186 n64 listxattr sys_listxattr
+187 n64 llistxattr sys_llistxattr
+188 n64 flistxattr sys_flistxattr
+189 n64 removexattr sys_removexattr
+190 n64 lremovexattr sys_lremovexattr
+191 n64 fremovexattr sys_fremovexattr
+192 n64 tkill sys_tkill
+193 n64 reserved193 sys_ni_syscall
+194 n64 futex sys_futex
+195 n64 sched_setaffinity sys_sched_setaffinity
+196 n64 sched_getaffinity sys_sched_getaffinity
+197 n64 cacheflush sys_cacheflush
+198 n64 cachectl sys_cachectl
+199 n64 sysmips __sys_sysmips
+200 n64 io_setup sys_io_setup
+201 n64 io_destroy sys_io_destroy
+202 n64 io_getevents sys_io_getevents
+203 n64 io_submit sys_io_submit
+204 n64 io_cancel sys_io_cancel
+205 n64 exit_group sys_exit_group
+206 n64 lookup_dcookie sys_lookup_dcookie
+207 n64 epoll_create sys_epoll_create
+208 n64 epoll_ctl sys_epoll_ctl
+209 n64 epoll_wait sys_epoll_wait
+210 n64 remap_file_pages sys_remap_file_pages
+211 n64 rt_sigreturn sys_rt_sigreturn
+212 n64 set_tid_address sys_set_tid_address
+213 n64 restart_syscall sys_restart_syscall
+214 n64 semtimedop sys_semtimedop
+215 n64 fadvise64 sys_fadvise64_64
+216 n64 timer_create sys_timer_create
+217 n64 timer_settime sys_timer_settime
+218 n64 timer_gettime sys_timer_gettime
+219 n64 timer_getoverrun sys_timer_getoverrun
+220 n64 timer_delete sys_timer_delete
+221 n64 clock_settime sys_clock_settime
+222 n64 clock_gettime sys_clock_gettime
+223 n64 clock_getres sys_clock_getres
+224 n64 clock_nanosleep sys_clock_nanosleep
+225 n64 tgkill sys_tgkill
+226 n64 utimes sys_utimes
+227 n64 mbind sys_mbind
+228 n64 get_mempolicy sys_get_mempolicy
+229 n64 set_mempolicy sys_set_mempolicy
+230 n64 mq_open sys_mq_open
+231 n64 mq_unlink sys_mq_unlink
+232 n64 mq_timedsend sys_mq_timedsend
+233 n64 mq_timedreceive sys_mq_timedreceive
+234 n64 mq_notify sys_mq_notify
+235 n64 mq_getsetattr sys_mq_getsetattr
+236 n64 vserver sys_ni_syscall
+237 n64 waitid sys_waitid
+# 238 was sys_setaltroot
+239 n64 add_key sys_add_key
+240 n64 request_key sys_request_key
+241 n64 keyctl sys_keyctl
+242 n64 set_thread_area sys_set_thread_area
+243 n64 inotify_init sys_inotify_init
+244 n64 inotify_add_watch sys_inotify_add_watch
+245 n64 inotify_rm_watch sys_inotify_rm_watch
+246 n64 migrate_pages sys_migrate_pages
+247 n64 openat sys_openat
+248 n64 mkdirat sys_mkdirat
+249 n64 mknodat sys_mknodat
+250 n64 fchownat sys_fchownat
+251 n64 futimesat sys_futimesat
+252 n64 newfstatat sys_newfstatat
+253 n64 unlinkat sys_unlinkat
+254 n64 renameat sys_renameat
+255 n64 linkat sys_linkat
+256 n64 symlinkat sys_symlinkat
+257 n64 readlinkat sys_readlinkat
+258 n64 fchmodat sys_fchmodat
+259 n64 faccessat sys_faccessat
+260 n64 pselect6 sys_pselect6
+261 n64 ppoll sys_ppoll
+262 n64 unshare sys_unshare
+263 n64 splice sys_splice
+264 n64 sync_file_range sys_sync_file_range
+265 n64 tee sys_tee
+266 n64 vmsplice sys_vmsplice
+267 n64 move_pages sys_move_pages
+268 n64 set_robust_list sys_set_robust_list
+269 n64 get_robust_list sys_get_robust_list
+270 n64 kexec_load sys_kexec_load
+271 n64 getcpu sys_getcpu
+272 n64 epoll_pwait sys_epoll_pwait
+273 n64 ioprio_set sys_ioprio_set
+274 n64 ioprio_get sys_ioprio_get
+275 n64 utimensat sys_utimensat
+276 n64 signalfd sys_signalfd
+277 n64 timerfd sys_ni_syscall
+278 n64 eventfd sys_eventfd
+279 n64 fallocate sys_fallocate
+280 n64 timerfd_create sys_timerfd_create
+281 n64 timerfd_gettime sys_timerfd_gettime
+282 n64 timerfd_settime sys_timerfd_settime
+283 n64 signalfd4 sys_signalfd4
+284 n64 eventfd2 sys_eventfd2
+285 n64 epoll_create1 sys_epoll_create1
+286 n64 dup3 sys_dup3
+287 n64 pipe2 sys_pipe2
+288 n64 inotify_init1 sys_inotify_init1
+289 n64 preadv sys_preadv
+290 n64 pwritev sys_pwritev
+291 n64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
+292 n64 perf_event_open sys_perf_event_open
+293 n64 accept4 sys_accept4
+294 n64 recvmmsg sys_recvmmsg
+295 n64 fanotify_init sys_fanotify_init
+296 n64 fanotify_mark sys_fanotify_mark
+297 n64 prlimit64 sys_prlimit64
+298 n64 name_to_handle_at sys_name_to_handle_at
+299 n64 open_by_handle_at sys_open_by_handle_at
+300 n64 clock_adjtime sys_clock_adjtime
+301 n64 syncfs sys_syncfs
+302 n64 sendmmsg sys_sendmmsg
+303 n64 setns sys_setns
+304 n64 process_vm_readv sys_process_vm_readv
+305 n64 process_vm_writev sys_process_vm_writev
+306 n64 kcmp sys_kcmp
+307 n64 finit_module sys_finit_module
+308 n64 getdents64 sys_getdents64
+309 n64 sched_setattr sys_sched_setattr
+310 n64 sched_getattr sys_sched_getattr
+311 n64 renameat2 sys_renameat2
+312 n64 seccomp sys_seccomp
+313 n64 getrandom sys_getrandom
+314 n64 memfd_create sys_memfd_create
+315 n64 bpf sys_bpf
+316 n64 execveat sys_execveat
+317 n64 userfaultfd sys_userfaultfd
+318 n64 membarrier sys_membarrier
+319 n64 mlock2 sys_mlock2
+320 n64 copy_file_range sys_copy_file_range
+321 n64 preadv2 sys_preadv2
+322 n64 pwritev2 sys_pwritev2
+323 n64 pkey_mprotect sys_pkey_mprotect
+324 n64 pkey_alloc sys_pkey_alloc
+325 n64 pkey_free sys_pkey_free
+326 n64 statx sys_statx
+327 n64 rseq sys_rseq
+328 n64 io_pgetevents sys_io_pgetevents
+# 329 through 423 are reserved to sync up with other architectures
+424 n64 pidfd_send_signal sys_pidfd_send_signal
+425 n64 io_uring_setup sys_io_uring_setup
+426 n64 io_uring_enter sys_io_uring_enter
+427 n64 io_uring_register sys_io_uring_register
+428 n64 open_tree sys_open_tree
+429 n64 move_mount sys_move_mount
+430 n64 fsopen sys_fsopen
+431 n64 fsconfig sys_fsconfig
+432 n64 fsmount sys_fsmount
+433 n64 fspick sys_fspick
+434 n64 pidfd_open sys_pidfd_open
+435 n64 clone3 __sys_clone3
+436 n64 close_range sys_close_range
+437 n64 openat2 sys_openat2
+438 n64 pidfd_getfd sys_pidfd_getfd
+439 n64 faccessat2 sys_faccessat2
+440 n64 process_madvise sys_process_madvise
+441 n64 epoll_pwait2 sys_epoll_pwait2
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * dwarf-regs-table.h : Mapping of DWARF debug register numbers into
+ * register names.
+ *
+ * Copyright (C) 2013 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+#undef REG_DWARFNUM_NAME
+#define REG_DWARFNUM_NAME(reg, idx) [idx] = "$" #reg
+static const char * const mips_regstr_tbl[] = {
+ "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9",
+ "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19",
+ "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "%29",
+ "$30", "$31",
+ REG_DWARFNUM_NAME(hi, 64),
+ REG_DWARFNUM_NAME(lo, 65),
+};
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+#define PERF_REGS_MAX PERF_REG_MIPS_MAX
+#define PERF_REG_IP PERF_REG_MIPS_PC
+#define PERF_REG_SP PERF_REG_MIPS_R29
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1)
+
+static inline const char *__perf_reg_name(int id)
+{
+ switch (id) {
+ case PERF_REG_MIPS_PC:
+ return "PC";
+ case PERF_REG_MIPS_R1:
+ return "$1";
+ case PERF_REG_MIPS_R2:
+ return "$2";
+ case PERF_REG_MIPS_R3:
+ return "$3";
+ case PERF_REG_MIPS_R4:
+ return "$4";
+ case PERF_REG_MIPS_R5:
+ return "$5";
+ case PERF_REG_MIPS_R6:
+ return "$6";
+ case PERF_REG_MIPS_R7:
+ return "$7";
+ case PERF_REG_MIPS_R8:
+ return "$8";
+ case PERF_REG_MIPS_R9:
+ return "$9";
+ case PERF_REG_MIPS_R10:
+ return "$10";
+ case PERF_REG_MIPS_R11:
+ return "$11";
+ case PERF_REG_MIPS_R12:
+ return "$12";
+ case PERF_REG_MIPS_R13:
+ return "$13";
+ case PERF_REG_MIPS_R14:
+ return "$14";
+ case PERF_REG_MIPS_R15:
+ return "$15";
+ case PERF_REG_MIPS_R16:
+ return "$16";
+ case PERF_REG_MIPS_R17:
+ return "$17";
+ case PERF_REG_MIPS_R18:
+ return "$18";
+ case PERF_REG_MIPS_R19:
+ return "$19";
+ case PERF_REG_MIPS_R20:
+ return "$20";
+ case PERF_REG_MIPS_R21:
+ return "$21";
+ case PERF_REG_MIPS_R22:
+ return "$22";
+ case PERF_REG_MIPS_R23:
+ return "$23";
+ case PERF_REG_MIPS_R24:
+ return "$24";
+ case PERF_REG_MIPS_R25:
+ return "$25";
+ case PERF_REG_MIPS_R28:
+ return "$28";
+ case PERF_REG_MIPS_R29:
+ return "$29";
+ case PERF_REG_MIPS_R30:
+ return "$30";
+ case PERF_REG_MIPS_R31:
+ return "$31";
+ default:
+ break;
+ }
+ return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
--- /dev/null
+perf-y += perf_regs.o
+perf-$(CONFIG_DWARF) += dwarf-regs.o
+perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2013 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <dwarf-regs.h>
+
+static const char *mips_gpr_names[32] = {
+ "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9",
+ "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19",
+ "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29",
+ "$30", "$31"
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+ if (n < 32)
+ return mips_gpr_names[n];
+ if (n == 64)
+ return "hi";
+ if (n == 65)
+ return "lo";
+ return NULL;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+ SMPL_REG_END
+};
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+#include "util/debug.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+ switch (regnum) {
+ case UNW_MIPS_R1 ... UNW_MIPS_R25:
+ return regnum - UNW_MIPS_R1 + PERF_REG_MIPS_R1;
+ case UNW_MIPS_R28 ... UNW_MIPS_R31:
+ return regnum - UNW_MIPS_R28 + PERF_REG_MIPS_R28;
+ case UNW_MIPS_PC:
+ return PERF_REG_MIPS_PC;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+}
perf-y += perf_regs.o
perf-y += mem-events.o
perf-y += sym-handling.o
+perf-y += evsel.o
+perf-y += event.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_DWARF) += skip-callchain-idx.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+
+#include "../../../util/event.h"
+#include "../../../util/synthetic-events.h"
+#include "../../../util/machine.h"
+#include "../../../util/tool.h"
+#include "../../../util/map.h"
+#include "../../../util/debug.h"
+
+void arch_perf_parse_sample_weight(struct perf_sample *data,
+ const __u64 *array, u64 type)
+{
+ union perf_sample_weight weight;
+
+ weight.full = *array;
+ if (type & PERF_SAMPLE_WEIGHT)
+ data->weight = weight.full;
+ else {
+ data->weight = weight.var1_dw;
+ data->ins_lat = weight.var2_w;
+ data->p_stage_cyc = weight.var3_w;
+ }
+}
+
+void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
+ __u64 *array, u64 type)
+{
+ *array = data->weight;
+
+ if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
+ *array &= 0xffffffff;
+ *array |= ((u64)data->ins_lat << 32);
+ }
+}
+
+const char *arch_perf_header_entry(const char *se_header)
+{
+ if (!strcmp(se_header, "Local INSTR Latency"))
+ return "Finish Cyc";
+ else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+ return "Dispatch Cyc";
+ return se_header;
+}
+
+int arch_support_sort_key(const char *sort_key)
+{
+ if (!strcmp(sort_key, "p_stage_cyc"))
+ return 1;
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "util/evsel.h"
+
+void arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+ evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
+}
}
/*
- * Incase of powerpc architecture, pmu registers are programmable
+ * In case of powerpc architecture, pmu registers are programmable
* by guest kernel. So monitoring guest via host may not provide
* valid samples with default 'cycles' event. It is better to use
* 'trace_imc/trace_cycles' event for guest profiling, since it
#define SPRN_PVR 0x11F /* Processor Version Register */
#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */
-#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */
+#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revision field */
#endif /* __PERF_UTIL_HEADER_H */
/*
* The parent does following steps:
* - creates a new breakpoint (id 0) for bp_2 function
- * - changes that breakponit to bp_1 function
+ * - changes that breakpoint to bp_1 function
* - waits for the breakpoint to hit and checks
* it has proper rip of bp_1 function
* - detaches the child
/*
* Max x86 register name length is 5(ex: %r15d). So, 6th char
* should always contain NULL. This helps to find register name
- * length using strlen, insted of maintaing one more variable.
+ * length using strlen, instead of maintaining one more variable.
*/
#define SDT_REG_NAME_SIZE 6
* and displacement 0 (Both sign and displacement 0 are
* optional so it may be empty). Use one more character
* to hold last NULL so that strlen can be used to find
- * prefix length, instead of maintaing one more variable.
+ * prefix length, instead of maintaining one more variable.
*/
char prefix[3] = {0};
* While the second model, enabled via --multiq option, uses multiple
* queueing (which refers to one epoll instance per worker). For example,
* short lived tcp connections in a high throughput httpd server will
- * ditribute the accept()'ing connections across CPUs. In this case each
+ * distribute the accept()'ing connections across CPUs. In this case each
* worker does a limited amount of processing.
*
* [queue A] ---> [worker]
do {
/*
- * Block undefinitely waiting for the IN event.
+ * Block indefinitely waiting for the IN event.
* In order to stress the epoll_wait(2) syscall,
* call it event per event, instead of a larger
* batch (max)limit.
len += synthesize_flush(data);
}
- /* tihs makes the child to finish */
+ /* this makes the child to finish */
close(data->input_pipe[1]);
wait4(data->pid, &status, 0, &rusage);
#endif
/*
- * Regular printout to the terminal, supressed if -q is specified:
+ * Regular printout to the terminal, suppressed if -q is specified:
*/
#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
}
/*
- * XXX filtered samples can still have branch entires pointing into our
+ * XXX filtered samples can still have branch entries pointing into our
* symbol and are missed.
*/
process_branch_stack(sample->branch_stack, al, sample);
#include <linux/zalloc.h>
#include <linux/string.h>
#include <linux/limits.h>
-#include <linux/string.h>
#include <string.h>
#include <sys/file.h>
#include <signal.h>
#include <sys/signalfd.h>
#include <sys/wait.h>
#include <poll.h>
-#include <sys/stat.h>
-#include <time.h>
#include "builtin.h"
#include "perf.h"
#include "debug.h"
data__for_each_file(i, d) {
/*
- * Baseline or compute realted columns:
+ * Baseline or compute related columns:
*
* PERF_HPP_DIFF__BASELINE
* PERF_HPP_DIFF__DELTA
/*
* FIXME: evsel__intval() returns u64,
- * so address of lockdep_map should be dealed as 64bit.
+ * so address of lockdep_map should be treated as 64bit.
* Is there more better solution?
*/
void *addr; /* address of lockdep_map, used as ID */
bool nonany_branch_mode;
bool group_set;
bool stitch_lbr;
+ bool disable_order;
int max_stack;
struct perf_read_values show_threads_values;
struct annotation_options annotation_opts;
OPTS_EVSWITCH(&report.evswitch),
OPT_BOOLEAN(0, "total-cycles", &report.total_cycles_mode,
"Sort all blocks by 'Sampled Cycles%'"),
+ OPT_BOOLEAN(0, "disable-order", &report.disable_order,
+ "Disable raw trace ordering"),
OPT_END()
};
struct perf_data data = {
if (report.mmaps_mode)
report.tasks_mode = true;
- if (dump_trace)
+ if (dump_trace && report.disable_order)
report.tool.ordered_events = false;
if (quiet)
{
struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
- /* run the fork event through the perf machineruy */
+ /* run the fork event through the perf machinery */
perf_event__process_fork(tool, event, sample, machine);
/* and then run additional processing needed for this command */
return (struct evsel_script *)evsel->priv;
}
-static struct evsel_script *perf_evsel_script__new(struct evsel *evsel,
- struct perf_data *data)
+static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data)
{
struct evsel_script *es = zalloc(sizeof(*es));
return NULL;
}
-static void perf_evsel_script__delete(struct evsel_script *es)
+static void evsel_script__delete(struct evsel_script *es)
{
zfree(&es->filename);
fclose(es->fp);
free(es);
}
-static int perf_evsel_script__fprintf(struct evsel_script *es, FILE *fp)
+static int evsel_script__fprintf(struct evsel_script *es, FILE *fp)
{
struct stat st;
if (!evsel->priv) {
if (scr->per_event_dump) {
- evsel->priv = perf_evsel_script__new(evsel,
- scr->session->data);
+ evsel->priv = evsel_script__new(evsel, scr->session->data);
} else {
es = zalloc(sizeof(*es));
if (!es)
evlist__for_each_entry(evlist, evsel) {
if (!evsel->priv)
break;
- perf_evsel_script__delete(evsel->priv);
+ evsel_script__delete(evsel->priv);
evsel->priv = NULL;
}
}
/*
* Already setup? I.e. we may be called twice in cases like
* Intel PT, one for the intel_pt// and dummy events, then
- * for the evsels syntheized from the auxtrace info.
+ * for the evsels synthesized from the auxtrace info.
*
* Ses perf_script__process_auxtrace_info.
*/
if (evsel->priv != NULL)
continue;
- evsel->priv = perf_evsel_script__new(evsel, script->session->data);
+ evsel->priv = evsel_script__new(evsel, script->session->data);
if (evsel->priv == NULL)
goto out_err_fclose;
}
evlist__for_each_entry(script->session->evlist, evsel) {
struct evsel_script *es = evsel->priv;
- perf_evsel_script__fprintf(es, stdout);
- perf_evsel_script__delete(es);
+ evsel_script__fprintf(es, stdout);
+ evsel_script__delete(es);
evsel->priv = NULL;
}
}
*
* Fixme: All existing "xxx-record" are all in good formats "-e event ",
* which is covered well now. And new parsing code should be added to
- * cover the future complexing formats like event groups etc.
+ * cover the future complex formats like event groups etc.
*/
static int check_ev_match(char *dir_name, char *scriptname,
struct perf_session *session)
}
evlist__for_each_cpu (evsel_list, i, cpu) {
+ /*
+ * bperf calls evsel__open_per_cpu() in bperf__load(), so
+ * no need to call it again here.
+ */
+ if (target.use_bpf)
+ break;
affinity__set(&affinity, cpu);
evlist__for_each_entry(evsel_list, counter) {
/*
* Enable counters and exec the command:
*/
- t0 = rdclock();
- clock_gettime(CLOCK_MONOTONIC, &ref_time);
-
if (forks) {
evlist__start_workload(evsel_list);
err = enable_counters();
if (err)
return -1;
+ t0 = rdclock();
+ clock_gettime(CLOCK_MONOTONIC, &ref_time);
+
if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
status = dispatch_events(forks, timeout, interval, ×);
if (child_pid != -1) {
err = enable_counters();
if (err)
return -1;
+
+ t0 = rdclock();
+ clock_gettime(CLOCK_MONOTONIC, &ref_time);
+
status = dispatch_events(forks, timeout, interval, ×);
}
stat_config.big_num = (set != 0);
}
+void perf_stat__set_no_csv_summary(int set)
+{
+ stat_config.no_csv_summary = (set != 0);
+}
+
static int stat__set_big_num(const struct option *opt __maybe_unused,
const char *s __maybe_unused, int unset)
{
#ifdef HAVE_BPF_SKEL
OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id",
"stat events on existing bpf program id"),
+ OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf,
+ "use bpf program to count events"),
+ OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path",
+ "path to perf_event_attr map"),
#endif
OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
"system-wide collection from all CPUs"),
"threads of same physical core"),
OPT_BOOLEAN(0, "summary", &stat_config.summary,
"print summary for interval mode"),
+ OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary,
+ "don't print 'summary' for CSV summary output"),
OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
"don't print output (useful with record)"),
#ifdef HAVE_LIBPFM
bzero(&errinfo, sizeof(errinfo));
if (transaction_run) {
/* Handle -T as -M transaction. Once platform specific metrics
- * support has been added to the json files, all archictures
+ * support has been added to the json files, all architectures
* will use this approach. To determine transaction support
* on an architecture test for such a metric name.
*/
/*
* We synthesize the kernel mmap record just so that older tools
* don't emit warnings about not being able to resolve symbols
- * due to /proc/sys/kernel/kptr_restrict settings and instear provide
+ * due to /proc/sys/kernel/kptr_restrict settings and instead provide
* a saner message about no samples being in the perf.data file.
*
* This also serves to suppress a warning about f_header.data.size == 0
if (status) {
/*
* Some arches do not provide a get_cpuid(), so just use pr_debug, otherwise
- * warn the user explicitely.
+ * warn the user explicitly.
*/
eprintf(status == ENOSYS ? 1 : 0, verbose,
"Couldn't read the cpuid for this machine: %s\n",
check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl
check_2 tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl
+check_2 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl
for i in $BEAUTY_FILES; do
beauty_check $i -B
/*
* Jump to syscall specific augmenter, even if the default one,
* "!raw_syscalls:unaugmented" that will just return 1 to return the
- * unagmented tracepoint payload.
+ * unaugmented tracepoint payload.
*/
bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
/*
* Jump to syscall specific return augmenter, even if the default one,
* "!raw_syscalls:unaugmented" that will just return 1 to return the
- * unagmented tracepoint payload.
+ * unaugmented tracepoint payload.
*/
bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr);
/*
rec.p.total_size += size;
/*
- * If JVM is multi-threaded, nultiple concurrent calls to agent
+ * If JVM is multi-threaded, multiple concurrent calls to agent
* may be possible, so protect file writes
*/
flockfile(fp);
rec.p.total_size = size;
/*
- * If JVM is multi-threaded, nultiple concurrent calls to agent
+ * If JVM is multi-threaded, multiple concurrent calls to agent
* may be possible, so protect file writes
*/
flockfile(fp);
"EventName": "L2D_TLB_REFILL",
"BriefDescription": "Attributable Level 2 data TLB refill"
},
+ {
+ "PublicDescription": "Attributable Level 2 instruction TLB refill.",
+ "EventCode": "0x2E",
+ "EventName": "L2I_TLB_REFILL",
+ "BriefDescription": "Attributable Level 2 instruction TLB refill."
+ },
{
"PublicDescription": "Attributable Level 2 data or unified TLB access",
"EventCode": "0x2F",
"EventName": "L2D_TLB",
"BriefDescription": "Attributable Level 2 data or unified TLB access"
},
+ {
+ "PublicDescription": "Attributable Level 2 instruction TLB access.",
+ "EventCode": "0x30",
+ "EventName": "L2I_TLB",
+ "BriefDescription": "Attributable Level 2 instruction TLB access."
+ },
{
"PublicDescription": "Access to another socket in a multi-socket system",
"EventCode": "0x31",
"EventCode": "0x37",
"EventName": "LL_CACHE_MISS_RD",
"BriefDescription": "Last level cache miss, read"
+ },
+ {
+ "PublicDescription": "SIMD Instruction architecturally executed.",
+ "EventCode": "0x8000",
+ "EventName": "SIMD_INST_RETIRED",
+ "BriefDescription": "SIMD Instruction architecturally executed."
+ },
+ {
+ "PublicDescription": "Instruction architecturally executed, SVE.",
+ "EventCode": "0x8002",
+ "EventName": "SVE_INST_RETIRED",
+ "BriefDescription": "Instruction architecturally executed, SVE."
+ },
+ {
+ "PublicDescription": "Microarchitectural operation, Operations speculatively executed.",
+ "EventCode": "0x8008",
+ "EventName": "UOP_SPEC",
+ "BriefDescription": "Microarchitectural operation, Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "SVE Math accelerator Operations speculatively executed.",
+ "EventCode": "0x800E",
+ "EventName": "SVE_MATH_SPEC",
+ "BriefDescription": "SVE Math accelerator Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Floating-point Operations speculatively executed.",
+ "EventCode": "0x8010",
+ "EventName": "FP_SPEC",
+ "BriefDescription": "Floating-point Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Floating-point FMA Operations speculatively executed.",
+ "EventCode": "0x8028",
+ "EventName": "FP_FMA_SPEC",
+ "BriefDescription": "Floating-point FMA Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Floating-point reciprocal estimate Operations speculatively executed.",
+ "EventCode": "0x8034",
+ "EventName": "FP_RECPE_SPEC",
+ "BriefDescription": "Floating-point reciprocal estimate Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "floating-point convert Operations speculatively executed.",
+ "EventCode": "0x8038",
+ "EventName": "FP_CVT_SPEC",
+ "BriefDescription": "floating-point convert Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE integer Operations speculatively executed.",
+ "EventCode": "0x8043",
+ "EventName": "ASE_SVE_INT_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE integer Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "SVE predicated Operations speculatively executed.",
+ "EventCode": "0x8074",
+ "EventName": "SVE_PRED_SPEC",
+ "BriefDescription": "SVE predicated Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "SVE MOVPRFX Operations speculatively executed.",
+ "EventCode": "0x807C",
+ "EventName": "SVE_MOVPRFX_SPEC",
+ "BriefDescription": "SVE MOVPRFX Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "SVE MOVPRFX unfused Operations speculatively executed.",
+ "EventCode": "0x807F",
+ "EventName": "SVE_MOVPRFX_U_SPEC",
+ "BriefDescription": "SVE MOVPRFX unfused Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE load Operations speculatively executed.",
+ "EventCode": "0x8085",
+ "EventName": "ASE_SVE_LD_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE load Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE store Operations speculatively executed.",
+ "EventCode": "0x8086",
+ "EventName": "ASE_SVE_ST_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE store Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Prefetch Operations speculatively executed.",
+ "EventCode": "0x8087",
+ "EventName": "PRF_SPEC",
+ "BriefDescription": "Prefetch Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "General-purpose register load Operations speculatively executed.",
+ "EventCode": "0x8089",
+ "EventName": "BASE_LD_REG_SPEC",
+ "BriefDescription": "General-purpose register load Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "General-purpose register store Operations speculatively executed.",
+ "EventCode": "0x808A",
+ "EventName": "BASE_ST_REG_SPEC",
+ "BriefDescription": "General-purpose register store Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "SVE unpredicated load register Operations speculatively executed.",
+ "EventCode": "0x8091",
+ "EventName": "SVE_LDR_REG_SPEC",
+ "BriefDescription": "SVE unpredicated load register Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "SVE unpredicated store register Operations speculatively executed.",
+ "EventCode": "0x8092",
+ "EventName": "SVE_STR_REG_SPEC",
+ "BriefDescription": "SVE unpredicated store register Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "SVE load predicate register Operations speculatively executed.",
+ "EventCode": "0x8095",
+ "EventName": "SVE_LDR_PREG_SPEC",
+ "BriefDescription": "SVE load predicate register Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "SVE store predicate register Operations speculatively executed.",
+ "EventCode": "0x8096",
+ "EventName": "SVE_STR_PREG_SPEC",
+ "BriefDescription": "SVE store predicate register Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "SVE contiguous prefetch element Operations speculatively executed.",
+ "EventCode": "0x809F",
+ "EventName": "SVE_PRF_CONTIG_SPEC",
+ "BriefDescription": "SVE contiguous prefetch element Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE contiguous load multiple vector Operations speculatively executed.",
+ "EventCode": "0x80A5",
+ "EventName": "ASE_SVE_LD_MULTI_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE contiguous load multiple vector Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Advanced SIMD and SVE contiguous store multiple vector Operations speculatively executed.",
+ "EventCode": "0x80A6",
+ "EventName": "ASE_SVE_ST_MULTI_SPEC",
+ "BriefDescription": "Advanced SIMD and SVE contiguous store multiple vector Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "SVE gather-load Operations speculatively executed.",
+ "EventCode": "0x80AD",
+ "EventName": "SVE_LD_GATHER_SPEC",
+ "BriefDescription": "SVE gather-load Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "SVE scatter-store Operations speculatively executed.",
+ "EventCode": "0x80AE",
+ "EventName": "SVE_ST_SCATTER_SPEC",
+ "BriefDescription": "SVE scatter-store Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "SVE gather-prefetch Operations speculatively executed.",
+ "EventCode": "0x80AF",
+ "EventName": "SVE_PRF_GATHER_SPEC",
+ "BriefDescription": "SVE gather-prefetch Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "SVE First-fault load Operations speculatively executed.",
+ "EventCode": "0x80BC",
+ "EventName": "SVE_LDFF_SPEC",
+ "BriefDescription": "SVE First-fault load Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Scalable floating-point element Operations speculatively executed.",
+ "EventCode": "0x80C0",
+ "EventName": "FP_SCALE_OPS_SPEC",
+ "BriefDescription": "Scalable floating-point element Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Non-scalable floating-point element Operations speculatively executed.",
+ "EventCode": "0x80C1",
+ "EventName": "FP_FIXED_OPS_SPEC",
+ "BriefDescription": "Non-scalable floating-point element Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Scalable half-precision floating-point element Operations speculatively executed.",
+ "EventCode": "0x80C2",
+ "EventName": "FP_HP_SCALE_OPS_SPEC",
+ "BriefDescription": "Scalable half-precision floating-point element Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Non-scalable half-precision floating-point element Operations speculatively executed.",
+ "EventCode": "0x80C3",
+ "EventName": "FP_HP_FIXED_OPS_SPEC",
+ "BriefDescription": "Non-scalable half-precision floating-point element Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Scalable single-precision floating-point element Operations speculatively executed.",
+ "EventCode": "0x80C4",
+ "EventName": "FP_SP_SCALE_OPS_SPEC",
+ "BriefDescription": "Scalable single-precision floating-point element Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Non-scalable single-precision floating-point element Operations speculatively executed.",
+ "EventCode": "0x80C5",
+ "EventName": "FP_SP_FIXED_OPS_SPEC",
+ "BriefDescription": "Non-scalable single-precision floating-point element Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Scalable double-precision floating-point element Operations speculatively executed.",
+ "EventCode": "0x80C6",
+ "EventName": "FP_DP_SCALE_OPS_SPEC",
+ "BriefDescription": "Scalable double-precision floating-point element Operations speculatively executed."
+ },
+ {
+ "PublicDescription": "Non-scalable double-precision floating-point element Operations speculatively executed.",
+ "EventCode": "0x80C7",
+ "EventName": "FP_DP_FIXED_OPS_SPEC",
+ "BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed."
}
]
--- /dev/null
+[
+ {
+ "ArchStdEvent": "BR_MIS_PRED"
+ },
+ {
+ "ArchStdEvent": "BR_PRED"
+ }
+]
--- /dev/null
+[
+ {
+ "PublicDescription": "This event counts read transactions from tofu controller to measured CMG.",
+ "EventCode": "0x314",
+ "EventName": "BUS_READ_TOTAL_TOFU",
+ "BriefDescription": "This event counts read transactions from tofu controller to measured CMG."
+ },
+ {
+ "PublicDescription": "This event counts read transactions from PCI controller to measured CMG.",
+ "EventCode": "0x315",
+ "EventName": "BUS_READ_TOTAL_PCI",
+ "BriefDescription": "This event counts read transactions from PCI controller to measured CMG."
+ },
+ {
+ "PublicDescription": "This event counts read transactions from measured CMG local memory to measured CMG.",
+ "EventCode": "0x316",
+ "EventName": "BUS_READ_TOTAL_MEM",
+ "BriefDescription": "This event counts read transactions from measured CMG local memory to measured CMG."
+ },
+ {
+ "PublicDescription": "This event counts write transactions from measured CMG to CMG0, if measured CMG is not CMG0.",
+ "EventCode": "0x318",
+ "EventName": "BUS_WRITE_TOTAL_CMG0",
+ "BriefDescription": "This event counts write transactions from measured CMG to CMG0, if measured CMG is not CMG0."
+ },
+ {
+ "PublicDescription": "This event counts write transactions from measured CMG to CMG1, if measured CMG is not CMG1.",
+ "EventCode": "0x319",
+ "EventName": "BUS_WRITE_TOTAL_CMG1",
+ "BriefDescription": "This event counts write transactions from measured CMG to CMG1, if measured CMG is not CMG1."
+ },
+ {
+ "PublicDescription": "This event counts write transactions from measured CMG to CMG2, if measured CMG is not CMG2.",
+ "EventCode": "0x31A",
+ "EventName": "BUS_WRITE_TOTAL_CMG2",
+ "BriefDescription": "This event counts write transactions from measured CMG to CMG2, if measured CMG is not CMG2."
+ },
+ {
+ "PublicDescription": "This event counts write transactions from measured CMG to CMG3, if measured CMG is not CMG3.",
+ "EventCode": "0x31B",
+ "EventName": "BUS_WRITE_TOTAL_CMG3",
+ "BriefDescription": "This event counts write transactions from measured CMG to CMG3, if measured CMG is not CMG3."
+ },
+ {
+ "PublicDescription": "This event counts write transactions from measured CMG to tofu controller.",
+ "EventCode": "0x31C",
+ "EventName": "BUS_WRITE_TOTAL_TOFU",
+ "BriefDescription": "This event counts write transactions from measured CMG to tofu controller."
+ },
+ {
+ "PublicDescription": "This event counts write transactions from measured CMG to PCI controller.",
+ "EventCode": "0x31D",
+ "EventName": "BUS_WRITE_TOTAL_PCI",
+ "BriefDescription": "This event counts write transactions from measured CMG to PCI controller."
+ },
+ {
+ "PublicDescription": "This event counts write transactions from measured CMG to measured CMG local memory.",
+ "EventCode": "0x31E",
+ "EventName": "BUS_WRITE_TOTAL_MEM",
+ "BriefDescription": "This event counts write transactions from measured CMG to measured CMG local memory."
+ }
+]
--- /dev/null
+[
+ {
+ "ArchStdEvent": "L1I_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1I_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE"
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L1I_CACHE"
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL"
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L2I_TLB_REFILL"
+ },
+ {
+ "ArchStdEvent": "L2D_TLB"
+ },
+ {
+ "ArchStdEvent": "L2I_TLB"
+ },
+ {
+ "PublicDescription": "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch.",
+ "EventCode": "0x49",
+ "EventName": "L1D_CACHE_REFILL_PRF",
+ "BriefDescription": "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch."
+ },
+ {
+ "PublicDescription": "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch.",
+ "EventCode": "0x59",
+ "EventName": "L2D_CACHE_REFILL_PRF",
+ "BriefDescription": "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch."
+ },
+ {
+ "PublicDescription": "This event counts L1D_CACHE_REFILL caused by demand access.",
+ "EventCode": "0x200",
+ "EventName": "L1D_CACHE_REFILL_DM",
+ "BriefDescription": "This event counts L1D_CACHE_REFILL caused by demand access."
+ },
+ {
+ "PublicDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch.",
+ "EventCode": "0x202",
+ "EventName": "L1D_CACHE_REFILL_HWPRF",
+ "BriefDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch."
+ },
+ {
+ "PublicDescription": "This event counts outstanding L1D cache miss requests per cycle.",
+ "EventCode": "0x208",
+ "EventName": "L1_MISS_WAIT",
+ "BriefDescription": "This event counts outstanding L1D cache miss requests per cycle."
+ },
+ {
+ "PublicDescription": "This event counts outstanding L1I cache miss requests per cycle.",
+ "EventCode": "0x209",
+ "EventName": "L1I_MISS_WAIT",
+ "BriefDescription": "This event counts outstanding L1I cache miss requests per cycle."
+ },
+ {
+ "PublicDescription": "This event counts L2D_CACHE_REFILL caused by demand access.",
+ "EventCode": "0x300",
+ "EventName": "L2D_CACHE_REFILL_DM",
+ "BriefDescription": "This event counts L2D_CACHE_REFILL caused by demand access."
+ },
+ {
+ "PublicDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch.",
+ "EventCode": "0x302",
+ "EventName": "L2D_CACHE_REFILL_HWPRF",
+ "BriefDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch."
+ },
+ {
+ "PublicDescription": "This event counts outstanding L2 cache miss requests per cycle.",
+ "EventCode": "0x308",
+ "EventName": "L2_MISS_WAIT",
+ "BriefDescription": "This event counts outstanding L2 cache miss requests per cycle."
+ },
+ {
+ "PublicDescription": "This event counts the number of times of L2 cache miss.",
+ "EventCode": "0x309",
+ "EventName": "L2_MISS_COUNT",
+ "BriefDescription": "This event counts the number of times of L2 cache miss."
+ },
+ {
+ "PublicDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.",
+ "EventCode": "0x325",
+ "EventName": "L2D_SWAP_DM",
+ "BriefDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch."
+ },
+ {
+ "PublicDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.",
+ "EventCode": "0x326",
+ "EventName": "L2D_CACHE_MIBMCH_PRF",
+ "BriefDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access."
+ },
+ {
+ "PublicDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.",
+ "EventCode": "0x396",
+ "EventName": "L2D_CACHE_SWAP_LOCAL",
+ "BriefDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch."
+ },
+ {
+ "PublicDescription": "This event counts energy consumption per cycle of L2 cache.",
+ "EventCode": "0x3E0",
+ "EventName": "EA_L2",
+ "BriefDescription": "This event counts energy consumption per cycle of L2 cache."
+ }
+]
--- /dev/null
+[
+ {
+ "ArchStdEvent": "CPU_CYCLES"
+ }
+]
--- /dev/null
+[
+ {
+ "ArchStdEvent": "EXC_TAKEN"
+ },
+ {
+ "ArchStdEvent": "EXC_UNDEF"
+ },
+ {
+ "ArchStdEvent": "EXC_SVC"
+ },
+ {
+ "ArchStdEvent": "EXC_PABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_DABORT"
+ },
+ {
+ "ArchStdEvent": "EXC_IRQ"
+ },
+ {
+ "ArchStdEvent": "EXC_FIQ"
+ },
+ {
+ "ArchStdEvent": "EXC_SMC"
+ },
+ {
+ "ArchStdEvent": "EXC_HVC"
+ }
+]
--- /dev/null
+[
+ {
+ "ArchStdEvent": "SW_INCR"
+ },
+ {
+ "ArchStdEvent": "INST_RETIRED"
+ },
+ {
+ "ArchStdEvent": "EXC_RETURN"
+ },
+ {
+ "ArchStdEvent": "CID_WRITE_RETIRED"
+ },
+ {
+ "ArchStdEvent": "INST_SPEC"
+ },
+ {
+ "ArchStdEvent": "LDREX_SPEC"
+ },
+ {
+ "ArchStdEvent": "STREX_SPEC"
+ },
+ {
+ "ArchStdEvent": "LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "LDST_SPEC"
+ },
+ {
+ "ArchStdEvent": "DP_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SPEC"
+ },
+ {
+ "ArchStdEvent": "VFP_SPEC"
+ },
+ {
+ "ArchStdEvent": "PC_WRITE_SPEC"
+ },
+ {
+ "ArchStdEvent": "CRYPTO_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_IMMED_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_RETURN_SPEC"
+ },
+ {
+ "ArchStdEvent": "BR_INDIRECT_SPEC"
+ },
+ {
+ "ArchStdEvent": "ISB_SPEC"
+ },
+ {
+ "ArchStdEvent": "DSB_SPEC"
+ },
+ {
+ "ArchStdEvent": "DMB_SPEC"
+ },
+ {
+ "PublicDescription": "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction.",
+ "EventCode": "0x9F",
+ "EventName": "DCZVA_SPEC",
+ "BriefDescription": "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction."
+ },
+ {
+ "PublicDescription": "This event counts architecturally executed floating-point move operations.",
+ "EventCode": "0x105",
+ "EventName": "FP_MV_SPEC",
+ "BriefDescription": "This event counts architecturally executed floating-point move operations."
+ },
+ {
+ "PublicDescription": "This event counts architecturally executed operations that using predicate register.",
+ "EventCode": "0x108",
+ "EventName": "PRD_SPEC",
+ "BriefDescription": "This event counts architecturally executed operations that using predicate register."
+ },
+ {
+ "PublicDescription": "This event counts architecturally executed inter-element manipulation operations.",
+ "EventCode": "0x109",
+ "EventName": "IEL_SPEC",
+ "BriefDescription": "This event counts architecturally executed inter-element manipulation operations."
+ },
+ {
+ "PublicDescription": "This event counts architecturally executed inter-register manipulation operations.",
+ "EventCode": "0x10A",
+ "EventName": "IREG_SPEC",
+ "BriefDescription": "This event counts architecturally executed inter-register manipulation operations."
+ },
+ {
+ "PublicDescription": "This event counts architecturally executed NOSIMD load operations that using SIMD&FP registers.",
+ "EventCode": "0x112",
+ "EventName": "FP_LD_SPEC",
+ "BriefDescription": "This event counts architecturally executed NOSIMD load operations that using SIMD&FP registers."
+ },
+ {
+ "PublicDescription": "This event counts architecturally executed NOSIMD store operations that using SIMD&FP registers.",
+ "EventCode": "0x113",
+ "EventName": "FP_ST_SPEC",
+ "BriefDescription": "This event counts architecturally executed NOSIMD store operations that using SIMD&FP registers."
+ },
+ {
+ "PublicDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations.",
+ "EventCode": "0x11A",
+ "EventName": "BC_LD_SPEC",
+ "BriefDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations."
+ },
+ {
+ "PublicDescription": "This event counts architecturally executed instructions, excluding the MOVPRFX instruction.",
+ "EventCode": "0x121",
+ "EventName": "EFFECTIVE_INST_SPEC",
+ "BriefDescription": "This event counts architecturally executed instructions, excluding the MOVPRFX instruction."
+ },
+ {
+ "PublicDescription": "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode.",
+ "EventCode": "0x123",
+ "EventName": "PRE_INDEX_SPEC",
+ "BriefDescription": "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode."
+ },
+ {
+ "PublicDescription": "This event counts architecturally executed operations that uses 'post-index' as its addressing mode.",
+ "EventCode": "0x124",
+ "EventName": "POST_INDEX_SPEC",
+ "BriefDescription": "This event counts architecturally executed operations that uses 'post-index' as its addressing mode."
+ }
+]
--- /dev/null
+[
+ {
+ "PublicDescription": "This event counts energy consumption per cycle of CMG local memory.",
+ "EventCode": "0x3E8",
+ "EventName": "EA_MEMORY",
+ "BriefDescription": "This event counts energy consumption per cycle of CMG local memory."
+ }
+]
--- /dev/null
+[
+ {
+ "PublicDescription": "This event counts the occurrence count of the micro-operation split.",
+ "EventCode": "0x139",
+ "EventName": "UOP_SPLIT",
+ "BriefDescription": "This event counts the occurrence count of the micro-operation split."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store/prefetch operation waits for memory access.",
+ "EventCode": "0x180",
+ "EventName": "LD_COMP_WAIT_L2_MISS",
+ "BriefDescription": "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store/prefetch operation waits for memory access."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for memory access.",
+ "EventCode": "0x181",
+ "EventName": "LD_COMP_WAIT_L2_MISS_EX",
+ "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for memory access."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache access.",
+ "EventCode": "0x182",
+ "EventName": "LD_COMP_WAIT_L1_MISS",
+ "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache access."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache access.",
+ "EventCode": "0x183",
+ "EventName": "LD_COMP_WAIT_L1_MISS_EX",
+ "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache access."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access.",
+ "EventCode": "0x184",
+ "EventName": "LD_COMP_WAIT",
+ "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access.",
+ "EventCode": "0x185",
+ "EventName": "LD_COMP_WAIT_EX",
+ "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that no instruction was committed due to the lack of an available prefetch port.",
+ "EventCode": "0x186",
+ "EventName": "LD_COMP_WAIT_PFP_BUSY",
+ "BriefDescription": "This event counts every cycle that no instruction was committed due to the lack of an available prefetch port."
+ },
+ {
+ "PublicDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation.",
+ "EventCode": "0x187",
+ "EventName": "LD_COMP_WAIT_PFP_BUSY_EX",
+ "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation."
+ },
+ {
+ "PublicDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction.",
+ "EventCode": "0x188",
+ "EventName": "LD_COMP_WAIT_PFP_BUSY_SWPF",
+ "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is an integer or floating-point/SIMD instruction.",
+ "EventCode": "0x189",
+ "EventName": "EU_COMP_WAIT",
+ "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is an integer or floating-point/SIMD instruction."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a floating-point/SIMD instruction.",
+ "EventCode": "0x18A",
+ "EventName": "FL_COMP_WAIT",
+ "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a floating-point/SIMD instruction."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a branch instruction.",
+ "EventCode": "0x18B",
+ "EventName": "BR_COMP_WAIT",
+ "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a branch instruction."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that no instruction was committed because the CSE is empty.",
+ "EventCode": "0x18C",
+ "EventName": "ROB_EMPTY",
+ "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that no instruction was committed because the CSE is empty and the store port (SP) is full.",
+ "EventCode": "0x18D",
+ "EventName": "ROB_EMPTY_STQ_BUSY",
+ "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty and the store port (SP) is full."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that the instruction unit is halted by the WFE/WFI instruction.",
+ "EventCode": "0x18E",
+ "EventName": "WFE_WFI_CYCLE",
+ "BriefDescription": "This event counts every cycle that the instruction unit is halted by the WFE/WFI instruction."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that no instruction was committed, but counts at the time when commits MOVPRFX only.",
+ "EventCode": "0x190",
+ "EventName": "_0INST_COMMIT",
+ "BriefDescription": "This event counts every cycle that no instruction was committed, but counts at the time when commits MOVPRFX only."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that one instruction is committed.",
+ "EventCode": "0x191",
+ "EventName": "_1INST_COMMIT",
+ "BriefDescription": "This event counts every cycle that one instruction is committed."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that two instructions are committed.",
+ "EventCode": "0x192",
+ "EventName": "_2INST_COMMIT",
+ "BriefDescription": "This event counts every cycle that two instructions are committed."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that three instructions are committed.",
+ "EventCode": "0x193",
+ "EventName": "_3INST_COMMIT",
+ "BriefDescription": "This event counts every cycle that three instructions are committed."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that four instructions are committed.",
+ "EventCode": "0x194",
+ "EventName": "_4INST_COMMIT",
+ "BriefDescription": "This event counts every cycle that four instructions are committed."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that only any micro-operations are committed.",
+ "EventCode": "0x198",
+ "EventName": "UOP_ONLY_COMMIT",
+ "BriefDescription": "This event counts every cycle that only any micro-operations are committed."
+ },
+ {
+ "PublicDescription": "This event counts every cycle that only the MOVPRFX instruction is committed.",
+ "EventCode": "0x199",
+ "EventName": "SINGLE_MOVPRFX_COMMIT",
+ "BriefDescription": "This event counts every cycle that only the MOVPRFX instruction is committed."
+ },
+ {
+ "PublicDescription": "This event counts energy consumption per cycle of core.",
+ "EventCode": "0x1E0",
+ "EventName": "EA_CORE",
+ "BriefDescription": "This event counts energy consumption per cycle of core."
+ },
+ {
+ "PublicDescription": "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher.",
+ "EventCode": "0x230",
+ "EventName": "L1HWPF_STREAM_PF",
+ "BriefDescription": "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher."
+ },
+ {
+ "PublicDescription": "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.",
+ "EventCode": "0x231",
+ "EventName": "L1HWPF_INJ_ALLOC_PF",
+ "BriefDescription": "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher."
+ },
+ {
+ "PublicDescription": "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.",
+ "EventCode": "0x232",
+ "EventName": "L1HWPF_INJ_NOALLOC_PF",
+ "BriefDescription": "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher."
+ },
+ {
+ "PublicDescription": "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher.",
+ "EventCode": "0x233",
+ "EventName": "L2HWPF_STREAM_PF",
+ "BriefDescription": "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher."
+ },
+ {
+ "PublicDescription": "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.",
+ "EventCode": "0x234",
+ "EventName": "L2HWPF_INJ_ALLOC_PF",
+ "BriefDescription": "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher."
+ },
+ {
+ "PublicDescription": "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.",
+ "EventCode": "0x235",
+ "EventName": "L2HWPF_INJ_NOALLOC_PF",
+ "BriefDescription": "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher."
+ },
+ {
+ "PublicDescription": "This event counts prefetch requests to L2 cache generated by the other causes.",
+ "EventCode": "0x236",
+ "EventName": "L2HWPF_OTHER",
+ "BriefDescription": "This event counts prefetch requests to L2 cache generated by the other causes."
+ }
+]
--- /dev/null
+[
+ {
+ "ArchStdEvent": "STALL_FRONTEND"
+ },
+ {
+ "ArchStdEvent": "STALL_BACKEND"
+ },
+ {
+ "PublicDescription": "This event counts valid cycles of EAGA pipeline.",
+ "EventCode": "0x1A0",
+ "EventName": "EAGA_VAL",
+ "BriefDescription": "This event counts valid cycles of EAGA pipeline."
+ },
+ {
+ "PublicDescription": "This event counts valid cycles of EAGB pipeline.",
+ "EventCode": "0x1A1",
+ "EventName": "EAGB_VAL",
+ "BriefDescription": "This event counts valid cycles of EAGB pipeline."
+ },
+ {
+ "PublicDescription": "This event counts valid cycles of EXA pipeline.",
+ "EventCode": "0x1A2",
+ "EventName": "EXA_VAL",
+ "BriefDescription": "This event counts valid cycles of EXA pipeline."
+ },
+ {
+ "PublicDescription": "This event counts valid cycles of EXB pipeline.",
+ "EventCode": "0x1A3",
+ "EventName": "EXB_VAL",
+ "BriefDescription": "This event counts valid cycles of EXB pipeline."
+ },
+ {
+ "PublicDescription": "This event counts valid cycles of FLA pipeline.",
+ "EventCode": "0x1A4",
+ "EventName": "FLA_VAL",
+ "BriefDescription": "This event counts valid cycles of FLA pipeline."
+ },
+ {
+ "PublicDescription": "This event counts valid cycles of FLB pipeline.",
+ "EventCode": "0x1A5",
+ "EventName": "FLB_VAL",
+ "BriefDescription": "This event counts valid cycles of FLB pipeline."
+ },
+ {
+ "PublicDescription": "This event counts valid cycles of PRX pipeline.",
+ "EventCode": "0x1A6",
+ "EventName": "PRX_VAL",
+ "BriefDescription": "This event counts valid cycles of PRX pipeline."
+ },
+ {
+ "PublicDescription": "This event counts the number of 1's in the predicate bits of request in FLA pipeline, where it is corrected so that it becomes 16 when all bits are 1.",
+ "EventCode": "0x1B4",
+ "EventName": "FLA_VAL_PRD_CNT",
+ "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLA pipeline, where it is corrected so that it becomes 16 when all bits are 1."
+ },
+ {
+ "PublicDescription": "This event counts the number of 1's in the predicate bits of request in FLB pipeline, where it is corrected so that it becomes 16 when all bits are 1.",
+ "EventCode": "0x1B5",
+ "EventName": "FLB_VAL_PRD_CNT",
+ "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLB pipeline, where it is corrected so that it becomes 16 when all bits are 1."
+ },
+ {
+ "PublicDescription": "This event counts valid cycles of L1D cache pipeline#0.",
+ "EventCode": "0x240",
+ "EventName": "L1_PIPE0_VAL",
+ "BriefDescription": "This event counts valid cycles of L1D cache pipeline#0."
+ },
+ {
+ "PublicDescription": "This event counts valid cycles of L1D cache pipeline#1.",
+ "EventCode": "0x241",
+ "EventName": "L1_PIPE1_VAL",
+ "BriefDescription": "This event counts valid cycles of L1D cache pipeline#1."
+ },
+ {
+ "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1.",
+ "EventCode": "0x250",
+ "EventName": "L1_PIPE0_VAL_IU_TAG_ADRS_SCE",
+ "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1."
+ },
+ {
+ "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1.",
+ "EventCode": "0x251",
+ "EventName": "L1_PIPE0_VAL_IU_TAG_ADRS_PFE",
+ "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1."
+ },
+ {
+ "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1.",
+ "EventCode": "0x252",
+ "EventName": "L1_PIPE1_VAL_IU_TAG_ADRS_SCE",
+ "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1."
+ },
+ {
+ "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1.",
+ "EventCode": "0x253",
+ "EventName": "L1_PIPE1_VAL_IU_TAG_ADRS_PFE",
+ "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1."
+ },
+ {
+ "PublicDescription": "This event counts completed requests in L1D cache pipeline#0.",
+ "EventCode": "0x260",
+ "EventName": "L1_PIPE0_COMP",
+ "BriefDescription": "This event counts completed requests in L1D cache pipeline#0."
+ },
+ {
+ "PublicDescription": "This event counts completed requests in L1D cache pipeline#1.",
+ "EventCode": "0x261",
+ "EventName": "L1_PIPE1_COMP",
+ "BriefDescription": "This event counts completed requests in L1D cache pipeline#1."
+ },
+ {
+ "PublicDescription": "This event counts completed requests in L1I cache pipeline.",
+ "EventCode": "0x268",
+ "EventName": "L1I_PIPE_COMP",
+ "BriefDescription": "This event counts completed requests in L1I cache pipeline."
+ },
+ {
+ "PublicDescription": "This event counts valid cycles of L1I cache pipeline.",
+ "EventCode": "0x269",
+ "EventName": "L1I_PIPE_VAL",
+ "BriefDescription": "This event counts valid cycles of L1I cache pipeline."
+ },
+ {
+ "PublicDescription": "This event counts aborted requests in L1D pipelines that due to store-load interlock.",
+ "EventCode": "0x274",
+ "EventName": "L1_PIPE_ABORT_STLD_INTLK",
+ "BriefDescription": "This event counts aborted requests in L1D pipelines that due to store-load interlock."
+ },
+ {
+ "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0.",
+ "EventCode": "0x2A0",
+ "EventName": "L1_PIPE0_VAL_IU_NOT_SEC0",
+ "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0."
+ },
+ {
+ "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0.",
+ "EventCode": "0x2A1",
+ "EventName": "L1_PIPE1_VAL_IU_NOT_SEC0",
+ "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0."
+ },
+ {
+ "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined.",
+ "EventCode": "0x2B0",
+ "EventName": "L1_PIPE_COMP_GATHER_2FLOW",
+ "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined."
+ },
+ {
+ "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined.",
+ "EventCode": "0x2B1",
+ "EventName": "L1_PIPE_COMP_GATHER_1FLOW",
+ "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined."
+ },
+ {
+ "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0.",
+ "EventCode": "0x2B2",
+ "EventName": "L1_PIPE_COMP_GATHER_0FLOW",
+ "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0."
+ },
+ {
+ "PublicDescription": "This event counts the number of flows of the scatter instructions.",
+ "EventCode": "0x2B3",
+ "EventName": "L1_PIPE_COMP_SCATTER_1FLOW",
+ "BriefDescription": "This event counts the number of flows of the scatter instructions."
+ },
+ {
+ "PublicDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#0, where it is corrected so that it becomes 16 when all bits are 1.",
+ "EventCode": "0x2B8",
+ "EventName": "L1_PIPE0_COMP_PRD_CNT",
+ "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#0, where it is corrected so that it becomes 16 when all bits are 1."
+ },
+ {
+ "PublicDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#1, where it is corrected so that it becomes 16 when all bits are 1.",
+ "EventCode": "0x2B9",
+ "EventName": "L1_PIPE1_COMP_PRD_CNT",
+ "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#1, where it is corrected so that it becomes 16 when all bits are 1."
+ },
+ {
+ "PublicDescription": "This event counts valid cycles of L2 cache pipeline.",
+ "EventCode": "0x330",
+ "EventName": "L2_PIPE_VAL",
+ "BriefDescription": "This event counts valid cycles of L2 cache pipeline."
+ },
+ {
+ "PublicDescription": "This event counts completed requests in L2 cache pipeline.",
+ "EventCode": "0x350",
+ "EventName": "L2_PIPE_COMP_ALL",
+ "BriefDescription": "This event counts completed requests in L2 cache pipeline."
+ },
+ {
+ "PublicDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.",
+ "EventCode": "0x370",
+ "EventName": "L2_PIPE_COMP_PF_L2MIB_MCH",
+ "BriefDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access."
+ }
+]
--- /dev/null
+[
+ {
+ "ArchStdEvent": "SIMD_INST_RETIRED"
+ },
+ {
+ "ArchStdEvent": "SVE_INST_RETIRED"
+ },
+ {
+ "ArchStdEvent": "UOP_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_MATH_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_FMA_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_RECPE_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_CVT_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_INT_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRED_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_MOVPRFX_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_MOVPRFX_U_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_LD_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_ST_SPEC"
+ },
+ {
+ "ArchStdEvent": "PRF_SPEC"
+ },
+ {
+ "ArchStdEvent": "BASE_LD_REG_SPEC"
+ },
+ {
+ "ArchStdEvent": "BASE_ST_REG_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_LDR_REG_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_STR_REG_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_LDR_PREG_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_STR_PREG_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRF_CONTIG_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_LD_MULTI_SPEC"
+ },
+ {
+ "ArchStdEvent": "ASE_SVE_ST_MULTI_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_LD_GATHER_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_ST_SCATTER_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_PRF_GATHER_SPEC"
+ },
+ {
+ "ArchStdEvent": "SVE_LDFF_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_SCALE_OPS_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_FIXED_OPS_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_HP_SCALE_OPS_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_HP_FIXED_OPS_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_SP_SCALE_OPS_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_SP_FIXED_OPS_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_DP_SCALE_OPS_SPEC"
+ },
+ {
+ "ArchStdEvent": "FP_DP_FIXED_OPS_SPEC"
+ }
+]
0x00000000410fd0c0,v1,arm/cortex-a76-n1,core
0x00000000420f5160,v1,cavium/thunderx2,core
0x00000000430f0af0,v1,cavium/thunderx2,core
+0x00000000460f0010,v1,fujitsu/a64fx,core
0x00000000480fd010,v1,hisilicon/hip08,core
0x00000000500f0000,v1,ampere/emag,core
"MetricName": "flush_rate_percent"
},
{
- "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had atleast 1 slot valid",
+ "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had at least 1 slot valid",
"MetricExpr": "PM_GCT_UTIL_11_14_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
"MetricGroup": "general",
"MetricName": "gct_util_11to14_slots_percent"
},
{
- "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had atleast 1 slot valid",
+ "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had at least 1 slot valid",
"MetricExpr": "PM_GCT_UTIL_15_17_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
"MetricGroup": "general",
"MetricName": "gct_util_15to17_slots_percent"
},
{
- "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had atleast 1 slot valid",
+ "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had at least 1 slot valid",
"MetricExpr": "PM_GCT_UTIL_18_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
"MetricGroup": "general",
"MetricName": "gct_util_18plus_slots_percent"
},
{
- "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had atleast 1 slot valid",
+ "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had at least 1 slot valid",
"MetricExpr": "PM_GCT_UTIL_1_2_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
"MetricGroup": "general",
"MetricName": "gct_util_1to2_slots_percent"
},
{
- "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had atleast 1 slot valid",
+ "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had at least 1 slot valid",
"MetricExpr": "PM_GCT_UTIL_3_6_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
"MetricGroup": "general",
"MetricName": "gct_util_3to6_slots_percent"
},
{
- "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had atleast 1 slot valid",
+ "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had at least 1 slot valid",
"MetricExpr": "PM_GCT_UTIL_7_10_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
"MetricGroup": "general",
"MetricName": "gct_util_7to10_slots_percent"
"MetricGroup": "instruction_stats_percent_per_ref",
"MetricName": "inst_from_rmem_percent"
},
- {
- "BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)",
- "MetricExpr": "((PM_L2_CASTOUT_MOD/2)*4)/ PM_RUN_CYC * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_co_m_rd_util"
- },
- {
- "BriefDescription": "L2 dcache invalidates per run inst (per core)",
- "MetricExpr": "(PM_L2_DC_INV / 2) / PM_RUN_INST_CMPL * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_dc_inv_rate_percent"
- },
{
"BriefDescription": "Demand load misses as a % of L2 LD dispatches (per thread)",
"MetricExpr": "PM_L1_DCACHE_RELOAD_VALID / (PM_L2_LD / 2) * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_dem_ld_disp_percent"
},
- {
- "BriefDescription": "L2 Icache invalidates per run inst (per core)",
- "MetricExpr": "(PM_L2_IC_INV / 2) / PM_RUN_INST_CMPL * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_ic_inv_rate_percent"
- },
- {
- "BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)",
- "MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_inst_miss_ratio_percent"
- },
- {
- "BriefDescription": "Average number of cycles between L2 Load hits",
- "MetricExpr": "(PM_L2_LD_HIT / PM_RUN_CYC) / 2",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_ld_hit_frequency"
- },
- {
- "BriefDescription": "Average number of cycles between L2 Load misses",
- "MetricExpr": "(PM_L2_LD_MISS / PM_RUN_CYC) / 2",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_ld_miss_frequency"
- },
- {
- "BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)",
- "MetricExpr": "PM_L2_LD_MISS / PM_L2_LD * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_ld_miss_ratio_percent"
- },
- {
- "BriefDescription": "% L2 load disp attempts Cache read Utilization (4 pclks per disp attempt)",
- "MetricExpr": "((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_ld_rd_util"
- },
- {
- "BriefDescription": "L2 load misses that require a cache write (4 pclks per disp attempt) % of pclks",
- "MetricExpr": "((( PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4)/ PM_RUN_CYC * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_ldmiss_wr_util"
- },
- {
- "BriefDescription": "L2 local pump prediction success",
- "MetricExpr": "PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_local_pred_correct_percent"
- },
- {
- "BriefDescription": "L2 COs that were in M,Me,Mu state as a % of all L2 COs",
- "MetricExpr": "PM_L2_CASTOUT_MOD / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_mod_co_percent"
- },
- {
- "BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts",
- "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR )/ PM_L2_RCLD_DISP * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_rc_ld_disp_addr_fail_percent"
- },
- {
- "BriefDescription": "% of L2 Load RC dispatch attempts that failed",
- "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR + PM_L2_RCLD_DISP_FAIL_OTHER)/ PM_L2_RCLD_DISP * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_rc_ld_disp_fail_percent"
- },
- {
- "BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts",
- "MetricExpr": "PM_L2_RCST_DISP_FAIL_ADDR / PM_L2_RCST_DISP * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_rc_st_disp_addr_fail_percent"
- },
- {
- "BriefDescription": "% of L2 Store RC dispatch attempts that failed",
- "MetricExpr": "(PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/ PM_L2_RCST_DISP * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_rc_st_disp_fail_percent"
- },
- {
- "BriefDescription": "L2 Cache Read Utilization (per core)",
- "MetricExpr": "(((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100) + (((PM_L2_RCST_DISP/2)*4)/PM_RUN_CYC * 100) + (((PM_L2_CASTOUT_MOD/2)*4)/PM_RUN_CYC * 100)",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_rd_util_percent"
- },
- {
- "BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs",
- "MetricExpr": "PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_shr_co_percent"
- },
{
"BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)",
"MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_st_miss_ratio_percent"
},
- {
- "BriefDescription": "% L2 store disp attempts Cache read Utilization (4 pclks per disp attempt)",
- "MetricExpr": "((PM_L2_RCST_DISP/2)*4) / PM_RUN_CYC * 100",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_st_rd_util"
- },
{
"BriefDescription": "L2 stores that require a cache write (4 pclks per disp attempt) % of pclks",
"MetricExpr": "((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100",
"MetricGroup": "l2_stats",
"MetricName": "l2_st_wr_util"
},
- {
- "BriefDescription": "L2 Cache Write Utilization (per core)",
- "MetricExpr": "((((PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4) / PM_RUN_CYC * 100) + (((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100)",
- "MetricGroup": "l2_stats",
- "MetricName": "l2_wr_util_percent"
- },
- {
- "BriefDescription": "Average number of cycles between L3 Load hits",
- "MetricExpr": "(PM_L3_LD_HIT / PM_RUN_CYC) / 2",
- "MetricGroup": "l3_stats",
- "MetricName": "l3_ld_hit_frequency"
- },
- {
- "BriefDescription": "Average number of cycles between L3 Load misses",
- "MetricExpr": "(PM_L3_LD_MISS / PM_RUN_CYC) / 2",
- "MetricGroup": "l3_stats",
- "MetricName": "l3_ld_miss_frequency"
- },
- {
- "BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle",
- "MetricExpr": "(PM_L3_WI_USAGE / PM_RUN_CYC) * 8",
- "MetricGroup": "l3_stats",
- "MetricName": "l3_wi_usage"
- },
{
"BriefDescription": "Average icache miss latency",
"MetricExpr": "PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ",
"MetricName": "custom_secs"
},
{
- "BriefDescription": "Percentage Cycles atleast one instruction dispatched",
+ "BriefDescription": "Percentage Cycles at least one instruction dispatched",
"MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100",
"MetricName": "cycles_atleast_one_inst_dispatched_percent"
},
* and directory tree could result in build failure due to table
* names not being found.
*
- * Atleast for now, be strict with processing JSON file names.
+ * At least for now, be strict with processing JSON file names.
* i.e. if JSON file name cannot be mapped to C-style table name,
* fail.
*/
return
rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time,
'irq_list':irq_list, 'event_list':event_list}
- # merge information realted to a NET_RX softirq
+ # merge information related to a NET_RX softirq
receive_hunk_list.append(rec_data)
def handle_napi_poll(event_info):
*
* The test case check following error conditions:
* - we get stuck in signal handler because of debug
- * exception being triggered receursively due to
+ * exception being triggered recursively due to
* the wrong RF EFLAG management
*
* - we never trigger the sig_handler breakpoint due
- * to the rong RF EFLAG management
+ * to the wrong RF EFLAG management
*
*/
ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0);
/*
- * Kick off the test by trigering 'fd1'
+ * Kick off the test by triggering 'fd1'
* breakpoint.
*/
test_function();
/*
* Both cpus and threads are now owned by evlist
* and will be freed by following perf_evlist__set_maps
- * call. Getting refference to keep them alive.
+ * call. Getting reference to keep them alive.
*/
perf_cpu_map__get(cpus);
perf_thread_map__get(threads);
"Stdlib.bytes.++" },
};
- for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
buf = ocaml_demangle_sym(test_cases[i].mangled);
if ((buf == NULL && test_cases[i].demangled != NULL)
|| (buf != NULL && test_cases[i].demangled == NULL)
};
/*
- * Will be casted to struct ip_callchain which has all 64 bit entries
+ * Will be cast to struct ip_callchain which has all 64 bit entries
* of nr and ips[].
*/
static u64 fake_callchains[][10] = {
return err;
}
-/* callcain + NO children */
+/* callchain + NO children */
static int test2(struct evsel *evsel, struct machine *machine)
{
int err;
#if defined(__s390x__)
/* Return true if kvm module is available and loaded. Test this
- * and retun success when trace point kvm_s390_create_vm
+ * and return success when trace point kvm_s390_create_vm
* exists. Otherwise this test always fails.
*/
static bool kvm_s390_create_vm_valid(void)
if (!strcmp(name, v->event))
return v->val;
v++;
- };
+ }
return 0;
}
*ratio2 = compute_single(&metric_events, evlist, &st, name2);
out:
- /* ... clenup. */
+ /* ... cleanup. */
metricgroup__rblist_exit(&metric_events);
runtime_stat__exit(&st);
evlist__free_stats(evlist);
exit 2
fi
+# check what we need to test windows binaries
+add_pe=1
+run_pe=1
+if ! perf version --build-options | grep -q 'libbfd: .* on '; then
+ echo "WARNING: perf not built with libbfd. PE binaries will not be tested."
+ add_pe=0
+ run_pe=0
+fi
+if ! which wine > /dev/null; then
+ echo "WARNING: wine not found. PE binaries will not be run."
+ run_pe=0
+fi
+
+# set up wine
+if [ ${run_pe} -eq 1 ]; then
+ wineprefix=$(mktemp -d /tmp/perf.wineprefix.XXX)
+ export WINEPREFIX=${wineprefix}
+ # clear display variables to prevent wine from popping up dialogs
+ unset DISPLAY
+ unset WAYLAND_DISPLAY
+fi
+
ex_md5=$(mktemp /tmp/perf.ex.MD5.XXX)
ex_sha1=$(mktemp /tmp/perf.ex.SHA1.XXX)
+ex_pe=$(dirname $0)/../pe-file.exe
echo 'int main(void) { return 0; }' | cc -Wl,--build-id=sha1 -o ${ex_sha1} -x c -
echo 'int main(void) { return 0; }' | cc -Wl,--build-id=md5 -o ${ex_md5} -x c -
-echo "test binaries: ${ex_sha1} ${ex_md5}"
+echo "test binaries: ${ex_sha1} ${ex_md5} ${ex_pe}"
check()
{
- id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'`
-
+ case $1 in
+ *.exe)
+ # We don't have a tool that can pull a nicely formatted build-id out of
+ # a PE file, but we can extract the whole section with objcopy and
+ # format it ourselves. The .buildid section is a Debug Directory
+ # containing a CodeView entry:
+ # https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#debug-directory-image-only
+ # https://github.com/dotnet/runtime/blob/da94c022576a5c3bbc0e896f006565905eb137f9/docs/design/specs/PE-COFF.md
+ # The build-id starts at byte 33 and must be rearranged into a GUID.
+ id=`objcopy -O binary --only-section=.buildid $1 /dev/stdout | \
+ cut -c 33-48 | hexdump -ve '/1 "%02x"' | \
+ sed 's@^\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(.*\)0a$@\4\3\2\1\6\5\8\7\9@'`
+ ;;
+ *)
+ id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'`
+ ;;
+ esac
echo "build id: ${id}"
link=${build_id_dir}/.build-id/${id:0:2}/${id:2}
exit 1
fi
- ${perf} buildid-cache -l | grep $id
+ ${perf} buildid-cache -l | grep ${id}
if [ $? -ne 0 ]; then
echo "failed: ${id} is not reported by \"perf buildid-cache -l\""
exit 1
{
data=$(mktemp /tmp/perf.data.XXX)
build_id_dir=$(mktemp -d /tmp/perf.debug.XXX)
+ log=$(mktemp /tmp/perf.log.XXX)
perf="perf --buildid-dir ${build_id_dir}"
- ${perf} record --buildid-all -o ${data} ${1}
+ echo "running: perf record $@"
+ ${perf} record --buildid-all -o ${data} $@ &> ${log}
if [ $? -ne 0 ]; then
- echo "failed: record ${1}"
+ echo "failed: record $@"
+ echo "see log: ${log}"
exit 1
fi
- check ${1}
+ check ${@: -1}
+ rm -f ${log}
rm -rf ${build_id_dir}
rm -rf ${data}
}
# add binaries manual via perf buildid-cache -a
test_add ${ex_sha1}
test_add ${ex_md5}
+if [ ${add_pe} -eq 1 ]; then
+ test_add ${ex_pe}
+fi
# add binaries via perf record post processing
test_record ${ex_sha1}
test_record ${ex_md5}
+if [ ${run_pe} -eq 1 ]; then
+ test_record wine ${ex_pe}
+fi
# cleanup
rm ${ex_sha1} ${ex_md5}
+if [ ${run_pe} -eq 1 ]; then
+ rm -r ${wineprefix}
+fi
exit ${err}
fi
}
+daemon_exit()
+{
+ local config=$1
+
+ local line=`perf daemon --config ${config} -x: | head -1`
+ local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
+
+ # Reset trap handler.
+ trap - SIGINT SIGTERM
+
+ # stop daemon
+ perf daemon stop --config ${config}
+
+ # ... and wait for the pid to go away
+ tail --pid=${pid} -f /dev/null
+}
+
daemon_start()
{
local config=$1
perf daemon start --config ${config}
+ # Clean up daemon if interrupted.
+ trap "echo 'FAILED: Signal caught'; daemon_exit ${config}; exit 1" SIGINT SIGTERM
+
# wait for the session to ping
local state="FAIL"
+ local retries=0
while [ "${state}" != "OK" ]; do
state=`perf daemon ping --config ${config} --session ${session} | awk '{ print $1 }'`
sleep 0.05
+ retries=$((${retries} +1))
+ if [ ${retries} -ge 600 ]; then
+ echo "FAILED: Timeout waiting for daemon to ping"
+ daemon_exit ${config}
+ exit 1
+ fi
done
}
-daemon_exit()
-{
- local base=$1
- local config=$2
-
- local line=`perf daemon --config ${config} -x: | head -1`
- local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
-
- # stop daemon
- perf daemon stop --config ${config}
-
- # ... and wait for the pid to go away
- tail --pid=${pid} -f /dev/null
-}
-
test_list()
{
echo "test daemon list"
${base}/session-time/ack "0"
# stop daemon
- daemon_exit ${base} ${config}
+ daemon_exit ${config}
rm -rf ${base}
rm -f ${config}
done
# stop daemon
- daemon_exit ${base} ${config}
+ daemon_exit ${config}
rm -rf ${base}
rm -f ${config}
fi
# stop daemon
- daemon_exit ${base} ${config}
+ daemon_exit ${config}
# check that sessions are gone
if [ -d "/proc/${pid_size}" ]; then
perf daemon signal --config ${config}
# stop daemon
- daemon_exit ${base} ${config}
+ daemon_exit ${config}
# count is 2 perf.data for signals and 1 for perf record finished
count=`ls ${base}/session-test/ | grep perf.data | wc -l`
fi
# stop daemon
- daemon_exit ${base} ${config}
+ daemon_exit ${config}
rm -rf ${base}
rm -f ${config}
fi
# stop daemon
- daemon_exit ${base} ${config}
+ daemon_exit ${config}
rm -rf ${base}
rm -f ${config}
--- /dev/null
+#!/bin/sh
+# perf stat csv summary test
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+#
+# 1.001364330 9224197 cycles 8012885033 100.00
+# summary 9224197 cycles 8012885033 100.00
+#
+perf stat -e cycles -x' ' -I1000 --interval-count 1 --summary 2>&1 | \
+grep -e summary | \
+while read summary num event run pct
+do
+ if [ $summary != "summary" ]; then
+ exit 1
+ fi
+done
+
+#
+# 1.001360298 9148534 cycles 8012853854 100.00
+#9148534 cycles 8012853854 100.00
+#
+perf stat -e cycles -x' ' -I1000 --interval-count 1 --summary --no-csv-summary 2>&1 | \
+grep -e summary | \
+while read num event run pct
+do
+ exit 1
+done
+
+exit 0
--- /dev/null
+#!/bin/sh
+# perf stat --bpf-counters test
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# check whether $2 is within +/- 10% of $1
+compare_number()
+{
+ first_num=$1
+ second_num=$2
+
+ # upper bound is first_num * 110%
+ upper=$(( $first_num + $first_num / 10 ))
+ # lower bound is first_num * 90%
+ lower=$(( $first_num - $first_num / 10 ))
+
+ if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then
+ echo "The difference between $first_num and $second_num are greater than 10%."
+ exit 1
+ fi
+}
+
+# skip if --bpf-counters is not supported
+perf stat --bpf-counters true > /dev/null 2>&1 || exit 2
+
+base_cycles=$(perf stat --no-big-num -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}')
+bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}')
+
+compare_number $base_cycles $bpf_cycles
+exit 0
* CPU 1 is on core_id 1 and physical_package_id 3
*
* Core_id and physical_package_id are platform and architecture
- * dependend and might have higher numbers than the CPU id.
+ * dependent and might have higher numbers than the CPU id.
* This actually depends on the configuration.
*
* In this case process_cpu_topology() prints error message:
/*
* POSIX 1003.1g - ancillary data object information
- * Ancillary data consits of a sequence of pairs of
+ * Ancillary data consists of a sequence of pairs of
* (cmsghdr, cmsg_data[])
*/
return true;
}
+#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64)
+
+static void annotate_browser__show_full_location(struct ui_browser *browser)
+{
+ struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
+ struct disasm_line *cursor = disasm_line(ab->selection);
+ struct annotation_line *al = &cursor->al;
+
+ if (al->offset != -1)
+ ui_helpline__puts("Only available for source code lines.");
+ else if (al->fileloc == NULL)
+ ui_helpline__puts("No source file location.");
+ else {
+ char help_line[SYM_TITLE_MAX_SIZE];
+ sprintf (help_line, "Source file location: %s", al->fileloc);
+ ui_helpline__puts(help_line);
+ }
+}
+
static void ui_browser__init_asm_mode(struct ui_browser *browser)
{
struct annotation *notes = browser__annotation(browser);
browser->nr_entries = notes->nr_asm_entries;
}
-#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64)
-
static int sym_title(struct symbol *sym, struct map *map, char *title,
size_t sz, int percent_type)
{
}
/*
- * This can be called from external jumps, i.e. jumps from one functon
+ * This can be called from external jumps, i.e. jumps from one function
* to another, like from the kernel's entry_SYSCALL_64 function to the
* swapgs_restore_regs_and_return_to_usermode() function.
*
"c Show min/max cycle\n"
"/ Search string\n"
"k Toggle line numbers\n"
+ "l Show full source file location\n"
"P Print to [symbol_name].annotation file.\n"
"r Run available scripts\n"
"p Toggle percent type [local/global]\n"
case 'k':
notes->options->show_linenr = !notes->options->show_linenr;
continue;
+ case 'l':
+ annotate_browser__show_full_location (&browser->b);
+ continue;
case 'H':
nd = browser->curr_hot;
break;
browser->rows -= browser->extra_title_lines;
/*
* Verify if we were at the last line and that line isn't
- * visibe because we now show the header line(s).
+ * visible because we now show the header line(s).
*/
index_row = browser->index - browser->top_idx;
if (index_row >= browser->rows)
s64 offset;
char *line;
int line_nr;
+ char *fileloc;
};
static void annotation_line__init(struct annotation_line *al,
al->offset = args->offset;
al->line = strdup(args->line);
al->line_nr = args->line_nr;
+ al->fileloc = args->fileloc;
al->data_nr = nr;
}
*/
static int symbol__parse_objdump_line(struct symbol *sym,
struct annotate_args *args,
- char *parsed_line, int *line_nr)
+ char *parsed_line, int *line_nr, char **fileloc)
{
struct map *map = args->ms.map;
struct annotation *notes = symbol__annotation(sym);
/* /filename:linenr ? Save line number and ignore. */
if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) {
*line_nr = atoi(parsed_line + match[1].rm_so);
+ *fileloc = strdup(parsed_line);
return 0;
}
args->offset = offset;
args->line = parsed_line;
args->line_nr = *line_nr;
+ args->fileloc = *fileloc;
args->ms.sym = sym;
dl = disasm_line__new(args);
args->offset = -1;
args->line = strdup(srcline);
args->line_nr = 0;
+ args->fileloc = NULL;
args->ms.sym = sym;
dl = disasm_line__new(args);
if (dl) {
args->offset = pc;
args->line = buf + prev_buf_size;
args->line_nr = 0;
+ args->fileloc = NULL;
args->ms.sym = sym;
dl = disasm_line__new(args);
if (dl)
args->offset = -1;
args->line = strdup("to be implemented");
args->line_nr = 0;
+ args->fileloc = NULL;
dl = disasm_line__new(args);
if (dl)
annotation_line__add(&dl->al, ¬es->src->source);
bool delete_extract = false;
bool decomp = false;
int lineno = 0;
+ char *fileloc = NULL;
int nline;
char *line;
size_t line_len;
* See disasm_line__new() and struct disasm_line::line_nr.
*/
if (symbol__parse_objdump_line(sym, args, expanded_line,
- &lineno) < 0)
+ &lineno, &fileloc) < 0)
break;
nline++;
}
opt->use_offset = perf_config_bool("use_offset", value);
} else if (!strcmp(var, "annotate.disassembler_style")) {
opt->disassembler_style = value;
+ } else if (!strcmp(var, "annotate.demangle")) {
+ symbol_conf.demangle = perf_config_bool("demangle", value);
+ } else if (!strcmp(var, "annotate.demangle_kernel")) {
+ symbol_conf.demangle_kernel = perf_config_bool("demangle_kernel", value);
} else {
pr_debug("%s variable unknown, ignoring...", var);
}
print_lines,
full_path,
show_linenr,
+ show_fileloc,
show_nr_jumps,
show_minmax_cycle,
show_asm_raw,
s64 offset;
char *line;
int line_nr;
+ char *fileloc;
int jump_sources;
float ipc;
u64 cycles;
* After probing, let's consider prologue, which
* adds program fetcher to BPF programs.
*
- * hook_load_preprocessorr() hooks pre-processor
+ * hook_load_preprocessor() hooks pre-processor
* to bpf_program, let it generate prologue
* dynamically during loading.
*/
#include <assert.h>
#include <limits.h>
#include <unistd.h>
+#include <sys/file.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <linux/err.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
#include <bpf/libbpf.h>
+#include <api/fs/fs.h>
#include "bpf_counter.h"
#include "counts.h"
#include "debug.h"
#include "evsel.h"
+#include "evlist.h"
#include "target.h"
+#include "cpumap.h"
+#include "thread_map.h"
#include "bpf_skel/bpf_prog_profiler.skel.h"
+#include "bpf_skel/bperf_u.h"
+#include "bpf_skel/bperf_leader.skel.h"
+#include "bpf_skel/bperf_follower.skel.h"
+
+/*
+ * bperf uses a hashmap, the attr_map, to track all the leader programs.
+ * The hashmap is pinned in bpffs. flock() on this file is used to ensure
+ * no concurrent access to the attr_map. The key of attr_map is struct
+ * perf_event_attr, and the value is struct perf_event_attr_map_entry.
+ *
+ * struct perf_event_attr_map_entry contains two __u32 IDs, bpf_link of the
+ * leader prog, and the diff_map. Each perf-stat session holds a reference
+ * to the bpf_link to make sure the leader prog is attached to sched_switch
+ * tracepoint.
+ *
+ * Since the hashmap only contains IDs of the bpf_link and diff_map, it
+ * does not hold any references to the leader program. Once all perf-stat
+ * sessions of these events exit, the leader prog, its maps, and the
+ * perf_events will be freed.
+ */
+struct perf_event_attr_map_entry {
+ __u32 link_id;
+ __u32 diff_map_id;
+};
+
+#define DEFAULT_ATTR_MAP_PATH "fs/bpf/perf_attr_map"
+#define ATTR_MAP_SIZE 16
static inline void *u64_to_ptr(__u64 ptr)
{
.install_pe = bpf_program_profiler__install_pe,
};
+static __u32 bpf_link_get_id(int fd)
+{
+ struct bpf_link_info link_info = {0};
+ __u32 link_info_len = sizeof(link_info);
+
+ bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+ return link_info.id;
+}
+
+static __u32 bpf_link_get_prog_id(int fd)
+{
+ struct bpf_link_info link_info = {0};
+ __u32 link_info_len = sizeof(link_info);
+
+ bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+ return link_info.prog_id;
+}
+
+static __u32 bpf_map_get_id(int fd)
+{
+ struct bpf_map_info map_info = {0};
+ __u32 map_info_len = sizeof(map_info);
+
+ bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len);
+ return map_info.id;
+}
+
+static int bperf_lock_attr_map(struct target *target)
+{
+ char path[PATH_MAX];
+ int map_fd, err;
+
+ if (target->attr_map) {
+ scnprintf(path, PATH_MAX, "%s", target->attr_map);
+ } else {
+ scnprintf(path, PATH_MAX, "%s/%s", sysfs__mountpoint(),
+ DEFAULT_ATTR_MAP_PATH);
+ }
+
+ if (access(path, F_OK)) {
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
+ sizeof(struct perf_event_attr),
+ sizeof(struct perf_event_attr_map_entry),
+ ATTR_MAP_SIZE, 0);
+ if (map_fd < 0)
+ return -1;
+
+ err = bpf_obj_pin(map_fd, path);
+ if (err) {
+ /* someone pinned the map in parallel? */
+ close(map_fd);
+ map_fd = bpf_obj_get(path);
+ if (map_fd < 0)
+ return -1;
+ }
+ } else {
+ map_fd = bpf_obj_get(path);
+ if (map_fd < 0)
+ return -1;
+ }
+
+ err = flock(map_fd, LOCK_EX);
+ if (err) {
+ close(map_fd);
+ return -1;
+ }
+ return map_fd;
+}
+
+/* trigger the leader program on a cpu */
+static int bperf_trigger_reading(int prog_fd, int cpu)
+{
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .ctx_in = NULL,
+ .ctx_size_in = 0,
+ .flags = BPF_F_TEST_RUN_ON_CPU,
+ .cpu = cpu,
+ .retval = 0,
+ );
+
+ return bpf_prog_test_run_opts(prog_fd, &opts);
+}
+
+static int bperf_check_target(struct evsel *evsel,
+ struct target *target,
+ enum bperf_filter_type *filter_type,
+ __u32 *filter_entry_cnt)
+{
+ if (evsel->leader->core.nr_members > 1) {
+ pr_err("bpf managed perf events do not yet support groups.\n");
+ return -1;
+ }
+
+ /* determine filter type based on target */
+ if (target->system_wide) {
+ *filter_type = BPERF_FILTER_GLOBAL;
+ *filter_entry_cnt = 1;
+ } else if (target->cpu_list) {
+ *filter_type = BPERF_FILTER_CPU;
+ *filter_entry_cnt = perf_cpu_map__nr(evsel__cpus(evsel));
+ } else if (target->tid) {
+ *filter_type = BPERF_FILTER_PID;
+ *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads);
+ } else if (target->pid || evsel->evlist->workload.pid != -1) {
+ *filter_type = BPERF_FILTER_TGID;
+ *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads);
+ } else {
+ pr_err("bpf managed perf events do not yet support these targets.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct perf_cpu_map *all_cpu_map;
+
+static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
+ struct perf_event_attr_map_entry *entry)
+{
+ struct bperf_leader_bpf *skel = bperf_leader_bpf__open();
+ int link_fd, diff_map_fd, err;
+ struct bpf_link *link = NULL;
+
+ if (!skel) {
+ pr_err("Failed to open leader skeleton\n");
+ return -1;
+ }
+
+ bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus());
+ err = bperf_leader_bpf__load(skel);
+ if (err) {
+ pr_err("Failed to load leader skeleton\n");
+ goto out;
+ }
+
+ err = -1;
+ link = bpf_program__attach(skel->progs.on_switch);
+ if (!link) {
+ pr_err("Failed to attach leader program\n");
+ goto out;
+ }
+
+ link_fd = bpf_link__fd(link);
+ diff_map_fd = bpf_map__fd(skel->maps.diff_readings);
+ entry->link_id = bpf_link_get_id(link_fd);
+ entry->diff_map_id = bpf_map_get_id(diff_map_fd);
+ err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, BPF_ANY);
+ assert(err == 0);
+
+ evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id);
+ assert(evsel->bperf_leader_link_fd >= 0);
+
+ /*
+ * save leader_skel for install_pe, which is called within
+ * following evsel__open_per_cpu call
+ */
+ evsel->leader_skel = skel;
+ evsel__open_per_cpu(evsel, all_cpu_map, -1);
+
+out:
+ bperf_leader_bpf__destroy(skel);
+ bpf_link__destroy(link);
+ return err;
+}
+
+static int bperf__load(struct evsel *evsel, struct target *target)
+{
+ struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff};
+ int attr_map_fd, diff_map_fd = -1, err;
+ enum bperf_filter_type filter_type;
+ __u32 filter_entry_cnt, i;
+
+ if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt))
+ return -1;
+
+ if (!all_cpu_map) {
+ all_cpu_map = perf_cpu_map__new(NULL);
+ if (!all_cpu_map)
+ return -1;
+ }
+
+ evsel->bperf_leader_prog_fd = -1;
+ evsel->bperf_leader_link_fd = -1;
+
+ /*
+ * Step 1: hold a fd on the leader program and the bpf_link, if
+ * the program is not already gone, reload the program.
+ * Use flock() to ensure exclusive access to the perf_event_attr
+ * map.
+ */
+ attr_map_fd = bperf_lock_attr_map(target);
+ if (attr_map_fd < 0) {
+ pr_err("Failed to lock perf_event_attr map\n");
+ return -1;
+ }
+
+ err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry);
+ if (err) {
+ err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, &entry, BPF_ANY);
+ if (err)
+ goto out;
+ }
+
+ evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id);
+ if (evsel->bperf_leader_link_fd < 0 &&
+ bperf_reload_leader_program(evsel, attr_map_fd, &entry))
+ goto out;
+
+ /*
+ * The bpf_link holds reference to the leader program, and the
+ * leader program holds reference to the maps. Therefore, if
+ * link_id is valid, diff_map_id should also be valid.
+ */
+ evsel->bperf_leader_prog_fd = bpf_prog_get_fd_by_id(
+ bpf_link_get_prog_id(evsel->bperf_leader_link_fd));
+ assert(evsel->bperf_leader_prog_fd >= 0);
+
+ diff_map_fd = bpf_map_get_fd_by_id(entry.diff_map_id);
+ assert(diff_map_fd >= 0);
+
+ /*
+ * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check
+ * whether the kernel support it
+ */
+ err = bperf_trigger_reading(evsel->bperf_leader_prog_fd, 0);
+ if (err) {
+ pr_err("The kernel does not support test_run for raw_tp BPF programs.\n"
+ "Therefore, --use-bpf might show inaccurate readings\n");
+ goto out;
+ }
+
+ /* Step 2: load the follower skeleton */
+ evsel->follower_skel = bperf_follower_bpf__open();
+ if (!evsel->follower_skel) {
+ pr_err("Failed to open follower skeleton\n");
+ goto out;
+ }
+
+ /* attach fexit program to the leader program */
+ bpf_program__set_attach_target(evsel->follower_skel->progs.fexit_XXX,
+ evsel->bperf_leader_prog_fd, "on_switch");
+
+ /* connect to leader diff_reading map */
+ bpf_map__reuse_fd(evsel->follower_skel->maps.diff_readings, diff_map_fd);
+
+ /* set up reading map */
+ bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings,
+ filter_entry_cnt);
+ /* set up follower filter based on target */
+ bpf_map__set_max_entries(evsel->follower_skel->maps.filter,
+ filter_entry_cnt);
+ err = bperf_follower_bpf__load(evsel->follower_skel);
+ if (err) {
+ pr_err("Failed to load follower skeleton\n");
+ bperf_follower_bpf__destroy(evsel->follower_skel);
+ evsel->follower_skel = NULL;
+ goto out;
+ }
+
+ for (i = 0; i < filter_entry_cnt; i++) {
+ int filter_map_fd;
+ __u32 key;
+
+ if (filter_type == BPERF_FILTER_PID ||
+ filter_type == BPERF_FILTER_TGID)
+ key = evsel->core.threads->map[i].pid;
+ else if (filter_type == BPERF_FILTER_CPU)
+ key = evsel->core.cpus->map[i];
+ else
+ break;
+
+ filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter);
+ bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY);
+ }
+
+ evsel->follower_skel->bss->type = filter_type;
+
+ err = bperf_follower_bpf__attach(evsel->follower_skel);
+
+out:
+ if (err && evsel->bperf_leader_link_fd >= 0)
+ close(evsel->bperf_leader_link_fd);
+ if (err && evsel->bperf_leader_prog_fd >= 0)
+ close(evsel->bperf_leader_prog_fd);
+ if (diff_map_fd >= 0)
+ close(diff_map_fd);
+
+ flock(attr_map_fd, LOCK_UN);
+ close(attr_map_fd);
+
+ return err;
+}
+
+static int bperf__install_pe(struct evsel *evsel, int cpu, int fd)
+{
+ struct bperf_leader_bpf *skel = evsel->leader_skel;
+
+ return bpf_map_update_elem(bpf_map__fd(skel->maps.events),
+ &cpu, &fd, BPF_ANY);
+}
+
+/*
+ * trigger the leader prog on each cpu, so the accum_reading map could get
+ * the latest readings.
+ */
+static int bperf_sync_counters(struct evsel *evsel)
+{
+ int num_cpu, i, cpu;
+
+ num_cpu = all_cpu_map->nr;
+ for (i = 0; i < num_cpu; i++) {
+ cpu = all_cpu_map->map[i];
+ bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu);
+ }
+ return 0;
+}
+
+static int bperf__enable(struct evsel *evsel)
+{
+ evsel->follower_skel->bss->enabled = 1;
+ return 0;
+}
+
+static int bperf__read(struct evsel *evsel)
+{
+ struct bperf_follower_bpf *skel = evsel->follower_skel;
+ __u32 num_cpu_bpf = cpu__max_cpu();
+ struct bpf_perf_event_value values[num_cpu_bpf];
+ int reading_map_fd, err = 0;
+ __u32 i, j, num_cpu;
+
+ bperf_sync_counters(evsel);
+ reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
+
+ for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) {
+ __u32 cpu;
+
+ err = bpf_map_lookup_elem(reading_map_fd, &i, values);
+ if (err)
+ goto out;
+ switch (evsel->follower_skel->bss->type) {
+ case BPERF_FILTER_GLOBAL:
+ assert(i == 0);
+
+ num_cpu = all_cpu_map->nr;
+ for (j = 0; j < num_cpu; j++) {
+ cpu = all_cpu_map->map[j];
+ perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter;
+ perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled;
+ perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running;
+ }
+ break;
+ case BPERF_FILTER_CPU:
+ cpu = evsel->core.cpus->map[i];
+ perf_counts(evsel->counts, i, 0)->val = values[cpu].counter;
+ perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled;
+ perf_counts(evsel->counts, i, 0)->run = values[cpu].running;
+ break;
+ case BPERF_FILTER_PID:
+ case BPERF_FILTER_TGID:
+ perf_counts(evsel->counts, 0, i)->val = 0;
+ perf_counts(evsel->counts, 0, i)->ena = 0;
+ perf_counts(evsel->counts, 0, i)->run = 0;
+
+ for (cpu = 0; cpu < num_cpu_bpf; cpu++) {
+ perf_counts(evsel->counts, 0, i)->val += values[cpu].counter;
+ perf_counts(evsel->counts, 0, i)->ena += values[cpu].enabled;
+ perf_counts(evsel->counts, 0, i)->run += values[cpu].running;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+out:
+ return err;
+}
+
+static int bperf__destroy(struct evsel *evsel)
+{
+ bperf_follower_bpf__destroy(evsel->follower_skel);
+ close(evsel->bperf_leader_prog_fd);
+ close(evsel->bperf_leader_link_fd);
+ return 0;
+}
+
+/*
+ * bperf: share hardware PMCs with BPF
+ *
+ * perf uses performance monitoring counters (PMC) to monitor system
+ * performance. The PMCs are limited hardware resources. For example,
+ * Intel CPUs have 3x fixed PMCs and 4x programmable PMCs per cpu.
+ *
+ * Modern data center systems use these PMCs in many different ways:
+ * system level monitoring, (maybe nested) container level monitoring, per
+ * process monitoring, profiling (in sample mode), etc. In some cases,
+ * there are more active perf_events than available hardware PMCs. To allow
+ * all perf_events to have a chance to run, it is necessary to do expensive
+ * time multiplexing of events.
+ *
+ * On the other hand, many monitoring tools count the common metrics
+ * (cycles, instructions). It is a waste to have multiple tools create
+ * multiple perf_events of "cycles" and occupy multiple PMCs.
+ *
+ * bperf tries to reduce such wastes by allowing multiple perf_events of
+ * "cycles" or "instructions" (at different scopes) to share PMUs. Instead
+ * of having each perf-stat session to read its own perf_events, bperf uses
+ * BPF programs to read the perf_events and aggregate readings to BPF maps.
+ * Then, the perf-stat session(s) reads the values from these BPF maps.
+ *
+ * ||
+ * shared progs and maps <- || -> per session progs and maps
+ * ||
+ * --------------- ||
+ * | perf_events | ||
+ * --------------- fexit || -----------------
+ * | --------||----> | follower prog |
+ * --------------- / || --- -----------------
+ * cs -> | leader prog |/ ||/ | |
+ * --> --------------- /|| -------------- ------------------
+ * / | | / || | filter map | | accum_readings |
+ * / ------------ ------------ || -------------- ------------------
+ * | | prev map | | diff map | || |
+ * | ------------ ------------ || |
+ * \ || |
+ * = \ ==================================================== | ============
+ * \ / user space
+ * \ /
+ * \ /
+ * BPF_PROG_TEST_RUN BPF_MAP_LOOKUP_ELEM
+ * \ /
+ * \ /
+ * \------ perf-stat ----------------------/
+ *
+ * The figure above shows the architecture of bperf. Note that the figure
+ * is divided into 3 regions: shared progs and maps (top left), per session
+ * progs and maps (top right), and user space (bottom).
+ *
+ * The leader prog is triggered on each context switch (cs). The leader
+ * prog reads perf_events and stores the difference (current_reading -
+ * previous_reading) to the diff map. For the same metric, e.g. "cycles",
+ * multiple perf-stat sessions share the same leader prog.
+ *
+ * Each perf-stat session creates a follower prog as fexit program to the
+ * leader prog. It is possible to attach up to BPF_MAX_TRAMP_PROGS (38)
+ * follower progs to the same leader prog. The follower prog checks current
+ * task and processor ID to decide whether to add the value from the diff
+ * map to its accumulated reading map (accum_readings).
+ *
+ * Finally, perf-stat user space reads the value from accum_reading map.
+ *
+ * Besides context switch, it is also necessary to trigger the leader prog
+ * before perf-stat reads the value. Otherwise, the accum_reading map may
+ * not have the latest reading from the perf_events. This is achieved by
+ * triggering the event via sys_bpf(BPF_PROG_TEST_RUN) to each CPU.
+ *
+ * Comment before the definition of struct perf_event_attr_map_entry
+ * describes how different sessions of perf-stat share information about
+ * the leader prog.
+ */
+
+struct bpf_counter_ops bperf_ops = {
+ .load = bperf__load,
+ .enable = bperf__enable,
+ .read = bperf__read,
+ .install_pe = bperf__install_pe,
+ .destroy = bperf__destroy,
+};
+
+static inline bool bpf_counter_skip(struct evsel *evsel)
+{
+ return list_empty(&evsel->bpf_counter_list) &&
+ evsel->follower_skel == NULL;
+}
+
int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd)
{
- if (list_empty(&evsel->bpf_counter_list))
+ if (bpf_counter_skip(evsel))
return 0;
return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd);
}
int bpf_counter__load(struct evsel *evsel, struct target *target)
{
- if (target__has_bpf(target))
+ if (target->bpf_str)
evsel->bpf_counter_ops = &bpf_program_profiler_ops;
+ else if (target->use_bpf)
+ evsel->bpf_counter_ops = &bperf_ops;
if (evsel->bpf_counter_ops)
return evsel->bpf_counter_ops->load(evsel, target);
int bpf_counter__enable(struct evsel *evsel)
{
- if (list_empty(&evsel->bpf_counter_list))
+ if (bpf_counter_skip(evsel))
return 0;
return evsel->bpf_counter_ops->enable(evsel);
}
int bpf_counter__read(struct evsel *evsel)
{
- if (list_empty(&evsel->bpf_counter_list))
+ if (bpf_counter_skip(evsel))
return -EAGAIN;
return evsel->bpf_counter_ops->read(evsel);
}
void bpf_counter__destroy(struct evsel *evsel)
{
- if (list_empty(&evsel->bpf_counter_list))
+ if (bpf_counter_skip(evsel))
return;
evsel->bpf_counter_ops->destroy(evsel);
evsel->bpf_counter_ops = NULL;
#else /* HAVE_BPF_SKEL */
-#include<linux/err.h>
+#include <linux/err.h>
static inline int bpf_counter__load(struct evsel *evsel __maybe_unused,
struct target *target __maybe_unused)
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+
+#ifndef __BPERF_STAT_H
+#define __BPERF_STAT_H
+
+typedef struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+ __uint(max_entries, 1);
+} reading_map;
+
+#endif /* __BPERF_STAT_H */
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bperf.h"
+#include "bperf_u.h"
+
+reading_map diff_readings SEC(".maps");
+reading_map accum_readings SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} filter SEC(".maps");
+
+enum bperf_filter_type type = 0;
+int enabled = 0;
+
+SEC("fexit/XXX")
+int BPF_PROG(fexit_XXX)
+{
+ struct bpf_perf_event_value *diff_val, *accum_val;
+ __u32 filter_key, zero = 0;
+ __u32 *accum_key;
+
+ if (!enabled)
+ return 0;
+
+ switch (type) {
+ case BPERF_FILTER_GLOBAL:
+ accum_key = &zero;
+ goto do_add;
+ case BPERF_FILTER_CPU:
+ filter_key = bpf_get_smp_processor_id();
+ break;
+ case BPERF_FILTER_PID:
+ filter_key = bpf_get_current_pid_tgid() & 0xffffffff;
+ break;
+ case BPERF_FILTER_TGID:
+ filter_key = bpf_get_current_pid_tgid() >> 32;
+ break;
+ default:
+ return 0;
+ }
+
+ accum_key = bpf_map_lookup_elem(&filter, &filter_key);
+ if (!accum_key)
+ return 0;
+
+do_add:
+ diff_val = bpf_map_lookup_elem(&diff_readings, &zero);
+ if (!diff_val)
+ return 0;
+
+ accum_val = bpf_map_lookup_elem(&accum_readings, accum_key);
+ if (!accum_val)
+ return 0;
+
+ accum_val->counter += diff_val->counter;
+ accum_val->enabled += diff_val->enabled;
+ accum_val->running += diff_val->running;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bperf.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(int));
+ __uint(map_flags, BPF_F_PRESERVE_ELEMS);
+} events SEC(".maps");
+
+reading_map prev_readings SEC(".maps");
+reading_map diff_readings SEC(".maps");
+
+SEC("raw_tp/sched_switch")
+int BPF_PROG(on_switch)
+{
+ struct bpf_perf_event_value val, *prev_val, *diff_val;
+ __u32 key = bpf_get_smp_processor_id();
+ __u32 zero = 0;
+ long err;
+
+ prev_val = bpf_map_lookup_elem(&prev_readings, &zero);
+ if (!prev_val)
+ return 0;
+
+ diff_val = bpf_map_lookup_elem(&diff_readings, &zero);
+ if (!diff_val)
+ return 0;
+
+ err = bpf_perf_event_read_value(&events, key, &val, sizeof(val));
+ if (err)
+ return 0;
+
+ diff_val->counter = val.counter - prev_val->counter;
+ diff_val->enabled = val.enabled - prev_val->enabled;
+ diff_val->running = val.running - prev_val->running;
+ *prev_val = val;
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+
+#ifndef __BPERF_STAT_U_H
+#define __BPERF_STAT_U_H
+
+enum bperf_filter_type {
+ BPERF_FILTER_GLOBAL = 1,
+ BPERF_FILTER_CPU,
+ BPERF_FILTER_PID,
+ BPERF_FILTER_TGID,
+};
+
+#endif /* __BPERF_STAT_U_H */
static inline void
fexit_update_maps(struct bpf_perf_event_value *after)
{
- struct bpf_perf_event_value *before, diff, *accum;
+ struct bpf_perf_event_value *before, diff;
__u32 zero = 0;
before = bpf_map_lookup_elem(&fentry_readings, &zero);
{
struct bpf_perf_event_value reading;
__u32 cpu = bpf_get_smp_processor_id();
- __u32 one = 1, zero = 0;
int err;
/* read all events before updating the maps, to reduce error */
* @children: tree of call paths of functions called
*
* In combination with the call_return structure, the call_path structure
- * defines a context-sensitve call-graph.
+ * defines a context-sensitive call-graph.
*/
struct call_path {
struct call_path *parent;
if (!node)
return -1;
- /* lookup in childrens */
+ /* lookup in children */
while (*p) {
enum match_result ret;
if (!strcmp(var, "stat.big-num"))
perf_stat__set_big_num(perf_config_bool(var, value));
+ if (!strcmp(var, "stat.no-csv-summary"))
+ perf_stat__set_no_csv_summary(perf_config_bool(var, value));
+
/* Add other config variables here. */
return 0;
}
/* perf_config_set can contain both user and system config items.
* So we should know where each value is from.
* The classification would be needed when a particular config file
- * is overwrited by setting feature i.e. set_config().
+ * is overwritten by setting feature i.e. set_config().
*/
if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) {
section->from_system_config = true;
* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
*/
+#include <linux/coresight-pmu.h>
#include <linux/err.h>
#include <linux/list.h>
#include <linux/zalloc.h>
* This is the first timestamp we've seen since the beginning of traces
* or a discontinuity. Since timestamps packets are generated *after*
* range packets have been generated, we need to estimate the time at
- * which instructions started by substracting the number of instructions
+ * which instructions started by subtracting the number of instructions
* executed to the timestamp.
*/
packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
const ocsd_generic_trace_elem *elem,
const uint8_t trace_chan_id)
{
- pid_t tid;
+ pid_t tid = -1;
+ static u64 pid_fmt;
+ int ret;
- /* Ignore PE_CONTEXT packets that don't have a valid contextID */
- if (!elem->context.ctxt_id_valid)
+ /*
+ * As all the ETMs run at the same exception level, the system should
+ * have the same PID format crossing CPUs. So cache the PID format
+ * and reuse it for sequential decoding.
+ */
+ if (!pid_fmt) {
+ ret = cs_etm__get_pid_fmt(trace_chan_id, &pid_fmt);
+ if (ret)
+ return OCSD_RESP_FATAL_SYS_ERR;
+ }
+
+ /*
+ * Process the PE_CONTEXT packets if we have a valid contextID or VMID.
+ * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2
+ * as VMID, Bit ETM_OPT_CTXTID2 is set in this case.
+ */
+ switch (pid_fmt) {
+ case BIT(ETM_OPT_CTXTID):
+ if (elem->context.ctxt_id_valid)
+ tid = elem->context.context_id;
+ break;
+ case BIT(ETM_OPT_CTXTID2):
+ if (elem->context.vmid_valid)
+ tid = elem->context.vmid;
+ break;
+ default:
+ break;
+ }
+
+ if (tid == -1)
return OCSD_RESP_CONT;
- tid = elem->context.context_id;
if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id))
return OCSD_RESP_FATAL_SYS_ERR;
*/
#include <linux/bitops.h>
+#include <linux/coresight-pmu.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/log2.h>
return 0;
}
+/*
+ * The returned PID format is presented by two bits:
+ *
+ * Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced;
+ * Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced.
+ *
+ * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
+ * are enabled at the same time when the session runs on an EL2 kernel.
+ * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
+ * recorded in the trace data, the tool will selectively use
+ * CONTEXTIDR_EL2 as PID.
+ */
+int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt)
+{
+ struct int_node *inode;
+ u64 *metadata, val;
+
+ inode = intlist__find(traceid_list, trace_chan_id);
+ if (!inode)
+ return -EINVAL;
+
+ metadata = inode->priv;
+
+ if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
+ val = metadata[CS_ETM_ETMCR];
+ /* CONTEXTIDR is traced */
+ if (val & BIT(ETM_OPT_CTXTID))
+ *pid_fmt = BIT(ETM_OPT_CTXTID);
+ } else {
+ val = metadata[CS_ETMV4_TRCCONFIGR];
+ /* CONTEXTIDR_EL2 is traced */
+ if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
+ *pid_fmt = BIT(ETM_OPT_CTXTID2);
+ /* CONTEXTIDR_EL1 is traced */
+ else if (val & BIT(ETM4_CFG_BIT_CTXTID))
+ *pid_fmt = BIT(ETM_OPT_CTXTID);
+ }
+
+ return 0;
+}
+
void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
u8 trace_chan_id)
{
/*
- * Wnen a timestamp packet is encountered the backend code
+ * When a timestamp packet is encountered the backend code
* is stopped so that the front end has time to process packets
* that were accumulated in the traceID queue. Since there can
* be more than one channel per cs_etm_queue, we need to specify
* | 1 1 0 1 1 1 1 1 | imm8 |
* +-----------------+--------+
*
- * According to the specifiction, it only defines SVC for T32
+ * According to the specification, it only defines SVC for T32
* with 16 bits instruction and has no definition for 32bits;
* so below only read 2 bytes as instruction size for T32.
*/
/*
* If the previous packet is an exception return packet
- * and the return address just follows SVC instuction,
+ * and the return address just follows SVC instruction,
* it needs to calibrate the previous packet sample flags
* as PERF_IP_FLAG_SYSCALLRET.
*/
* contain exception type related info so we cannot decide
* the exception type purely based on exception return packet.
* If we record the exception number from exception packet and
- * reuse it for excpetion return packet, this is not reliable
+ * reuse it for exception return packet, this is not reliable
* due the trace can be discontinuity or the interrupt can
* be nested, thus the recorded exception number cannot be
* used for exception return packet for these two cases.
}
static const char * const cs_etm_global_header_fmts[] = {
- [CS_HEADER_VERSION_0] = " Header version %llx\n",
+ [CS_HEADER_VERSION] = " Header version %llx\n",
[CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n",
[CS_ETM_SNAPSHOT] = " Snapshot %llx\n",
};
static const char * const cs_etm_priv_fmts[] = {
[CS_ETM_MAGIC] = " Magic number %llx\n",
[CS_ETM_CPU] = " CPU %lld\n",
+ [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n",
[CS_ETM_ETMCR] = " ETMCR %llx\n",
[CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n",
[CS_ETM_ETMCCER] = " ETMCCER %llx\n",
static const char * const cs_etmv4_priv_fmts[] = {
[CS_ETM_MAGIC] = " Magic number %llx\n",
[CS_ETM_CPU] = " CPU %lld\n",
+ [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n",
[CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n",
[CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n",
[CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n",
[CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n",
};
-static void cs_etm__print_auxtrace_info(__u64 *val, int num)
+static const char * const param_unk_fmt =
+ " Unknown parameter [%d] %llx\n";
+static const char * const magic_unk_fmt =
+ " Magic number Unknown %llx\n";
+
+static int cs_etm__print_cpu_metadata_v0(__u64 *val, int *offset)
{
- int i, j, cpu = 0;
+ int i = *offset, j, nr_params = 0, fmt_offset;
+ __u64 magic;
- for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++)
- fprintf(stdout, cs_etm_global_header_fmts[i], val[i]);
+ /* check magic value */
+ magic = val[i + CS_ETM_MAGIC];
+ if ((magic != __perf_cs_etmv3_magic) &&
+ (magic != __perf_cs_etmv4_magic)) {
+ /* failure - note bad magic value */
+ fprintf(stdout, magic_unk_fmt, magic);
+ return -EINVAL;
+ }
+
+ /* print common header block */
+ fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]);
+ fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]);
+
+ if (magic == __perf_cs_etmv3_magic) {
+ nr_params = CS_ETM_NR_TRC_PARAMS_V0;
+ fmt_offset = CS_ETM_ETMCR;
+ /* after common block, offset format index past NR_PARAMS */
+ for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++)
+ fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
+ } else if (magic == __perf_cs_etmv4_magic) {
+ nr_params = CS_ETMV4_NR_TRC_PARAMS_V0;
+ fmt_offset = CS_ETMV4_TRCCONFIGR;
+ /* after common block, offset format index past NR_PARAMS */
+ for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++)
+ fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
+ }
+ *offset = i;
+ return 0;
+}
+
+static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset)
+{
+ int i = *offset, j, total_params = 0;
+ __u64 magic;
+
+ magic = val[i + CS_ETM_MAGIC];
+ /* total params to print is NR_PARAMS + common block size for v1 */
+ total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1;
- for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) {
- if (val[i] == __perf_cs_etmv3_magic)
- for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++)
+ if (magic == __perf_cs_etmv3_magic) {
+ for (j = 0; j < total_params; j++, i++) {
+ /* if newer record - could be excess params */
+ if (j >= CS_ETM_PRIV_MAX)
+ fprintf(stdout, param_unk_fmt, j, val[i]);
+ else
fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
- else if (val[i] == __perf_cs_etmv4_magic)
- for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++)
+ }
+ } else if (magic == __perf_cs_etmv4_magic) {
+ for (j = 0; j < total_params; j++, i++) {
+ /* if newer record - could be excess params */
+ if (j >= CS_ETMV4_PRIV_MAX)
+ fprintf(stdout, param_unk_fmt, j, val[i]);
+ else
fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
- else
- /* failure.. return */
+ }
+ } else {
+ /* failure - note bad magic value and error out */
+ fprintf(stdout, magic_unk_fmt, magic);
+ return -EINVAL;
+ }
+ *offset = i;
+ return 0;
+}
+
+static void cs_etm__print_auxtrace_info(__u64 *val, int num)
+{
+ int i, cpu = 0, version, err;
+
+ /* bail out early on bad header version */
+ version = val[0];
+ if (version > CS_HEADER_CURRENT_VERSION) {
+ /* failure.. return */
+ fprintf(stdout, " Unknown Header Version = %x, ", version);
+ fprintf(stdout, "Version supported <= %x\n", CS_HEADER_CURRENT_VERSION);
+ return;
+ }
+
+ for (i = 0; i < CS_HEADER_VERSION_MAX; i++)
+ fprintf(stdout, cs_etm_global_header_fmts[i], val[i]);
+
+ for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) {
+ if (version == 0)
+ err = cs_etm__print_cpu_metadata_v0(val, &i);
+ else if (version == 1)
+ err = cs_etm__print_cpu_metadata_v1(val, &i);
+ if (err)
return;
}
}
+/*
+ * Read a single cpu parameter block from the auxtrace_info priv block.
+ *
+ * For version 1 there is a per cpu nr_params entry. If we are handling
+ * version 1 file, then there may be less, the same, or more params
+ * indicated by this value than the compile time number we understand.
+ *
+ * For a version 0 info block, there are a fixed number, and we need to
+ * fill out the nr_param value in the metadata we create.
+ */
+static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
+ int out_blk_size, int nr_params_v0)
+{
+ u64 *metadata = NULL;
+ int hdr_version;
+ int nr_in_params, nr_out_params, nr_cmn_params;
+ int i, k;
+
+ metadata = zalloc(sizeof(*metadata) * out_blk_size);
+ if (!metadata)
+ return NULL;
+
+ /* read block current index & version */
+ i = *buff_in_offset;
+ hdr_version = buff_in[CS_HEADER_VERSION];
+
+ if (!hdr_version) {
+ /* read version 0 info block into a version 1 metadata block */
+ nr_in_params = nr_params_v0;
+ metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
+ metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
+ metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
+ /* remaining block params at offset +1 from source */
+ for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
+ metadata[k + 1] = buff_in[i + k];
+ /* version 0 has 2 common params */
+ nr_cmn_params = 2;
+ } else {
+ /* read version 1 info block - input and output nr_params may differ */
+ /* version 1 has 3 common params */
+ nr_cmn_params = 3;
+ nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
+
+ /* if input has more params than output - skip excess */
+ nr_out_params = nr_in_params + nr_cmn_params;
+ if (nr_out_params > out_blk_size)
+ nr_out_params = out_blk_size;
+
+ for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
+ metadata[k] = buff_in[i + k];
+
+ /* record the actual nr params we copied */
+ metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
+ }
+
+ /* adjust in offset by number of in params used */
+ i += nr_in_params + nr_cmn_params;
+ *buff_in_offset = i;
+ return metadata;
+}
+
int cs_etm__process_auxtrace_info(union perf_event *event,
struct perf_session *session)
{
int info_header_size;
int total_size = auxtrace_info->header.size;
int priv_size = 0;
- int num_cpu;
- int err = 0, idx = -1;
- int i, j, k;
+ int num_cpu, trcidr_idx;
+ int err = 0;
+ int i, j;
u64 *ptr, *hdr = NULL;
u64 **metadata = NULL;
+ u64 hdr_version;
/*
* sizeof(auxtrace_info_event::type) +
/* First the global part */
ptr = (u64 *) auxtrace_info->priv;
- /* Look for version '0' of the header */
- if (ptr[0] != 0)
+ /* Look for version of the header */
+ hdr_version = ptr[0];
+ if (hdr_version > CS_HEADER_CURRENT_VERSION) {
+ /* print routine will print an error on bad version */
+ if (dump_trace)
+ cs_etm__print_auxtrace_info(auxtrace_info->priv, 0);
return -EINVAL;
+ }
- hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX);
+ hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_MAX);
if (!hdr)
return -ENOMEM;
/* Extract header information - see cs-etm.h for format */
- for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++)
+ for (i = 0; i < CS_HEADER_VERSION_MAX; i++)
hdr[i] = ptr[i];
num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff;
pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) &
*/
for (j = 0; j < num_cpu; j++) {
if (ptr[i] == __perf_cs_etmv3_magic) {
- metadata[j] = zalloc(sizeof(*metadata[j]) *
- CS_ETM_PRIV_MAX);
- if (!metadata[j]) {
- err = -ENOMEM;
- goto err_free_metadata;
- }
- for (k = 0; k < CS_ETM_PRIV_MAX; k++)
- metadata[j][k] = ptr[i + k];
+ metadata[j] =
+ cs_etm__create_meta_blk(ptr, &i,
+ CS_ETM_PRIV_MAX,
+ CS_ETM_NR_TRC_PARAMS_V0);
/* The traceID is our handle */
- idx = metadata[j][CS_ETM_ETMTRACEIDR];
- i += CS_ETM_PRIV_MAX;
+ trcidr_idx = CS_ETM_ETMTRACEIDR;
+
} else if (ptr[i] == __perf_cs_etmv4_magic) {
- metadata[j] = zalloc(sizeof(*metadata[j]) *
- CS_ETMV4_PRIV_MAX);
- if (!metadata[j]) {
- err = -ENOMEM;
- goto err_free_metadata;
- }
- for (k = 0; k < CS_ETMV4_PRIV_MAX; k++)
- metadata[j][k] = ptr[i + k];
+ metadata[j] =
+ cs_etm__create_meta_blk(ptr, &i,
+ CS_ETMV4_PRIV_MAX,
+ CS_ETMV4_NR_TRC_PARAMS_V0);
/* The traceID is our handle */
- idx = metadata[j][CS_ETMV4_TRCTRACEIDR];
- i += CS_ETMV4_PRIV_MAX;
+ trcidr_idx = CS_ETMV4_TRCTRACEIDR;
+ }
+
+ if (!metadata[j]) {
+ err = -ENOMEM;
+ goto err_free_metadata;
}
/* Get an RB node for this CPU */
- inode = intlist__findnew(traceid_list, idx);
+ inode = intlist__findnew(traceid_list, metadata[j][trcidr_idx]);
/* Something went wrong, no need to continue */
if (!inode) {
}
/*
- * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and
+ * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
* CS_ETMV4_PRIV_MAX mark how many double words are in the
* global metadata, and each cpu's metadata respectively.
* The following tests if the correct number of double words was
intlist__delete(traceid_list);
err_free_hdr:
zfree(&hdr);
-
+ /*
+ * At this point, as a minimum we have valid header. Dump the rest of
+ * the info section - the print routines will error out on structural
+ * issues.
+ */
+ if (dump_trace)
+ cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
return err;
}
struct perf_session;
-/* Versionning header in case things need tro change in the future. That way
+/*
+ * Versioning header in case things need to change in the future. That way
* decoding of old snapshot is still possible.
*/
enum {
/* Starting with 0x0 */
- CS_HEADER_VERSION_0,
+ CS_HEADER_VERSION,
/* PMU->type (32 bit), total # of CPUs (32 bit) */
CS_PMU_TYPE_CPUS,
CS_ETM_SNAPSHOT,
- CS_HEADER_VERSION_0_MAX,
+ CS_HEADER_VERSION_MAX,
};
+/*
+ * Update the version for new format.
+ *
+ * New version 1 format adds a param count to the per cpu metadata.
+ * This allows easy adding of new metadata parameters.
+ * Requires that new params always added after current ones.
+ * Also allows client reader to handle file versions that are different by
+ * checking the number of params in the file vs the number expected.
+ */
+#define CS_HEADER_CURRENT_VERSION 1
+
/* Beginning of header common to both ETMv3 and V4 */
enum {
CS_ETM_MAGIC,
CS_ETM_CPU,
+ /* Number of trace config params in following ETM specific block */
+ CS_ETM_NR_TRC_PARAMS,
+ CS_ETM_COMMON_BLK_MAX_V1,
};
/* ETMv3/PTM metadata */
enum {
/* Dynamic, configurable parameters */
- CS_ETM_ETMCR = CS_ETM_CPU + 1,
+ CS_ETM_ETMCR = CS_ETM_COMMON_BLK_MAX_V1,
CS_ETM_ETMTRACEIDR,
/* RO, taken from sysFS */
CS_ETM_ETMCCER,
CS_ETM_PRIV_MAX,
};
+/* define fixed version 0 length - allow new format reader to read old files. */
+#define CS_ETM_NR_TRC_PARAMS_V0 (CS_ETM_ETMIDR - CS_ETM_ETMCR + 1)
+
/* ETMv4 metadata */
enum {
/* Dynamic, configurable parameters */
- CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1,
+ CS_ETMV4_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1,
CS_ETMV4_TRCTRACEIDR,
/* RO, taken from sysFS */
CS_ETMV4_TRCIDR0,
CS_ETMV4_PRIV_MAX,
};
+/* define fixed version 0 length - allow new format reader to read old files. */
+#define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1)
+
/*
* ETMv3 exception encoding number:
- * See Embedded Trace Macrocell spcification (ARM IHI 0014Q)
+ * See Embedded Trace Macrocell specification (ARM IHI 0014Q)
* table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors.
*/
enum {
#define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb)
-#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
+#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_MAX * sizeof(u64))
#define __perf_cs_etmv3_magic 0x3030303030303030ULL
#define __perf_cs_etmv4_magic 0x4040404040404040ULL
int cs_etm__process_auxtrace_info(union perf_event *event,
struct perf_session *session);
int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
+int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt);
int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
pid_t tid, u8 trace_chan_id);
bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq);
/*
* Add '_' prefix to potential keywork. According to
* Mathieu Desnoyers (https://lore.kernel.org/lkml/1074266107.40857.1422045946295.JavaMail.zimbra@efficios.com),
- * futher CTF spec updating may require us to use '$'.
+ * further CTF spec updating may require us to use '$'.
*/
if (dup < 0)
len = strlen(name) + sizeof("_");
* Demangle Java function signature (openJDK, not GCJ)
* input:
* str: string to parse. String is not modified
- * flags: comobination of JAVA_DEMANGLE_* flags to modify demangling
+ * flags: combination of JAVA_DEMANGLE_* flags to modify demangling
* return:
* if input can be demangled, then a newly allocated string is returned.
* if input cannot be demangled, then NULL is returned
if (!str)
return NULL;
- /* find start of retunr type */
+ /* find start of return type */
p = strrchr(str, ')');
if (!p)
return NULL;
/* dso__for_each_symbol - iterate over the symbols of given type
*
- * @dso: the 'struct dso *' in which symbols itereated
+ * @dso: the 'struct dso *' in which symbols are iterated
* @pos: the 'struct symbol *' to use as a loop cursor
* @n: the 'struct rb_node *' to use as a temporary storage
*/
return NULL;
} while (laddr == addr);
l++;
- /* Going foward to find the statement line */
+ /* Going forward to find the statement line */
do {
line = dwarf_onesrcline(lines, l++);
if (!line || dwarf_lineaddr(line, &laddr) != 0 ||
* die_get_linkage_name - Get the linkage name of the object
* @dw_die: A DIE of the object
*
- * Get the linkage name attiribute of given @dw_die.
+ * Get the linkage name attribute of given @dw_die.
* For C++ binary, the linkage name will be the mangled symbol.
*/
const char *die_get_linkage_name(Dwarf_Die *dw_die)
* @data: user data
*
* Walk on the instances of give @in_die. @in_die must be an inlined function
- * declartion. This returns the return value of @callback if it returns
+ * declaration. This returns the return value of @callback if it returns
* non-zero value, or -ENOENT if there is no instance.
*/
int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *),
int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
const char **fname, int *lineno);
-/* Walk on funcitons at given address */
+/* Walk on functions at given address */
int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
int (*callback)(Dwarf_Die *, void *), void *data);
#include "../arch/s390/include/dwarf-regs-table.h"
#include "../arch/sparc/include/dwarf-regs-table.h"
#include "../arch/xtensa/include/dwarf-regs-table.h"
+#include "../arch/mips/include/dwarf-regs-table.h"
#define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL)
return __get_dwarf_regstr(sparc_regstr_tbl, n);
case EM_XTENSA:
return __get_dwarf_regstr(xtensa_regstr_tbl, n);
+ case EM_MIPS:
+ return __get_dwarf_regstr(mips_regstr_tbl, n);
default:
pr_err("ELF MACHINE %x is not supported.\n", machine);
}
u8 cpumode;
u16 misc;
u16 ins_lat;
+ u16 p_stage_cyc;
bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN];
void *raw_data;
void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type);
void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type);
+const char *arch_perf_header_entry(const char *se_header);
+int arch_support_sort_key(const char *sort_key);
#endif /* __PERF_RECORD_H */
* all struct perf_record_lost_samples.lost fields reported.
*
* The total_period is needed because by default auto-freq is used, so
- * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
+ * multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
* the total number of low level events, it is necessary to to sum all struct
* perf_record_sample.period and stash the result in total_period.
*/
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <sys/prctl.h>
#include <linux/bitops.h>
#include <linux/hash.h>
}
}
- /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
+ /* PERF_SAMPLE_READ implies PERF_FORMAT_ID. */
if ((sample_type & PERF_SAMPLE_READ) &&
!(read_format & PERF_FORMAT_ID)) {
return false;
close(go_pipe[1]);
fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+ /*
+ * Change the name of this process not to confuse --exclude-perf users
+ * that sees 'perf' in the window up to the execvp() and thinks that
+ * perf samples are not being excluded.
+ */
+ prctl(PR_SET_NAME, "perf-exec");
+
/*
* Tell the parent we're ready to go
*/
#define COP(x) (1 << x)
/*
- * cache operartion stat
+ * cache operation stat
* L1I : Read and prefetch only
* ITLB and BPU : Read-only
*/
/*
* Undo swap of u64, then swap on individual u32s,
* get the size of the raw area and undo all of the
- * swap. The pevent interface handles endianity by
+ * swap. The pevent interface handles endianness by
* itself.
*/
if (swapped) {
struct bpf_counter_ops;
struct target;
struct hashmap;
+struct bperf_leader_bpf;
+struct bperf_follower_bpf;
typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
* See also evsel__has_callchain().
*/
__u64 synth_sample_type;
- struct list_head bpf_counter_list;
+
+ /*
+ * bpf_counter_ops serves two use cases:
+ * 1. perf-stat -b counting events used byBPF programs
+ * 2. perf-stat --use-bpf use BPF programs to aggregate counts
+ */
struct bpf_counter_ops *bpf_counter_ops;
+
+ /* for perf-stat -b */
+ struct list_head bpf_counter_list;
+
+ /* for perf-stat --use-bpf */
+ int bperf_leader_prog_fd;
+ int bperf_leader_link_fd;
+ union {
+ struct bperf_leader_bpf *leader_skel;
+ struct bperf_follower_bpf *follower_skel;
+ };
};
struct perf_missing_features {
#define PARSE_CTX_H 1
// There are fixes that need to land upstream before we can use libbpf's headers,
-// for now use our copy uncoditionally, since the data structures at this point
+// for now use our copy unconditionally, since the data structures at this point
// are exactly the same, no problem.
//#ifdef HAVE_LIBBPF_SUPPORT
//#include <bpf/hashmap.h>
return 0;
}
-/* Return: 0 if succeded, -ERR if failed. */
+/* Return: 0 if succeeded, -ERR if failed. */
int do_write(struct feat_fd *ff, const void *buf, size_t size)
{
if (!ff->buf)
return __do_write_buf(ff, buf, size);
}
-/* Return: 0 if succeded, -ERR if failed. */
+/* Return: 0 if succeeded, -ERR if failed. */
static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size)
{
u64 *p = (u64 *) set;
return 0;
}
-/* Return: 0 if succeded, -ERR if failed. */
+/* Return: 0 if succeeded, -ERR if failed. */
int write_padded(struct feat_fd *ff, const void *bf,
size_t count, size_t count_aligned)
{
#define string_size(str) \
(PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32))
-/* Return: 0 if succeded, -ERR if failed. */
+/* Return: 0 if succeeded, -ERR if failed. */
static int do_write_string(struct feat_fd *ff, const char *str)
{
u32 len, olen;
return NULL;
}
-/* Return: 0 if succeded, -ERR if failed. */
+/* Return: 0 if succeeded, -ERR if failed. */
static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
{
unsigned long *set;
int err = -1;
if (ff->ph->needs_swap) {
- pr_warning("interpreting bpf_prog_info from systems with endianity is not yet supported\n");
+ pr_warning("interpreting bpf_prog_info from systems with endianness is not yet supported\n");
return 0;
}
int err = -1;
if (ff->ph->needs_swap) {
- pr_warning("interpreting btf from systems with endianity is not yet supported\n");
+ pr_warning("interpreting btf from systems with endianness is not yet supported\n");
return 0;
}
};
/*
- * In the legacy pipe format, there is an implicit assumption that endiannesss
+ * In the legacy pipe format, there is an implicit assumption that endianness
* between host recording the samples, and host parsing the samples is the
* same. This is not always the case given that the pipe output may always be
* redirected into a file and analyzed on a different machine with possibly a
- * different endianness and perf_event ABI revsions in the perf tool itself.
+ * different endianness and perf_event ABI revisions in the perf tool itself.
*/
static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)
{
hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
+ hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
if (symbol_conf.nanosecs)
hists__new_col_len(hists, HISTC_TIME, 16);
else
}
static void he_stat__add_period(struct he_stat *he_stat, u64 period,
- u64 weight, u64 ins_lat)
+ u64 weight, u64 ins_lat, u64 p_stage_cyc)
{
he_stat->period += period;
he_stat->weight += weight;
he_stat->nr_events += 1;
he_stat->ins_lat += ins_lat;
+ he_stat->p_stage_cyc += p_stage_cyc;
}
static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
dest->nr_events += src->nr_events;
dest->weight += src->weight;
dest->ins_lat += src->ins_lat;
+ dest->p_stage_cyc += src->p_stage_cyc;
}
static void he_stat__decay(struct he_stat *he_stat)
u64 period = entry->stat.period;
u64 weight = entry->stat.weight;
u64 ins_lat = entry->stat.ins_lat;
+ u64 p_stage_cyc = entry->stat.p_stage_cyc;
bool leftmost = true;
p = &hists->entries_in->rb_root.rb_node;
if (!cmp) {
if (sample_self) {
- he_stat__add_period(&he->stat, period, weight, ins_lat);
+ he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc);
hist_entry__add_callchain_period(he, period);
}
if (symbol_conf.cumulate_callchain)
- he_stat__add_period(he->stat_acc, period, weight, ins_lat);
+ he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc);
/*
* This mem info was allocated from sample__resolve_mem
.period = sample->period,
.weight = sample->weight,
.ins_lat = sample->ins_lat,
+ .p_stage_cyc = sample->p_stage_cyc,
},
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent) | al->filtered,
HISTC_MEM_BLOCKED,
HISTC_LOCAL_INS_LAT,
HISTC_GLOBAL_INS_LAT,
+ HISTC_P_STAGE_CYC,
HISTC_NR_COLS, /* Last entry */
};
/*
* Since this thread will not be kept in any rbtree not in a
* list, initialize its list node so that at thread__put() the
- * current thread lifetime assuption is kept and we don't segfault
+ * current thread lifetime assumption is kept and we don't segfault
* at list_del_init().
*/
INIT_LIST_HEAD(&pt->unknown_thread->node);
*
* It does so by calculating the costs of the path ending in characters
* i (in string1) and j (in string2), respectively, given that the last
- * operation is a substition, a swap, a deletion, or an insertion.
+ * operation is a substitution, a swap, a deletion, or an insertion.
*
* This implementation allows the costs to be weighted:
*
* generic one.
*
* The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
- * name and the defination of this function is included directly from
+ * name and the definition of this function is included directly from
* 'arch/arm64/util/unwind-libunwind.c', to make sure that this function
* is defined no matter what arch the host is.
*
* generic one.
*
* The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
- * name and the defination of this function is included directly from
+ * name and the definition of this function is included directly from
* 'arch/x86/util/unwind-libunwind.c', to make sure that this function
* is defined no matter what arch the host is.
*
/*
* This is an optional work. Even it fail we can continue our
- * work. Needn't to check error return.
+ * work. Needn't check error return.
*/
llvm__get_kbuild_opts(&kbuild_dir, &kbuild_include_opts);
maps__insert(&machine->kmaps, map);
- /* Put the map here because maps__insert alread got it */
+ /* Put the map here because maps__insert already got it */
map__put(map);
out:
/* put the dso here, corresponding to machine__findnew_module_dso */
* maps because that is what the kernel just did.
*
* But when synthesizing, this should not be done. If we do, we end up
- * with overlapping maps as we process the sythesized MMAP2 events that
+ * with overlapping maps as we process the synthesized MMAP2 events that
* get delivered shortly thereafter.
*
* Use the FORK event misc flags in an internal way to signal this
static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
{
if (!regexec(regex, sym->name, 0, NULL, 0))
- return 1;
- return 0;
+ return true;
+ return false;
}
static void ip__resolve_ams(struct thread *thread,
/*
* Check if there are identical LBRs between two samples.
- * Identicall LBRs must have same from, to and flags values. Also,
+ * Identical LBRs must have same from, to and flags values. Also,
* they have to be saved in the same LBR registers (same physical
* index).
*
}
/*
- * Recolve LBR callstack chain sample
+ * Resolve LBR callstack chain sample
* Return:
* 1 on success get LBR callchain information
* 0 no available LBR callchain information, should try fp
/* map__for_each_symbol - iterate over the symbols in the given map
*
- * @map: the 'struct map *' in which symbols itereated
+ * @map: the 'struct map *' in which symbols are iterated
* @pos: the 'struct symbol *' to use as a loop cursor
* @n: the 'struct rb_node *' to use as a temporary storage
* Note: caller must ensure map->dso is not NULL (map is loaded).
/* map__for_each_symbol_with_name - iterate over the symbols in the given map
* that have the given name
*
- * @map: the 'struct map *' in which symbols itereated
+ * @map: the 'struct map *' in which symbols are iterated
* @sym_name: the symbol name
* @pos: the 'struct symbol *' to use as a loop cursor
*/
u32 rmt_dram; /* count of loads miss to remote DRAM */
u32 blk_data; /* count of loads blocked by data */
u32 blk_addr; /* count of loads blocked by address conflict */
- u32 nomap; /* count of load/stores with no phys adrs */
+ u32 nomap; /* count of load/stores with no phys addrs */
u32 noparse; /* count of unparsable data sources */
};
* @pctx: the parse context for the metric expression.
* @metric_no_merge: don't attempt to share events for the metric with other
* metrics.
- * @has_constraint: is there a contraint on the group of events? In which case
+ * @has_constraint: is there a constraint on the group of events? In which case
* the events won't be grouped.
* @metric_events: out argument, null terminated array of evsel's associated
* with the metric.
struct evlist;
struct evsel;
-struct evlist;
struct option;
struct rblist;
struct pmu_events_map;
struct parse_events_term *term, *temp;
/*
- * Currectly, all possible user config term
+ * Currently, all possible user config term
* belong to bpf object. parse_events__is_hardcoded_term()
- * happends to be a good flag.
+ * happens to be a good flag.
*
* See parse_events_config_bpf() and
* config_term_tracepoint().
/*
* Caller doesn't know anything about obj_head_config,
- * so combine them together again before returnning.
+ * so combine them together again before returning.
*/
if (head_config)
list_splice_tail(&obj_head_config, head_config);
}
/*
- * Check term availbility after basic checking so
+ * Check term availability after basic checking so
* PARSE_EVENTS__TERM_TYPE_USER can be found and filtered.
*
- * If check availbility at the entry of this function,
+ * If check availability at the entry of this function,
* user will see "'<sysfs term>' is not usable in 'perf stat'"
* if an invalid config term is provided for legacy events
* (for example, instructions/badterm/...), which is confusing.
/*
* Sets value based on the format definition (format parameter)
- * and unformated value (value parameter).
+ * and unformatted value (value parameter).
*/
static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
bool zero)
}
/*
- * if no unit or scale foundin aliases, then
+ * if no unit or scale found in aliases, then
* set defaults as for evsel
* unit cannot left to NULL
*/
return err;
}
-/* Concatinate two arrays */
+/* Concatenate two arrays */
static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
{
void *ret;
if (*ntevs + ntevs2 > probe_conf.max_probes)
ret = -E2BIG;
else {
- /* Concatinate the array of probe_trace_event */
+ /* Concatenate the array of probe_trace_event */
new_tevs = memcat(*tevs, (*ntevs) * sizeof(**tevs),
*tevs2, ntevs2 * sizeof(**tevs2));
if (!new_tevs)
/*
* Convert a location into trace_arg.
* If tvar == NULL, this just checks variable can be converted.
- * If fentry == true and vr_die is a parameter, do huristic search
+ * If fentry == true and vr_die is a parameter, do heuristic search
* for the location fuzzed by function entry mcount.
*/
static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
" nor array.\n", varname);
return -EINVAL;
}
- /* While prcessing unnamed field, we don't care about this */
+ /* While processing unnamed field, we don't care about this */
if (field->ref && dwarf_diename(vr_die)) {
pr_err("Semantic error: %s must be referred by '.'\n",
field->name);
(lf->lno_s > lineno || lf->lno_e < lineno))
return 0;
- /* Make sure this line can be reversable */
+ /* Make sure this line can be reversible */
if (cu_find_lineinfo(&lf->cu_die, addr, &__fname, &__lineno) > 0
&& (lineno != __lineno || strcmp(fname, __fname)))
return 0;
* the data portion is mmap()'ed.
*
* To sort the queues in chronological order, all queue access is controlled
- * by the auxtrace_heap. This is basicly a stack, each stack element has two
+ * by the auxtrace_heap. This is basically a stack, each stack element has two
* entries, the queue number and a time stamp. However the stack is sorted by
* the time stamps. The highest time stamp is at the bottom the lowest
* (nearest) time stamp is at the top. That sort order is maintained at all
* stamp of the last processed entry of the auxtrace_buffer replaces the
* current auxtrace_heap top.
*
- * 3. Auxtrace_queues might run of out data and are feeded by the
+ * 3. Auxtrace_queues might run of out data and are fed by the
* PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
*
* Event Generation
- * Each sampling-data entry in the auxilary trace data generates a perf sample.
+ * Each sampling-data entry in the auxiliary trace data generates a perf sample.
* This sample is filled
* with data from the auxtrace such as PID/TID, instruction address, CPU state,
* etc. This sample is processed with perf_session__deliver_synth_event() to
* pointer to the queue, the second parameter is the time stamp. This
* is the time stamp:
* - of the event that triggered this processing.
- * - or the time stamp when the last proccesing of this queue stopped.
+ * - or the time stamp when the last processing of this queue stopped.
* In this case it stopped at a 4KB page boundary and record the
* position on where to continue processing on the next invocation
* (see buffer->use_data and buffer->use_size).
goto out;
}
- pos += dsdes; /* Skip diagnositic entry */
+ pos += dsdes; /* Skip diagnostic entry */
/* Check for trailer entry */
if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
* Attempt to use the call path root from the call return
* processor, if the call return processor is in use. Otherwise,
* we allocate a new call path root. This prevents exporting
- * duplicate call path ids when both are in use simultaniously.
+ * duplicate call path ids when both are in use simultaneously.
*/
if (tables->dbe.crp)
tables->dbe.cpr = tables->dbe.crp->cpr;
* in "to" register.
* For example, there is a call stack
* "A"->"B"->"C"->"D".
- * The LBR registers will recorde like
+ * The LBR registers will be recorded like
* "C"->"D", "B"->"C", "A"->"B".
* So only the first "to" register and all "from"
* registers are needed to construct the whole stack.
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
printf("... weight: %" PRIu64 "", sample->weight);
- if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
+ if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
printf(",0x%"PRIx16"", sample->ins_lat);
+ printf(",0x%"PRIx16"", sample->p_stage_cyc);
+ }
printf("\n");
}
return tool->event_update(tool, event, &session->evlist);
case PERF_RECORD_HEADER_EVENT_TYPE:
/*
- * Depreceated, but we need to handle it for sake
+ * Deprecated, but we need to handle it for sake
* of old data files create in pipe mode.
*/
return 0;
#include <traceevent/event-parse.h>
#include "mem-events.h"
#include "annotate.h"
+#include "event.h"
#include "time-utils.h"
#include "cgroup.h"
#include "machine.h"
const char *parent_pattern = default_parent_pattern;
const char *default_sort_order = "comm,dso,symbol";
const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
-const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat";
+const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc";
const char default_top_sort_order[] = "dso,symbol";
const char default_diff_sort_order[] = "dso,symbol";
const char default_tracepoint_sort_order[] = "trace";
regex_t ignore_callees_regex;
int have_ignore_callees = 0;
enum sort_mode sort__mode = SORT_MODE__NORMAL;
+const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
+const char *arch_specific_sort_keys[] = {"p_stage_cyc"};
/*
* Replaces all occurrences of a char used with the:
.se_width_idx = HISTC_GLOBAL_INS_LAT,
};
+static int64_t
+sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->stat.p_stage_cyc - right->stat.p_stage_cyc;
+}
+
+static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc);
+}
+
+struct sort_entry sort_p_stage_cyc = {
+ .se_header = "Pipeline Stage Cycle",
+ .se_cmp = sort__global_p_stage_cyc_cmp,
+ .se_snprintf = hist_entry__p_stage_cyc_snprintf,
+ .se_width_idx = HISTC_P_STAGE_CYC,
+};
+
struct sort_entry sort_mem_daddr_sym = {
.se_header = "Data Symbol",
.se_cmp = sort__daddr_cmp,
int taken;
};
+int __weak arch_support_sort_key(const char *sort_key __maybe_unused)
+{
+ return 0;
+}
+
+const char * __weak arch_perf_header_entry(const char *se_header)
+{
+ return se_header;
+}
+
+static void sort_dimension_add_dynamic_header(struct sort_dimension *sd)
+{
+ sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header);
+}
+
#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
+ DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
};
#undef DIM
struct evlist *evlist,
int level)
{
- unsigned int i;
+ unsigned int i, j;
+
+ /*
+ * Check to see if there are any arch specific
+ * sort dimensions not applicable for the current
+ * architecture. If so, Skip that sort key since
+ * we don't want to display it in the output fields.
+ */
+ for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) {
+ if (!strcmp(arch_specific_sort_keys[j], tok) &&
+ !arch_support_sort_key(tok)) {
+ return 0;
+ }
+ }
for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
struct sort_dimension *sd = &common_sort_dimensions[i];
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
+ for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) {
+ if (!strcmp(dynamic_headers[j], sd->name))
+ sort_dimension_add_dynamic_header(sd);
+ }
+
if (sd->entry == &sort_parent) {
int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
if (ret) {
u64 period_guest_us;
u64 weight;
u64 ins_lat;
+ u64 p_stage_cyc;
u32 nr_events;
};
SORT_CODE_PAGE_SIZE,
SORT_LOCAL_INS_LAT,
SORT_GLOBAL_INS_LAT,
+ SORT_PIPELINE_STAGE_CYC,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
if (counter->cgrp)
os.nfields++;
}
+
+ if (!config->no_csv_summary && config->csv_output &&
+ config->summary && !config->interval) {
+ fprintf(config->output, "%16s%s", "summary", config->csv_sep);
+ }
+
if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
if (config->metric_only) {
pm(config, &os, NULL, "", "", 0);
#include "expr.h"
#include "metricgroup.h"
#include "cgroup.h"
+#include "units.h"
#include <linux/zalloc.h>
/*
generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
} else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) {
- char unit = 'M';
- char unit_buf[10];
+ char unit = ' ';
+ char unit_buf[10] = "/sec";
total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd);
-
if (total)
- ratio = 1000.0 * avg / total;
- if (ratio < 0.001) {
- ratio *= 1000;
- unit = 'K';
- }
- snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
+ ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
+
+ if (unit != ' ')
+ snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
print_smi_cost(config, cpu, out, st, &rsd);
return pct;
}
-bool __perf_evsel_stat__is(struct evsel *evsel,
- enum perf_stat_evsel_id id)
+bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id)
{
struct perf_stat_evsel *ps = evsel->stats;
bool all_user;
bool percore_show_thread;
bool summary;
+ bool no_csv_summary;
bool metric_no_group;
bool metric_no_merge;
bool stop_read_counter;
};
void perf_stat__set_big_num(int set);
+void perf_stat__set_no_csv_summary(int set);
void update_stats(struct stats *stats, u64 val);
double avg_stats(struct stats *stats);
u64 ena;
};
-bool __perf_evsel_stat__is(struct evsel *evsel,
- enum perf_stat_evsel_id id);
+bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id);
#define perf_stat_evsel__is(evsel, id) \
- __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
+ __perf_stat_evsel__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
extern struct runtime_stat rt_stat;
extern struct stats walltime_nsecs_stats;
* build complex strings/buffers whose final size isn't easily known.
*
* It is NOT legal to copy the ->buf pointer away.
- * `strbuf_detach' is the operation that detachs a buffer from its shell
+ * `strbuf_detach' is the operation that detaches a buffer from its shell
* while keeping the shell valid wrt its invariants.
*
* 2. the ->buf member is a byte array that has at least ->len + 1 bytes
/* A node of string filter */
struct strfilter_node {
- struct strfilter_node *l; /* Tree left branche (for &,|) */
- struct strfilter_node *r; /* Tree right branche (for !,&,|) */
+ struct strfilter_node *l; /* Tree left branch (for &,|) */
+ struct strfilter_node *r; /* Tree right branch (for !,&,|) */
const char *p; /* Operator or rule */
};
curr_dso->symtab_type = dso->symtab_type;
maps__insert(kmaps, curr_map);
/*
- * Add it before we drop the referece to curr_map, i.e. while
+ * Add it before we drop the reference to curr_map, i.e. while
* we still are sure to have a reference to this DSO via
* *curr_map->dso.
*/
for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) {
pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
- fprintf(fp, "%s\n", pos->sym.name);
+ ret += fprintf(fp, "%s\n", pos->sym.name);
}
return ret;
*max = 0;
for (i = 0; i < map->nr; i++) {
- /* bit possition of the cpu is + 1 */
+ /* bit position of the cpu is + 1 */
int bit = map->map[i] + 1;
if (bit > *max)
* mask = size of 'struct perf_record_record_cpu_map' +
* maximum cpu bit converted to size of longs
*
- * and finaly + the size of 'struct perf_record_cpu_map_data'.
+ * and finally + the size of 'struct perf_record_cpu_map_data'.
*/
size_cpus = cpus_size(map);
size_mask = mask_size(map, max);
#include <asm/syscalls.c>
const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID;
static const char **syscalltbl_native = syscalltbl_arm64;
+#elif defined(__mips__)
+#include <asm/syscalls_n64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_mips_n64;
#endif
struct syscall {
bool uses_mmap;
bool default_per_cpu;
bool per_thread;
+ bool use_bpf;
+ const char *attr_map;
};
enum target_errno {
static inline bool target__has_bpf(struct target *target)
{
- return target->bpf_str;
+ return target->bpf_str || target->use_bpf;
}
static inline bool target__none(struct target *target)
struct ip_callchain;
struct symbol;
struct dso;
-struct comm;
struct perf_sample;
struct addr_location;
struct call_path;
return (unsigned long) -1;
}
-unsigned long convert_unit(unsigned long value, char *unit)
+double convert_unit_double(double value, char *unit)
{
*unit = ' ';
- if (value > 1000) {
- value /= 1000;
+ if (value > 1000.0) {
+ value /= 1000.0;
*unit = 'K';
}
- if (value > 1000) {
- value /= 1000;
+ if (value > 1000.0) {
+ value /= 1000.0;
*unit = 'M';
}
- if (value > 1000) {
- value /= 1000;
+ if (value > 1000.0) {
+ value /= 1000.0;
*unit = 'G';
}
return value;
}
+unsigned long convert_unit(unsigned long value, char *unit)
+{
+ double v = convert_unit_double((double)value, unit);
+
+ return (unsigned long)v;
+}
+
int unit_number__scnprintf(char *buf, size_t size, u64 n)
{
char unit[4] = "BKMG";
unsigned long parse_tag_value(const char *str, struct parse_tag *tags);
+double convert_unit_double(double value, char *unit);
unsigned long convert_unit(unsigned long value, char *unit);
int unit_number__scnprintf(char *buf, size_t size, u64 n);
#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */
#define DW_EH_PE_aligned 0x50 /* aligned pointer */
-/* Flags intentionaly not handled, since they're not needed:
+/* Flags intentionally not handled, since they're not needed:
* #define DW_EH_PE_indirect 0x80
* #define DW_EH_PE_uleb128 0x01
* #define DW_EH_PE_udata2 0x02