perf tools: Support data block and addr block
authorKan Liang <kan.liang@linux.intel.com>
Tue, 2 Feb 2021 20:09:07 +0000 (12:09 -0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 8 Feb 2021 19:25:00 +0000 (16:25 -0300)
Two new data source fields, to indicate the block reasons of a load
instruction, are introduced on the Intel Sapphire Rapids server. The
fields can be used by the memory profiling.

Add a new sort function, SORT_MEM_BLOCKED, for the two fields.

For the previous platforms or the block reason is unknown, print "N/A"
for the block reason.

Add blocked as a default mem sort key for perf report and perf mem
report.

Committer testing:

So in machines without this capability we get a "N/A" filling the new "Blocked"
column:

  $ perf mem record ls
  arch     certs  CREDITS  Documentation  include  ipc     Kconfig  lib       MAINTAINERS  mm   samples  security  usr    block
  COPYING  crypto  drivers  fs             init     Kbuild  kernel   LICENSES  Makefile     net  README   scripts   sound  tools
  virt
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.008 MB perf.data (17 samples) ]
  $
  $ perf mem report --stdio
  # To display the perf.data header info, please use --header/--header-only options.
  #
  # Total Lost Samples: 0
  #
  # Samples: 6  of event 'cpu/mem-loads,ldlat=30/Pu'
  # Total weight : 1381
  # Sort order   : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked
  #
  # Overhead  Samples  Local Weight  Memory access         Symbol                   Shared Object  Data Symbol             Data Object   Snoop  TLB access    Locked  Blocked
  # ........  .......  ............  ....................  .......................  .............  ......................  ............  .....  ............  ......  .......
  #
      32.87%        1  454           Local RAM or RAM hit  [.] _dl_relocate_object  ld-2.31.so     [.] 0x00007fe91cef3078  libc-2.31.so  Hit    L1 or L2 hit  No       N/A
      25.56%        1  353           LFB or LFB hit        [.] strcmp               ld-2.31.so     [.] 0x00005586973855ca  ls            None   L1 or L2 hit  No       N/A
      22.59%        1  312           LFB or LFB hit        [.] _dl_cache_libcmp     ld-2.31.so     [.] 0x00007fe91d0e3b18  ld.so.cache   None   L1 or L2 hit  No       N/A
       8.47%        1  117           LFB or LFB hit        [.] _dl_relocate_object  ld-2.31.so     [.] 0x00007fe91ceee570  libc-2.31.so  None   L1 or L2 hit  No       N/A
       6.88%        1  95            LFB or LFB hit        [.] _dl_relocate_object  ld-2.31.so     [.] 0x00007fe91ceed490  libc-2.31.so  None   L1 or L2 hit  No       N/A
       3.62%        1  50            LFB or LFB hit        [.] _dl_cache_libcmp     ld-2.31.so     [.] 0x00007fe91d0ebe60  ld.so.cache   None   L1 or L2 hit  No       N/A

  # Samples: 11  of event 'cpu/mem-stores/Pu'
  # Total weight : 11
  # Sort order   : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked
  #
  # Overhead  Samples  Local Weight  Memory access  Symbol                   Shared Object  Data Symbol             Data Object  Snoop  TLB access  Locked  Blocked
  # ........  .......  ............  .............  .......................  .............  ......................  ...........  .....  ..........  ......  .......
  #
       9.09%        1  0             L1 hit         [.] __strcoll_l          libc-2.31.so   [.] 0x00007fffe5648fc8  [stack]      N/A    N/A         N/A      N/A
       9.09%        1  0             L1 hit         [.] _dl_lookup_symbol_x  ld-2.31.so     [.] 0x00007fffe56490b8  [stack]      N/A    N/A         N/A      N/A
       9.09%        1  0             L1 hit         [.] _dl_name_match_p     ld-2.31.so     [.] 0x00007fffe56487d8  [stack]      N/A    N/A         N/A      N/A
       9.09%        1  0             L1 hit         [.] _dl_start            ld-2.31.so     [.] start_time+0x0      ld-2.31.so   N/A    N/A         N/A      N/A
       9.09%        1  0             L1 hit         [.] _dl_sysdep_start     ld-2.31.so     [.] 0x00007fffe56494b8  [stack]      N/A    N/A         N/A      N/A
       9.09%        1  0             L1 hit         [.] do_lookup_x          ld-2.31.so     [.] 0x00007fffe5648ff8  [stack]      N/A    N/A         N/A      N/A
       9.09%        1  0             L1 hit         [.] do_lookup_x          ld-2.31.so     [.] 0x00007fffe5649064  [stack]      N/A    N/A         N/A      N/A
       9.09%        1  0             L1 hit         [.] do_lookup_x          ld-2.31.so     [.] 0x00007fffe5649130  [stack]      N/A    N/A         N/A      N/A
       9.09%        1  0             L1 miss        [.] _dl_start            ld-2.31.so     [.] _rtld_global+0xaf8  ld-2.31.so   N/A    N/A         N/A      N/A
       9.09%        1  0             L1 miss        [.] _dl_start            ld-2.31.so     [.] _rtld_global+0xc28  ld-2.31.so   N/A    N/A         N/A      N/A
       9.09%        1  0             L1 miss        [.] _dl_start            ld-2.31.so     [.] 0x00007fffe56495b8  [stack]      N/A    N/A         N/A      N/A

  # (Tip: Show user configuration overrides: perf config --user --list)
  $

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/1612296553-21962-4-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-report.txt
tools/perf/builtin-mem.c
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/mem-events.c
tools/perf/util/mem-events.h
tools/perf/util/sort.c
tools/perf/util/sort.h

index e440458..b9686a1 100644 (file)
@@ -140,7 +140,7 @@ OPTIONS
 
        If the --mem-mode option is used, the following sort keys are also available
        (incompatible with --branch-stack):
-       symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
+       symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline, blocked.
 
        - symbol_daddr: name of data symbol being executed on at the time of sample
        - dso_daddr: name of library or module containing the data being executed
@@ -152,9 +152,10 @@ OPTIONS
        - dcacheline: the cacheline the data address is on at the time of the sample
        - phys_daddr: physical address of data being executed on at the time of sample
        - data_page_size: the data page size of data being executed on at the time of sample
+       - blocked: reason of blocked load access for the data at the time of the sample
 
        And the default sort keys are changed to local_weight, mem, sym, dso,
-       symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
+       symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, see '--mem-mode'.
 
        If the data file has tracepoint event(s), following (dynamic) sort keys
        are also available:
index f3aac85..cdd2b9f 100644 (file)
@@ -309,7 +309,7 @@ static char *get_sort_order(struct perf_mem *mem)
                             "dso_daddr,tlb,locked");
        } else if (has_extra_options) {
                strcpy(sort, "--sort=local_weight,mem,sym,dso,symbol_daddr,"
-                            "dso_daddr,snoop,tlb,locked");
+                            "dso_daddr,snoop,tlb,locked,blocked");
        } else
                return NULL;
 
index 6d50379..4038b08 100644 (file)
@@ -208,6 +208,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
        hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3);
        hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
        hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
+       hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
        if (symbol_conf.nanosecs)
                hists__new_col_len(hists, HISTC_TIME, 16);
        else
index 3611085..3788391 100644 (file)
@@ -72,6 +72,7 @@ enum hist_column {
        HISTC_SYM_SIZE,
        HISTC_DSO_SIZE,
        HISTC_SYMBOL_IPC,
+       HISTC_MEM_BLOCKED,
        HISTC_NR_COLS, /* Last entry */
 };
 
index 3edfb88..890f638 100644 (file)
@@ -337,6 +337,29 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
        return l;
 }
 
+int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+       size_t l = 0;
+       u64 mask = PERF_MEM_BLK_NA;
+
+       sz -= 1; /* -1 for null termination */
+       out[0] = '\0';
+
+       if (mem_info)
+               mask = mem_info->data_src.mem_blk;
+
+       if (!mask || (mask & PERF_MEM_BLK_NA)) {
+               l += scnprintf(out + l, sz - l, " N/A");
+               return l;
+       }
+       if (mask & PERF_MEM_BLK_DATA)
+               l += scnprintf(out + l, sz - l, " Data");
+       if (mask & PERF_MEM_BLK_ADDR)
+               l += scnprintf(out + l, sz - l, " Addr");
+
+       return l;
+}
+
 int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
 {
        int i = 0;
@@ -348,6 +371,8 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_in
        i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
        i += scnprintf(out + i, sz - i, "|LCK ");
        i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
+       i += scnprintf(out + i, sz - i, "|BLK ");
+       i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info);
 
        return i;
 }
index 045a507..5ddf447 100644 (file)
@@ -49,6 +49,7 @@ int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
 int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
 int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
 
 int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);
 
index c00934c..e29a24b 100644 (file)
@@ -36,7 +36,7 @@ const char    default_parent_pattern[] = "^sys_|^do_page_fault";
 const char     *parent_pattern = default_parent_pattern;
 const char     *default_sort_order = "comm,dso,symbol";
 const char     default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
-const char     default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
+const char     default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked";
 const char     default_top_sort_order[] = "dso,symbol";
 const char     default_diff_sort_order[] = "dso,symbol";
 const char     default_tracepoint_sort_order[] = "trace";
@@ -1421,6 +1421,41 @@ struct sort_entry sort_mem_dcacheline = {
        .se_width_idx   = HISTC_MEM_DCACHELINE,
 };
 
+static int64_t
+sort__blocked_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       union perf_mem_data_src data_src_l;
+       union perf_mem_data_src data_src_r;
+
+       if (left->mem_info)
+               data_src_l = left->mem_info->data_src;
+       else
+               data_src_l.mem_blk = PERF_MEM_BLK_NA;
+
+       if (right->mem_info)
+               data_src_r = right->mem_info->data_src;
+       else
+               data_src_r.mem_blk = PERF_MEM_BLK_NA;
+
+       return (int64_t)(data_src_r.mem_blk - data_src_l.mem_blk);
+}
+
+static int hist_entry__blocked_snprintf(struct hist_entry *he, char *bf,
+                                       size_t size, unsigned int width)
+{
+       char out[16];
+
+       perf_mem__blk_scnprintf(out, sizeof(out), he->mem_info);
+       return repsep_snprintf(bf, size, "%.*s", width, out);
+}
+
+struct sort_entry sort_mem_blocked = {
+       .se_header      = "Blocked",
+       .se_cmp         = sort__blocked_cmp,
+       .se_snprintf    = hist_entry__blocked_snprintf,
+       .se_width_idx   = HISTC_MEM_BLOCKED,
+};
+
 static int64_t
 sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -1796,6 +1831,7 @@ static struct sort_dimension memory_sort_dimensions[] = {
        DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
        DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
        DIM(SORT_MEM_DATA_PAGE_SIZE, "data_page_size", sort_mem_data_page_size),
+       DIM(SORT_MEM_BLOCKED, "blocked", sort_mem_blocked),
 };
 
 #undef DIM
index cab4172..984e545 100644 (file)
@@ -258,6 +258,7 @@ enum sort_type {
        SORT_MEM_IADDR_SYMBOL,
        SORT_MEM_PHYS_DADDR,
        SORT_MEM_DATA_PAGE_SIZE,
+       SORT_MEM_BLOCKED,
 };
 
 /*