This patch enable perf report to sort by processor socket:
  $ perf report --stdio --sort socket,comm,dso,symbol
  # To display the perf.data header info, please use --header/--header-only options.
  #
  # Total Lost Samples: 0
  #
  # Samples: 686  of event 'cycles'
  # Event count (approx.): 
349215462
  #
  # Overhead SOCKET Command Shared Object    Symbol
  # ........ ...... ....... ................ ............................
  #
    97.05%    000   test    test             [.] plusB_c
     0.98%    000   test    test             [.] plusA_c
     0.93%    001   perf    [kernel.vmlinux] [k] smp_call_function_single
     0.19%    001   perf    [kernel.vmlinux] [k] page_fault
     0.19%    001   swapper [kernel.vmlinux] [k] pm_qos_request
     0.16%    000   test    [kernel.vmlinux] [k] add_mm_counter_fast
Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1441377946-44429-2-git-send-email-kan.liang@intel.com
[ Fix col calc, un-allcapsify col header & read the topology when not using perf.data ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 --sort=::
        Sort histogram entries by given key(s) - multiple keys can be specified
        in CSV format.  Following sort keys are available:
-       pid, comm, dso, symbol, parent, cpu, srcline, weight, local_weight.
+       pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight.
 
        Each key has following meaning:
 
        - parent: name of function matched to the parent regex filter. Unmatched
        entries are displayed as "[other]".
        - cpu: cpu number the task ran at the time of sample
+       - socket: processor socket number the task ran at the time of sample
        - srcline: filename and line number executed at the time of sample.  The
        DWARF debugging info must be provided.
        - srcfile: file name of the source file of the same. Requires dwarf
 
 
        machine__synthesize_threads(&top->session->machines.host, &opts->target,
                                    top->evlist->threads, false, opts->proc_map_timeout);
+
+       if (sort__has_socket) {
+               ret = perf_env__read_cpu_topology_map(&perf_env);
+               if (ret < 0)
+                       goto out_err_cpu_topo;
+       }
+
        ret = perf_top__start_counters(top);
        if (ret)
                goto out_delete;
        top->session = NULL;
 
        return ret;
+
+out_err_cpu_topo: {
+       char errbuf[BUFSIZ];
+       const char *err = strerror_r(-ret, errbuf, sizeof(errbuf));
+
+       ui__error("Could not read the CPU topology map: %s\n", err);
+       goto out_delete;
+}
 }
 
 static int
 
        }
 
        hists__new_col_len(hists, HISTC_CPU, 3);
+       hists__new_col_len(hists, HISTC_SOCKET, 6);
        hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
        hists__new_col_len(hists, HISTC_MEM_TLB, 22);
        hists__new_col_len(hists, HISTC_MEM_SNOOP, 12);
 
        HISTC_COMM,
        HISTC_PARENT,
        HISTC_CPU,
+       HISTC_SOCKET,
        HISTC_SRCLINE,
        HISTC_SRCFILE,
        HISTC_MISPREDICT,
 
 int            sort__has_parent = 0;
 int            sort__has_sym = 0;
 int            sort__has_dso = 0;
+int            sort__has_socket = 0;
 enum sort_mode sort__mode = SORT_MODE__NORMAL;
 
 
        .se_width_idx   = HISTC_CPU,
 };
 
+/* --sort socket */
+
+static int64_t
+sort__socket_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       return right->socket - left->socket;
+}
+
+static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf,
+                                   size_t size, unsigned int width)
+{
+       return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket);
+}
+
+struct sort_entry sort_socket = {
+       .se_header      = "Socket",
+       .se_cmp         = sort__socket_cmp,
+       .se_snprintf    = hist_entry__socket_snprintf,
+       .se_width_idx   = HISTC_SOCKET,
+};
+
 /* sort keys for branch stacks */
 
 static int64_t
        DIM(SORT_SYM, "symbol", sort_sym),
        DIM(SORT_PARENT, "parent", sort_parent),
        DIM(SORT_CPU, "cpu", sort_cpu),
+       DIM(SORT_SOCKET, "socket", sort_socket),
        DIM(SORT_SRCLINE, "srcline", sort_srcline),
        DIM(SORT_SRCFILE, "srcfile", sort_srcfile),
        DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
 
                } else if (sd->entry == &sort_dso) {
                        sort__has_dso = 1;
+               } else if (sd->entry == &sort_socket) {
+                       sort__has_socket = 1;
                }
 
                return __sort_dimension__add(sd);
 
 extern int sort__need_collapse;
 extern int sort__has_parent;
 extern int sort__has_sym;
+extern int sort__has_socket;
 extern enum sort_mode sort__mode;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;
        SORT_SYM,
        SORT_PARENT,
        SORT_CPU,
+       SORT_SOCKET,
        SORT_SRCLINE,
        SORT_SRCFILE,
        SORT_LOCAL_WEIGHT,