perf stat: Use affinity for closing file descriptors
authorAndi Kleen <ak@linux.intel.com>
Thu, 21 Nov 2019 00:15:17 +0000 (16:15 -0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 29 Nov 2019 15:20:45 +0000 (12:20 -0300)
Closing a perf fd can also trigger an IPI to the target CPU.

Use the same affinity technique as we use for reading/enabling events to
closing to optimize the CPU transitions.

Before on a large test case with 94 CPUs:

  % time     seconds  usecs/call     calls    errors syscall
  ------ ----------- ----------- --------- --------- ----------------
   32.56    3.085463          50     61483           close

  After:

   10.54    0.735704          11     61485           close

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lore.kernel.org/lkml/20191121001522.180827-8-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/evlist.c

index dae6e84..2e8d38a 100644 (file)
@@ -18,6 +18,7 @@
 #include "debug.h"
 #include "units.h"
 #include <internal/lib.h> // page_size
+#include "affinity.h"
 #include "../perf.h"
 #include "asm/bug.h"
 #include "bpf-event.h"
@@ -1169,9 +1170,35 @@ void perf_evlist__set_selected(struct evlist *evlist,
 void evlist__close(struct evlist *evlist)
 {
        struct evsel *evsel;
+       struct affinity affinity;
+       int cpu, i;
 
-       evlist__for_each_entry_reverse(evlist, evsel)
-               evsel__close(evsel);
+       /*
+        * With perf record core.cpus is usually NULL.
+        * Use the old method to handle this for now.
+        */
+       if (!evlist->core.cpus) {
+               evlist__for_each_entry_reverse(evlist, evsel)
+                       evsel__close(evsel);
+               return;
+       }
+
+       if (affinity__setup(&affinity) < 0)
+               return;
+       evlist__for_each_cpu(evlist, i, cpu) {
+               affinity__set(&affinity, cpu);
+
+               evlist__for_each_entry_reverse(evlist, evsel) {
+                       if (evsel__cpu_iter_skip(evsel, cpu))
+                           continue;
+                       perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter - 1);
+               }
+       }
+       affinity__cleanup(&affinity);
+       evlist__for_each_entry_reverse(evlist, evsel) {
+               perf_evsel__free_fd(&evsel->core);
+               perf_evsel__free_id(&evsel->core);
+       }
 }
 
 static int perf_evlist__create_syswide_maps(struct evlist *evlist)