perf bench: Add benchmark for evlist open/close operations
authorRiccardo Mancini <rickyman7@gmail.com>
Mon, 9 Aug 2021 20:11:02 +0000 (22:11 +0200)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 10 Aug 2021 14:32:37 +0000 (11:32 -0300)
This new benchmark finds the total time that is taken to open, mmap,
enable, disable, munmap, close an evlist (time taken for new,
create_maps, config, delete is not counted in).

The evlist can be configured as in perf-record using the
-a,-C,-e,-u,--per-thread,-t,-p options.

The events can be duplicated in the evlist to quickly test performance
with many events using the -n options.

Furthermore, also the number of iterations used to calculate the
statistics is customizable.

Examples:
- Open one dummy event system-wide:

  $ sudo ./perf bench internals evlist-open-close
    Number of cpus:       4
    Number of threads:    1
    Number of events:     1 (4 fds)
    Number of iterations: 100
    Average open-close took: 613.870 usec (+- 32.852 usec)

- Open the group '{cs,cycles}' on CPU 0

  $ sudo ./perf bench internals evlist-open-close -e '{cs,cycles}' -C 0
    Number of cpus:       1
    Number of threads:    1
    Number of events:     2 (2 fds)
    Number of iterations: 100
    Average open-close took: 8503.220 usec (+- 252.652 usec)

- Open 10 'cycles' events for user 0, calculate average over 100 runs

  $ sudo ./perf bench internals evlist-open-close -e cycles -n 10 -u 0 -i 100
    Number of cpus:       4
    Number of threads:    328
    Number of events:     10 (13120 fds)
    Number of iterations: 100
    Average open-close took: 180043.140 usec (+- 2295.889 usec)

Committer notes:

Replaced a deprecated bzero() call with designated initialized zeroing.

Added some missing evlist allocation checks, one noted by Riccardo on
the mailing list.

Minor cosmetic changes (sent in private).

Signed-off-by: Riccardo Mancini <rickyman7@gmail.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210809201101.277594-1-rickyman7@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/bench/Build
tools/perf/bench/bench.h
tools/perf/bench/evlist-open-close.c [new file with mode: 0644]
tools/perf/builtin-bench.c

index e43f469..61d45fc 100644 (file)
@@ -13,6 +13,7 @@ perf-y += synthesize.o
 perf-y += kallsyms-parse.o
 perf-y += find-bit-bench.o
 perf-y += inject-buildid.o
+perf-y += evlist-open-close.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
 perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
index eac36af..b3480bc 100644 (file)
@@ -48,6 +48,7 @@ int bench_epoll_ctl(int argc, const char **argv);
 int bench_synthesize(int argc, const char **argv);
 int bench_kallsyms_parse(int argc, const char **argv);
 int bench_inject_build_id(int argc, const char **argv);
+int bench_evlist_open_close(int argc, const char **argv);
 
 #define BENCH_FORMAT_DEFAULT_STR       "default"
 #define BENCH_FORMAT_DEFAULT           0
diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c
new file mode 100644 (file)
index 0000000..674cb14
--- /dev/null
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "bench.h"
+#include "../util/debug.h"
+#include "../util/stat.h"
+#include "../util/evlist.h"
+#include "../util/evsel.h"
+#include "../util/strbuf.h"
+#include "../util/record.h"
+#include "../util/parse-events.h"
+#include "internal/threadmap.h"
+#include "internal/cpumap.h"
+#include <linux/perf_event.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <linux/string.h>
+#include <subcmd/parse-options.h>
+
+#define MMAP_FLUSH_DEFAULT 1
+
+static int iterations = 100;
+static int nr_events = 1;
+static const char *event_string = "dummy";
+
+static struct record_opts opts = {
+       .sample_time         = true,
+       .mmap_pages          = UINT_MAX,
+       .user_freq           = UINT_MAX,
+       .user_interval       = ULLONG_MAX,
+       .freq                = 4000,
+       .target              = {
+               .uses_mmap   = true,
+               .default_per_cpu = true,
+       },
+       .mmap_flush          = MMAP_FLUSH_DEFAULT,
+       .nr_threads_synthesize = 1,
+       .ctl_fd              = -1,
+       .ctl_fd_ack          = -1,
+};
+
+static const struct option options[] = {
+       OPT_STRING('e', "event", &event_string, "event", "event selector. use 'perf list' to list available events"),
+       OPT_INTEGER('n', "nr-events", &nr_events,
+                    "number of dummy events to create (default 1). If used with -e, it clones those events n times (1 = no change)"),
+       OPT_INTEGER('i', "iterations", &iterations, "Number of iterations used to compute average (default=100)"),
+       OPT_BOOLEAN('a', "all-cpus", &opts.target.system_wide, "system-wide collection from all CPUs"),
+       OPT_STRING('C', "cpu", &opts.target.cpu_list, "cpu", "list of cpus where to open events"),
+       OPT_STRING('p', "pid", &opts.target.pid, "pid", "record events on existing process id"),
+       OPT_STRING('t', "tid", &opts.target.tid, "tid", "record events on existing thread id"),
+       OPT_STRING('u', "uid", &opts.target.uid_str, "user", "user to profile"),
+       OPT_BOOLEAN(0, "per-thread", &opts.target.per_thread, "use per-thread mmaps"),
+       OPT_END()
+};
+
+static const char *const bench_usage[] = {
+       "perf bench internals evlist-open-close <options>",
+       NULL
+};
+
+static int evlist__count_evsel_fds(struct evlist *evlist)
+{
+       struct evsel *evsel;
+       int cnt = 0;
+
+       evlist__for_each_entry(evlist, evsel)
+               cnt += evsel->core.threads->nr * evsel->core.cpus->nr;
+
+       return cnt;
+}
+
+static struct evlist *bench__create_evlist(char *evstr)
+{
+       struct parse_events_error err = { .idx = 0, };
+       struct evlist *evlist = evlist__new();
+       int ret;
+
+       if (!evlist) {
+               pr_err("Not enough memory to create evlist\n");
+               return NULL;
+       }
+
+       ret = parse_events(evlist, evstr, &err);
+       if (ret) {
+               parse_events_print_error(&err, evstr);
+               pr_err("Run 'perf list' for a list of valid events\n");
+               ret = 1;
+               goto out_delete_evlist;
+       }
+
+       ret = evlist__create_maps(evlist, &opts.target);
+       if (ret < 0) {
+               pr_err("Not enough memory to create thread/cpu maps\n");
+               goto out_delete_evlist;
+       }
+
+       evlist__config(evlist, &opts, NULL);
+
+       return evlist;
+
+out_delete_evlist:
+       evlist__delete(evlist);
+       return NULL;
+}
+
+static int bench__do_evlist_open_close(struct evlist *evlist)
+{
+       char sbuf[STRERR_BUFSIZE];
+       int err = evlist__open(evlist);
+
+       if (err < 0) {
+               pr_err("evlist__open: %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+               return err;
+       }
+
+       err = evlist__mmap(evlist, opts.mmap_pages);
+       if (err < 0) {
+               pr_err("evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+               return err;
+       }
+
+       evlist__enable(evlist);
+       evlist__disable(evlist);
+       evlist__munmap(evlist);
+       evlist__close(evlist);
+
+       return 0;
+}
+
+static int bench_evlist_open_close__run(char *evstr)
+{
+       // used to print statistics only
+       struct evlist *evlist = bench__create_evlist(evstr);
+       double time_average, time_stddev;
+       struct timeval start, end, diff;
+       struct stats time_stats;
+       u64 runtime_us;
+       int i, err;
+
+       if (!evlist)
+               return -ENOMEM;
+
+       init_stats(&time_stats);
+
+       printf("  Number of cpus:\t%d\n", evlist->core.cpus->nr);
+       printf("  Number of threads:\t%d\n", evlist->core.threads->nr);
+       printf("  Number of events:\t%d (%d fds)\n",
+               evlist->core.nr_entries, evlist__count_evsel_fds(evlist));
+       printf("  Number of iterations:\t%d\n", iterations);
+
+       evlist__delete(evlist);
+
+       for (i = 0; i < iterations; i++) {
+               pr_debug("Started iteration %d\n", i);
+               evlist = bench__create_evlist(evstr);
+               if (!evlist)
+                       return -ENOMEM;
+
+               gettimeofday(&start, NULL);
+               err = bench__do_evlist_open_close(evlist);
+               if (err) {
+                       evlist__delete(evlist);
+                       return err;
+               }
+
+               gettimeofday(&end, NULL);
+               timersub(&end, &start, &diff);
+               runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+               update_stats(&time_stats, runtime_us);
+
+               evlist__delete(evlist);
+               pr_debug("Iteration %d took:\t%ldus\n", i, runtime_us);
+       }
+
+       time_average = avg_stats(&time_stats);
+       time_stddev = stddev_stats(&time_stats);
+       printf("  Average open-close took: %.3f usec (+- %.3f usec)\n", time_average, time_stddev);
+
+       return 0;
+}
+
+static char *bench__repeat_event_string(const char *evstr, int n)
+{
+       char sbuf[STRERR_BUFSIZE];
+       struct strbuf buf;
+       int i, str_size = strlen(evstr),
+           final_size = str_size * n + n,
+           err = strbuf_init(&buf, final_size);
+
+       if (err) {
+               pr_err("strbuf_init: %s\n", str_error_r(err, sbuf, sizeof(sbuf)));
+               goto out_error;
+       }
+
+       for (i = 0; i < n; i++) {
+               err = strbuf_add(&buf, evstr, str_size);
+               if (err) {
+                       pr_err("strbuf_add: %s\n", str_error_r(err, sbuf, sizeof(sbuf)));
+                       goto out_error;
+               }
+
+               err = strbuf_addch(&buf, i == n-1 ? '\0' : ',');
+               if (err) {
+                       pr_err("strbuf_addch: %s\n", str_error_r(err, sbuf, sizeof(sbuf)));
+                       goto out_error;
+               }
+       }
+
+       return strbuf_detach(&buf, NULL);
+
+out_error:
+       strbuf_release(&buf);
+       return NULL;
+}
+
+
+int bench_evlist_open_close(int argc, const char **argv)
+{
+       char *evstr, errbuf[BUFSIZ];
+       int err;
+
+       argc = parse_options(argc, argv, options, bench_usage, 0);
+       if (argc) {
+               usage_with_options(bench_usage, options);
+               exit(EXIT_FAILURE);
+       }
+
+       err = target__validate(&opts.target);
+       if (err) {
+               target__strerror(&opts.target, err, errbuf, sizeof(errbuf));
+               pr_err("%s\n", errbuf);
+               goto out;
+       }
+
+       err = target__parse_uid(&opts.target);
+       if (err) {
+               target__strerror(&opts.target, err, errbuf, sizeof(errbuf));
+               pr_err("%s", errbuf);
+               goto out;
+       }
+
+       /* Enable ignoring missing threads when -u/-p option is defined. */
+       opts.ignore_missing_thread = opts.target.uid != UINT_MAX || opts.target.pid;
+
+       evstr = bench__repeat_event_string(event_string, nr_events);
+       if (!evstr) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       err = bench_evlist_open_close__run(evstr);
+
+       free(evstr);
+out:
+       return err;
+}
index 62a7b74..d089516 100644 (file)
@@ -88,6 +88,7 @@ static struct bench internals_benchmarks[] = {
        { "synthesize", "Benchmark perf event synthesis",       bench_synthesize        },
        { "kallsyms-parse", "Benchmark kallsyms parsing",       bench_kallsyms_parse    },
        { "inject-build-id", "Benchmark build-id injection",    bench_inject_build_id   },
+       { "evlist-open-close", "Benchmark evlist open and close",       bench_evlist_open_close },
        { NULL,         NULL,                                   NULL                    }
 };