1 // SPDX-License-Identifier: GPL-2.0
11 #include <linux/kernel.h>
12 #include <linux/time64.h>
13 #include <linux/list.h>
14 #include <linux/err.h>
15 #include <internal/lib.h>
16 #include <subcmd/parse-options.h>
19 #include "util/data.h"
20 #include "util/stat.h"
21 #include "util/debug.h"
22 #include "util/event.h"
23 #include "util/symbol.h"
24 #include "util/session.h"
25 #include "util/build-id.h"
26 #include "util/synthetic-events.h"
28 #define MMAP_DEV_MAJOR 8
29 #define DSO_MMAP_RATIO 4
31 static unsigned int iterations = 100;
32 static unsigned int nr_mmaps = 100;
33 static unsigned int nr_samples = 100; /* samples per mmap */
35 static u64 bench_sample_type;
36 static u16 bench_id_hdr_size;
46 struct list_head list;
52 static struct bench_dso *dsos;
54 extern int cmd_inject(int argc, const char *argv[]);
56 static const struct option options[] = {
57 OPT_UINTEGER('i', "iterations", &iterations,
58 "Number of iterations used to compute average (default: 100)"),
59 OPT_UINTEGER('m', "nr-mmaps", &nr_mmaps,
60 "Number of mmap events for each iteration (default: 100)"),
61 OPT_UINTEGER('n', "nr-samples", &nr_samples,
62 "Number of sample events per mmap event (default: 100)"),
63 OPT_INCR('v', "verbose", &verbose,
64 "be more verbose (show iteration count, DSO name, etc)"),
68 static const char *const bench_usage[] = {
69 "perf bench internals inject-build-id <options>",
74 * Helper for collect_dso that adds the given file as a dso to dso_list
75 * if it contains a build-id. Stops after collecting 4 times more than
76 * we need (for MMAP2 events).
78 static int add_dso(const char *fpath, const struct stat *sb __maybe_unused,
79 int typeflag, struct FTW *ftwbuf __maybe_unused)
81 struct bench_dso *dso = &dsos[nr_dsos];
84 if (typeflag == FTW_D || typeflag == FTW_SL)
87 if (filename__read_build_id(fpath, &bid) < 0)
90 dso->name = realpath(fpath, NULL);
91 if (dso->name == NULL)
95 pr_debug2(" Adding DSO: %s\n", fpath);
97 /* stop if we collected enough DSOs */
98 if ((unsigned int)nr_dsos == DSO_MMAP_RATIO * nr_mmaps)
104 static void collect_dso(void)
106 dsos = calloc(nr_mmaps * DSO_MMAP_RATIO, sizeof(*dsos));
108 printf(" Memory allocation failed\n");
112 if (nftw("/usr/lib/", add_dso, 10, FTW_PHYS) < 0)
115 pr_debug(" Collected %d DSOs\n", nr_dsos);
118 static void release_dso(void)
122 for (i = 0; i < nr_dsos; i++) {
123 struct bench_dso *dso = &dsos[i];
130 /* Fake address used by mmap and sample events */
131 static u64 dso_map_addr(struct bench_dso *dso)
133 return 0x400000ULL + dso->ino * 8192ULL;
136 static ssize_t synthesize_attr(struct bench_data *data)
138 union perf_event event;
140 memset(&event, 0, sizeof(event.attr) + sizeof(u64));
142 event.header.type = PERF_RECORD_HEADER_ATTR;
143 event.header.size = sizeof(event.attr) + sizeof(u64);
145 event.attr.attr.type = PERF_TYPE_SOFTWARE;
146 event.attr.attr.config = PERF_COUNT_SW_TASK_CLOCK;
147 event.attr.attr.exclude_kernel = 1;
148 event.attr.attr.sample_id_all = 1;
149 event.attr.attr.sample_type = bench_sample_type;
151 return writen(data->input_pipe[1], &event, event.header.size);
154 static ssize_t synthesize_fork(struct bench_data *data)
156 union perf_event event;
158 memset(&event, 0, sizeof(event.fork) + bench_id_hdr_size);
160 event.header.type = PERF_RECORD_FORK;
161 event.header.misc = PERF_RECORD_MISC_FORK_EXEC;
162 event.header.size = sizeof(event.fork) + bench_id_hdr_size;
166 event.fork.pid = data->pid;
167 event.fork.tid = data->pid;
169 return writen(data->input_pipe[1], &event, event.header.size);
172 static ssize_t synthesize_mmap(struct bench_data *data, struct bench_dso *dso, u64 timestamp)
174 union perf_event event;
175 size_t len = offsetof(struct perf_record_mmap2, filename);
176 u64 *id_hdr_ptr = (void *)&event;
179 len += roundup(strlen(dso->name) + 1, 8) + bench_id_hdr_size;
181 memset(&event, 0, min(len, sizeof(event.mmap2)));
183 event.header.type = PERF_RECORD_MMAP2;
184 event.header.misc = PERF_RECORD_MISC_USER;
185 event.header.size = len;
187 event.mmap2.pid = data->pid;
188 event.mmap2.tid = data->pid;
189 event.mmap2.maj = MMAP_DEV_MAJOR;
190 event.mmap2.ino = dso->ino;
192 strcpy(event.mmap2.filename, dso->name);
194 event.mmap2.start = dso_map_addr(dso);
195 event.mmap2.len = 4096;
196 event.mmap2.prot = PROT_EXEC;
198 if (len > sizeof(event.mmap2)) {
199 /* write mmap2 event first */
200 if (writen(data->input_pipe[1], &event, len - bench_id_hdr_size) < 0)
202 /* zero-fill sample id header */
203 memset(id_hdr_ptr, 0, bench_id_hdr_size);
204 /* put timestamp in the right position */
205 ts_idx = (bench_id_hdr_size / sizeof(u64)) - 2;
206 id_hdr_ptr[ts_idx] = timestamp;
207 if (writen(data->input_pipe[1], id_hdr_ptr, bench_id_hdr_size) < 0)
213 ts_idx = (len / sizeof(u64)) - 2;
214 id_hdr_ptr[ts_idx] = timestamp;
215 return writen(data->input_pipe[1], &event, len);
218 static ssize_t synthesize_sample(struct bench_data *data, struct bench_dso *dso, u64 timestamp)
220 union perf_event event;
221 struct perf_sample sample = {
224 .ip = dso_map_addr(dso),
228 event.header.type = PERF_RECORD_SAMPLE;
229 event.header.misc = PERF_RECORD_MISC_USER;
230 event.header.size = perf_event__sample_event_size(&sample, bench_sample_type, 0);
232 perf_event__synthesize_sample(&event, bench_sample_type, 0, &sample);
234 return writen(data->input_pipe[1], &event, event.header.size);
237 static ssize_t synthesize_flush(struct bench_data *data)
239 struct perf_event_header header = {
240 .size = sizeof(header),
241 .type = PERF_RECORD_FINISHED_ROUND,
244 return writen(data->input_pipe[1], &header, header.size);
247 static void *data_reader(void *arg)
249 struct bench_data *data = arg;
254 flag = fcntl(data->output_pipe[0], F_GETFL);
255 fcntl(data->output_pipe[0], F_SETFL, flag | O_NONBLOCK);
257 /* read out data from child */
259 n = read(data->output_pipe[0], buf, sizeof(buf));
265 if (errno != EINTR && errno != EAGAIN)
271 close(data->output_pipe[0]);
275 static int setup_injection(struct bench_data *data, bool build_id_all)
281 if (pipe(ready_pipe) < 0)
284 if (pipe(data->input_pipe) < 0)
287 if (pipe(data->output_pipe) < 0)
294 if (data->pid == 0) {
295 const char **inject_argv;
298 close(data->input_pipe[1]);
299 close(data->output_pipe[0]);
300 close(ready_pipe[0]);
302 dup2(data->input_pipe[0], STDIN_FILENO);
303 close(data->input_pipe[0]);
304 dup2(data->output_pipe[1], STDOUT_FILENO);
305 close(data->output_pipe[1]);
307 dev_null_fd = open("/dev/null", O_WRONLY);
311 dup2(dev_null_fd, STDERR_FILENO);
316 inject_argv = calloc(inject_argc + 1, sizeof(*inject_argv));
317 if (inject_argv == NULL)
320 inject_argv[0] = strdup("inject");
321 inject_argv[1] = strdup("-b");
323 inject_argv[2] = strdup("--buildid-all");
325 /* signal that we're ready to go */
326 close(ready_pipe[1]);
328 cmd_inject(inject_argc, inject_argv);
333 pthread_create(&data->th, NULL, data_reader, data);
335 close(ready_pipe[1]);
336 close(data->input_pipe[0]);
337 close(data->output_pipe[1]);
339 /* wait for child ready */
340 if (read(ready_pipe[0], &buf, 1) < 0)
342 close(ready_pipe[0]);
347 static int inject_build_id(struct bench_data *data, u64 *max_rss)
351 struct rusage rusage;
353 /* this makes the child to run */
354 if (perf_header__write_pipe(data->input_pipe[1]) < 0)
357 if (synthesize_attr(data) < 0)
360 if (synthesize_fork(data) < 0)
363 for (i = 0; i < nr_mmaps; i++) {
364 int idx = rand() % (nr_dsos - 1);
365 struct bench_dso *dso = &dsos[idx];
366 u64 timestamp = rand() % 1000000;
368 pr_debug2(" [%d] injecting: %s\n", i+1, dso->name);
369 if (synthesize_mmap(data, dso, timestamp) < 0)
372 for (k = 0; k < nr_samples; k++) {
373 if (synthesize_sample(data, dso, timestamp + k * 1000) < 0)
377 if ((i + 1) % 10 == 0) {
378 if (synthesize_flush(data) < 0)
383 /* this makes the child to finish */
384 close(data->input_pipe[1]);
386 wait4(data->pid, &status, 0, &rusage);
387 *max_rss = rusage.ru_maxrss;
389 pr_debug(" Child %d exited with %d\n", data->pid, status);
394 static void do_inject_loop(struct bench_data *data, bool build_id_all)
397 struct stats time_stats, mem_stats;
398 double time_average, time_stddev;
399 double mem_average, mem_stddev;
401 init_stats(&time_stats);
402 init_stats(&mem_stats);
404 pr_debug(" Build-id%s injection benchmark\n", build_id_all ? "-all" : "");
406 for (i = 0; i < iterations; i++) {
407 struct timeval start, end, diff;
408 u64 runtime_us, max_rss;
410 pr_debug(" Iteration #%d\n", i+1);
412 if (setup_injection(data, build_id_all) < 0) {
413 printf(" Build-id injection setup failed\n");
417 gettimeofday(&start, NULL);
418 if (inject_build_id(data, &max_rss) < 0) {
419 printf(" Build-id injection failed\n");
423 gettimeofday(&end, NULL);
424 timersub(&end, &start, &diff);
425 runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
426 update_stats(&time_stats, runtime_us);
427 update_stats(&mem_stats, max_rss);
429 pthread_join(data->th, NULL);
432 time_average = avg_stats(&time_stats) / USEC_PER_MSEC;
433 time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC;
434 printf(" Average build-id%s injection took: %.3f msec (+- %.3f msec)\n",
435 build_id_all ? "-all" : "", time_average, time_stddev);
437 /* each iteration, it processes MMAP2 + BUILD_ID + nr_samples * SAMPLE */
438 time_average = avg_stats(&time_stats) / (nr_mmaps * (nr_samples + 2));
439 time_stddev = stddev_stats(&time_stats) / (nr_mmaps * (nr_samples + 2));
440 printf(" Average time per event: %.3f usec (+- %.3f usec)\n",
441 time_average, time_stddev);
443 mem_average = avg_stats(&mem_stats);
444 mem_stddev = stddev_stats(&mem_stats);
445 printf(" Average memory usage: %.0f KB (+- %.0f KB)\n",
446 mem_average, mem_stddev);
449 static int do_inject_loops(struct bench_data *data)
455 bench_sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP;
456 bench_sample_type |= PERF_SAMPLE_TID | PERF_SAMPLE_TIME;
457 bench_id_hdr_size = 32;
461 printf(" Cannot collect DSOs for injection\n");
465 do_inject_loop(data, false);
466 do_inject_loop(data, true);
472 int bench_inject_build_id(int argc, const char **argv)
474 struct bench_data data;
476 argc = parse_options(argc, argv, options, bench_usage, 0);
478 usage_with_options(bench_usage, options);
482 return do_inject_loops(&data);