1 /* GPLv2 Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
3 static const char *__doc__ =
4 " XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\"";
14 #include <sys/resource.h>
19 #include <arpa/inet.h>
20 #include <linux/if_link.h>
22 #define MAX_CPUS 64 /* WARNING - sync with _kern.c */
24 /* How many xdp_progs are defined in _kern.c */
28 #include "bpf/libbpf.h"
32 static int ifindex = -1;
33 static char ifname_buf[IF_NAMESIZE];
37 static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
38 static int cpu_map_fd;
39 static int rx_cnt_map_fd;
40 static int redirect_err_cnt_map_fd;
41 static int cpumap_enqueue_cnt_map_fd;
42 static int cpumap_kthread_cnt_map_fd;
43 static int cpus_available_map_fd;
44 static int cpus_count_map_fd;
45 static int cpus_iterator_map_fd;
46 static int exception_cnt_map_fd;
48 /* Exit return codes */
51 #define EXIT_FAIL_OPTION 2
52 #define EXIT_FAIL_XDP 3
53 #define EXIT_FAIL_BPF 4
54 #define EXIT_FAIL_MEM 5
56 static const struct option long_options[] = {
57 {"help", no_argument, NULL, 'h' },
58 {"dev", required_argument, NULL, 'd' },
59 {"skb-mode", no_argument, NULL, 'S' },
60 {"sec", required_argument, NULL, 's' },
61 {"progname", required_argument, NULL, 'p' },
62 {"qsize", required_argument, NULL, 'q' },
63 {"cpu", required_argument, NULL, 'c' },
64 {"stress-mode", no_argument, NULL, 'x' },
65 {"no-separators", no_argument, NULL, 'z' },
66 {"force", no_argument, NULL, 'F' },
70 static void int_exit(int sig)
72 __u32 curr_prog_id = 0;
75 if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
76 printf("bpf_get_link_xdp_id failed\n");
79 if (prog_id == curr_prog_id) {
81 "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
83 bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
84 } else if (!curr_prog_id) {
85 printf("couldn't find a prog id on a given iface\n");
87 printf("program on interface changed, not removing\n");
93 static void print_avail_progs(struct bpf_object *obj)
95 struct bpf_program *pos;
97 bpf_object__for_each_program(pos, obj) {
98 if (bpf_program__is_xdp(pos))
99 printf(" %s\n", bpf_program__title(pos, false));
103 static void usage(char *argv[], struct bpf_object *obj)
107 printf("\nDOCUMENTATION:\n%s\n", __doc__);
109 printf(" Usage: %s (options-see-below)\n", argv[0]);
110 printf(" Listing options:\n");
111 for (i = 0; long_options[i].name != 0; i++) {
112 printf(" --%-12s", long_options[i].name);
113 if (long_options[i].flag != NULL)
114 printf(" flag (internal value:%d)",
115 *long_options[i].flag);
117 printf(" short-option: -%c",
118 long_options[i].val);
121 printf("\n Programs to be used for --progname:\n");
122 print_avail_progs(obj);
126 /* gettime returns the current time of day in nanoseconds.
127 * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC)
128 * clock_gettime (ns) => 9ns (CLOCK_MONOTONIC_COARSE)
130 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */
131 static __u64 gettime(void)
136 res = clock_gettime(CLOCK_MONOTONIC, &t);
138 fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
141 return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
144 /* Common stats data record shared with _kern.c */
152 struct datarec total;
155 struct stats_record {
156 struct record rx_cnt;
157 struct record redir_err;
158 struct record kthread;
159 struct record exception;
160 struct record enq[MAX_CPUS];
163 static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
165 /* For percpu maps, userspace gets a value per possible CPU */
166 unsigned int nr_cpus = bpf_num_possible_cpus();
167 struct datarec values[nr_cpus];
168 __u64 sum_processed = 0;
169 __u64 sum_dropped = 0;
173 if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
175 "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
178 /* Get time as close as possible to reading map contents */
179 rec->timestamp = gettime();
181 /* Record and sum values from each CPU */
182 for (i = 0; i < nr_cpus; i++) {
183 rec->cpu[i].processed = values[i].processed;
184 sum_processed += values[i].processed;
185 rec->cpu[i].dropped = values[i].dropped;
186 sum_dropped += values[i].dropped;
187 rec->cpu[i].issue = values[i].issue;
188 sum_issue += values[i].issue;
190 rec->total.processed = sum_processed;
191 rec->total.dropped = sum_dropped;
192 rec->total.issue = sum_issue;
196 static struct datarec *alloc_record_per_cpu(void)
198 unsigned int nr_cpus = bpf_num_possible_cpus();
199 struct datarec *array;
202 size = sizeof(struct datarec) * nr_cpus;
203 array = malloc(size);
204 memset(array, 0, size);
206 fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
212 static struct stats_record *alloc_stats_record(void)
214 struct stats_record *rec;
217 rec = malloc(sizeof(*rec));
218 memset(rec, 0, sizeof(*rec));
220 fprintf(stderr, "Mem alloc error\n");
223 rec->rx_cnt.cpu = alloc_record_per_cpu();
224 rec->redir_err.cpu = alloc_record_per_cpu();
225 rec->kthread.cpu = alloc_record_per_cpu();
226 rec->exception.cpu = alloc_record_per_cpu();
227 for (i = 0; i < MAX_CPUS; i++)
228 rec->enq[i].cpu = alloc_record_per_cpu();
233 static void free_stats_record(struct stats_record *r)
237 for (i = 0; i < MAX_CPUS; i++)
239 free(r->exception.cpu);
240 free(r->kthread.cpu);
241 free(r->redir_err.cpu);
246 static double calc_period(struct record *r, struct record *p)
251 period = r->timestamp - p->timestamp;
253 period_ = ((double) period / NANOSEC_PER_SEC);
258 static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
264 packets = r->processed - p->processed;
265 pps = packets / period_;
270 static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
276 packets = r->dropped - p->dropped;
277 pps = packets / period_;
282 static __u64 calc_errs_pps(struct datarec *r,
283 struct datarec *p, double period_)
289 packets = r->issue - p->issue;
290 pps = packets / period_;
295 static void stats_print(struct stats_record *stats_rec,
296 struct stats_record *stats_prev,
299 unsigned int nr_cpus = bpf_num_possible_cpus();
300 double pps = 0, drop = 0, err = 0;
301 struct record *rec, *prev;
307 printf("Running XDP/eBPF prog_name:%s\n", prog_name);
308 printf("%-15s %-7s %-14s %-11s %-9s\n",
309 "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info");
313 char *fmt_rx = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
314 char *fm2_rx = "%-15s %-7s %'-14.0f %'-11.0f\n";
317 rec = &stats_rec->rx_cnt;
318 prev = &stats_prev->rx_cnt;
319 t = calc_period(rec, prev);
320 for (i = 0; i < nr_cpus; i++) {
321 struct datarec *r = &rec->cpu[i];
322 struct datarec *p = &prev->cpu[i];
324 pps = calc_pps(r, p, t);
325 drop = calc_drop_pps(r, p, t);
326 err = calc_errs_pps(r, p, t);
328 errstr = "cpu-dest/err";
330 printf(fmt_rx, "XDP-RX",
331 i, pps, drop, err, errstr);
333 pps = calc_pps(&rec->total, &prev->total, t);
334 drop = calc_drop_pps(&rec->total, &prev->total, t);
335 err = calc_errs_pps(&rec->total, &prev->total, t);
336 printf(fm2_rx, "XDP-RX", "total", pps, drop);
339 /* cpumap enqueue stats */
340 for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
341 char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
342 char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
345 rec = &stats_rec->enq[to_cpu];
346 prev = &stats_prev->enq[to_cpu];
347 t = calc_period(rec, prev);
348 for (i = 0; i < nr_cpus; i++) {
349 struct datarec *r = &rec->cpu[i];
350 struct datarec *p = &prev->cpu[i];
352 pps = calc_pps(r, p, t);
353 drop = calc_drop_pps(r, p, t);
354 err = calc_errs_pps(r, p, t);
356 errstr = "bulk-average";
357 err = pps / err; /* calc average bulk size */
360 printf(fmt, "cpumap-enqueue",
361 i, to_cpu, pps, drop, err, errstr);
363 pps = calc_pps(&rec->total, &prev->total, t);
365 drop = calc_drop_pps(&rec->total, &prev->total, t);
366 err = calc_errs_pps(&rec->total, &prev->total, t);
368 errstr = "bulk-average";
369 err = pps / err; /* calc average bulk size */
371 printf(fm2, "cpumap-enqueue",
372 "sum", to_cpu, pps, drop, err, errstr);
376 /* cpumap kthread stats */
378 char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
379 char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n";
382 rec = &stats_rec->kthread;
383 prev = &stats_prev->kthread;
384 t = calc_period(rec, prev);
385 for (i = 0; i < nr_cpus; i++) {
386 struct datarec *r = &rec->cpu[i];
387 struct datarec *p = &prev->cpu[i];
389 pps = calc_pps(r, p, t);
390 drop = calc_drop_pps(r, p, t);
391 err = calc_errs_pps(r, p, t);
395 printf(fmt_k, "cpumap_kthread",
396 i, pps, drop, err, e_str);
398 pps = calc_pps(&rec->total, &prev->total, t);
399 drop = calc_drop_pps(&rec->total, &prev->total, t);
400 err = calc_errs_pps(&rec->total, &prev->total, t);
403 printf(fm2_k, "cpumap_kthread", "total", pps, drop, err, e_str);
406 /* XDP redirect err tracepoints (very unlikely) */
408 char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
409 char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
411 rec = &stats_rec->redir_err;
412 prev = &stats_prev->redir_err;
413 t = calc_period(rec, prev);
414 for (i = 0; i < nr_cpus; i++) {
415 struct datarec *r = &rec->cpu[i];
416 struct datarec *p = &prev->cpu[i];
418 pps = calc_pps(r, p, t);
419 drop = calc_drop_pps(r, p, t);
421 printf(fmt_err, "redirect_err", i, pps, drop);
423 pps = calc_pps(&rec->total, &prev->total, t);
424 drop = calc_drop_pps(&rec->total, &prev->total, t);
425 printf(fm2_err, "redirect_err", "total", pps, drop);
428 /* XDP general exception tracepoints */
430 char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
431 char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
433 rec = &stats_rec->exception;
434 prev = &stats_prev->exception;
435 t = calc_period(rec, prev);
436 for (i = 0; i < nr_cpus; i++) {
437 struct datarec *r = &rec->cpu[i];
438 struct datarec *p = &prev->cpu[i];
440 pps = calc_pps(r, p, t);
441 drop = calc_drop_pps(r, p, t);
443 printf(fmt_err, "xdp_exception", i, pps, drop);
445 pps = calc_pps(&rec->total, &prev->total, t);
446 drop = calc_drop_pps(&rec->total, &prev->total, t);
447 printf(fm2_err, "xdp_exception", "total", pps, drop);
454 static void stats_collect(struct stats_record *rec)
459 map_collect_percpu(fd, 0, &rec->rx_cnt);
461 fd = redirect_err_cnt_map_fd;
462 map_collect_percpu(fd, 1, &rec->redir_err);
464 fd = cpumap_enqueue_cnt_map_fd;
465 for (i = 0; i < MAX_CPUS; i++)
466 map_collect_percpu(fd, i, &rec->enq[i]);
468 fd = cpumap_kthread_cnt_map_fd;
469 map_collect_percpu(fd, 0, &rec->kthread);
471 fd = exception_cnt_map_fd;
472 map_collect_percpu(fd, 0, &rec->exception);
476 /* Pointer swap trick */
477 static inline void swap(struct stats_record **a, struct stats_record **b)
479 struct stats_record *tmp;
486 static int create_cpu_entry(__u32 cpu, __u32 queue_size,
487 __u32 avail_idx, bool new)
489 __u32 curr_cpus_count = 0;
493 /* Add a CPU entry to cpumap, as this allocate a cpu entry in
494 * the kernel for the cpu.
496 ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0);
498 fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
502 /* Inform bpf_prog's that a new CPU is available to select
503 * from via some control maps.
505 ret = bpf_map_update_elem(cpus_available_map_fd, &avail_idx, &cpu, 0);
507 fprintf(stderr, "Add to avail CPUs failed\n");
511 /* When not replacing/updating existing entry, bump the count */
512 ret = bpf_map_lookup_elem(cpus_count_map_fd, &key, &curr_cpus_count);
514 fprintf(stderr, "Failed reading curr cpus_count\n");
519 ret = bpf_map_update_elem(cpus_count_map_fd, &key,
520 &curr_cpus_count, 0);
522 fprintf(stderr, "Failed write curr cpus_count\n");
526 /* map_fd[7] = cpus_iterator */
527 printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n",
528 new ? "Add-new":"Replace", cpu, avail_idx,
529 queue_size, curr_cpus_count);
534 /* CPUs are zero-indexed. Thus, add a special sentinel default value
535 * in map cpus_available to mark CPU index'es not configured
537 static void mark_cpus_unavailable(void)
539 __u32 invalid_cpu = MAX_CPUS;
542 for (i = 0; i < MAX_CPUS; i++) {
543 ret = bpf_map_update_elem(cpus_available_map_fd, &i,
546 fprintf(stderr, "Failed marking CPU unavailable\n");
552 /* Stress cpumap management code by concurrently changing underlying cpumap */
553 static void stress_cpumap(void)
555 /* Changing qsize will cause kernel to free and alloc a new
556 * bpf_cpu_map_entry, with an associated/complicated tear-down
559 create_cpu_entry(1, 1024, 0, false);
560 create_cpu_entry(1, 8, 0, false);
561 create_cpu_entry(1, 16000, 0, false);
564 static void stats_poll(int interval, bool use_separators, char *prog_name,
567 struct stats_record *record, *prev;
569 record = alloc_stats_record();
570 prev = alloc_stats_record();
571 stats_collect(record);
573 /* Trick to pretty printf with thousands separators use %' */
575 setlocale(LC_NUMERIC, "en_US");
578 swap(&prev, &record);
579 stats_collect(record);
580 stats_print(record, prev, prog_name);
586 free_stats_record(record);
587 free_stats_record(prev);
590 static int init_map_fds(struct bpf_object *obj)
592 cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
593 rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
594 redirect_err_cnt_map_fd =
595 bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt");
596 cpumap_enqueue_cnt_map_fd =
597 bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt");
598 cpumap_kthread_cnt_map_fd =
599 bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt");
600 cpus_available_map_fd =
601 bpf_object__find_map_fd_by_name(obj, "cpus_available");
602 cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count");
603 cpus_iterator_map_fd =
604 bpf_object__find_map_fd_by_name(obj, "cpus_iterator");
605 exception_cnt_map_fd =
606 bpf_object__find_map_fd_by_name(obj, "exception_cnt");
608 if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 ||
609 redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 ||
610 cpumap_kthread_cnt_map_fd < 0 || cpus_available_map_fd < 0 ||
611 cpus_count_map_fd < 0 || cpus_iterator_map_fd < 0 ||
612 exception_cnt_map_fd < 0)
618 int main(int argc, char **argv)
620 struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
621 char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
622 struct bpf_prog_load_attr prog_load_attr = {
623 .prog_type = BPF_PROG_TYPE_UNSPEC,
625 struct bpf_prog_info info = {};
626 __u32 info_len = sizeof(info);
627 bool use_separators = true;
628 bool stress_mode = false;
629 struct bpf_program *prog;
630 struct bpf_object *obj;
640 /* Notice: choosing he queue size is very important with the
641 * ixgbe driver, because it's driver page recycling trick is
642 * dependend on pages being returned quickly. The number of
643 * out-standing packets in the system must be less-than 2x
648 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
649 prog_load_attr.file = filename;
651 if (setrlimit(RLIMIT_MEMLOCK, &r)) {
652 perror("setrlimit(RLIMIT_MEMLOCK)");
656 if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
660 fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
664 if (init_map_fds(obj) < 0) {
665 fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
668 mark_cpus_unavailable();
670 /* Parse commands line args */
671 while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF",
672 long_options, &longindex)) != -1) {
675 if (strlen(optarg) >= IF_NAMESIZE) {
676 fprintf(stderr, "ERR: --dev name too long\n");
679 ifname = (char *)&ifname_buf;
680 strncpy(ifname, optarg, IF_NAMESIZE);
681 ifindex = if_nametoindex(ifname);
684 "ERR: --dev name unknown err(%d):%s\n",
685 errno, strerror(errno));
690 interval = atoi(optarg);
693 xdp_flags |= XDP_FLAGS_SKB_MODE;
699 use_separators = false;
702 /* Selecting eBPF prog to load */
706 /* Add multiple CPUs */
707 add_cpu = strtoul(optarg, NULL, 0);
708 if (add_cpu >= MAX_CPUS) {
710 "--cpu nr too large for cpumap err(%d):%s\n",
711 errno, strerror(errno));
714 create_cpu_entry(add_cpu, qsize, added_cpus, true);
718 qsize = atoi(optarg);
721 xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
727 return EXIT_FAIL_OPTION;
730 /* Required option */
732 fprintf(stderr, "ERR: required option --dev missing\n");
734 return EXIT_FAIL_OPTION;
736 /* Required option */
738 fprintf(stderr, "ERR: required option --cpu missing\n");
739 fprintf(stderr, " Specify multiple --cpu option to add more\n");
741 return EXIT_FAIL_OPTION;
744 /* Remove XDP program when program is interrupted or killed */
745 signal(SIGINT, int_exit);
746 signal(SIGTERM, int_exit);
748 prog = bpf_object__find_program_by_title(obj, prog_name);
750 fprintf(stderr, "bpf_object__find_program_by_title failed\n");
754 prog_fd = bpf_program__fd(prog);
756 fprintf(stderr, "bpf_program__fd failed\n");
760 if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
761 fprintf(stderr, "link set xdp fd failed\n");
762 return EXIT_FAIL_XDP;
765 err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
767 printf("can't get prog info - %s\n", strerror(errno));
772 stats_poll(interval, use_separators, prog_name, stress_mode);