2 * Test functionality of BPF filters for SO_REUSEPORT. The tests below will use
3 * a BPF program (both classic and extended) to read the first word from an
4 * incoming packet (expected to be in network byte-order), calculate a modulus
5 * of that number, and then dispatch the packet to the Nth socket using the
6 * result. These tests are run for each supported address family and protocol.
7 * Additionally, a few edge cases in the implementation are tested.
13 #include <linux/bpf.h>
14 #include <linux/filter.h>
15 #include <linux/unistd.h>
16 #include <netinet/in.h>
17 #include <netinet/tcp.h>
21 #include <sys/epoll.h>
22 #include <sys/types.h>
23 #include <sys/socket.h>
24 #include <sys/resource.h>
27 #include "../kselftest.h"
35 uint16_t send_port_min;
38 static size_t sockaddr_size(void)
40 return sizeof(struct sockaddr_storage);
43 static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
45 struct sockaddr_storage *addr;
46 struct sockaddr_in *addr4;
47 struct sockaddr_in6 *addr6;
49 addr = malloc(sizeof(struct sockaddr_storage));
50 memset(addr, 0, sizeof(struct sockaddr_storage));
54 addr4 = (struct sockaddr_in *)addr;
55 addr4->sin_family = AF_INET;
56 addr4->sin_addr.s_addr = htonl(INADDR_ANY);
57 addr4->sin_port = htons(port);
60 addr6 = (struct sockaddr_in6 *)addr;
61 addr6->sin6_family = AF_INET6;
62 addr6->sin6_addr = in6addr_any;
63 addr6->sin6_port = htons(port);
66 error(1, 0, "Unsupported family %d", family);
68 return (struct sockaddr *)addr;
71 static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
73 struct sockaddr *addr = new_any_sockaddr(family, port);
74 struct sockaddr_in *addr4;
75 struct sockaddr_in6 *addr6;
79 addr4 = (struct sockaddr_in *)addr;
80 addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
83 addr6 = (struct sockaddr_in6 *)addr;
84 addr6->sin6_addr = in6addr_loopback;
87 error(1, 0, "Unsupported family %d", family);
92 static void attach_ebpf(int fd, uint16_t mod)
94 static char bpf_log_buf[65536];
95 static const char bpf_license[] = "GPL";
98 const struct bpf_insn prog[] = {
99 /* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
100 { BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
101 /* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
102 { BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
103 /* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
104 { BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
105 /* BPF_EXIT_INSN() */
106 { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
110 memset(&attr, 0, sizeof(attr));
111 attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
112 attr.insn_cnt = ARRAY_SIZE(prog);
113 attr.insns = (unsigned long) &prog;
114 attr.license = (unsigned long) &bpf_license;
115 attr.log_buf = (unsigned long) &bpf_log_buf;
116 attr.log_size = sizeof(bpf_log_buf);
118 attr.kern_version = 0;
120 bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
122 error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
124 if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
126 error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
131 static void attach_cbpf(int fd, uint16_t mod)
133 struct sock_filter code[] = {
134 /* A = (uint32_t)skb[0] */
135 { BPF_LD | BPF_W | BPF_ABS, 0, 0, 0 },
137 { BPF_ALU | BPF_MOD, 0, 0, mod },
139 { BPF_RET | BPF_A, 0, 0, 0 },
141 struct sock_fprog p = {
142 .len = ARRAY_SIZE(code),
146 if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
147 error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
150 static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
151 void (*attach_bpf)(int, uint16_t))
153 struct sockaddr * const addr =
154 new_any_sockaddr(p.recv_family, p.recv_port);
157 for (i = 0; i < p.recv_socks; ++i) {
158 fd[i] = socket(p.recv_family, p.protocol, 0);
160 error(1, errno, "failed to create recv %d", i);
163 if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
165 error(1, errno, "failed to set SO_REUSEPORT on %d", i);
168 attach_bpf(fd[i], mod);
170 if (bind(fd[i], addr, sockaddr_size()))
171 error(1, errno, "failed to bind recv socket %d", i);
173 if (p.protocol == SOCK_STREAM) {
175 if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt,
178 "failed to set TCP_FASTOPEN on %d", i);
179 if (listen(fd[i], p.recv_socks * 10))
180 error(1, errno, "failed to listen on socket");
186 static void send_from(struct test_params p, uint16_t sport, char *buf,
189 struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
190 struct sockaddr * const daddr =
191 new_loopback_sockaddr(p.send_family, p.recv_port);
192 const int fd = socket(p.send_family, p.protocol, 0), one = 1;
195 error(1, errno, "failed to create send socket");
197 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)))
198 error(1, errno, "failed to set reuseaddr");
200 if (bind(fd, saddr, sockaddr_size()))
201 error(1, errno, "failed to bind send socket");
203 if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0)
204 error(1, errno, "failed to send message");
211 static void test_recv_order(const struct test_params p, int fd[], int mod)
213 char recv_buf[8], send_buf[8];
215 struct iovec recv_io = { recv_buf, 8 };
216 struct epoll_event ev;
217 int epfd, conn, i, sport, expected;
218 uint32_t data, ndata;
220 epfd = epoll_create(1);
222 error(1, errno, "failed to create epoll");
223 for (i = 0; i < p.recv_socks; ++i) {
226 if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
227 error(1, errno, "failed to register sock %d epoll", i);
230 memset(&msg, 0, sizeof(msg));
231 msg.msg_iov = &recv_io;
234 for (data = 0; data < p.recv_socks * 2; ++data) {
235 sport = p.send_port_min + data;
237 memcpy(send_buf, &ndata, sizeof(ndata));
238 send_from(p, sport, send_buf, sizeof(ndata));
240 i = epoll_wait(epfd, &ev, 1, -1);
242 error(1, errno, "epoll wait failed");
244 if (p.protocol == SOCK_STREAM) {
245 conn = accept(ev.data.fd, NULL, NULL);
247 error(1, errno, "error accepting");
248 i = recvmsg(conn, &msg, 0);
251 i = recvmsg(ev.data.fd, &msg, 0);
254 error(1, errno, "recvmsg error");
255 if (i != sizeof(ndata))
256 error(1, 0, "expected size %zd got %d",
259 for (i = 0; i < p.recv_socks; ++i)
260 if (ev.data.fd == fd[i])
262 memcpy(&ndata, recv_buf, sizeof(ndata));
263 fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
265 expected = (sport % mod);
267 error(1, 0, "expected socket %d", expected);
271 static void test_reuseport_ebpf(struct test_params p)
273 int i, fd[p.recv_socks];
275 fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
276 build_recv_group(p, fd, p.recv_socks, attach_ebpf);
277 test_recv_order(p, fd, p.recv_socks);
279 p.send_port_min += p.recv_socks * 2;
280 fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
281 attach_ebpf(fd[0], p.recv_socks / 2);
282 test_recv_order(p, fd, p.recv_socks / 2);
284 for (i = 0; i < p.recv_socks; ++i)
288 static void test_reuseport_cbpf(struct test_params p)
290 int i, fd[p.recv_socks];
292 fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
293 build_recv_group(p, fd, p.recv_socks, attach_cbpf);
294 test_recv_order(p, fd, p.recv_socks);
296 p.send_port_min += p.recv_socks * 2;
297 fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
298 attach_cbpf(fd[0], p.recv_socks / 2);
299 test_recv_order(p, fd, p.recv_socks / 2);
301 for (i = 0; i < p.recv_socks; ++i)
305 static void test_extra_filter(const struct test_params p)
307 struct sockaddr * const addr =
308 new_any_sockaddr(p.recv_family, p.recv_port);
311 fprintf(stderr, "Testing too many filters...\n");
312 fd1 = socket(p.recv_family, p.protocol, 0);
314 error(1, errno, "failed to create socket 1");
315 fd2 = socket(p.recv_family, p.protocol, 0);
317 error(1, errno, "failed to create socket 2");
320 if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
321 error(1, errno, "failed to set SO_REUSEPORT on socket 1");
322 if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
323 error(1, errno, "failed to set SO_REUSEPORT on socket 2");
325 attach_ebpf(fd1, 10);
326 attach_ebpf(fd2, 10);
328 if (bind(fd1, addr, sockaddr_size()))
329 error(1, errno, "failed to bind recv socket 1");
331 if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
332 error(1, errno, "bind socket 2 should fail with EADDRINUSE");
337 static void test_filter_no_reuseport(const struct test_params p)
339 struct sockaddr * const addr =
340 new_any_sockaddr(p.recv_family, p.recv_port);
341 const char bpf_license[] = "GPL";
342 struct bpf_insn ecode[] = {
343 { BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
344 { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
346 struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
347 union bpf_attr eprog;
348 struct sock_fprog cprog;
351 fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
353 memset(&eprog, 0, sizeof(eprog));
354 eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
355 eprog.insn_cnt = ARRAY_SIZE(ecode);
356 eprog.insns = (unsigned long) &ecode;
357 eprog.license = (unsigned long) &bpf_license;
358 eprog.kern_version = 0;
360 memset(&cprog, 0, sizeof(cprog));
361 cprog.len = ARRAY_SIZE(ccode);
362 cprog.filter = ccode;
365 bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
367 error(1, errno, "ebpf error");
368 fd = socket(p.recv_family, p.protocol, 0);
370 error(1, errno, "failed to create socket 1");
372 if (bind(fd, addr, sockaddr_size()))
373 error(1, errno, "failed to bind recv socket 1");
376 if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
377 sizeof(bpf_fd)) || errno != EINVAL)
378 error(1, errno, "setsockopt should have returned EINVAL");
381 if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
382 sizeof(cprog)) || errno != EINVAL)
383 error(1, errno, "setsockopt should have returned EINVAL");
388 static void test_filter_without_bind(void)
390 int fd1, fd2, opt = 1;
392 fprintf(stderr, "Testing filter add without bind...\n");
393 fd1 = socket(AF_INET, SOCK_DGRAM, 0);
395 error(1, errno, "failed to create socket 1");
396 fd2 = socket(AF_INET, SOCK_DGRAM, 0);
398 error(1, errno, "failed to create socket 2");
399 if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
400 error(1, errno, "failed to set SO_REUSEPORT on socket 1");
401 if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
402 error(1, errno, "failed to set SO_REUSEPORT on socket 2");
404 attach_ebpf(fd1, 10);
405 attach_cbpf(fd2, 10);
411 void enable_fastopen(void)
413 int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0);
414 int rw_mask = 3; /* bit 1: client side; bit-2 server side */
419 error(1, errno, "Unable to open tcp_fastopen sysctl");
420 if (read(fd, buf, sizeof(buf)) <= 0)
421 error(1, errno, "Unable to read tcp_fastopen sysctl");
425 if ((val & rw_mask) != rw_mask) {
426 fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
429 "Unable to open tcp_fastopen sysctl for writing");
431 size = snprintf(buf, 16, "%d", val);
432 if (write(fd, buf, size) <= 0)
433 error(1, errno, "Unable to write tcp_fastopen sysctl");
438 static struct rlimit rlim_old;
440 static __attribute__((constructor)) void main_ctor(void)
442 getrlimit(RLIMIT_MEMLOCK, &rlim_old);
444 if (rlim_old.rlim_cur != RLIM_INFINITY) {
445 struct rlimit rlim_new;
447 rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
448 rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
449 setrlimit(RLIMIT_MEMLOCK, &rlim_new);
453 static __attribute__((destructor)) void main_dtor(void)
455 setrlimit(RLIMIT_MEMLOCK, &rlim_old);
460 fprintf(stderr, "---- IPv4 UDP ----\n");
461 /* NOTE: UDP socket lookups traverse a different code path when there
462 * are > 10 sockets in a group. Run the bpf test through both paths.
464 test_reuseport_ebpf((struct test_params) {
465 .recv_family = AF_INET,
466 .send_family = AF_INET,
467 .protocol = SOCK_DGRAM,
470 .send_port_min = 9000});
471 test_reuseport_ebpf((struct test_params) {
472 .recv_family = AF_INET,
473 .send_family = AF_INET,
474 .protocol = SOCK_DGRAM,
477 .send_port_min = 9000});
478 test_reuseport_cbpf((struct test_params) {
479 .recv_family = AF_INET,
480 .send_family = AF_INET,
481 .protocol = SOCK_DGRAM,
484 .send_port_min = 9020});
485 test_reuseport_cbpf((struct test_params) {
486 .recv_family = AF_INET,
487 .send_family = AF_INET,
488 .protocol = SOCK_DGRAM,
491 .send_port_min = 9020});
492 test_extra_filter((struct test_params) {
493 .recv_family = AF_INET,
494 .protocol = SOCK_DGRAM,
496 test_filter_no_reuseport((struct test_params) {
497 .recv_family = AF_INET,
498 .protocol = SOCK_DGRAM,
501 fprintf(stderr, "---- IPv6 UDP ----\n");
502 test_reuseport_ebpf((struct test_params) {
503 .recv_family = AF_INET6,
504 .send_family = AF_INET6,
505 .protocol = SOCK_DGRAM,
508 .send_port_min = 9040});
509 test_reuseport_ebpf((struct test_params) {
510 .recv_family = AF_INET6,
511 .send_family = AF_INET6,
512 .protocol = SOCK_DGRAM,
515 .send_port_min = 9040});
516 test_reuseport_cbpf((struct test_params) {
517 .recv_family = AF_INET6,
518 .send_family = AF_INET6,
519 .protocol = SOCK_DGRAM,
522 .send_port_min = 9060});
523 test_reuseport_cbpf((struct test_params) {
524 .recv_family = AF_INET6,
525 .send_family = AF_INET6,
526 .protocol = SOCK_DGRAM,
529 .send_port_min = 9060});
530 test_extra_filter((struct test_params) {
531 .recv_family = AF_INET6,
532 .protocol = SOCK_DGRAM,
534 test_filter_no_reuseport((struct test_params) {
535 .recv_family = AF_INET6,
536 .protocol = SOCK_DGRAM,
539 fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
540 test_reuseport_ebpf((struct test_params) {
541 .recv_family = AF_INET6,
542 .send_family = AF_INET,
543 .protocol = SOCK_DGRAM,
546 .send_port_min = 9080});
547 test_reuseport_ebpf((struct test_params) {
548 .recv_family = AF_INET6,
549 .send_family = AF_INET,
550 .protocol = SOCK_DGRAM,
553 .send_port_min = 9080});
554 test_reuseport_cbpf((struct test_params) {
555 .recv_family = AF_INET6,
556 .send_family = AF_INET,
557 .protocol = SOCK_DGRAM,
560 .send_port_min = 9100});
561 test_reuseport_cbpf((struct test_params) {
562 .recv_family = AF_INET6,
563 .send_family = AF_INET,
564 .protocol = SOCK_DGRAM,
567 .send_port_min = 9100});
569 /* TCP fastopen is required for the TCP tests */
571 fprintf(stderr, "---- IPv4 TCP ----\n");
572 test_reuseport_ebpf((struct test_params) {
573 .recv_family = AF_INET,
574 .send_family = AF_INET,
575 .protocol = SOCK_STREAM,
578 .send_port_min = 9120});
579 test_reuseport_cbpf((struct test_params) {
580 .recv_family = AF_INET,
581 .send_family = AF_INET,
582 .protocol = SOCK_STREAM,
585 .send_port_min = 9160});
586 test_extra_filter((struct test_params) {
587 .recv_family = AF_INET,
588 .protocol = SOCK_STREAM,
590 test_filter_no_reuseport((struct test_params) {
591 .recv_family = AF_INET,
592 .protocol = SOCK_STREAM,
595 fprintf(stderr, "---- IPv6 TCP ----\n");
596 test_reuseport_ebpf((struct test_params) {
597 .recv_family = AF_INET6,
598 .send_family = AF_INET6,
599 .protocol = SOCK_STREAM,
602 .send_port_min = 9200});
603 test_reuseport_cbpf((struct test_params) {
604 .recv_family = AF_INET6,
605 .send_family = AF_INET6,
606 .protocol = SOCK_STREAM,
609 .send_port_min = 9240});
610 test_extra_filter((struct test_params) {
611 .recv_family = AF_INET6,
612 .protocol = SOCK_STREAM,
614 test_filter_no_reuseport((struct test_params) {
615 .recv_family = AF_INET6,
616 .protocol = SOCK_STREAM,
619 fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n");
620 test_reuseport_ebpf((struct test_params) {
621 .recv_family = AF_INET6,
622 .send_family = AF_INET,
623 .protocol = SOCK_STREAM,
626 .send_port_min = 9320});
627 test_reuseport_cbpf((struct test_params) {
628 .recv_family = AF_INET6,
629 .send_family = AF_INET,
630 .protocol = SOCK_STREAM,
633 .send_port_min = 9360});
635 test_filter_without_bind();
637 fprintf(stderr, "SUCCESS\n");