1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2020 Cloudflare
4 * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
6 * 1. BPF map operations - bpf_map_{update,lookup delete}_elem
7 * 2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
8 * 3. BPF reuseport helper - bpf_sk_select_reuseport
11 #include <linux/compiler.h>
15 #include <netinet/in.h>
19 #include <sys/select.h>
23 #include <bpf/libbpf.h>
26 #include "test_progs.h"
27 #include "test_sockmap_listen.skel.h"
29 #define IO_TIMEOUT_SEC 30
30 #define MAX_STRERR_LEN 256
31 #define MAX_TEST_NAME 80
33 #define _FAIL(errnum, fmt...) \
35 error_at_line(0, (errnum), __func__, __LINE__, fmt); \
38 #define FAIL(fmt...) _FAIL(0, fmt)
39 #define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
40 #define FAIL_LIBBPF(err, msg) \
42 char __buf[MAX_STRERR_LEN]; \
43 libbpf_strerror((err), __buf, sizeof(__buf)); \
44 FAIL("%s: %s", (msg), __buf); \
47 /* Wrappers that fail the test on error and report it. */
49 #define xaccept_nonblock(fd, addr, len) \
52 accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
54 FAIL_ERRNO("accept"); \
58 #define xbind(fd, addr, len) \
60 int __ret = bind((fd), (addr), (len)); \
68 int __ret = close((fd)); \
70 FAIL_ERRNO("close"); \
74 #define xconnect(fd, addr, len) \
76 int __ret = connect((fd), (addr), (len)); \
78 FAIL_ERRNO("connect"); \
82 #define xgetsockname(fd, addr, len) \
84 int __ret = getsockname((fd), (addr), (len)); \
86 FAIL_ERRNO("getsockname"); \
90 #define xgetsockopt(fd, level, name, val, len) \
92 int __ret = getsockopt((fd), (level), (name), (val), (len)); \
94 FAIL_ERRNO("getsockopt(" #name ")"); \
98 #define xlisten(fd, backlog) \
100 int __ret = listen((fd), (backlog)); \
102 FAIL_ERRNO("listen"); \
106 #define xsetsockopt(fd, level, name, val, len) \
108 int __ret = setsockopt((fd), (level), (name), (val), (len)); \
110 FAIL_ERRNO("setsockopt(" #name ")"); \
114 #define xsend(fd, buf, len, flags) \
116 ssize_t __ret = send((fd), (buf), (len), (flags)); \
118 FAIL_ERRNO("send"); \
122 #define xrecv_nonblock(fd, buf, len, flags) \
124 ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
127 FAIL_ERRNO("recv"); \
131 #define xsocket(family, sotype, flags) \
133 int __ret = socket(family, sotype, flags); \
135 FAIL_ERRNO("socket"); \
139 #define xbpf_map_delete_elem(fd, key) \
141 int __ret = bpf_map_delete_elem((fd), (key)); \
143 FAIL_ERRNO("map_delete"); \
147 #define xbpf_map_lookup_elem(fd, key, val) \
149 int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
151 FAIL_ERRNO("map_lookup"); \
155 #define xbpf_map_update_elem(fd, key, val, flags) \
157 int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
159 FAIL_ERRNO("map_update"); \
163 #define xbpf_prog_attach(prog, target, type, flags) \
166 bpf_prog_attach((prog), (target), (type), (flags)); \
168 FAIL_ERRNO("prog_attach(" #type ")"); \
172 #define xbpf_prog_detach2(prog, target, type) \
174 int __ret = bpf_prog_detach2((prog), (target), (type)); \
176 FAIL_ERRNO("prog_detach2(" #type ")"); \
180 #define xpthread_create(thread, attr, func, arg) \
182 int __ret = pthread_create((thread), (attr), (func), (arg)); \
185 FAIL_ERRNO("pthread_create"); \
189 #define xpthread_join(thread, retval) \
191 int __ret = pthread_join((thread), (retval)); \
194 FAIL_ERRNO("pthread_join"); \
198 static int poll_read(int fd, unsigned int timeout_sec)
200 struct timeval timeout = { .tv_sec = timeout_sec };
207 r = select(fd + 1, &rfds, NULL, NULL, &timeout);
211 return r == 1 ? 0 : -1;
214 static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
215 unsigned int timeout_sec)
217 if (poll_read(fd, timeout_sec))
220 return accept(fd, addr, len);
223 static int recv_timeout(int fd, void *buf, size_t len, int flags,
224 unsigned int timeout_sec)
226 if (poll_read(fd, timeout_sec))
229 return recv(fd, buf, len, flags);
232 static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
234 struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
236 addr4->sin_family = AF_INET;
238 addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
239 *len = sizeof(*addr4);
242 static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
244 struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
246 addr6->sin6_family = AF_INET6;
247 addr6->sin6_port = 0;
248 addr6->sin6_addr = in6addr_loopback;
249 *len = sizeof(*addr6);
252 static void init_addr_loopback(int family, struct sockaddr_storage *ss,
257 init_addr_loopback4(ss, len);
260 init_addr_loopback6(ss, len);
263 FAIL("unsupported address family %d", family);
267 static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
269 return (struct sockaddr *)ss;
272 static int enable_reuseport(int s, int progfd)
276 err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
279 err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
287 static int socket_loopback_reuseport(int family, int sotype, int progfd)
289 struct sockaddr_storage addr;
293 init_addr_loopback(family, &addr, &len);
295 s = xsocket(family, sotype, 0);
300 enable_reuseport(s, progfd);
302 err = xbind(s, sockaddr(&addr), len);
306 if (sotype & SOCK_DGRAM)
309 err = xlisten(s, SOMAXCONN);
319 static int socket_loopback(int family, int sotype)
321 return socket_loopback_reuseport(family, sotype, -1);
324 static void test_insert_invalid(int family, int sotype, int mapfd)
331 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
332 if (!err || errno != EINVAL)
333 FAIL_ERRNO("map_update: expected EINVAL");
336 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
337 if (!err || errno != EBADF)
338 FAIL_ERRNO("map_update: expected EBADF");
341 static void test_insert_opened(int family, int sotype, int mapfd)
347 s = xsocket(family, sotype, 0);
353 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
354 if (!err || errno != EOPNOTSUPP)
355 FAIL_ERRNO("map_update: expected EOPNOTSUPP");
360 static void test_insert_bound(int family, int sotype, int mapfd)
362 struct sockaddr_storage addr;
368 init_addr_loopback(family, &addr, &len);
370 s = xsocket(family, sotype, 0);
374 err = xbind(s, sockaddr(&addr), len);
380 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
381 if (!err || errno != EOPNOTSUPP)
382 FAIL_ERRNO("map_update: expected EOPNOTSUPP");
387 static void test_insert(int family, int sotype, int mapfd)
393 s = socket_loopback(family, sotype);
399 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
403 static void test_delete_after_insert(int family, int sotype, int mapfd)
409 s = socket_loopback(family, sotype);
415 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
416 xbpf_map_delete_elem(mapfd, &key);
420 static void test_delete_after_close(int family, int sotype, int mapfd)
426 s = socket_loopback(family, sotype);
432 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
437 err = bpf_map_delete_elem(mapfd, &key);
438 if (!err || (errno != EINVAL && errno != ENOENT))
439 /* SOCKMAP and SOCKHASH return different error codes */
440 FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
443 static void test_lookup_after_insert(int family, int sotype, int mapfd)
450 s = socket_loopback(family, sotype);
456 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
458 len = sizeof(cookie);
459 xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
461 xbpf_map_lookup_elem(mapfd, &key, &value);
463 if (value != cookie) {
464 FAIL("map_lookup: have %#llx, want %#llx",
465 (unsigned long long)value, (unsigned long long)cookie);
471 static void test_lookup_after_delete(int family, int sotype, int mapfd)
477 s = socket_loopback(family, sotype);
483 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
484 xbpf_map_delete_elem(mapfd, &key);
487 err = bpf_map_lookup_elem(mapfd, &key, &value);
488 if (!err || errno != ENOENT)
489 FAIL_ERRNO("map_lookup: expected ENOENT");
494 static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
499 s = socket_loopback(family, sotype);
503 mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key),
504 sizeof(value32), 1, 0);
506 FAIL_ERRNO("map_create");
512 xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
515 err = bpf_map_lookup_elem(mapfd, &key, &value32);
516 if (!err || errno != ENOSPC)
517 FAIL_ERRNO("map_lookup: expected ENOSPC");
524 static void test_update_existing(int family, int sotype, int mapfd)
530 s1 = socket_loopback(family, sotype);
534 s2 = socket_loopback(family, sotype);
540 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
543 xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
549 /* Exercise the code path where we destroy child sockets that never
550 * got accept()'ed, aka orphans, when parent socket gets closed.
552 static void test_destroy_orphan_child(int family, int sotype, int mapfd)
554 struct sockaddr_storage addr;
560 s = socket_loopback(family, sotype);
565 err = xgetsockname(s, sockaddr(&addr), &len);
571 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
573 c = xsocket(family, sotype, 0);
577 xconnect(c, sockaddr(&addr), len);
583 /* Perform a passive open after removing listening socket from SOCKMAP
584 * to ensure that callbacks get restored properly.
586 static void test_clone_after_delete(int family, int sotype, int mapfd)
588 struct sockaddr_storage addr;
594 s = socket_loopback(family, sotype);
599 err = xgetsockname(s, sockaddr(&addr), &len);
605 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
606 xbpf_map_delete_elem(mapfd, &key);
608 c = xsocket(family, sotype, 0);
612 xconnect(c, sockaddr(&addr), len);
618 /* Check that child socket that got created while parent was in a
619 * SOCKMAP, but got accept()'ed only after the parent has been removed
620 * from SOCKMAP, gets cloned without parent psock state or callbacks.
622 static void test_accept_after_delete(int family, int sotype, int mapfd)
624 struct sockaddr_storage addr;
630 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
635 err = xgetsockname(s, sockaddr(&addr), &len);
640 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
644 c = xsocket(family, sotype, 0);
648 /* Create child while parent is in sockmap */
649 err = xconnect(c, sockaddr(&addr), len);
653 /* Remove parent from sockmap */
654 err = xbpf_map_delete_elem(mapfd, &zero);
658 p = xaccept_nonblock(s, NULL, NULL);
662 /* Check that child sk_user_data is not set */
664 xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
673 /* Check that child socket that got created and accepted while parent
674 * was in a SOCKMAP is cloned without parent psock state or callbacks.
676 static void test_accept_before_delete(int family, int sotype, int mapfd)
678 struct sockaddr_storage addr;
679 const u32 zero = 0, one = 1;
684 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
689 err = xgetsockname(s, sockaddr(&addr), &len);
694 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
698 c = xsocket(family, sotype, 0);
702 /* Create & accept child while parent is in sockmap */
703 err = xconnect(c, sockaddr(&addr), len);
707 p = xaccept_nonblock(s, NULL, NULL);
711 /* Check that child sk_user_data is not set */
713 xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
722 struct connect_accept_ctx {
725 unsigned int nr_iter;
728 static bool is_thread_done(struct connect_accept_ctx *ctx)
730 return READ_ONCE(ctx->done);
733 static void *connect_accept_thread(void *arg)
735 struct connect_accept_ctx *ctx = arg;
736 struct sockaddr_storage addr;
737 int family, socktype;
744 err = xgetsockname(s, sockaddr(&addr), &len);
748 len = sizeof(family);
749 err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
753 len = sizeof(socktype);
754 err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
758 for (i = 0; i < ctx->nr_iter; i++) {
761 c = xsocket(family, socktype, 0);
765 err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
771 p = xaccept_nonblock(s, NULL, NULL);
781 WRITE_ONCE(ctx->done, 1);
785 static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
787 struct connect_accept_ctx ctx = { 0 };
788 struct sockaddr_storage addr;
795 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
800 err = xgetsockname(s, sockaddr(&addr), &len);
807 err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
812 while (!is_thread_done(&ctx)) {
813 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
817 err = xbpf_map_delete_elem(mapfd, &zero);
822 xpthread_join(t, NULL);
827 static void *listen_thread(void *arg)
829 struct sockaddr unspec = { AF_UNSPEC };
830 struct connect_accept_ctx *ctx = arg;
835 for (i = 0; i < ctx->nr_iter; i++) {
839 err = xconnect(s, &unspec, sizeof(unspec));
844 WRITE_ONCE(ctx->done, 1);
848 static void test_race_insert_listen(int family, int socktype, int mapfd)
850 struct connect_accept_ctx ctx = { 0 };
857 s = xsocket(family, socktype, 0);
861 err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
868 err = pthread_create(&t, NULL, listen_thread, &ctx);
873 while (!is_thread_done(&ctx)) {
874 err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
875 /* Expecting EOPNOTSUPP before listen() */
876 if (err && errno != EOPNOTSUPP) {
877 FAIL_ERRNO("map_update");
881 err = bpf_map_delete_elem(mapfd, &zero);
882 /* Expecting no entry after unhash on connect(AF_UNSPEC) */
883 if (err && errno != EINVAL && errno != ENOENT) {
884 FAIL_ERRNO("map_delete");
889 xpthread_join(t, NULL);
894 static void zero_verdict_count(int mapfd)
896 unsigned int zero = 0;
900 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
902 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
910 static const char *redir_mode_str(enum redir_mode mode)
922 static void redir_to_connected(int family, int sotype, int sock_mapfd,
923 int verd_mapfd, enum redir_mode mode)
925 const char *log_prefix = redir_mode_str(mode);
926 struct sockaddr_storage addr;
927 int s, c0, c1, p0, p1;
935 zero_verdict_count(verd_mapfd);
937 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
942 err = xgetsockname(s, sockaddr(&addr), &len);
946 c0 = xsocket(family, sotype, 0);
949 err = xconnect(c0, sockaddr(&addr), len);
953 p0 = xaccept_nonblock(s, NULL, NULL);
957 c1 = xsocket(family, sotype, 0);
960 err = xconnect(c1, sockaddr(&addr), len);
964 p1 = xaccept_nonblock(s, NULL, NULL);
970 err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
976 err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
980 n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
982 FAIL_ERRNO("%s: write", log_prefix);
984 FAIL("%s: incomplete write", log_prefix);
989 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
993 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
997 FAIL_ERRNO("%s: read", log_prefix);
999 FAIL("%s: incomplete read", log_prefix);
1013 static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
1014 struct bpf_map *inner_map, int family,
1017 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
1018 int parser = bpf_program__fd(skel->progs.prog_stream_parser);
1019 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1020 int sock_map = bpf_map__fd(inner_map);
1023 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
1026 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
1030 redir_to_connected(family, sotype, sock_map, verdict_map,
1033 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
1035 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
1038 static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
1039 struct bpf_map *inner_map, int family,
1042 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
1043 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1044 int sock_map = bpf_map__fd(inner_map);
1047 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
1051 redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
1053 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
1056 static void redir_to_listening(int family, int sotype, int sock_mapfd,
1057 int verd_mapfd, enum redir_mode mode)
1059 const char *log_prefix = redir_mode_str(mode);
1060 struct sockaddr_storage addr;
1061 int s, c, p, err, n;
1067 zero_verdict_count(verd_mapfd);
1069 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
1074 err = xgetsockname(s, sockaddr(&addr), &len);
1078 c = xsocket(family, sotype, 0);
1081 err = xconnect(c, sockaddr(&addr), len);
1085 p = xaccept_nonblock(s, NULL, NULL);
1091 err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
1097 err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
1101 n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
1102 if (n < 0 && errno != EACCES)
1103 FAIL_ERRNO("%s: write", log_prefix);
1105 FAIL("%s: incomplete write", log_prefix);
1110 err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
1114 FAIL("%s: want drop count 1, have %d", log_prefix, drop);
1124 static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
1125 struct bpf_map *inner_map, int family,
1128 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
1129 int parser = bpf_program__fd(skel->progs.prog_stream_parser);
1130 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1131 int sock_map = bpf_map__fd(inner_map);
1134 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
1137 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
1141 redir_to_listening(family, sotype, sock_map, verdict_map,
1144 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
1146 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
1149 static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
1150 struct bpf_map *inner_map, int family,
1153 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
1154 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1155 int sock_map = bpf_map__fd(inner_map);
1158 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
1162 redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
1164 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
1167 static void test_reuseport_select_listening(int family, int sotype,
1168 int sock_map, int verd_map,
1171 struct sockaddr_storage addr;
1178 zero_verdict_count(verd_map);
1180 s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
1186 err = xgetsockname(s, sockaddr(&addr), &len);
1192 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1196 c = xsocket(family, sotype, 0);
1199 err = xconnect(c, sockaddr(&addr), len);
1203 if (sotype == SOCK_STREAM) {
1206 p = xaccept_nonblock(s, NULL, NULL);
1214 n = xsend(c, &b, sizeof(b), 0);
1218 n = xrecv_nonblock(s, &b, sizeof(b), 0);
1224 err = xbpf_map_lookup_elem(verd_map, &key, &pass);
1228 FAIL("want pass count 1, have %d", pass);
1236 static void test_reuseport_select_connected(int family, int sotype,
1237 int sock_map, int verd_map,
1240 struct sockaddr_storage addr;
1241 int s, c0, c1, p0, err;
1247 zero_verdict_count(verd_map);
1249 s = socket_loopback_reuseport(family, sotype, reuseport_prog);
1253 /* Populate sock_map[0] to avoid ENOENT on first connection */
1256 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1261 err = xgetsockname(s, sockaddr(&addr), &len);
1265 c0 = xsocket(family, sotype, 0);
1269 err = xconnect(c0, sockaddr(&addr), len);
1273 if (sotype == SOCK_STREAM) {
1274 p0 = xaccept_nonblock(s, NULL, NULL);
1278 p0 = xsocket(family, sotype, 0);
1283 err = xgetsockname(c0, sockaddr(&addr), &len);
1287 err = xconnect(p0, sockaddr(&addr), len);
1292 /* Update sock_map[0] to redirect to a connected socket */
1295 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
1299 c1 = xsocket(family, sotype, 0);
1304 err = xgetsockname(s, sockaddr(&addr), &len);
1309 err = connect(c1, sockaddr(&addr), len);
1310 if (sotype == SOCK_DGRAM) {
1314 n = xsend(c1, &b, sizeof(b), 0);
1318 n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1321 if (!err || errno != ECONNREFUSED)
1322 FAIL_ERRNO("connect: expected ECONNREFUSED");
1325 err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1329 FAIL("want drop count 1, have %d", drop);
1341 /* Check that redirecting across reuseport groups is not allowed. */
1342 static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
1343 int verd_map, int reuseport_prog)
1345 struct sockaddr_storage addr;
1352 zero_verdict_count(verd_map);
1354 /* Create two listeners, each in its own reuseport group */
1355 s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1359 s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1365 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1371 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1373 /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
1375 err = xgetsockname(s2, sockaddr(&addr), &len);
1379 c = xsocket(family, sotype, 0);
1383 err = connect(c, sockaddr(&addr), len);
1384 if (sotype == SOCK_DGRAM) {
1388 n = xsend(c, &b, sizeof(b), 0);
1392 n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1395 if (!err || errno != ECONNREFUSED) {
1396 FAIL_ERRNO("connect: expected ECONNREFUSED");
1400 /* Expect drop, can't redirect outside of reuseport group */
1402 err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1406 FAIL("want drop count 1, have %d", drop);
1416 #define TEST(fn, ...) \
1418 fn, #fn, __VA_ARGS__ \
1421 static void test_ops_cleanup(const struct bpf_map *map)
1423 const struct bpf_map_def *def;
1427 def = bpf_map__def(map);
1428 mapfd = bpf_map__fd(map);
1430 for (key = 0; key < def->max_entries; key++) {
1431 err = bpf_map_delete_elem(mapfd, &key);
1432 if (err && errno != EINVAL && errno != ENOENT)
1433 FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
1437 static const char *family_str(sa_family_t family)
1449 static const char *map_type_str(const struct bpf_map *map)
1451 const struct bpf_map_def *def;
1453 def = bpf_map__def(map);
1457 switch (def->type) {
1458 case BPF_MAP_TYPE_SOCKMAP:
1460 case BPF_MAP_TYPE_SOCKHASH:
1467 static const char *sotype_str(int sotype)
1479 static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
1480 int family, int sotype)
1482 const struct op_test {
1483 void (*fn)(int family, int sotype, int mapfd);
1488 TEST(test_insert_invalid),
1489 TEST(test_insert_opened),
1490 TEST(test_insert_bound, SOCK_STREAM),
1493 TEST(test_delete_after_insert),
1494 TEST(test_delete_after_close),
1496 TEST(test_lookup_after_insert),
1497 TEST(test_lookup_after_delete),
1498 TEST(test_lookup_32_bit_value),
1500 TEST(test_update_existing),
1501 /* races with insert/delete */
1502 TEST(test_destroy_orphan_child, SOCK_STREAM),
1503 TEST(test_syn_recv_insert_delete, SOCK_STREAM),
1504 TEST(test_race_insert_listen, SOCK_STREAM),
1506 TEST(test_clone_after_delete, SOCK_STREAM),
1507 TEST(test_accept_after_delete, SOCK_STREAM),
1508 TEST(test_accept_before_delete, SOCK_STREAM),
1510 const char *family_name, *map_name, *sotype_name;
1511 const struct op_test *t;
1512 char s[MAX_TEST_NAME];
1515 family_name = family_str(family);
1516 map_name = map_type_str(map);
1517 sotype_name = sotype_str(sotype);
1518 map_fd = bpf_map__fd(map);
1520 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1521 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1522 sotype_name, t->name);
1524 if (t->sotype != 0 && t->sotype != sotype)
1527 if (!test__start_subtest(s))
1530 t->fn(family, sotype, map_fd);
1531 test_ops_cleanup(map);
1535 static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1536 int family, int sotype)
1538 const struct redir_test {
1539 void (*fn)(struct test_sockmap_listen *skel,
1540 struct bpf_map *map, int family, int sotype);
1543 TEST(test_skb_redir_to_connected),
1544 TEST(test_skb_redir_to_listening),
1545 TEST(test_msg_redir_to_connected),
1546 TEST(test_msg_redir_to_listening),
1548 const char *family_name, *map_name;
1549 const struct redir_test *t;
1550 char s[MAX_TEST_NAME];
1552 family_name = family_str(family);
1553 map_name = map_type_str(map);
1555 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1556 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
1559 if (!test__start_subtest(s))
1562 t->fn(skel, map, family, sotype);
1566 static void test_reuseport(struct test_sockmap_listen *skel,
1567 struct bpf_map *map, int family, int sotype)
1569 const struct reuseport_test {
1570 void (*fn)(int family, int sotype, int socket_map,
1571 int verdict_map, int reuseport_prog);
1575 TEST(test_reuseport_select_listening),
1576 TEST(test_reuseport_select_connected),
1577 TEST(test_reuseport_mixed_groups),
1579 int socket_map, verdict_map, reuseport_prog;
1580 const char *family_name, *map_name, *sotype_name;
1581 const struct reuseport_test *t;
1582 char s[MAX_TEST_NAME];
1584 family_name = family_str(family);
1585 map_name = map_type_str(map);
1586 sotype_name = sotype_str(sotype);
1588 socket_map = bpf_map__fd(map);
1589 verdict_map = bpf_map__fd(skel->maps.verdict_map);
1590 reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
1592 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1593 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1594 sotype_name, t->name);
1596 if (t->sotype != 0 && t->sotype != sotype)
1599 if (!test__start_subtest(s))
1602 t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
1606 static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
1607 int verd_mapfd, enum redir_mode mode)
1609 const char *log_prefix = redir_mode_str(mode);
1610 struct sockaddr_storage addr;
1620 zero_verdict_count(verd_mapfd);
1622 p0 = socket_loopback(family, sotype | SOCK_NONBLOCK);
1626 err = xgetsockname(p0, sockaddr(&addr), &len);
1630 c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
1633 err = xconnect(c0, sockaddr(&addr), len);
1636 err = xgetsockname(c0, sockaddr(&addr), &len);
1639 err = xconnect(p0, sockaddr(&addr), len);
1643 p1 = socket_loopback(family, sotype | SOCK_NONBLOCK);
1646 err = xgetsockname(p1, sockaddr(&addr), &len);
1650 c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
1653 err = xconnect(c1, sockaddr(&addr), len);
1656 err = xgetsockname(c1, sockaddr(&addr), &len);
1659 err = xconnect(p1, sockaddr(&addr), len);
1665 err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
1671 err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
1675 n = write(c1, "a", 1);
1677 FAIL_ERRNO("%s: write", log_prefix);
1679 FAIL("%s: incomplete write", log_prefix);
1684 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1688 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1691 n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
1693 if (errno == EAGAIN && retries--)
1695 FAIL_ERRNO("%s: read", log_prefix);
1698 FAIL("%s: incomplete read", log_prefix);
1710 static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
1711 struct bpf_map *inner_map, int family)
1713 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1714 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1715 int sock_map = bpf_map__fd(inner_map);
1718 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1722 skel->bss->test_ingress = false;
1723 udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1725 skel->bss->test_ingress = true;
1726 udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1729 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1732 static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1735 const char *family_name, *map_name;
1736 char s[MAX_TEST_NAME];
1738 family_name = family_str(family);
1739 map_name = map_type_str(map);
1740 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1741 if (!test__start_subtest(s))
1743 udp_skb_redir_to_connected(skel, map, family);
1746 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
1749 test_ops(skel, map, family, SOCK_STREAM);
1750 test_ops(skel, map, family, SOCK_DGRAM);
1751 test_redir(skel, map, family, SOCK_STREAM);
1752 test_reuseport(skel, map, family, SOCK_STREAM);
1753 test_reuseport(skel, map, family, SOCK_DGRAM);
1754 test_udp_redir(skel, map, family);
1757 void test_sockmap_listen(void)
1759 struct test_sockmap_listen *skel;
1761 skel = test_sockmap_listen__open_and_load();
1763 FAIL("skeleton open/load failed");
1767 skel->bss->test_sockmap = true;
1768 run_tests(skel, skel->maps.sock_map, AF_INET);
1769 run_tests(skel, skel->maps.sock_map, AF_INET6);
1771 skel->bss->test_sockmap = false;
1772 run_tests(skel, skel->maps.sock_hash, AF_INET);
1773 run_tests(skel, skel->maps.sock_hash, AF_INET6);
1775 test_sockmap_listen__destroy(skel);