1 // SPDX-License-Identifier: GPL-2.0
11 #include <linux/netlink.h>
12 #include <linux/rtnetlink.h>
13 #include <netinet/if_ether.h>
14 #include <netinet/ip.h>
15 #include <netinet/ip6.h>
16 #include <netinet/udp.h>
21 #include <sys/ioctl.h>
22 #include <sys/socket.h>
25 #include <sys/types.h>
29 #define ETH_MAX_MTU 0xFFFFU
33 #define UDP_SEGMENT 103
36 #ifndef UDP_MAX_SEGMENTS
37 #define UDP_MAX_SEGMENTS (1 << 6UL)
40 #define CONST_MTU_TEST 1500
42 #define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr))
43 #define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr))
45 #define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4)
46 #define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6)
48 #define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4)
49 #define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6)
51 static bool cfg_do_ipv4;
52 static bool cfg_do_ipv6;
53 static bool cfg_do_connected;
54 static bool cfg_do_connectionless;
55 static bool cfg_do_msgmore;
56 static bool cfg_do_setsockopt;
57 static int cfg_specific_test_id = -1;
59 static const char cfg_ifname[] = "lo";
60 static unsigned short cfg_port = 9000;
62 static char buf[ETH_MAX_MTU];
65 int tlen; /* send() buffer size, may exceed mss */
66 bool tfail; /* send() call is expected to fail */
67 int gso_len; /* mss after applying gso */
68 int r_num_mss; /* recv(): number of calls of full mss */
69 int r_len_last; /* recv(): size of last non-mss dgram, if any */
72 const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
73 const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
75 struct testcase testcases_v4[] = {
77 /* no GSO: send a single byte */
82 /* no GSO: send a single MSS */
87 /* no GSO: send a single MSS + 1B: fail */
88 .tlen = CONST_MSS_V4 + 1,
92 /* send a single MSS: will fail with GSO, because the segment
93 * logic in udp4_ufo_fragment demands a gso skb to be > MTU
96 .gso_len = CONST_MSS_V4,
101 /* send a single MSS + 1B */
102 .tlen = CONST_MSS_V4 + 1,
103 .gso_len = CONST_MSS_V4,
108 /* send exactly 2 MSS */
109 .tlen = CONST_MSS_V4 * 2,
110 .gso_len = CONST_MSS_V4,
114 /* send 2 MSS + 1B */
115 .tlen = (CONST_MSS_V4 * 2) + 1,
116 .gso_len = CONST_MSS_V4,
122 .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4,
123 .gso_len = CONST_MSS_V4,
124 .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4),
129 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4,
130 .gso_len = CONST_MSS_V4,
131 .r_num_mss = CONST_MAX_SEGS_V4,
132 .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 -
133 (CONST_MAX_SEGS_V4 * CONST_MSS_V4),
136 /* send MAX + 1: fail */
137 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1,
138 .gso_len = CONST_MSS_V4,
142 /* send a single 1B MSS: will fail, see single MSS above */
149 /* send 2 1B segments */
155 /* send 2B + 2B + 1B segments */
162 /* send max number of min sized segments */
163 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
165 .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
168 /* send max number + 1 of min sized segments: fail */
169 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
179 #define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr))
182 struct testcase testcases_v6[] = {
184 /* no GSO: send a single byte */
189 /* no GSO: send a single MSS */
190 .tlen = CONST_MSS_V6,
194 /* no GSO: send a single MSS + 1B: fail */
195 .tlen = CONST_MSS_V6 + 1,
199 /* send a single MSS: will fail with GSO, because the segment
200 * logic in udp4_ufo_fragment demands a gso skb to be > MTU
202 .tlen = CONST_MSS_V6,
203 .gso_len = CONST_MSS_V6,
208 /* send a single MSS + 1B */
209 .tlen = CONST_MSS_V6 + 1,
210 .gso_len = CONST_MSS_V6,
215 /* send exactly 2 MSS */
216 .tlen = CONST_MSS_V6 * 2,
217 .gso_len = CONST_MSS_V6,
221 /* send 2 MSS + 1B */
222 .tlen = (CONST_MSS_V6 * 2) + 1,
223 .gso_len = CONST_MSS_V6,
229 .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6,
230 .gso_len = CONST_MSS_V6,
231 .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6),
236 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6,
237 .gso_len = CONST_MSS_V6,
238 .r_num_mss = CONST_MAX_SEGS_V6,
239 .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 -
240 (CONST_MAX_SEGS_V6 * CONST_MSS_V6),
243 /* send MAX + 1: fail */
244 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1,
245 .gso_len = CONST_MSS_V6,
249 /* send a single 1B MSS: will fail, see single MSS above */
256 /* send 2 1B segments */
262 /* send 2B + 2B + 1B segments */
269 /* send max number of min sized segments */
270 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
272 .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
275 /* send max number + 1 of min sized segments: fail */
276 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
285 static unsigned int get_device_mtu(int fd, const char *ifname)
289 memset(&ifr, 0, sizeof(ifr));
291 strcpy(ifr.ifr_name, ifname);
293 if (ioctl(fd, SIOCGIFMTU, &ifr))
294 error(1, errno, "ioctl get mtu");
299 static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
303 memset(&ifr, 0, sizeof(ifr));
306 strcpy(ifr.ifr_name, ifname);
308 if (ioctl(fd, SIOCSIFMTU, &ifr))
309 error(1, errno, "ioctl set mtu");
312 static void set_device_mtu(int fd, int mtu)
316 val = get_device_mtu(fd, cfg_ifname);
317 fprintf(stderr, "device mtu (orig): %u\n", val);
319 __set_device_mtu(fd, cfg_ifname, mtu);
320 val = get_device_mtu(fd, cfg_ifname);
322 error(1, 0, "unable to set device mtu to %u\n", val);
324 fprintf(stderr, "device mtu (test): %u\n", val);
327 static void set_pmtu_discover(int fd, bool is_ipv4)
329 int level, name, val;
333 name = IP_MTU_DISCOVER;
334 val = IP_PMTUDISC_DO;
337 name = IPV6_MTU_DISCOVER;
338 val = IPV6_PMTUDISC_DO;
341 if (setsockopt(fd, level, name, &val, sizeof(val)))
342 error(1, errno, "setsockopt path mtu");
345 static unsigned int get_path_mtu(int fd, bool is_ipv4)
351 vallen = sizeof(mtu);
353 ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen);
355 ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen);
358 error(1, errno, "getsockopt mtu");
361 fprintf(stderr, "path mtu (read): %u\n", mtu);
365 /* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
366 static void set_route_mtu(int mtu, bool is_ipv4)
368 struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
372 char data[NLMSG_ALIGN(sizeof(*nh)) +
373 NLMSG_ALIGN(sizeof(*rt)) +
374 NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
375 NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
376 NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
377 int fd, ret, alen, off = 0;
379 alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
381 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
383 error(1, errno, "socket netlink");
385 memset(data, 0, sizeof(data));
388 nh->nlmsg_type = RTM_NEWROUTE;
389 nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
390 off += NLMSG_ALIGN(sizeof(*nh));
392 rt = (void *)(data + off);
393 rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
394 rt->rtm_table = RT_TABLE_MAIN;
395 rt->rtm_dst_len = alen << 3;
396 rt->rtm_protocol = RTPROT_BOOT;
397 rt->rtm_scope = RT_SCOPE_UNIVERSE;
398 rt->rtm_type = RTN_UNICAST;
399 off += NLMSG_ALIGN(sizeof(*rt));
401 rta = (void *)(data + off);
402 rta->rta_type = RTA_DST;
403 rta->rta_len = RTA_LENGTH(alen);
405 memcpy(RTA_DATA(rta), &addr4, alen);
407 memcpy(RTA_DATA(rta), &addr6, alen);
408 off += NLMSG_ALIGN(rta->rta_len);
410 rta = (void *)(data + off);
411 rta->rta_type = RTA_OIF;
412 rta->rta_len = RTA_LENGTH(sizeof(int));
413 *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
414 off += NLMSG_ALIGN(rta->rta_len);
416 /* MTU is a subtype in a metrics type */
417 rta = (void *)(data + off);
418 rta->rta_type = RTA_METRICS;
419 rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
420 off += NLMSG_ALIGN(rta->rta_len);
422 /* now fill MTU subtype. Note that it fits within above rta_len */
423 rta = (void *)(((char *) rta) + RTA_LENGTH(0));
424 rta->rta_type = RTAX_MTU;
425 rta->rta_len = RTA_LENGTH(sizeof(int));
426 *((int *)(RTA_DATA(rta))) = mtu;
430 ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
432 error(1, errno, "send netlink: %uB != %uB\n", ret, off);
435 error(1, errno, "close netlink");
437 fprintf(stderr, "route mtu (test): %u\n", mtu);
440 static bool __send_one(int fd, struct msghdr *msg, int flags)
444 ret = sendmsg(fd, msg, flags);
446 (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL))
449 error(1, errno, "sendmsg");
450 if (ret != msg->msg_iov->iov_len)
451 error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
453 error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
458 static bool send_one(int fd, int len, int gso_len,
459 struct sockaddr *addr, socklen_t alen)
461 char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
462 struct msghdr msg = {0};
463 struct iovec iov = {0};
473 msg.msg_namelen = alen;
475 if (gso_len && !cfg_do_setsockopt) {
476 msg.msg_control = control;
477 msg.msg_controllen = sizeof(control);
479 cm = CMSG_FIRSTHDR(&msg);
480 cm->cmsg_level = SOL_UDP;
481 cm->cmsg_type = UDP_SEGMENT;
482 cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
483 *((uint16_t *) CMSG_DATA(cm)) = gso_len;
486 /* If MSG_MORE, send 1 byte followed by remainder */
487 if (cfg_do_msgmore && len > 1) {
489 if (!__send_one(fd, &msg, MSG_MORE))
490 error(1, 0, "send 1B failed");
493 iov.iov_len = len - 1;
496 return __send_one(fd, &msg, 0);
499 static int recv_one(int fd, int flags)
503 ret = recv(fd, buf, sizeof(buf), flags);
504 if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT))
507 error(1, errno, "recv");
512 static void run_one(struct testcase *test, int fdt, int fdr,
513 struct sockaddr *addr, socklen_t alen)
515 int i, ret, val, mss;
518 fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
519 addr->sa_family == AF_INET ? 4 : 6,
520 test->tlen, test->gso_len,
521 test->tfail ? "(fail)" : "");
524 if (cfg_do_setsockopt) {
525 if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
526 error(1, errno, "setsockopt udp segment");
529 sent = send_one(fdt, test->tlen, test->gso_len, addr, alen);
530 if (sent && test->tfail)
531 error(1, 0, "send succeeded while expecting failure");
532 if (!sent && !test->tfail)
533 error(1, 0, "send failed while expecting success");
540 mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
543 /* Recv all full MSS datagrams */
544 for (i = 0; i < test->r_num_mss; i++) {
545 ret = recv_one(fdr, 0);
547 error(1, 0, "recv.%d: %d != %d", i, ret, mss);
550 /* Recv the non-full last datagram, if tlen was not a multiple of mss */
551 if (test->r_len_last) {
552 ret = recv_one(fdr, 0);
553 if (ret != test->r_len_last)
554 error(1, 0, "recv.%d: %d != %d (last)",
555 i, ret, test->r_len_last);
558 /* Verify received all data */
559 ret = recv_one(fdr, MSG_DONTWAIT);
561 error(1, 0, "recv: unexpected datagram");
564 static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen)
566 struct testcase *tests, *test;
568 tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6;
570 for (test = tests; test->tlen; test++) {
571 /* if a specific test is given, then skip all others */
572 if (cfg_specific_test_id == -1 ||
573 cfg_specific_test_id == test - tests)
574 run_one(test, fdt, fdr, addr, alen);
578 static void run_test(struct sockaddr *addr, socklen_t alen)
580 struct timeval tv = { .tv_usec = 100 * 1000 };
583 fdr = socket(addr->sa_family, SOCK_DGRAM, 0);
585 error(1, errno, "socket r");
587 if (bind(fdr, addr, alen))
588 error(1, errno, "bind");
590 /* Have tests fail quickly instead of hang */
591 if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
592 error(1, errno, "setsockopt rcv timeout");
594 fdt = socket(addr->sa_family, SOCK_DGRAM, 0);
596 error(1, errno, "socket t");
598 /* Do not fragment these datagrams: only succeed if GSO works */
599 set_pmtu_discover(fdt, addr->sa_family == AF_INET);
601 if (cfg_do_connectionless) {
602 set_device_mtu(fdt, CONST_MTU_TEST);
603 run_all(fdt, fdr, addr, alen);
606 if (cfg_do_connected) {
607 set_device_mtu(fdt, CONST_MTU_TEST + 100);
608 set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
610 if (connect(fdt, addr, alen))
611 error(1, errno, "connect");
613 val = get_path_mtu(fdt, addr->sa_family == AF_INET);
614 if (val != CONST_MTU_TEST)
615 error(1, 0, "bad path mtu %u\n", val);
617 run_all(fdt, fdr, addr, 0 /* use connected addr */);
621 error(1, errno, "close t");
623 error(1, errno, "close r");
626 static void run_test_v4(void)
628 struct sockaddr_in addr = {0};
630 addr.sin_family = AF_INET;
631 addr.sin_port = htons(cfg_port);
632 addr.sin_addr = addr4;
634 run_test((void *)&addr, sizeof(addr));
637 static void run_test_v6(void)
639 struct sockaddr_in6 addr = {0};
641 addr.sin6_family = AF_INET6;
642 addr.sin6_port = htons(cfg_port);
643 addr.sin6_addr = addr6;
645 run_test((void *)&addr, sizeof(addr));
648 static void parse_opts(int argc, char **argv)
652 while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
661 cfg_do_connected = true;
664 cfg_do_connectionless = true;
667 cfg_do_msgmore = true;
670 cfg_do_setsockopt = true;
673 cfg_specific_test_id = strtoul(optarg, NULL, 0);
676 error(1, 0, "%s: parse error", argv[0]);
681 int main(int argc, char **argv)
683 parse_opts(argc, argv);
690 fprintf(stderr, "OK\n");