selftests: udp gso benchmark
authorWillem de Bruijn <willemb@google.com>
Thu, 26 Apr 2018 17:42:25 +0000 (13:42 -0400)
committerDavid S. Miller <davem@davemloft.net>
Thu, 26 Apr 2018 19:09:52 +0000 (15:09 -0400)
Send udp data between a source and sink, optionally with udp gso.
The two processes are expected to be run on separate hosts.

A script is included that runs them together over loopback in a
single namespace for functionality testing.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
tools/testing/selftests/net/.gitignore
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/udpgso_bench.sh [new file with mode: 0755]
tools/testing/selftests/net/udpgso_bench_rx.c [new file with mode: 0644]
tools/testing/selftests/net/udpgso_bench_tx.c [new file with mode: 0644]

index 67a2624..f0e6c35 100644 (file)
@@ -9,3 +9,5 @@ reuseport_dualstack
 reuseaddr_conflict
 tcp_mmap
 udpgso
+udpgso_bench_rx
+udpgso_bench_tx
index db33033..df9102e 100644 (file)
@@ -6,12 +6,13 @@ CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
 TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh udpgso.sh
+TEST_PROGS += udpgso_bench.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_FILES += tcp_mmap
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
-TEST_GEN_PROGS += udpgso
+TEST_GEN_PROGS += udpgso udpgso_bench_tx udpgso_bench_rx
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
new file mode 100755 (executable)
index 0000000..792fa4d
--- /dev/null
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso benchmarks
+
+wake_children() {
+       local -r jobs="$(jobs -p)"
+
+       if [[ "${jobs}" != "" ]]; then
+               kill -1 ${jobs} 2>/dev/null
+       fi
+}
+trap wake_children EXIT
+
+run_one() {
+       local -r args=$@
+
+       ./udpgso_bench_rx &
+       ./udpgso_bench_rx -t &
+
+       ./udpgso_bench_tx ${args}
+}
+
+run_in_netns() {
+       local -r args=$@
+
+       ./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+       local -r args=$@
+
+       echo "udp"
+       run_in_netns ${args}
+
+       echo "udp gso"
+       run_in_netns ${args} -S
+
+       echo "udp gso zerocopy"
+       run_in_netns ${args} -S -z
+}
+
+run_tcp() {
+       local -r args=$@
+
+       echo "tcp"
+       run_in_netns ${args} -t
+
+       echo "tcp zerocopy"
+       run_in_netns ${args} -t -z
+}
+
+run_all() {
+       local -r core_args="-l 4"
+       local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
+       local -r ipv6_args="${core_args} -6 -D ::1"
+
+       echo "ipv4"
+       run_tcp "${ipv4_args}"
+       run_udp "${ipv4_args}"
+
+       echo "ipv6"
+       run_tcp "${ipv4_args}"
+       run_udp "${ipv6_args}"
+}
+
+if [[ $# -eq 0 ]]; then
+       run_all
+elif [[ $1 == "__subprocess" ]]; then
+       shift
+       run_one $@
+else
+       run_in_netns $@
+fi
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
new file mode 100644 (file)
index 0000000..727cf67
--- /dev/null
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static int  cfg_port           = 8000;
+static bool cfg_tcp;
+static bool cfg_verify;
+
+static bool interrupted;
+static unsigned long packets, bytes;
+
+static void sigint_handler(int signum)
+{
+       if (signum == SIGINT)
+               interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+       struct timeval tv;
+
+       gettimeofday(&tv, NULL);
+       return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_poll(int fd)
+{
+       struct pollfd pfd;
+       int ret;
+
+       pfd.events = POLLIN;
+       pfd.revents = 0;
+       pfd.fd = fd;
+
+       do {
+               ret = poll(&pfd, 1, 10);
+               if (ret == -1)
+                       error(1, errno, "poll");
+               if (ret == 0)
+                       continue;
+               if (pfd.revents != POLLIN)
+                       error(1, errno, "poll: 0x%x expected 0x%x\n",
+                                       pfd.revents, POLLIN);
+       } while (!ret && !interrupted);
+}
+
+static int do_socket(bool do_tcp)
+{
+       struct sockaddr_in6 addr = {0};
+       int fd, val;
+
+       fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+       if (fd == -1)
+               error(1, errno, "socket");
+
+       val = 1 << 21;
+       if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)))
+               error(1, errno, "setsockopt rcvbuf");
+       val = 1;
+       if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
+               error(1, errno, "setsockopt reuseport");
+
+       addr.sin6_family =      PF_INET6;
+       addr.sin6_port =        htons(cfg_port);
+       addr.sin6_addr =        in6addr_any;
+       if (bind(fd, (void *) &addr, sizeof(addr)))
+               error(1, errno, "bind");
+
+       if (do_tcp) {
+               int accept_fd = fd;
+
+               if (listen(accept_fd, 1))
+                       error(1, errno, "listen");
+
+               do_poll(accept_fd);
+
+               fd = accept(accept_fd, NULL, NULL);
+               if (fd == -1)
+                       error(1, errno, "accept");
+               if (close(accept_fd))
+                       error(1, errno, "close accept fd");
+       }
+
+       return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+       int ret;
+
+       while (true) {
+               /* MSG_TRUNC flushes up to len bytes */
+               ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+               if (ret == -1 && errno == EAGAIN)
+                       return;
+               if (ret == -1)
+                       error(1, errno, "flush");
+               if (ret == 0) {
+                       /* client detached */
+                       exit(0);
+               }
+
+               packets++;
+               bytes += ret;
+       }
+
+}
+
+static char sanitized_char(char val)
+{
+       return (val >= 'a' && val <= 'z') ? val : '.';
+}
+
+static void do_verify_udp(const char *data, int len)
+{
+       char cur = data[0];
+       int i;
+
+       /* verify contents */
+       if (cur < 'a' || cur > 'z')
+               error(1, 0, "data initial byte out of range");
+
+       for (i = 1; i < len; i++) {
+               if (cur == 'z')
+                       cur = 'a';
+               else
+                       cur++;
+
+               if (data[i] != cur)
+                       error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
+                             i, len,
+                             sanitized_char(data[i]), data[i],
+                             sanitized_char(cur), cur);
+       }
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_udp(int fd)
+{
+       static char rbuf[ETH_DATA_LEN];
+       int ret, len, budget = 256;
+
+       len = cfg_verify ? sizeof(rbuf) : 0;
+       while (budget--) {
+               /* MSG_TRUNC will make return value full datagram length */
+               ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
+               if (ret == -1 && errno == EAGAIN)
+                       return;
+               if (ret == -1)
+                       error(1, errno, "recv");
+               if (len) {
+                       if (ret == 0)
+                               error(1, errno, "recv: 0 byte datagram\n");
+
+                       do_verify_udp(rbuf, ret);
+               }
+
+               packets++;
+               bytes += ret;
+       }
+}
+
+static void usage(const char *filepath)
+{
+       error(1, 0, "Usage: %s [-tv] [-p port]", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+       int c;
+
+       while ((c = getopt(argc, argv, "ptv")) != -1) {
+               switch (c) {
+               case 'p':
+                       cfg_port = htons(strtoul(optarg, NULL, 0));
+                       break;
+               case 't':
+                       cfg_tcp = true;
+                       break;
+               case 'v':
+                       cfg_verify = true;
+                       break;
+               }
+       }
+
+       if (optind != argc)
+               usage(argv[0]);
+
+       if (cfg_tcp && cfg_verify)
+               error(1, 0, "TODO: implement verify mode for tcp");
+}
+
+static void do_recv(void)
+{
+       unsigned long tnow, treport;
+       int fd;
+
+       fd = do_socket(cfg_tcp);
+
+       treport = gettimeofday_ms() + 1000;
+       do {
+               do_poll(fd);
+
+               if (cfg_tcp)
+                       do_flush_tcp(fd);
+               else
+                       do_flush_udp(fd);
+
+               tnow = gettimeofday_ms();
+               if (tnow > treport) {
+                       if (packets)
+                               fprintf(stderr,
+                                       "%s rx: %6lu MB/s %8lu calls/s\n",
+                                       cfg_tcp ? "tcp" : "udp",
+                                       bytes >> 20, packets);
+                       bytes = packets = 0;
+                       treport = tnow + 1000;
+               }
+
+       } while (!interrupted);
+
+       if (close(fd))
+               error(1, errno, "close");
+}
+
+int main(int argc, char **argv)
+{
+       parse_opts(argc, argv);
+
+       signal(SIGINT, sigint_handler);
+
+       do_recv();
+
+       return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
new file mode 100644 (file)
index 0000000..e821564
--- /dev/null
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT            103
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY    60
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY   0x4000000
+#endif
+
+#define NUM_PKT                100
+
+static bool    cfg_cache_trash;
+static int     cfg_cpu         = -1;
+static int     cfg_connected   = true;
+static int     cfg_family      = PF_UNSPEC;
+static uint16_t        cfg_mss;
+static int     cfg_payload_len = (1472 * 42);
+static int     cfg_port        = 8000;
+static int     cfg_runtime_ms  = -1;
+static bool    cfg_segment;
+static bool    cfg_sendmmsg;
+static bool    cfg_tcp;
+static bool    cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static bool interrupted;
+static char buf[NUM_PKT][ETH_MAX_MTU];
+
+static void sigint_handler(int signum)
+{
+       if (signum == SIGINT)
+               interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+       struct timeval tv;
+
+       gettimeofday(&tv, NULL);
+       return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int set_cpu(int cpu)
+{
+       cpu_set_t mask;
+
+       CPU_ZERO(&mask);
+       CPU_SET(cpu, &mask);
+       if (sched_setaffinity(0, sizeof(mask), &mask))
+               error(1, 0, "setaffinity %d", cpu);
+
+       return 0;
+}
+
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+       struct sockaddr_in6 *addr6 = (void *) sockaddr;
+       struct sockaddr_in *addr4 = (void *) sockaddr;
+
+       switch (domain) {
+       case PF_INET:
+               addr4->sin_family = AF_INET;
+               addr4->sin_port = htons(cfg_port);
+               if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+                       error(1, 0, "ipv4 parse error: %s", str_addr);
+               break;
+       case PF_INET6:
+               addr6->sin6_family = AF_INET6;
+               addr6->sin6_port = htons(cfg_port);
+               if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+                       error(1, 0, "ipv6 parse error: %s", str_addr);
+               break;
+       default:
+               error(1, 0, "illegal domain");
+       }
+}
+
+static void flush_zerocopy(int fd)
+{
+       struct msghdr msg = {0};        /* flush */
+       int ret;
+
+       while (1) {
+               ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+               if (ret == -1 && errno == EAGAIN)
+                       break;
+               if (ret == -1)
+                       error(1, errno, "errqueue");
+               if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
+                       error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+               msg.msg_flags = 0;
+       }
+}
+
+static int send_tcp(int fd, char *data)
+{
+       int ret, done = 0, count = 0;
+
+       while (done < cfg_payload_len) {
+               ret = send(fd, data + done, cfg_payload_len - done,
+                          cfg_zerocopy ? MSG_ZEROCOPY : 0);
+               if (ret == -1)
+                       error(1, errno, "write");
+
+               done += ret;
+               count++;
+       }
+
+       return count;
+}
+
+static int send_udp(int fd, char *data)
+{
+       int ret, total_len, len, count = 0;
+
+       total_len = cfg_payload_len;
+
+       while (total_len) {
+               len = total_len < cfg_mss ? total_len : cfg_mss;
+
+               ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
+                            cfg_connected ? NULL : (void *)&cfg_dst_addr,
+                            cfg_connected ? 0 : cfg_alen);
+               if (ret == -1)
+                       error(1, errno, "write");
+               if (ret != len)
+                       error(1, errno, "write: %uB != %uB\n", ret, len);
+
+               total_len -= len;
+               count++;
+       }
+
+       return count;
+}
+
+static int send_udp_sendmmsg(int fd, char *data)
+{
+       const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
+       struct mmsghdr mmsgs[max_nr_msg];
+       struct iovec iov[max_nr_msg];
+       unsigned int off = 0, left;
+       int i = 0, ret;
+
+       memset(mmsgs, 0, sizeof(mmsgs));
+
+       left = cfg_payload_len;
+       while (left) {
+               if (i == max_nr_msg)
+                       error(1, 0, "sendmmsg: exceeds max_nr_msg");
+
+               iov[i].iov_base = data + off;
+               iov[i].iov_len = cfg_mss < left ? cfg_mss : left;
+
+               mmsgs[i].msg_hdr.msg_iov = iov + i;
+               mmsgs[i].msg_hdr.msg_iovlen = 1;
+
+               off += iov[i].iov_len;
+               left -= iov[i].iov_len;
+               i++;
+       }
+
+       ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+       if (ret == -1)
+               error(1, errno, "sendmmsg");
+
+       return ret;
+}
+
+static void send_udp_segment_cmsg(struct cmsghdr *cm)
+{
+       uint16_t *valp;
+
+       cm->cmsg_level = SOL_UDP;
+       cm->cmsg_type = UDP_SEGMENT;
+       cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss));
+       valp = (void *)CMSG_DATA(cm);
+       *valp = cfg_mss;
+}
+
+static int send_udp_segment(int fd, char *data)
+{
+       char control[CMSG_SPACE(sizeof(cfg_mss))] = {0};
+       struct msghdr msg = {0};
+       struct iovec iov = {0};
+       int ret;
+
+       iov.iov_base = data;
+       iov.iov_len = cfg_payload_len;
+
+       msg.msg_iov = &iov;
+       msg.msg_iovlen = 1;
+
+       msg.msg_control = control;
+       msg.msg_controllen = sizeof(control);
+       send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
+
+       msg.msg_name = (void *)&cfg_dst_addr;
+       msg.msg_namelen = cfg_alen;
+
+       ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+       if (ret == -1)
+               error(1, errno, "sendmsg");
+       if (ret != iov.iov_len)
+               error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
+
+       return 1;
+}
+
+static void usage(const char *filepath)
+{
+       error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]",
+                   filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+       int max_len, hdrlen;
+       int c;
+
+       while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) {
+               switch (c) {
+               case '4':
+                       if (cfg_family != PF_UNSPEC)
+                               error(1, 0, "Pass one of -4 or -6");
+                       cfg_family = PF_INET;
+                       cfg_alen = sizeof(struct sockaddr_in);
+                       break;
+               case '6':
+                       if (cfg_family != PF_UNSPEC)
+                               error(1, 0, "Pass one of -4 or -6");
+                       cfg_family = PF_INET6;
+                       cfg_alen = sizeof(struct sockaddr_in6);
+                       break;
+               case 'c':
+                       cfg_cache_trash = true;
+                       break;
+               case 'C':
+                       cfg_cpu = strtol(optarg, NULL, 0);
+                       break;
+               case 'D':
+                       setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+                       break;
+               case 'l':
+                       cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
+                       break;
+               case 'm':
+                       cfg_sendmmsg = true;
+                       break;
+               case 'p':
+                       cfg_port = strtoul(optarg, NULL, 0);
+                       break;
+               case 's':
+                       cfg_payload_len = strtoul(optarg, NULL, 0);
+                       break;
+               case 'S':
+                       cfg_segment = true;
+                       break;
+               case 't':
+                       cfg_tcp = true;
+                       break;
+               case 'u':
+                       cfg_connected = false;
+                       break;
+               case 'z':
+                       cfg_zerocopy = true;
+                       break;
+               }
+       }
+
+       if (optind != argc)
+               usage(argv[0]);
+
+       if (cfg_family == PF_UNSPEC)
+               error(1, 0, "must pass one of -4 or -6");
+       if (cfg_tcp && !cfg_connected)
+               error(1, 0, "connectionless tcp makes no sense");
+       if (cfg_segment && cfg_sendmmsg)
+               error(1, 0, "cannot combine segment offload and sendmmsg");
+
+       if (cfg_family == PF_INET)
+               hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
+       else
+               hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr);
+
+       cfg_mss = ETH_DATA_LEN - hdrlen;
+       max_len = ETH_MAX_MTU - hdrlen;
+
+       if (cfg_payload_len > max_len)
+               error(1, 0, "payload length %u exceeds max %u",
+                     cfg_payload_len, max_len);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+       int level, name, val;
+
+       if (is_ipv4) {
+               level   = SOL_IP;
+               name    = IP_MTU_DISCOVER;
+               val     = IP_PMTUDISC_DO;
+       } else {
+               level   = SOL_IPV6;
+               name    = IPV6_MTU_DISCOVER;
+               val     = IPV6_PMTUDISC_DO;
+       }
+
+       if (setsockopt(fd, level, name, &val, sizeof(val)))
+               error(1, errno, "setsockopt path mtu");
+}
+
+int main(int argc, char **argv)
+{
+       unsigned long num_msgs, num_sends;
+       unsigned long tnow, treport, tstop;
+       int fd, i, val;
+
+       parse_opts(argc, argv);
+
+       if (cfg_cpu > 0)
+               set_cpu(cfg_cpu);
+
+       for (i = 0; i < sizeof(buf[0]); i++)
+               buf[0][i] = 'a' + (i % 26);
+       for (i = 1; i < NUM_PKT; i++)
+               memcpy(buf[i], buf[0], sizeof(buf[0]));
+
+       signal(SIGINT, sigint_handler);
+
+       fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+       if (fd == -1)
+               error(1, errno, "socket");
+
+       if (cfg_zerocopy) {
+               val = 1;
+               if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
+                       error(1, errno, "setsockopt zerocopy");
+       }
+
+       if (cfg_connected &&
+           connect(fd, (void *)&cfg_dst_addr, cfg_alen))
+               error(1, errno, "connect");
+
+       if (cfg_segment)
+               set_pmtu_discover(fd, cfg_family == PF_INET);
+
+       num_msgs = num_sends = 0;
+       tnow = gettimeofday_ms();
+       tstop = tnow + cfg_runtime_ms;
+       treport = tnow + 1000;
+
+       i = 0;
+       do {
+               if (cfg_tcp)
+                       num_sends += send_tcp(fd, buf[i]);
+               else if (cfg_segment)
+                       num_sends += send_udp_segment(fd, buf[i]);
+               else if (cfg_sendmmsg)
+                       num_sends += send_udp_sendmmsg(fd, buf[i]);
+               else
+                       num_sends += send_udp(fd, buf[i]);
+               num_msgs++;
+
+               if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
+                       flush_zerocopy(fd);
+
+               tnow = gettimeofday_ms();
+               if (tnow > treport) {
+                       fprintf(stderr,
+                               "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
+                               cfg_tcp ? "tcp" : "udp",
+                               (num_msgs * cfg_payload_len) >> 20,
+                               num_sends, num_msgs);
+                       num_msgs = num_sends = 0;
+                       treport = tnow + 1000;
+               }
+
+               /* cold cache when writing buffer */
+               if (cfg_cache_trash)
+                       i = ++i < NUM_PKT ? i : 0;
+
+       } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
+
+       if (close(fd))
+               error(1, errno, "close");
+
+       return 0;
+}