nvme-tcp: optimize network stack with setting msg flags according to batch size
authorSagi Grimberg <sagi@grimberg.me>
Fri, 19 Jun 2020 00:30:24 +0000 (17:30 -0700)
committerChristoph Hellwig <hch@lst.de>
Wed, 8 Jul 2020 14:16:18 +0000 (16:16 +0200)
If we have a long list of request to send, signal the network stack
that more is coming (MSG_MORE). If we have nothing else, signal MSG_EOR.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Tested-by: Mark Wunderlich <mark.wunderlich@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
drivers/nvme/host/tcp.c

index 2d3962c..b2e73e1 100644 (file)
@@ -79,6 +79,7 @@ struct nvme_tcp_queue {
        struct mutex            send_mutex;
        struct llist_head       req_list;
        struct list_head        send_list;
+       bool                    more_requests;
 
        /* recv state */
        void                    *pdu;
@@ -277,7 +278,9 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
         */
        if (queue->io_cpu == smp_processor_id() &&
            sync && empty && mutex_trylock(&queue->send_mutex)) {
+               queue->more_requests = !last;
                nvme_tcp_try_send(queue);
+               queue->more_requests = false;
                mutex_unlock(&queue->send_mutex);
        } else if (last) {
                queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
@@ -877,6 +880,12 @@ done:
        read_unlock(&sk->sk_callback_lock);
 }
 
+static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
+{
+       return !list_empty(&queue->send_list) ||
+               !llist_empty(&queue->req_list) || queue->more_requests;
+}
+
 static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
 {
        queue->request = NULL;
@@ -898,7 +907,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
                bool last = nvme_tcp_pdu_last_send(req, len);
                int ret, flags = MSG_DONTWAIT;
 
-               if (last && !queue->data_digest)
+               if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
                        flags |= MSG_EOR;
                else
                        flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
@@ -945,7 +954,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
        int flags = MSG_DONTWAIT;
        int ret;
 
-       if (inline_data)
+       if (inline_data || nvme_tcp_queue_more(queue))
                flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
        else
                flags |= MSG_EOR;
@@ -1010,12 +1019,17 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
 {
        struct nvme_tcp_queue *queue = req->queue;
        int ret;
-       struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
+       struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
        struct kvec iov = {
                .iov_base = &req->ddgst + req->offset,
                .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
        };
 
+       if (nvme_tcp_queue_more(queue))
+               msg.msg_flags |= MSG_MORE;
+       else
+               msg.msg_flags |= MSG_EOR;
+
        ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
        if (unlikely(ret <= 0))
                return ret;