nvme-tcp: avoid scheduling io_work if we are already polling
authorSagi Grimberg <sagi@grimberg.me>
Fri, 1 May 2020 21:25:44 +0000 (14:25 -0700)
committerJens Axboe <axboe@kernel.dk>
Sat, 9 May 2020 22:18:36 +0000 (16:18 -0600)
When the user runs polled I/O, we shouldn't have to trigger
the workqueue to generate the receive work upon the .data_ready
upcall. This prevents a redundant context switch when the
application is already polling for completions.

Proposed-by: Mark Wunderlich <mark.wunderlich@intel.com>
Signed-off-by: Mark Wunderlich <mark.wunderlich@intel.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/nvme/host/tcp.c

index 4862fa9..b28f91d 100644 (file)
@@ -60,6 +60,7 @@ struct nvme_tcp_request {
 enum nvme_tcp_queue_flags {
        NVME_TCP_Q_ALLOCATED    = 0,
        NVME_TCP_Q_LIVE         = 1,
+       NVME_TCP_Q_POLLING      = 2,
 };
 
 enum nvme_tcp_recv_state {
@@ -796,7 +797,8 @@ static void nvme_tcp_data_ready(struct sock *sk)
 
        read_lock_bh(&sk->sk_callback_lock);
        queue = sk->sk_user_data;
-       if (likely(queue && queue->rd_enabled))
+       if (likely(queue && queue->rd_enabled) &&
+           !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
                queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
        read_unlock_bh(&sk->sk_callback_lock);
 }
@@ -2302,9 +2304,11 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
        if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
                return 0;
 
+       set_bit(NVME_TCP_Q_POLLING, &queue->flags);
        if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
                sk_busy_loop(sk, true);
        nvme_tcp_try_recv(queue);
+       clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
        return queue->nr_cqe;
 }