io_uring: sendzc with fixed buffers
authorPavel Begunkov <asml.silence@gmail.com>
Tue, 12 Jul 2022 20:52:46 +0000 (21:52 +0100)
committerJens Axboe <axboe@kernel.dk>
Mon, 25 Jul 2022 00:41:07 +0000 (18:41 -0600)
Allow zerocopy sends to use fixed buffers. There is an optimisation for
this case, the network layer don't need to reference the pages, see
SKBFL_MANAGED_FRAG_REFS, so io_uring have to ensure validity of fixed
buffers until the notifier is released.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/e1d8bd1b5934e541d90c1824eb4020ae3f5f43f3.1657643355.git.asml.silence@gmail.com
[axboe: fold in 32-bit pointer cast warning fix]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/uapi/linux/io_uring.h
io_uring/net.c

index 0736e27..f1a9ff9 100644 (file)
@@ -272,9 +272,13 @@ enum io_uring_op {
  * IORING_RECV_MULTISHOT       Multishot recv. Sets IORING_CQE_F_MORE if
  *                             the handler will continue to report
  *                             CQEs on behalf of the same SQE.
+ *
+ * IORING_RECVSEND_FIXED_BUF   Use registered buffers, the index is stored in
+ *                             the buf_index field.
  */
 #define IORING_RECVSEND_POLL_FIRST     (1U << 0)
-#define IORING_RECV_MULTISHOT  (1U << 1)
+#define IORING_RECV_MULTISHOT          (1U << 1)
+#define IORING_RECVSEND_FIXED_BUF      (1U << 2)
 
 /*
  * accept flags stored in sqe->ioprio
index 61414d8..ab443c5 100644 (file)
@@ -15,6 +15,7 @@
 #include "alloc_cache.h"
 #include "net.h"
 #include "notif.h"
+#include "rsrc.h"
 
 #if defined(CONFIG_NET)
 struct io_shutdown {
@@ -849,13 +850,23 @@ out_free:
 int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_sendzc *zc = io_kiocb_to_cmd(req);
+       struct io_ring_ctx *ctx = req->ctx;
 
        if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))
                return -EINVAL;
 
        zc->flags = READ_ONCE(sqe->ioprio);
-       if (zc->flags & ~IORING_RECVSEND_POLL_FIRST)
+       if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF))
                return -EINVAL;
+       if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
+               unsigned idx = READ_ONCE(sqe->buf_index);
+
+               if (unlikely(idx >= ctx->nr_user_bufs))
+                       return -EFAULT;
+               idx = array_index_nospec(idx, ctx->nr_user_bufs);
+               req->imu = READ_ONCE(ctx->user_bufs[idx]);
+               io_req_set_rsrc_node(req, ctx, 0);
+       }
 
        zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
        zc->len = READ_ONCE(sqe->len);
@@ -909,10 +920,18 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
        msg.msg_controllen = 0;
        msg.msg_namelen = 0;
 
-       ret = import_single_range(WRITE, zc->buf, zc->len, &iov, &msg.msg_iter);
-       if (unlikely(ret))
-               return ret;
-       mm_account_pinned_pages(&notif->uarg.mmp, zc->len);
+       if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
+               ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu,
+                                       (u64)(uintptr_t)zc->buf, zc->len);
+               if (unlikely(ret))
+                               return ret;
+       } else {
+               ret = import_single_range(WRITE, zc->buf, zc->len, &iov,
+                                         &msg.msg_iter);
+               if (unlikely(ret))
+                       return ret;
+               mm_account_pinned_pages(&notif->uarg.mmp, zc->len);
+       }
 
        if (zc->addr) {
                ret = move_addr_to_kernel(zc->addr, zc->addr_len, &address);