ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG
authorMing Lei <ming.lei@redhat.com>
Tue, 20 May 2025 04:54:33 +0000 (12:54 +0800)
committerJens Axboe <axboe@kernel.dk>
Tue, 20 May 2025 16:24:45 +0000 (10:24 -0600)
Add UBLK_F_AUTO_BUF_REG for supporting to register buffer automatically
to local io_uring context with provided buffer index.

Add UAPI structure `struct ublk_auto_buf_reg` for holding user parameter
to register request buffer automatically, one 'flags' field is defined, and
there is still 32bit available for future extension, such as, adding one
io_ring FD field for registering buffer to external io_uring.

`struct ublk_auto_buf_reg` is populated from ublk uring_cmd's sqe->addr,
and all existing ublk commands are data-less, so it is just fine to reuse
sqe->addr for this purpose.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250520045455.515691-4-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/block/ublk_drv.c
include/uapi/linux/ublk_cmd.h

index 3e56a9d..1aabc65 100644 (file)
@@ -66,7 +66,8 @@
                | UBLK_F_USER_COPY \
                | UBLK_F_ZONED \
                | UBLK_F_USER_RECOVERY_FAIL_IO \
-               | UBLK_F_UPDATE_SIZE)
+               | UBLK_F_UPDATE_SIZE \
+               | UBLK_F_AUTO_BUF_REG)
 
 #define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
                | UBLK_F_USER_RECOVERY_REISSUE \
@@ -80,6 +81,9 @@
 
 struct ublk_rq_data {
        refcount_t ref;
+
+       /* for auto-unregister buffer in case of UBLK_F_AUTO_BUF_REG */
+       u16 buf_index;
 };
 
 struct ublk_uring_cmd_pdu {
@@ -101,6 +105,9 @@ struct ublk_uring_cmd_pdu {
         * setup in ublk uring_cmd handler
         */
        struct ublk_queue *ubq;
+
+       struct ublk_auto_buf_reg buf;
+
        u16 tag;
 };
 
@@ -630,7 +637,7 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
 
 static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
 {
-       return false;
+       return ubq->flags & UBLK_F_AUTO_BUF_REG;
 }
 
 static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
@@ -1178,17 +1185,20 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
 static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
                              unsigned int issue_flags)
 {
+       struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd);
        struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
        int ret;
 
-       ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, 0,
-                                     issue_flags);
+       ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release,
+                                     pdu->buf.index, issue_flags);
        if (ret) {
                blk_mq_end_request(req, BLK_STS_IOERR);
                return false;
        }
        /* one extra reference is dropped by ublk_io_release */
        refcount_set(&data->ref, 2);
+       /* store buffer index in request payload */
+       data->buf_index = pdu->buf.index;
        io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG;
        return true;
 }
@@ -1952,6 +1962,18 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd,
        io_uring_cmd_mark_cancelable(cmd, issue_flags);
 }
 
+static inline int ublk_set_auto_buf_reg(struct io_uring_cmd *cmd)
+{
+       struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+
+       pdu->buf = ublk_sqe_addr_to_auto_buf_reg(READ_ONCE(cmd->sqe->addr));
+
+       if (pdu->buf.reserved0 || pdu->buf.reserved1)
+               return -EINVAL;
+
+       return 0;
+}
+
 static void ublk_io_release(void *priv)
 {
        struct request *rq = priv;
@@ -2034,6 +2056,12 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
                goto out;
        }
 
+       if (ublk_support_auto_buf_reg(ubq)) {
+               ret = ublk_set_auto_buf_reg(cmd);
+               if (ret)
+                       return ret;
+       }
+
        ublk_fill_io_cmd(io, cmd, buf_addr);
        ublk_mark_io_ready(ub, ubq);
 out:
@@ -2065,11 +2093,20 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
        }
 
        if (ublk_support_auto_buf_reg(ubq)) {
+               int ret;
+
                if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) {
-                       WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, 0,
+                       struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+                       WARN_ON_ONCE(io_buffer_unregister_bvec(cmd,
+                                               data->buf_index,
                                                issue_flags));
                        io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG;
                }
+
+               ret = ublk_set_auto_buf_reg(cmd);
+               if (ret)
+                       return ret;
        }
 
        ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
@@ -2791,8 +2828,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
                 * For USER_COPY, we depends on userspace to fill request
                 * buffer by pwrite() to ublk char device, which can't be
                 * used for unprivileged device
+                *
+                * Same with zero copy or auto buffer register.
                 */
-               if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
+               if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
+                                       UBLK_F_AUTO_BUF_REG))
                        return -EINVAL;
        }
 
@@ -2850,7 +2890,8 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
                UBLK_F_URING_CMD_COMP_IN_TASK;
 
        /* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */
-       if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
+       if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
+                               UBLK_F_AUTO_BUF_REG))
                ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
 
        /*
@@ -3377,6 +3418,7 @@ static int __init ublk_init(void)
 
        BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
                        UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
+       BUILD_BUG_ON(sizeof(struct ublk_auto_buf_reg) != 8);
 
        init_waitqueue_head(&ublk_idr_wq);
 
index be5c6c6..f6f516b 100644 (file)
  */
 #define UBLK_F_UPDATE_SIZE              (1ULL << 10)
 
+/*
+ * request buffer is registered automatically to uring_cmd's io_uring
+ * context before delivering this io command to ublk server, meantime
+ * it is un-registered automatically when completing this io command.
+ *
+ * For using this feature:
+ *
+ * - ublk server has to create sparse buffer table
+ *
+ * - ublk server passes auto buf register data via uring_cmd's sqe->addr,
+ *   `struct ublk_auto_buf_reg` is populated from sqe->addr, please see
+ *   the definition of ublk_sqe_addr_to_auto_buf_reg()
+ *
+ * - pass buffer index from `ublk_auto_buf_reg.index`
+ *
+ * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed
+ *
+ * This way avoids extra cost from two uring_cmd, but also simplifies backend
+ * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
+ * IO_UNREGISTER_IO_BUF becomes not necessary.
+ */
+#define UBLK_F_AUTO_BUF_REG    (1ULL << 11)
+
 /* device state */
 #define UBLK_S_DEV_DEAD        0
 #define UBLK_S_DEV_LIVE        1
@@ -339,6 +362,47 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
        return iod->op_flags >> 8;
 }
 
+struct ublk_auto_buf_reg {
+       /* index for registering the delivered request buffer */
+       __u16  index;
+       __u16   reserved0;
+
+       /*
+        * io_ring FD can be passed via the reserve field in future for
+        * supporting to register io buffer to external io_uring
+        */
+       __u32  reserved1;
+};
+
+/*
+ * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via
+ * uring_cmd's sqe->addr:
+ *
+ *     - bit0 ~ bit15: buffer index
+ *     - bit24 ~ bit31: reserved0
+ *     - bit32 ~ bit63: reserved1
+ */
+static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
+               __u64 sqe_addr)
+{
+       struct ublk_auto_buf_reg reg = {
+               .index = sqe_addr & 0xffff,
+               .reserved0 = (sqe_addr >> 16) & 0xffff,
+               .reserved1 = sqe_addr >> 32,
+       };
+
+       return reg;
+}
+
+static inline __u64
+ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf)
+{
+       __u64 addr = buf->index | (__u64)buf->reserved0 << 16 |
+               (__u64)buf->reserved1 << 32;
+
+       return addr;
+}
+
 /* issued to ublk driver via /dev/ublkcN */
 struct ublksrv_io_cmd {
        __u16   q_id;