io_uring: add timeout support for io_uring_enter()

author Hao Xu <haoxu@linux.alibaba.com>

Tue, 3 Nov 2020 02:54:37 +0000 (10:54 +0800)

committer Jens Axboe <axboe@kernel.dk>

Wed, 9 Dec 2020 19:03:59 +0000 (12:03 -0700)
author Hao Xu <haoxu@linux.alibaba.com>
Tue, 3 Nov 2020 02:54:37 +0000 (10:54 +0800)
committer Jens Axboe <axboe@kernel.dk>
Wed, 9 Dec 2020 19:03:59 +0000 (12:03 -0700)
diff --git a/fs/io_uring.c b/fs/io_uring.c

index 11ce97d..ee25c70 100644 (file)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7118,7 +7118,8 @@ static int io_run_task_work_sig(void)
   * application must reap them itself, as they reside on the shared cq ring.
   */
  static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
-                         const sigset_t __user *sig, size_t sigsz)
+                         const sigset_t __user *sig, size_t sigsz,
+                         struct __kernel_timespec __user *uts)
  {
         struct io_wait_queue iowq = {
                 .wq = {
@@ -7130,6 +7131,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                 .to_wait        = min_events,
         };
         struct io_rings *rings = ctx->rings;
+       struct timespec64 ts;
+       signed long timeout = 0;
         int ret = 0;
  
         do {
@@ -7152,6 +7155,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                         return ret;
         }
  
+       if (uts) {
+               if (get_timespec64(&ts, uts))
+                       return -EFAULT;
+               timeout = timespec64_to_jiffies(&ts);
+       }
+
         iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
         trace_io_uring_cqring_wait(ctx, min_events);
         do {
@@ -7165,7 +7174,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                         break;
                 if (io_should_wake(&iowq, false))
                         break;
-               schedule();
+               if (uts) {
+                       timeout = schedule_timeout(timeout);
+                       if (timeout == 0) {
+                               ret = -ETIME;
+                               break;
+                       }
+               } else {
+                       schedule();
+               }
         } while (1);
         finish_wait(&ctx->wait, &iowq.wq);
  
@@ -9167,9 +9184,39 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
         finish_wait(&ctx->sqo_sq_wait, &wait);
  }
  
+static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz,
+                         struct __kernel_timespec __user **ts,
+                         const sigset_t __user **sig)
+{
+       struct io_uring_getevents_arg arg;
+
+       /*
+        * If EXT_ARG isn't set, then we have no timespec and the argp pointer
+        * is just a pointer to the sigset_t.
+        */
+       if (!(flags & IORING_ENTER_EXT_ARG)) {
+               *sig = (const sigset_t __user *) argp;
+               *ts = NULL;
+               return 0;
+       }
+
+       /*
+        * EXT_ARG is set - ensure we agree on the size of it and copy in our
+        * timespec and sigset_t pointers if good.
+        */
+       if (*argsz != sizeof(arg))
+               return -EINVAL;
+       if (copy_from_user(&arg, argp, sizeof(arg)))
+               return -EFAULT;
+       *sig = u64_to_user_ptr(arg.sigmask);
+       *argsz = arg.sigmask_sz;
+       *ts = u64_to_user_ptr(arg.ts);
+       return 0;
+}
+
  SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
-               u32, min_complete, u32, flags, const sigset_t __user *, sig,
-               size_t, sigsz)
+               u32, min_complete, u32, flags, const void __user *, argp,
+               size_t, argsz)
  {
         struct io_ring_ctx *ctx;
         long ret = -EBADF;
@@ -9179,7 +9226,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
         io_run_task_work();
  
         if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
-                       IORING_ENTER_SQ_WAIT))
+                       IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG))
                 return -EINVAL;
  
         f = fdget(fd);
@@ -9225,6 +9272,13 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                         goto out;
         }
         if (flags & IORING_ENTER_GETEVENTS) {
+               const sigset_t __user *sig;
+               struct __kernel_timespec __user *ts;
+
+               ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
+               if (unlikely(ret))
+                       goto out;
+
                 min_complete = min(min_complete, ctx->cq_entries);
  
                 /*
@@ -9237,7 +9291,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                     !(ctx->flags & IORING_SETUP_SQPOLL)) {
                         ret = io_iopoll_check(ctx, min_complete);
                 } else {
-                       ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
+                       ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts);
                 }
         }
  
@@ -9600,7 +9654,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
         p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
                         IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
                         IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
-                       IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED;
+                       IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
+                       IORING_FEAT_EXT_ARG;
  
         if (copy_to_user(params, p, sizeof(*p))) {
                 ret = -EFAULT;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h

index 37bea07..8576e8b 100644 (file)
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -317,7 +317,7 @@ asmlinkage long sys_io_uring_setup(u32 entries,
                                 struct io_uring_params __user *p);
  asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
                                 u32 min_complete, u32 flags,
-                               const sigset_t __user *sig, size_t sigsz);
+                               const void __user *argp, size_t argsz);
  asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
                                 void __user *arg, unsigned int nr_args);
  
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h

index 557e7ea..6bb8229 100644 (file)
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -231,6 +231,7 @@ struct io_cqring_offsets {
  #define IORING_ENTER_GETEVENTS (1U << 0)
  #define IORING_ENTER_SQ_WAKEUP (1U << 1)
  #define IORING_ENTER_SQ_WAIT   (1U << 2)
+#define IORING_ENTER_EXT_ARG   (1U << 3)
  
  /*
   * Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -259,6 +260,7 @@ struct io_uring_params {
  #define IORING_FEAT_FAST_POLL          (1U << 5)
  #define IORING_FEAT_POLL_32BITS        (1U << 6)
  #define IORING_FEAT_SQPOLL_NONFIXED    (1U << 7)
+#define IORING_FEAT_EXT_ARG            (1U << 8)
  
  /*
   * io_uring_register(2) opcodes and arguments
@@ -335,4 +337,11 @@ enum {
         IORING_RESTRICTION_LAST
  };
  
+struct io_uring_getevents_arg {
+       __u64   sigmask;
+       __u32   sigmask_sz;
+       __u32   pad;
+       __u64   ts;
+};
+
  #endif
author	Hao Xu <haoxu@linux.alibaba.com>
	Tue, 3 Nov 2020 02:54:37 +0000 (10:54 +0800)
committer	Jens Axboe <axboe@kernel.dk>
	Wed, 9 Dec 2020 19:03:59 +0000 (12:03 -0700)
fs/io_uring.c		patch \| blob \| history
include/linux/syscalls.h		patch \| blob \| history
include/uapi/linux/io_uring.h		patch \| blob \| history