eventfd: convert to f_op->read_iter()
authorJens Axboe <axboe@kernel.dk>
Fri, 1 May 2020 19:11:09 +0000 (13:11 -0600)
committerAl Viro <viro@zeniv.linux.org.uk>
Thu, 7 May 2020 02:33:43 +0000 (22:33 -0400)
eventfd is using ->read() as it's file_operations read handler, but
this prevents passing in information about whether a given IO operation
is blocking or not. We can only use the file flags for that. To support
async (-EAGAIN/poll based) retries for io_uring, we need ->read_iter()
support. Convert eventfd to using ->read_iter().

With ->read_iter(), we can support IOCB_NOWAIT. Ensure the fd setup
is done such that we set file->f_mode with FMODE_NOWAIT.

[missing include added]

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/eventfd.c

index 78e41c7..df466ef 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/idr.h>
+#include <linux/uio.h>
 
 DEFINE_PER_CPU(int, eventfd_wake_count);
 
@@ -216,32 +217,32 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w
 }
 EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
 
-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
-                           loff_t *ppos)
+static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to)
 {
+       struct file *file = iocb->ki_filp;
        struct eventfd_ctx *ctx = file->private_data;
-       ssize_t res;
        __u64 ucnt = 0;
        DECLARE_WAITQUEUE(wait, current);
 
-       if (count < sizeof(ucnt))
+       if (iov_iter_count(to) < sizeof(ucnt))
                return -EINVAL;
-
        spin_lock_irq(&ctx->wqh.lock);
-       res = -EAGAIN;
-       if (ctx->count > 0)
-               res = sizeof(ucnt);
-       else if (!(file->f_flags & O_NONBLOCK)) {
+       if (!ctx->count) {
+               if ((file->f_flags & O_NONBLOCK) ||
+                   (iocb->ki_flags & IOCB_NOWAIT)) {
+                       spin_unlock_irq(&ctx->wqh.lock);
+                       return -EAGAIN;
+               }
                __add_wait_queue(&ctx->wqh, &wait);
                for (;;) {
                        set_current_state(TASK_INTERRUPTIBLE);
-                       if (ctx->count > 0) {
-                               res = sizeof(ucnt);
+                       if (ctx->count)
                                break;
-                       }
                        if (signal_pending(current)) {
-                               res = -ERESTARTSYS;
-                               break;
+                               __remove_wait_queue(&ctx->wqh, &wait);
+                               __set_current_state(TASK_RUNNING);
+                               spin_unlock_irq(&ctx->wqh.lock);
+                               return -ERESTARTSYS;
                        }
                        spin_unlock_irq(&ctx->wqh.lock);
                        schedule();
@@ -250,17 +251,14 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
                __remove_wait_queue(&ctx->wqh, &wait);
                __set_current_state(TASK_RUNNING);
        }
-       if (likely(res > 0)) {
-               eventfd_ctx_do_read(ctx, &ucnt);
-               if (waitqueue_active(&ctx->wqh))
-                       wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
-       }
+       eventfd_ctx_do_read(ctx, &ucnt);
+       if (waitqueue_active(&ctx->wqh))
+               wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
        spin_unlock_irq(&ctx->wqh.lock);
-
-       if (res > 0 && put_user(ucnt, (__u64 __user *)buf))
+       if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt)))
                return -EFAULT;
 
-       return res;
+       return sizeof(ucnt);
 }
 
 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
@@ -329,7 +327,7 @@ static const struct file_operations eventfd_fops = {
 #endif
        .release        = eventfd_release,
        .poll           = eventfd_poll,
-       .read           = eventfd_read,
+       .read_iter      = eventfd_read,
        .write          = eventfd_write,
        .llseek         = noop_llseek,
 };
@@ -406,6 +404,7 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
 static int do_eventfd(unsigned int count, int flags)
 {
        struct eventfd_ctx *ctx;
+       struct file *file;
        int fd;
 
        /* Check the EFD_* constants for consistency.  */
@@ -425,11 +424,24 @@ static int do_eventfd(unsigned int count, int flags)
        ctx->flags = flags;
        ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
 
-       fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
-                             O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
+       flags &= EFD_SHARED_FCNTL_FLAGS;
+       flags |= O_RDWR;
+       fd = get_unused_fd_flags(flags);
        if (fd < 0)
-               eventfd_free_ctx(ctx);
+               goto err;
+
+       file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
+       if (IS_ERR(file)) {
+               put_unused_fd(fd);
+               fd = PTR_ERR(file);
+               goto err;
+       }
 
+       file->f_mode |= FMODE_NOWAIT;
+       fd_install(fd, file);
+       return fd;
+err:
+       eventfd_free_ctx(ctx);
        return fd;
 }