Merge tag 'io_uring-5.15-2021-09-11' of git://git.kernel.dk/linux-block
[linux-2.6-microblaze.git] / fs / io_uring.c
index 30d9594..16fb743 100644 (file)
@@ -667,6 +667,29 @@ struct io_unlink {
        struct filename                 *filename;
 };
 
+struct io_mkdir {
+       struct file                     *file;
+       int                             dfd;
+       umode_t                         mode;
+       struct filename                 *filename;
+};
+
+struct io_symlink {
+       struct file                     *file;
+       int                             new_dfd;
+       struct filename                 *oldpath;
+       struct filename                 *newpath;
+};
+
+struct io_hardlink {
+       struct file                     *file;
+       int                             old_dfd;
+       int                             new_dfd;
+       struct filename                 *oldpath;
+       struct filename                 *newpath;
+       int                             flags;
+};
+
 struct io_completion {
        struct file                     *file;
        u32                             cflags;
@@ -826,6 +849,9 @@ struct io_kiocb {
                struct io_shutdown      shutdown;
                struct io_rename        rename;
                struct io_unlink        unlink;
+               struct io_mkdir         mkdir;
+               struct io_symlink       symlink;
+               struct io_hardlink      hardlink;
                /* use only after cleaning per-op data, see io_clean_op() */
                struct io_completion    compl;
        };
@@ -1038,6 +1064,9 @@ static const struct io_op_def io_op_defs[] = {
        },
        [IORING_OP_RENAMEAT] = {},
        [IORING_OP_UNLINKAT] = {},
+       [IORING_OP_MKDIRAT] = {},
+       [IORING_OP_SYMLINKAT] = {},
+       [IORING_OP_LINKAT] = {},
 };
 
 /* requests with any of those set should undergo io_disarm_next() */
@@ -1453,6 +1482,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
        struct io_timeout_data *io = req->async_data;
 
        if (hrtimer_try_to_cancel(&io->timer) != -1) {
+               if (status)
+                       req_set_fail(req);
                atomic_set(&req->ctx->cq_timeouts,
                        atomic_read(&req->ctx->cq_timeouts) + 1);
                list_del_init(&req->timeout.list);
@@ -1590,8 +1621,11 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 
 static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
 {
+       /* see waitqueue_active() comment */
+       smp_mb();
+
        if (ctx->flags & IORING_SETUP_SQPOLL) {
-               if (wq_has_sleeper(&ctx->cq_wait))
+               if (waitqueue_active(&ctx->cq_wait))
                        wake_up_all(&ctx->cq_wait);
        }
        if (io_should_trigger_evfd(ctx))
@@ -2850,7 +2884,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                    !kiocb->ki_filp->f_op->iopoll)
                        return -EOPNOTSUPP;
 
-               kiocb->ki_flags |= IOCB_HIPRI;
+               kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
                kiocb->ki_complete = io_complete_rw_iopoll;
                req->iopoll_completed = 0;
        } else {
@@ -3451,6 +3485,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
                if (req->flags & REQ_F_NOWAIT)
                        goto done;
                /* some cases will consume bytes even on error returns */
+               iov_iter_reexpand(iter, iter->count + iter->truncated);
                iov_iter_revert(iter, io_size - iov_iter_count(iter));
                ret = 0;
        } else if (ret == -EIOCBQUEUED) {
@@ -3590,6 +3625,7 @@ done:
        } else {
 copy_iov:
                /* some cases will consume bytes even on error returns */
+               iov_iter_reexpand(iter, iter->count + iter->truncated);
                iov_iter_revert(iter, io_size - iov_iter_count(iter));
                ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
                return ret ?: -EAGAIN;
@@ -3701,6 +3737,149 @@ static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
        return 0;
 }
 
+static int io_mkdirat_prep(struct io_kiocb *req,
+                           const struct io_uring_sqe *sqe)
+{
+       struct io_mkdir *mkd = &req->mkdir;
+       const char __user *fname;
+
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+       if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index ||
+           sqe->splice_fd_in)
+               return -EINVAL;
+       if (unlikely(req->flags & REQ_F_FIXED_FILE))
+               return -EBADF;
+
+       mkd->dfd = READ_ONCE(sqe->fd);
+       mkd->mode = READ_ONCE(sqe->len);
+
+       fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       mkd->filename = getname(fname);
+       if (IS_ERR(mkd->filename))
+               return PTR_ERR(mkd->filename);
+
+       req->flags |= REQ_F_NEED_CLEANUP;
+       return 0;
+}
+
+static int io_mkdirat(struct io_kiocb *req, int issue_flags)
+{
+       struct io_mkdir *mkd = &req->mkdir;
+       int ret;
+
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               return -EAGAIN;
+
+       ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode);
+
+       req->flags &= ~REQ_F_NEED_CLEANUP;
+       if (ret < 0)
+               req_set_fail(req);
+       io_req_complete(req, ret);
+       return 0;
+}
+
+static int io_symlinkat_prep(struct io_kiocb *req,
+                           const struct io_uring_sqe *sqe)
+{
+       struct io_symlink *sl = &req->symlink;
+       const char __user *oldpath, *newpath;
+
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+       if (sqe->ioprio || sqe->len || sqe->rw_flags || sqe->buf_index ||
+           sqe->splice_fd_in)
+               return -EINVAL;
+       if (unlikely(req->flags & REQ_F_FIXED_FILE))
+               return -EBADF;
+
+       sl->new_dfd = READ_ONCE(sqe->fd);
+       oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+
+       sl->oldpath = getname(oldpath);
+       if (IS_ERR(sl->oldpath))
+               return PTR_ERR(sl->oldpath);
+
+       sl->newpath = getname(newpath);
+       if (IS_ERR(sl->newpath)) {
+               putname(sl->oldpath);
+               return PTR_ERR(sl->newpath);
+       }
+
+       req->flags |= REQ_F_NEED_CLEANUP;
+       return 0;
+}
+
+static int io_symlinkat(struct io_kiocb *req, int issue_flags)
+{
+       struct io_symlink *sl = &req->symlink;
+       int ret;
+
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               return -EAGAIN;
+
+       ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath);
+
+       req->flags &= ~REQ_F_NEED_CLEANUP;
+       if (ret < 0)
+               req_set_fail(req);
+       io_req_complete(req, ret);
+       return 0;
+}
+
+static int io_linkat_prep(struct io_kiocb *req,
+                           const struct io_uring_sqe *sqe)
+{
+       struct io_hardlink *lnk = &req->hardlink;
+       const char __user *oldf, *newf;
+
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+       if (sqe->ioprio || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+               return -EINVAL;
+       if (unlikely(req->flags & REQ_F_FIXED_FILE))
+               return -EBADF;
+
+       lnk->old_dfd = READ_ONCE(sqe->fd);
+       lnk->new_dfd = READ_ONCE(sqe->len);
+       oldf = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       newf = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+       lnk->flags = READ_ONCE(sqe->hardlink_flags);
+
+       lnk->oldpath = getname(oldf);
+       if (IS_ERR(lnk->oldpath))
+               return PTR_ERR(lnk->oldpath);
+
+       lnk->newpath = getname(newf);
+       if (IS_ERR(lnk->newpath)) {
+               putname(lnk->oldpath);
+               return PTR_ERR(lnk->newpath);
+       }
+
+       req->flags |= REQ_F_NEED_CLEANUP;
+       return 0;
+}
+
+static int io_linkat(struct io_kiocb *req, int issue_flags)
+{
+       struct io_hardlink *lnk = &req->hardlink;
+       int ret;
+
+       if (issue_flags & IO_URING_F_NONBLOCK)
+               return -EAGAIN;
+
+       ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd,
+                               lnk->newpath, lnk->flags);
+
+       req->flags &= ~REQ_F_NEED_CLEANUP;
+       if (ret < 0)
+               req_set_fail(req);
+       io_req_complete(req, ret);
+       return 0;
+}
+
 static int io_shutdown_prep(struct io_kiocb *req,
                            const struct io_uring_sqe *sqe)
 {
@@ -6200,6 +6379,12 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                return io_renameat_prep(req, sqe);
        case IORING_OP_UNLINKAT:
                return io_unlinkat_prep(req, sqe);
+       case IORING_OP_MKDIRAT:
+               return io_mkdirat_prep(req, sqe);
+       case IORING_OP_SYMLINKAT:
+               return io_symlinkat_prep(req, sqe);
+       case IORING_OP_LINKAT:
+               return io_linkat_prep(req, sqe);
        }
 
        printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -6368,6 +6553,17 @@ static void io_clean_op(struct io_kiocb *req)
                case IORING_OP_UNLINKAT:
                        putname(req->unlink.filename);
                        break;
+               case IORING_OP_MKDIRAT:
+                       putname(req->mkdir.filename);
+                       break;
+               case IORING_OP_SYMLINKAT:
+                       putname(req->symlink.oldpath);
+                       putname(req->symlink.newpath);
+                       break;
+               case IORING_OP_LINKAT:
+                       putname(req->hardlink.oldpath);
+                       putname(req->hardlink.newpath);
+                       break;
                }
        }
        if ((req->flags & REQ_F_POLLED) && req->apoll) {
@@ -6496,6 +6692,15 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
        case IORING_OP_UNLINKAT:
                ret = io_unlinkat(req, issue_flags);
                break;
+       case IORING_OP_MKDIRAT:
+               ret = io_mkdirat(req, issue_flags);
+               break;
+       case IORING_OP_SYMLINKAT:
+               ret = io_symlinkat(req, issue_flags);
+               break;
+       case IORING_OP_LINKAT:
+               ret = io_linkat(req, issue_flags);
+               break;
        default:
                ret = -EINVAL;
                break;
@@ -10350,7 +10555,14 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
        if (ctx->flags & IORING_SETUP_SQPOLL) {
                sqd = ctx->sq_data;
                if (sqd) {
+                       /*
+                        * Observe the correct sqd->lock -> ctx->uring_lock
+                        * ordering. Fine to drop uring_lock here, we hold
+                        * a ref to the ctx.
+                        */
+                       mutex_unlock(&ctx->uring_lock);
                        mutex_lock(&sqd->lock);
+                       mutex_lock(&ctx->uring_lock);
                        tctx = sqd->thread->io_uring;
                }
        } else {
@@ -10653,7 +10865,7 @@ static int __init io_uring_init(void)
        BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
 
        BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
-       BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
+       BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int));
 
        req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
                                SLAB_ACCOUNT);