iov[0].iov_len = kbuf->len;
return 0;
}
- if (!req->rw.len)
- return 0;
- else if (req->rw.len > 1)
+ if (req->rw.len != 1)
return -EINVAL;
#ifdef CONFIG_COMPAT
struct io_uring_task *tctx = req->task->io_uring;
unsigned long flags;
+ put_files_struct(req->work.identity->files);
+ put_nsproxy(req->work.identity->nsproxy);
spin_lock_irqsave(&ctx->inflight_lock, flags);
list_del(&req->inflight_entry);
- if (atomic_read(&tctx->in_idle))
- wake_up(&tctx->wait);
spin_unlock_irqrestore(&ctx->inflight_lock, flags);
req->flags &= ~REQ_F_INFLIGHT;
- put_files_struct(req->work.identity->files);
- put_nsproxy(req->work.identity->nsproxy);
req->work.flags &= ~IO_WQ_WORK_FILES;
+ if (atomic_read(&tctx->in_idle))
+ wake_up(&tctx->wait);
}
static void __io_clean_op(struct io_kiocb *req)
}
if (ret) {
+ struct io_ring_ctx *lock_ctx = NULL;
+
+ if (req->ctx->flags & IORING_SETUP_IOPOLL)
+ lock_ctx = req->ctx;
+
/*
- * io_iopoll_complete() does not hold completion_lock to complete
- * polled io, so here for polled io, just mark it done and still let
- * io_iopoll_complete() complete it.
+ * io_iopoll_complete() does not hold completion_lock to
+ * complete polled io, so here for polled io, we can not call
+ * io_req_complete() directly, otherwise there maybe concurrent
+ * access to cqring, defer_list, etc, which is not safe. Given
+ * that io_iopoll_complete() is always called under uring_lock,
+ * so here for polled io, we also get uring_lock to complete
+ * it.
*/
- if (req->ctx->flags & IORING_SETUP_IOPOLL) {
- struct kiocb *kiocb = &req->rw.kiocb;
+ if (lock_ctx)
+ mutex_lock(&lock_ctx->uring_lock);
- kiocb_done(kiocb, ret, NULL);
- } else {
- req_set_fail_links(req);
- io_req_complete(req, ret);
- }
+ req_set_fail_links(req);
+ io_req_complete(req, ret);
+
+ if (lock_ctx)
+ mutex_unlock(&lock_ctx->uring_lock);
}
return io_steal_work(req);
io_ring_ctx_free(ctx);
}
+static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
+{
+ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+
+ return req->ctx == data;
+}
+
static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
{
mutex_lock(&ctx->uring_lock);
io_poll_remove_all(ctx, NULL, NULL);
if (ctx->io_wq)
- io_wq_cancel_all(ctx->io_wq);
+ io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true);
/* if we failed setting up the ctx, we might not have any rings */
io_iopoll_try_reap_events(ctx);
ret |= io_poll_remove_all(ctx, task, NULL);
ret |= io_kill_timeouts(ctx, task, NULL);
+ ret |= io_run_task_work();
if (!ret)
break;
- io_run_task_work();
cond_resched();
}
}
static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
{
struct io_uring_task *tctx = current->io_uring;
+ int ret;
if (unlikely(!tctx)) {
- int ret;
-
ret = io_uring_alloc_task_context(current);
if (unlikely(ret))
return ret;
if (!old) {
get_file(file);
- xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL);
+ ret = xa_err(xa_store(&tctx->xa, (unsigned long)file,
+ file, GFP_KERNEL));
+ if (ret) {
+ fput(file);
+ return ret;
+ }
}
tctx->last = file;
}
if (inflight != tctx_inflight(tctx))
continue;
schedule();
+ finish_wait(&tctx->wait, &wait);
} while (1);
- finish_wait(&tctx->wait, &wait);
atomic_dec(&tctx->in_idle);
}
*/
ret = 0;
if (ctx->flags & IORING_SETUP_SQPOLL) {
- io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
- if (!list_empty_careful(&ctx->cq_overflow_list))
+ if (!list_empty_careful(&ctx->cq_overflow_list)) {
+ bool needs_lock = ctx->flags & IORING_SETUP_IOPOLL;
+
+ io_ring_submit_lock(ctx, needs_lock);
io_cqring_overflow_flush(ctx, false, NULL, NULL);
- io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL));
+ io_ring_submit_unlock(ctx, needs_lock);
+ }
if (flags & IORING_ENTER_SQ_WAKEUP)
wake_up(&ctx->sq_data->wait);
if (flags & IORING_ENTER_SQ_WAIT)
return 0;
}
+static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
+{
+ int ret, fd;
+
+ fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ ret = io_uring_add_task_file(ctx, file);
+ if (ret) {
+ put_unused_fd(fd);
+ return ret;
+ }
+ fd_install(fd, file);
+ return fd;
+}
+
/*
* Allocate an anonymous fd, this is what constitutes the application
* visible backing of an io_uring instance. The application mmaps this
* fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
* we have to tie this fd to a socket for file garbage collection purposes.
*/
-static int io_uring_get_fd(struct io_ring_ctx *ctx)
+static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
{
struct file *file;
+#if defined(CONFIG_UNIX)
int ret;
- int fd;
-#if defined(CONFIG_UNIX)
ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
&ctx->ring_sock);
if (ret)
- return ret;
+ return ERR_PTR(ret);
#endif
- ret = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
- if (ret < 0)
- goto err;
- fd = ret;
-
file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
O_RDWR | O_CLOEXEC);
- if (IS_ERR(file)) {
- put_unused_fd(fd);
- ret = PTR_ERR(file);
- goto err;
- }
-
#if defined(CONFIG_UNIX)
- ctx->ring_sock->file = file;
-#endif
- ret = io_uring_add_task_file(ctx, file);
- if (ret) {
- fput(file);
- put_unused_fd(fd);
- goto err;
+ if (IS_ERR(file)) {
+ sock_release(ctx->ring_sock);
+ ctx->ring_sock = NULL;
+ } else {
+ ctx->ring_sock->file = file;
}
- fd_install(fd, file);
- return fd;
-err:
-#if defined(CONFIG_UNIX)
- sock_release(ctx->ring_sock);
- ctx->ring_sock = NULL;
#endif
- return ret;
+ return file;
}
static int io_uring_create(unsigned entries, struct io_uring_params *p,
{
struct user_struct *user = NULL;
struct io_ring_ctx *ctx;
+ struct file *file;
bool limit_mem;
int ret;
goto err;
}
+ file = io_uring_get_file(ctx);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto err;
+ }
+
/*
* Install ring fd as the very last thing, so we don't risk someone
* having closed it before we finish setup
*/
- ret = io_uring_get_fd(ctx);
- if (ret < 0)
- goto err;
+ ret = io_uring_install_fd(ctx, file);
+ if (ret < 0) {
+ /* fput will clean it up */
+ fput(file);
+ return ret;
+ }
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
return ret;