spin_unlock_irq(&ctx->inflight_lock);
req->work.flags |= IO_WQ_WORK_FILES;
}
+ if (!(req->work.flags & IO_WQ_WORK_MM) &&
+ (def->work_flags & IO_WQ_WORK_MM)) {
+ if (id->mm != current->mm)
+ return false;
+ mmgrab(id->mm);
+ req->work.flags |= IO_WQ_WORK_MM;
+ }
return true;
}
req->work.flags |= IO_WQ_WORK_UNBOUND;
}
- /* ->mm can never change on us */
- if (!(req->work.flags & IO_WQ_WORK_MM) &&
- (def->work_flags & IO_WQ_WORK_MM)) {
- mmgrab(id->mm);
- req->work.flags |= IO_WQ_WORK_MM;
- }
-
/* if we fail grabbing identity, we must COW, regrab, and retry */
if (io_grab_identity(req))
return;
}
if (ret) {
+ struct io_ring_ctx *lock_ctx = NULL;
+
+ if (req->ctx->flags & IORING_SETUP_IOPOLL)
+ lock_ctx = req->ctx;
+
/*
- * io_iopoll_complete() does not hold completion_lock to complete
- * polled io, so here for polled io, just mark it done and still let
- * io_iopoll_complete() complete it.
+ * io_iopoll_complete() does not hold completion_lock to
+ * complete polled io, so here for polled io, we can not call
+ * io_req_complete() directly, otherwise there maybe concurrent
+ * access to cqring, defer_list, etc, which is not safe. Given
+ * that io_iopoll_complete() is always called under uring_lock,
+ * so here for polled io, we also get uring_lock to complete
+ * it.
*/
- if (req->ctx->flags & IORING_SETUP_IOPOLL) {
- struct kiocb *kiocb = &req->rw.kiocb;
+ if (lock_ctx)
+ mutex_lock(&lock_ctx->uring_lock);
- kiocb_done(kiocb, ret, NULL);
- } else {
- req_set_fail_links(req);
- io_req_complete(req, ret);
- }
+ req_set_fail_links(req);
+ io_req_complete(req, ret);
+
+ if (lock_ctx)
+ mutex_unlock(&lock_ctx->uring_lock);
}
return io_steal_work(req);
complete(&data->done);
}
+static void io_sqe_files_set_node(struct fixed_file_data *file_data,
+ struct fixed_file_ref_node *ref_node)
+{
+ spin_lock_bh(&file_data->lock);
+ file_data->node = ref_node;
+ list_add_tail(&ref_node->node, &file_data->ref_list);
+ spin_unlock_bh(&file_data->lock);
+ percpu_ref_get(&file_data->refs);
+}
+
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
struct fixed_file_data *data = ctx->file_data;
return PTR_ERR(ref_node);
}
- file_data->node = ref_node;
- spin_lock_bh(&file_data->lock);
- list_add_tail(&ref_node->node, &file_data->ref_list);
- spin_unlock_bh(&file_data->lock);
- percpu_ref_get(&file_data->refs);
+ io_sqe_files_set_node(file_data, ref_node);
return ret;
out_fput:
for (i = 0; i < ctx->nr_user_files; i++) {
if (needs_switch) {
percpu_ref_kill(&data->node->refs);
- spin_lock_bh(&data->lock);
- list_add_tail(&ref_node->node, &data->ref_list);
- data->node = ref_node;
- spin_unlock_bh(&data->lock);
- percpu_ref_get(&ctx->file_data->refs);
+ io_sqe_files_set_node(data, ref_node);
} else
destroy_fixed_file_ref_node(ref_node);
ret |= io_poll_remove_all(ctx, task, NULL);
ret |= io_kill_timeouts(ctx, task, NULL);
+ ret |= io_run_task_work();
if (!ret)
break;
- io_run_task_work();
cond_resched();
}
}
static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
{
struct io_uring_task *tctx = current->io_uring;
+ int ret;
if (unlikely(!tctx)) {
- int ret;
-
ret = io_uring_alloc_task_context(current);
if (unlikely(ret))
return ret;
if (!old) {
get_file(file);
- xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL);
+ ret = xa_err(xa_store(&tctx->xa, (unsigned long)file,
+ file, GFP_KERNEL));
+ if (ret) {
+ fput(file);
+ return ret;
+ }
}
tctx->last = file;
}
if (inflight != tctx_inflight(tctx))
continue;
schedule();
+ finish_wait(&tctx->wait, &wait);
} while (1);
- finish_wait(&tctx->wait, &wait);
atomic_dec(&tctx->in_idle);
}
return 0;
}
+static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
+{
+ int ret, fd;
+
+ fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ ret = io_uring_add_task_file(ctx, file);
+ if (ret) {
+ put_unused_fd(fd);
+ return ret;
+ }
+ fd_install(fd, file);
+ return fd;
+}
+
/*
* Allocate an anonymous fd, this is what constitutes the application
* visible backing of an io_uring instance. The application mmaps this
* fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
* we have to tie this fd to a socket for file garbage collection purposes.
*/
-static int io_uring_get_fd(struct io_ring_ctx *ctx)
+static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
{
struct file *file;
+#if defined(CONFIG_UNIX)
int ret;
- int fd;
-#if defined(CONFIG_UNIX)
ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
&ctx->ring_sock);
if (ret)
- return ret;
+ return ERR_PTR(ret);
#endif
- ret = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
- if (ret < 0)
- goto err;
- fd = ret;
-
file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
O_RDWR | O_CLOEXEC);
- if (IS_ERR(file)) {
- put_unused_fd(fd);
- ret = PTR_ERR(file);
- goto err;
- }
-
#if defined(CONFIG_UNIX)
- ctx->ring_sock->file = file;
-#endif
- ret = io_uring_add_task_file(ctx, file);
- if (ret) {
- fput(file);
- put_unused_fd(fd);
- goto err;
+ if (IS_ERR(file)) {
+ sock_release(ctx->ring_sock);
+ ctx->ring_sock = NULL;
+ } else {
+ ctx->ring_sock->file = file;
}
- fd_install(fd, file);
- return fd;
-err:
-#if defined(CONFIG_UNIX)
- sock_release(ctx->ring_sock);
- ctx->ring_sock = NULL;
#endif
- return ret;
+ return file;
}
static int io_uring_create(unsigned entries, struct io_uring_params *p,
{
struct user_struct *user = NULL;
struct io_ring_ctx *ctx;
+ struct file *file;
bool limit_mem;
int ret;
goto err;
}
+ file = io_uring_get_file(ctx);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ goto err;
+ }
+
/*
* Install ring fd as the very last thing, so we don't risk someone
* having closed it before we finish setup
*/
- ret = io_uring_get_fd(ctx);
- if (ret < 0)
- goto err;
+ ret = io_uring_install_fd(ctx, file);
+ if (ret < 0) {
+ /* fput will clean it up */
+ fput(file);
+ return ret;
+ }
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
return ret;