unsigned int eventfd_async: 1;
unsigned int restricted: 1;
unsigned int sqo_dead: 1;
+ unsigned int sqo_exec: 1;
/*
* Ring buffer of indices into array of io_uring_sqe, which is
unsigned cached_cq_overflow;
unsigned long sq_check_overflow;
+ /* hashed buffered write serialization */
+ struct io_wq_hash *hash_map;
+
struct list_head defer_list;
struct list_head timeout_list;
struct list_head cq_overflow_list;
/* exit task_work */
struct callback_head *exit_task_work;
+ struct wait_queue_head hash_wait;
+
/* Keep this last, we don't need it for the fast path */
struct work_struct exit_work;
};
}
}
-static bool io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
-{
- if (!percpu_ref_tryget(ref)) {
- /* already at zero, wait for ->release() */
- if (!try_wait_for_completion(compl))
- synchronize_rcu();
- return false;
- }
-
- percpu_ref_resurrect(ref);
- reinit_completion(compl);
- percpu_ref_put(ref);
- return true;
-}
-
static bool io_match_task(struct io_kiocb *head,
struct task_struct *task,
struct files_struct *files)
/* ctx stays valid until unlock, even if we drop all ours ctx->refs */
mutex_lock(&ctx->uring_lock);
- if (!ctx->sqo_dead && !(current->flags & PF_EXITING))
+ if (!ctx->sqo_dead && !(current->flags & PF_EXITING) && !current->in_execve)
__io_queue_sqe(req);
else
__io_req_task_cancel(req, -EFAULT);
return false;
if ((req->flags & REQ_F_NOWAIT) || io_wq_current_is_worker())
return false;
+ /*
+ * If ref is dying, we might be running poll reap from the exit work.
+ * Don't attempt to reissue from that path, just let it fail with
+ * -EAGAIN.
+ */
+ if (percpu_ref_is_dying(&req->ctx->refs))
+ return false;
lockdep_assert_held(&req->ctx->uring_lock);
complete_all(&sqd->completion);
mutex_lock(&sqd->lock);
sqd->thread = NULL;
+ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
+ ctx->sqo_exec = 1;
+ io_ring_set_wakeup_flag(ctx);
+ }
mutex_unlock(&sqd->lock);
complete(&sqd->exited);
flush_delayed_work(&ctx->rsrc_put_work);
ret = wait_for_completion_interruptible(&data->done);
- if (!ret || !io_refs_resurrect(&data->refs, &data->done))
+ if (!ret)
break;
+ percpu_ref_resurrect(&data->refs);
io_sqe_rsrc_set_node(ctx, data, backup_node);
backup_node = NULL;
+ reinit_completion(&data->done);
mutex_unlock(&ctx->uring_lock);
ret = io_run_task_work_sig();
mutex_lock(&ctx->uring_lock);
struct io_sq_data *sqd = ctx->sq_data;
if (sqd) {
+ complete(&sqd->startup);
if (sqd->thread) {
wait_for_completion(&ctx->sq_thread_comp);
io_sq_thread_park(sqd);
static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx)
{
+ struct io_wq_hash *hash;
struct io_wq_data data;
unsigned int concurrency;
+ hash = ctx->hash_map;
+ if (!hash) {
+ hash = kzalloc(sizeof(*hash), GFP_KERNEL);
+ if (!hash)
+ return ERR_PTR(-ENOMEM);
+ refcount_set(&hash->refs, 1);
+ init_waitqueue_head(&hash->wait);
+ ctx->hash_map = hash;
+ }
+
+ data.hash = hash;
data.free_work = io_free_work;
data.do_work = io_wq_submit_work;
tsk->io_uring = NULL;
}
+static int io_sq_thread_fork(struct io_sq_data *sqd, struct io_ring_ctx *ctx)
+{
+ int ret;
+
+ clear_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+ reinit_completion(&sqd->completion);
+ ctx->sqo_dead = ctx->sqo_exec = 0;
+ sqd->task_pid = current->pid;
+ current->flags |= PF_IO_WORKER;
+ ret = io_wq_fork_thread(io_sq_thread, sqd);
+ current->flags &= ~PF_IO_WORKER;
+ if (ret < 0) {
+ sqd->thread = NULL;
+ return ret;
+ }
+ wait_for_completion(&sqd->completion);
+ return io_uring_alloc_task_context(sqd->thread, ctx);
+}
+
static int io_sq_offload_create(struct io_ring_ctx *ctx,
struct io_uring_params *p)
{
{
struct io_sq_data *sqd = ctx->sq_data;
- if ((ctx->flags & IORING_SETUP_SQPOLL) && sqd->thread)
+ if (ctx->flags & IORING_SETUP_SQPOLL)
complete(&sqd->startup);
}
static void io_req_caches_free(struct io_ring_ctx *ctx, struct task_struct *tsk)
{
struct io_submit_state *submit_state = &ctx->submit_state;
+ struct io_comp_state *cs = &ctx->submit_state.comp;
mutex_lock(&ctx->uring_lock);
- if (submit_state->free_reqs)
+ if (submit_state->free_reqs) {
kmem_cache_free_bulk(req_cachep, submit_state->free_reqs,
submit_state->reqs);
-
- io_req_cache_free(&submit_state->comp.free_list, NULL);
+ submit_state->free_reqs = 0;
+ }
spin_lock_irq(&ctx->completion_lock);
- io_req_cache_free(&submit_state->comp.locked_free_list, NULL);
+ list_splice_init(&cs->locked_free_list, &cs->free_list);
+ cs->locked_free_nr = 0;
spin_unlock_irq(&ctx->completion_lock);
+ io_req_cache_free(&cs->free_list, NULL);
+
mutex_unlock(&ctx->uring_lock);
}
percpu_ref_exit(&ctx->refs);
free_uid(ctx->user);
io_req_caches_free(ctx, NULL);
+ if (ctx->hash_map)
+ io_wq_put_hash(ctx->hash_map);
kfree(ctx->cancel_hash);
kfree(ctx);
}
if (files) {
io_uring_remove_task_files(tctx);
if (tctx->io_wq) {
- io_wq_destroy(tctx->io_wq);
+ io_wq_put(tctx->io_wq);
tctx->io_wq = NULL;
}
}
if (ctx->flags & IORING_SETUP_SQPOLL) {
io_cqring_overflow_flush(ctx, false, NULL, NULL);
+ if (unlikely(ctx->sqo_exec)) {
+ ret = io_sq_thread_fork(ctx->sq_data, ctx);
+ if (ret)
+ goto out;
+ ctx->sqo_exec = 0;
+ }
ret = -EOWNERDEAD;
if (unlikely(ctx->sqo_dead))
goto out;
*/
has_lock = mutex_trylock(&ctx->uring_lock);
- if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL))
+ if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
sq = ctx->sq_data;
+ if (!sq->thread)
+ sq = NULL;
+ }
seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1);
seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1);
mutex_lock(&ctx->uring_lock);
- if (ret && io_refs_resurrect(&ctx->refs, &ctx->ref_comp))
- return ret;
+ if (ret) {
+ percpu_ref_resurrect(&ctx->refs);
+ goto out_quiesce;
+ }
}
if (ctx->restricted) {
if (io_register_op_must_quiesce(opcode)) {
/* bring the ctx back to life */
percpu_ref_reinit(&ctx->refs);
+out_quiesce:
reinit_completion(&ctx->ref_comp);
}
return ret;