From: Linus Torvalds Date: Sat, 11 Sep 2021 17:28:14 +0000 (-0700) Subject: Merge tag 'io_uring-5.15-2021-09-11' of git://git.kernel.dk/linux-block X-Git-Tag: microblaze-v5.16~16 X-Git-Url: http://git.monstr.eu/?p=linux-2.6-microblaze.git;a=commitdiff_plain;h=c605c39677b9842b0566013e0cf30bc13e90bdbc;hp=c0f7e49fc480a97770e448e0c0493e7ba46a9852 Merge tag 'io_uring-5.15-2021-09-11' of git://git.kernel.dk/linux-block Pull io_uring fixes from Jens Axboe: - Fix an off-by-one in a BUILD_BUG_ON() check. Not a real issue right now as we have plenty of flags left, but could become one. (Hao) - Fix lockdep issue introduced in this merge window (me) - Fix a few issues with the worker creation (me, Pavel, Qiang) - Fix regression with wq_has_sleeper() for IOPOLL (Pavel) - Timeout link error propagation fix (Pavel) * tag 'io_uring-5.15-2021-09-11' of git://git.kernel.dk/linux-block: io_uring: fix off-by-one in BUILD_BUG_ON check of __REQ_F_LAST_BIT io_uring: fail links of cancelled timeouts io-wq: fix memory leak in create_io_worker() io-wq: fix silly logic error in io_task_work_match() io_uring: drop ctx->uring_lock before acquiring sqd->lock io_uring: fix missing mb() before waitqueue_active io-wq: fix cancellation on create-worker failure --- diff --git a/fs/io-wq.c b/fs/io-wq.c index d80e4a735677..6c55362c1f99 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -709,6 +709,7 @@ static void create_worker_cont(struct callback_head *cb) } raw_spin_unlock(&wqe->lock); io_worker_ref_put(wqe->wq); + kfree(worker); return; } @@ -725,6 +726,7 @@ static void io_workqueue_create(struct work_struct *work) if (!io_queue_worker_create(worker, acct, create_worker_cont)) { clear_bit_unlock(0, &worker->create_state); io_worker_release(worker); + kfree(worker); } } @@ -759,6 +761,7 @@ fail: if (!IS_ERR(tsk)) { io_init_new_worker(wqe, worker, tsk); } else if (!io_should_retry_thread(PTR_ERR(tsk))) { + kfree(worker); goto fail; } else { INIT_WORK(&worker->work, io_workqueue_create); @@ -832,6 +835,11 @@ append: wq_list_add_after(&work->list, &tail->list, &acct->work_list); } +static bool io_wq_work_match_item(struct io_wq_work *work, void *data) +{ + return work == data; +} + static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) { struct io_wqe_acct *acct = io_work_get_acct(wqe, work); @@ -844,7 +852,6 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) */ if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || (work->flags & IO_WQ_WORK_CANCEL)) { -run_cancel: io_run_cancel(work, wqe); return; } @@ -864,15 +871,22 @@ run_cancel: bool did_create; did_create = io_wqe_create_worker(wqe, acct); - if (unlikely(!did_create)) { - raw_spin_lock(&wqe->lock); - /* fatal condition, failed to create the first worker */ - if (!acct->nr_workers) { - raw_spin_unlock(&wqe->lock); - goto run_cancel; - } - raw_spin_unlock(&wqe->lock); + if (likely(did_create)) + return; + + raw_spin_lock(&wqe->lock); + /* fatal condition, failed to create the first worker */ + if (!acct->nr_workers) { + struct io_cb_cancel_data match = { + .fn = io_wq_work_match_item, + .data = work, + .cancel_all = false, + }; + + if (io_acct_cancel_pending_work(wqe, acct, &match)) + raw_spin_lock(&wqe->lock); } + raw_spin_unlock(&wqe->lock); } } @@ -1122,7 +1136,7 @@ static bool io_task_work_match(struct callback_head *cb, void *data) { struct io_worker *worker; - if (cb->func != create_worker_cb || cb->func != create_worker_cont) + if (cb->func != create_worker_cb && cb->func != create_worker_cont) return false; worker = container_of(cb, struct io_worker, create_work); return worker->wqe->wq == data; @@ -1143,9 +1157,14 @@ static void io_wq_exit_workers(struct io_wq *wq) while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { struct io_worker *worker; + struct io_wqe_acct *acct; worker = container_of(cb, struct io_worker, create_work); - atomic_dec(&worker->wqe->acct[worker->create_index].nr_running); + acct = io_wqe_get_acct(worker); + atomic_dec(&acct->nr_running); + raw_spin_lock(&worker->wqe->lock); + acct->nr_workers--; + raw_spin_unlock(&worker->wqe->lock); io_worker_ref_put(wq); clear_bit_unlock(0, &worker->create_state); io_worker_release(worker); diff --git a/fs/io_uring.c b/fs/io_uring.c index 855ea544807f..16fb7436043c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1482,6 +1482,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status) struct io_timeout_data *io = req->async_data; if (hrtimer_try_to_cancel(&io->timer) != -1) { + if (status) + req_set_fail(req); atomic_set(&req->ctx->cq_timeouts, atomic_read(&req->ctx->cq_timeouts) + 1); list_del_init(&req->timeout.list); @@ -1619,8 +1621,11 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) { + /* see waitqueue_active() comment */ + smp_mb(); + if (ctx->flags & IORING_SETUP_SQPOLL) { - if (wq_has_sleeper(&ctx->cq_wait)) + if (waitqueue_active(&ctx->cq_wait)) wake_up_all(&ctx->cq_wait); } if (io_should_trigger_evfd(ctx)) @@ -10550,7 +10555,14 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, if (ctx->flags & IORING_SETUP_SQPOLL) { sqd = ctx->sq_data; if (sqd) { + /* + * Observe the correct sqd->lock -> ctx->uring_lock + * ordering. Fine to drop uring_lock here, we hold + * a ref to the ctx. + */ + mutex_unlock(&ctx->uring_lock); mutex_lock(&sqd->lock); + mutex_lock(&ctx->uring_lock); tctx = sqd->thread->io_uring; } } else { @@ -10853,7 +10865,7 @@ static int __init io_uring_init(void) BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8)); BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); - BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int)); + BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int)); req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);