X-Git-Url: http://git.monstr.eu/?a=blobdiff_plain;f=fs%2Fio_uring.c;h=361befaae28f4be281519819157e422eda14185e;hb=3e6a0d3c7571ce3ed0d25c5c32543a54a7ebcd75;hp=1dd30a15ea6aa984747a843a623559c400af2a9b;hpb=1575f21a09206e914b81dace0add693346d97594;p=linux-2.6-microblaze.git diff --git a/fs/io_uring.c b/fs/io_uring.c index 1dd30a15ea6a..361befaae28f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -338,7 +338,6 @@ struct io_ring_ctx { unsigned int drain_next: 1; unsigned int eventfd_async: 1; unsigned int restricted: 1; - unsigned int sqo_dead: 1; unsigned int sqo_exec: 1; /* @@ -380,11 +379,6 @@ struct io_ring_ctx { struct io_rings *rings; - /* - * For SQPOLL usage - */ - struct task_struct *sqo_task; - /* Only used for accounting purposes */ struct mm_struct *mm_account; @@ -688,7 +682,6 @@ enum { REQ_F_POLLED_BIT, REQ_F_BUFFER_SELECTED_BIT, REQ_F_NO_FILE_TABLE_BIT, - REQ_F_WORK_INITIALIZED_BIT, REQ_F_LTIMEOUT_ACTIVE_BIT, REQ_F_COMPLETE_INLINE_BIT, @@ -730,8 +723,6 @@ enum { REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), /* doesn't need file table for this request */ REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT), - /* io_wq_work is initialized */ - REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT), /* linked timeout is active, i.e. prepared by link's head */ REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT), /* completion is deferred through io_comp_state */ @@ -1094,24 +1085,6 @@ static inline void req_set_fail_links(struct io_kiocb *req) req->flags |= REQ_F_FAIL_LINK; } -static inline void __io_req_init_async(struct io_kiocb *req) -{ - memset(&req->work, 0, sizeof(req->work)); - req->flags |= REQ_F_WORK_INITIALIZED; -} - -/* - * Note: must call io_req_init_async() for the first time you - * touch any members of io_wq_work. - */ -static inline void io_req_init_async(struct io_kiocb *req) -{ - if (req->flags & REQ_F_WORK_INITIALIZED) - return; - - __io_req_init_async(req); -} - static void io_ring_ctx_ref_free(struct percpu_ref *ref) { struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs); @@ -1196,13 +1169,6 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq) static void io_req_clean_work(struct io_kiocb *req) { - if (!(req->flags & REQ_F_WORK_INITIALIZED)) - return; - - if (req->work.creds) { - put_cred(req->work.creds); - req->work.creds = NULL; - } if (req->flags & REQ_F_INFLIGHT) { struct io_ring_ctx *ctx = req->ctx; struct io_uring_task *tctx = req->task->io_uring; @@ -1215,8 +1181,6 @@ static void io_req_clean_work(struct io_kiocb *req) if (atomic_read(&tctx->in_idle)) wake_up(&tctx->wait); } - - req->flags &= ~REQ_F_WORK_INITIALIZED; } static void io_req_track_inflight(struct io_kiocb *req) @@ -1224,7 +1188,6 @@ static void io_req_track_inflight(struct io_kiocb *req) struct io_ring_ctx *ctx = req->ctx; if (!(req->flags & REQ_F_INFLIGHT)) { - io_req_init_async(req); req->flags |= REQ_F_INFLIGHT; spin_lock_irq(&ctx->inflight_lock); @@ -1238,8 +1201,6 @@ static void io_prep_async_work(struct io_kiocb *req) const struct io_op_def *def = &io_op_defs[req->opcode]; struct io_ring_ctx *ctx = req->ctx; - io_req_init_async(req); - if (req->flags & REQ_F_FORCE_ASYNC) req->work.flags |= IO_WQ_WORK_CONCURRENT; @@ -1250,8 +1211,6 @@ static void io_prep_async_work(struct io_kiocb *req) if (def->unbound_nonreg_file) req->work.flags |= IO_WQ_WORK_UNBOUND; } - if (!req->work.creds) - req->work.creds = get_current_cred(); } static void io_prep_async_link(struct io_kiocb *req) @@ -1835,6 +1794,18 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req) return __io_req_find_next(req); } +static void ctx_flush_and_put(struct io_ring_ctx *ctx) +{ + if (!ctx) + return; + if (ctx->submit_state.comp.nr) { + mutex_lock(&ctx->uring_lock); + io_submit_flush_completions(&ctx->submit_state.comp, ctx); + mutex_unlock(&ctx->uring_lock); + } + percpu_ref_put(&ctx->refs); +} + static bool __tctx_task_work(struct io_uring_task *tctx) { struct io_ring_ctx *ctx = NULL; @@ -1852,30 +1823,20 @@ static bool __tctx_task_work(struct io_uring_task *tctx) node = list.first; while (node) { struct io_wq_work_node *next = node->next; - struct io_ring_ctx *this_ctx; struct io_kiocb *req; req = container_of(node, struct io_kiocb, io_task_work.node); - this_ctx = req->ctx; - req->task_work.func(&req->task_work); - node = next; - - if (!ctx) { - ctx = this_ctx; - } else if (ctx != this_ctx) { - mutex_lock(&ctx->uring_lock); - io_submit_flush_completions(&ctx->submit_state.comp, ctx); - mutex_unlock(&ctx->uring_lock); - ctx = this_ctx; + if (req->ctx != ctx) { + ctx_flush_and_put(ctx); + ctx = req->ctx; + percpu_ref_get(&ctx->refs); } - } - if (ctx && ctx->submit_state.comp.nr) { - mutex_lock(&ctx->uring_lock); - io_submit_flush_completions(&ctx->submit_state.comp, ctx); - mutex_unlock(&ctx->uring_lock); + req->task_work.func(&req->task_work); + node = next; } + ctx_flush_and_put(ctx); return list.first != NULL; } @@ -2000,7 +1961,7 @@ static void __io_req_task_submit(struct io_kiocb *req) /* ctx stays valid until unlock, even if we drop all ours ctx->refs */ mutex_lock(&ctx->uring_lock); - if (!ctx->sqo_dead && !(current->flags & PF_EXITING) && !current->in_execve) + if (!(current->flags & PF_EXITING) && !current->in_execve) __io_queue_sqe(req); else __io_req_task_cancel(req, -EFAULT); @@ -2462,23 +2423,32 @@ static bool io_resubmit_prep(struct io_kiocb *req) return false; return !io_setup_async_rw(req, iovec, inline_vecs, &iter, false); } -#endif -static bool io_rw_reissue(struct io_kiocb *req) +static bool io_rw_should_reissue(struct io_kiocb *req) { -#ifdef CONFIG_BLOCK umode_t mode = file_inode(req->file)->i_mode; + struct io_ring_ctx *ctx = req->ctx; if (!S_ISBLK(mode) && !S_ISREG(mode)) return false; - if ((req->flags & REQ_F_NOWAIT) || io_wq_current_is_worker()) + if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() && + !(ctx->flags & IORING_SETUP_IOPOLL))) return false; /* * If ref is dying, we might be running poll reap from the exit work. * Don't attempt to reissue from that path, just let it fail with * -EAGAIN. */ - if (percpu_ref_is_dying(&req->ctx->refs)) + if (percpu_ref_is_dying(&ctx->refs)) + return false; + return true; +} +#endif + +static bool io_rw_reissue(struct io_kiocb *req) +{ +#ifdef CONFIG_BLOCK + if (!io_rw_should_reissue(req)) return false; lockdep_assert_held(&req->ctx->uring_lock); @@ -2521,6 +2491,19 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); +#ifdef CONFIG_BLOCK + /* Rewind iter, if we have one. iopoll path resubmits as usual */ + if (res == -EAGAIN && io_rw_should_reissue(req)) { + struct io_async_rw *rw = req->async_data; + + if (rw) + iov_iter_revert(&rw->iter, + req->result - iov_iter_count(&rw->iter)); + else if (!io_resubmit_prep(req)) + res = -EIO; + } +#endif + if (kiocb->ki_flags & IOCB_WRITE) kiocb_end_write(req); @@ -3269,6 +3252,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) ret = io_iter_do_read(req, iter); if (ret == -EIOCBQUEUED) { + if (req->async_data) + iov_iter_revert(iter, io_size - iov_iter_count(iter)); goto out_free; } else if (ret == -EAGAIN) { /* IOPOLL retry should happen for io-wq threads */ @@ -3400,6 +3385,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) /* no retry on NONBLOCK nor RWF_NOWAIT */ if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT)) goto done; + if (ret2 == -EIOCBQUEUED && req->async_data) + iov_iter_revert(iter, io_size - iov_iter_count(iter)); if (!force_nonblock || ret2 != -EAGAIN) { /* IOPOLL retry should happen for io-wq threads */ if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN) @@ -3578,7 +3565,6 @@ static int __io_splice_prep(struct io_kiocb *req, * Splice operation will be punted aync, and here need to * modify io_wq_work.flags, so initialize io_wq_work firstly. */ - io_req_init_async(req); req->work.flags |= IO_WQ_WORK_UNBOUND; } @@ -4993,6 +4979,9 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, pt->error = -EINVAL; return; } + /* double add on the same waitqueue head, ignore */ + if (poll->head == head) + return; poll = kmalloc(sizeof(*poll), GFP_ATOMIC); if (!poll) { pt->error = -ENOMEM; @@ -5935,8 +5924,22 @@ static void __io_clean_op(struct io_kiocb *req) static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; + const struct cred *creds = NULL; int ret; + if (req->work.personality) { + const struct cred *new_creds; + + if (!(issue_flags & IO_URING_F_NONBLOCK)) + mutex_lock(&ctx->uring_lock); + new_creds = idr_find(&ctx->personality_idr, req->work.personality); + if (!(issue_flags & IO_URING_F_NONBLOCK)) + mutex_unlock(&ctx->uring_lock); + if (!new_creds) + return -EINVAL; + creds = override_creds(new_creds); + } + switch (req->opcode) { case IORING_OP_NOP: ret = io_nop(req, issue_flags); @@ -6043,6 +6046,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) break; } + if (creds) + revert_creds(creds); + if (ret) return ret; @@ -6206,18 +6212,10 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) static void __io_queue_sqe(struct io_kiocb *req) { struct io_kiocb *linked_timeout = io_prep_linked_timeout(req); - const struct cred *old_creds = NULL; int ret; - if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds && - req->work.creds != current_cred()) - old_creds = override_creds(req->work.creds); - ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER); - if (old_creds) - revert_creds(old_creds); - /* * We async punt it if the file wasn't marked NOWAIT, or if the file * doesn't support non-blocking read/write attempts @@ -6304,7 +6302,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, { struct io_submit_state *state; unsigned int sqe_flags; - int id, ret = 0; + int ret = 0; req->opcode = READ_ONCE(sqe->opcode); /* same numerical values with corresponding REQ_F_*, safe to copy */ @@ -6336,15 +6334,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, !io_op_defs[req->opcode].buffer_select) return -EOPNOTSUPP; - id = READ_ONCE(sqe->personality); - if (id) { - __io_req_init_async(req); - req->work.creds = idr_find(&ctx->personality_idr, id); - if (unlikely(!req->work.creds)) - return -EINVAL; - get_cred(req->work.creds); - } - + req->work.list.next = NULL; + req->work.flags = 0; + req->work.personality = READ_ONCE(sqe->personality); state = &ctx->submit_state; /* @@ -6606,8 +6598,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) if (!list_empty(&ctx->iopoll_list)) io_do_iopoll(ctx, &nr_events, 0); - if (to_submit && !ctx->sqo_dead && - likely(!percpu_ref_is_dying(&ctx->refs))) + if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs))) ret = io_submit_sqes(ctx, to_submit); mutex_unlock(&ctx->uring_lock); } @@ -7846,7 +7837,7 @@ static int io_sq_thread_fork(struct io_sq_data *sqd, struct io_ring_ctx *ctx) clear_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); reinit_completion(&sqd->completion); - ctx->sqo_dead = ctx->sqo_exec = 0; + ctx->sqo_exec = 0; sqd->task_pid = current->pid; current->flags |= PF_IO_WORKER; ret = io_wq_fork_thread(io_sq_thread, sqd); @@ -7947,6 +7938,7 @@ static void io_sq_offload_start(struct io_ring_ctx *ctx) { struct io_sq_data *sqd = ctx->sq_data; + ctx->flags &= ~IORING_SETUP_R_DISABLED; if (ctx->flags & IORING_SETUP_SQPOLL) complete(&sqd->startup); } @@ -8515,15 +8507,11 @@ static int io_remove_personalities(int id, void *p, void *data) static bool io_run_ctx_fallback(struct io_ring_ctx *ctx) { - struct callback_head *work, *head, *next; + struct callback_head *work, *next; bool executed = false; do { - do { - head = NULL; - work = READ_ONCE(ctx->exit_task_work); - } while (cmpxchg(&ctx->exit_task_work, work, head) != work); - + work = xchg(&ctx->exit_task_work, NULL); if (!work) break; @@ -8552,7 +8540,6 @@ static void io_ring_exit_work(struct work_struct *work) */ do { io_uring_try_cancel_requests(ctx, NULL, NULL); - io_run_ctx_fallback(ctx); } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)); io_ring_ctx_free(ctx); } @@ -8561,10 +8548,6 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); - - if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead)) - ctx->sqo_dead = 1; - /* if force is set, the ring is going away. always drop after that */ ctx->cq_overflow_flushed = 1; if (ctx->rings) @@ -8724,17 +8707,6 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, } } -static void io_disable_sqo_submit(struct io_ring_ctx *ctx) -{ - mutex_lock(&ctx->uring_lock); - ctx->sqo_dead = 1; - mutex_unlock(&ctx->uring_lock); - - /* make sure callers enter the ring to get error */ - if (ctx->rings) - io_ring_set_wakeup_flag(ctx); -} - /* * We need to iteratively cancel requests, in case a request has dependent * hard links. These persist even for failure of cancelations, hence keep @@ -8747,7 +8719,11 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, bool did_park = false; if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { - io_disable_sqo_submit(ctx); + /* never started, nothing to cancel */ + if (ctx->flags & IORING_SETUP_R_DISABLED) { + io_sq_offload_start(ctx); + return; + } did_park = io_sq_thread_park(ctx->sq_data); if (did_park) { task = ctx->sq_data->thread; @@ -8792,10 +8768,6 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) fput(file); return ret; } - - /* one and only SQPOLL file note, held by sqo_task */ - WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && - current != ctx->sqo_task); } tctx->last = file; } @@ -8868,7 +8840,6 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx) if (!sqd) return; - io_disable_sqo_submit(ctx); if (!io_sq_thread_park(sqd)) return; tctx = ctx->sq_data->thread->io_uring; @@ -8913,7 +8884,6 @@ void __io_uring_task_cancel(void) /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); - /* trigger io_disable_sqo_submit() */ if (tctx->sqpoll) { struct file *file; unsigned long index; @@ -8948,52 +8918,6 @@ void __io_uring_task_cancel(void) __io_uring_free(current); } -static int io_uring_flush(struct file *file, void *data) -{ - struct io_uring_task *tctx = current->io_uring; - struct io_ring_ctx *ctx = file->private_data; - - /* Ignore helper thread files exit */ - if (current->flags & PF_IO_WORKER) - return 0; - - if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) { - io_uring_cancel_task_requests(ctx, NULL); - io_req_caches_free(ctx); - } - - io_run_ctx_fallback(ctx); - - if (!tctx) - return 0; - - /* we should have cancelled and erased it before PF_EXITING */ - WARN_ON_ONCE((current->flags & PF_EXITING) && - xa_load(&tctx->xa, (unsigned long)file)); - - /* - * fput() is pending, will be 2 if the only other ref is our potential - * task file note. If the task is exiting, drop regardless of count. - */ - if (atomic_long_read(&file->f_count) != 2) - return 0; - - if (ctx->flags & IORING_SETUP_SQPOLL) { - /* there is only one file note, which is owned by sqo_task */ - WARN_ON_ONCE(ctx->sqo_task != current && - xa_load(&tctx->xa, (unsigned long)file)); - /* sqo_dead check is for when this happens after cancellation */ - WARN_ON_ONCE(ctx->sqo_task == current && !ctx->sqo_dead && - !xa_load(&tctx->xa, (unsigned long)file)); - - io_disable_sqo_submit(ctx); - } - - if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current) - io_uring_del_task_file(file); - return 0; -} - static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff, size_t sz) { @@ -9072,22 +8996,14 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) do { if (!io_sqring_full(ctx)) break; - prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE); - if (unlikely(ctx->sqo_dead)) { - ret = -EOWNERDEAD; - goto out; - } - if (!io_sqring_full(ctx)) break; - schedule(); } while (!signal_pending(current)); finish_wait(&ctx->sqo_sq_wait, &wait); -out: return ret; } @@ -9169,8 +9085,6 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, ctx->sqo_exec = 0; } ret = -EOWNERDEAD; - if (unlikely(ctx->sqo_dead)) - goto out; if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); if (flags & IORING_ENTER_SQ_WAIT) { @@ -9325,7 +9239,6 @@ static void io_uring_show_fdinfo(struct seq_file *m, struct file *f) static const struct file_operations io_uring_fops = { .release = io_uring_release, - .flush = io_uring_flush, .mmap = io_uring_mmap, #ifndef CONFIG_MMU .get_unmapped_area = io_uring_nommu_get_unmapped_area, @@ -9480,7 +9393,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, ctx->compat = in_compat_syscall(); if (!capable(CAP_IPC_LOCK)) ctx->user = get_uid(current_user()); - ctx->sqo_task = current; /* * This is just grabbed for accounting purposes. When a process exits, @@ -9543,7 +9455,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, */ ret = io_uring_install_fd(ctx, file); if (ret < 0) { - io_disable_sqo_submit(ctx); /* fput will clean it up */ fput(file); return ret; @@ -9552,7 +9463,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; err: - io_disable_sqo_submit(ctx); io_ring_ctx_wait_and_kill(ctx); return ret; } @@ -9720,10 +9630,7 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx) if (ctx->restrictions.registered) ctx->restricted = 1; - ctx->flags &= ~IORING_SETUP_R_DISABLED; - io_sq_offload_start(ctx); - return 0; }