* io_kiocb alloc cache
*/
void *reqs[IO_IOPOLL_BATCH];
- unsigned int free_reqs;
- unsigned int cur_req;
+ unsigned int free_reqs;
/*
* File reference cache
struct io_uring_files_update *ip,
unsigned nr_args);
static int io_grab_files(struct io_kiocb *req);
+static void io_ring_file_ref_flush(struct fixed_file_data *data);
static struct kmem_cache *req_cachep;
ret = 1;
}
state->free_reqs = ret - 1;
- state->cur_req = 1;
- req = state->reqs[0];
+ req = state->reqs[ret - 1];
} else {
- req = state->reqs[state->cur_req];
state->free_reqs--;
- state->cur_req++;
+ req = state->reqs[state->free_reqs];
}
got_it:
req->flags |= REQ_F_CUR_POS;
kiocb->ki_pos = req->file->f_pos;
}
- kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
+ kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
+ ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
+ if (unlikely(ret))
+ return ret;
ioprio = READ_ONCE(sqe->ioprio);
if (ioprio) {
} else
kiocb->ki_ioprio = get_current_ioprio();
- ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
- if (unlikely(ret))
- return ret;
-
/* don't allow async punt if RWF_NOWAIT was requested */
if ((kiocb->ki_flags & IOCB_NOWAIT) ||
(req->file->f_flags & O_NONBLOCK))
blk_finish_plug(&state->plug);
io_file_put(state);
if (state->free_reqs)
- kmem_cache_free_bulk(req_cachep, state->free_reqs,
- &state->reqs[state->cur_req]);
+ kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
}
/*
* reap events and wake us up.
*/
if (inflight ||
- (!time_after(jiffies, timeout) && ret != -EBUSY)) {
+ (!time_after(jiffies, timeout) && ret != -EBUSY &&
+ !percpu_ref_is_dying(&ctx->refs))) {
cond_resched();
continue;
}
if (!data)
return -ENXIO;
- /* protect against inflight atomic switch, which drops the ref */
- percpu_ref_get(&data->refs);
- /* wait for existing switches */
- flush_work(&data->ref_work);
percpu_ref_kill_and_confirm(&data->refs, io_file_ref_kill);
- wait_for_completion(&data->done);
- percpu_ref_put(&data->refs);
- /* flush potential new switch */
flush_work(&data->ref_work);
+ wait_for_completion(&data->done);
+ io_ring_file_ref_flush(data);
percpu_ref_exit(&data->refs);
__io_sqe_files_unregister(ctx);
struct completion *done;
};
-static void io_ring_file_ref_switch(struct work_struct *work)
+static void io_ring_file_ref_flush(struct fixed_file_data *data)
{
struct io_file_put *pfile, *tmp;
- struct fixed_file_data *data;
struct llist_node *node;
- data = container_of(work, struct fixed_file_data, ref_work);
-
while ((node = llist_del_all(&data->put_llist)) != NULL) {
llist_for_each_entry_safe(pfile, tmp, node, llist) {
io_ring_file_put(data->ctx, pfile->file);
kfree(pfile);
}
}
+}
+
+static void io_ring_file_ref_switch(struct work_struct *work)
+{
+ struct fixed_file_data *data;
+ data = container_of(work, struct fixed_file_data, ref_work);
+ io_ring_file_ref_flush(data);
percpu_ref_get(&data->refs);
percpu_ref_switch_to_percpu(&data->refs);
}
data = container_of(ref, struct fixed_file_data, refs);
- /* we can't safely switch from inside this context, punt to wq */
- queue_work(system_wq, &data->ref_work);
+ /*
+ * We can't safely switch from inside this context, punt to wq. If
+ * the table ref is going away, the table is being unregistered.
+ * Don't queue up the async work for that case, the caller will
+ * handle it.
+ */
+ if (!percpu_ref_is_dying(&data->refs))
+ queue_work(system_wq, &data->ref_work);
}
static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
for (j = 0; j < imu->nr_bvecs; j++)
- put_user_page(imu->bvec[j].bv_page);
+ unpin_user_page(imu->bvec[j].bv_page);
if (ctx->account_mem)
io_unaccount_mem(ctx->user, imu->nr_bvecs);
ret = 0;
down_read(¤t->mm->mmap_sem);
- pret = get_user_pages(ubuf, nr_pages,
+ pret = pin_user_pages(ubuf, nr_pages,
FOLL_WRITE | FOLL_LONGTERM,
pages, vmas);
if (pret == nr_pages) {
* release any pages we did get
*/
if (pret > 0)
- put_user_pages(pages, pret);
+ unpin_user_pages(pages, pret);
if (ctx->account_mem)
io_unaccount_mem(ctx->user, nr_pages);
kvfree(imu->bvec);
percpu_ref_kill(&ctx->refs);
mutex_unlock(&ctx->uring_lock);
+ /*
+ * Wait for sq thread to idle, if we have one. It won't spin on new
+ * work after we've killed the ctx ref above. This is important to do
+ * before we cancel existing commands, as the thread could otherwise
+ * be queueing new work post that. If that's work we need to cancel,
+ * it could cause shutdown to hang.
+ */
+ while (ctx->sqo_thread && !wq_has_sleeper(&ctx->sqo_wait))
+ cpu_relax();
+
io_kill_timeouts(ctx);
io_poll_remove_all(ctx);