struct io_uring_files_update *ip,
unsigned nr_args);
static int io_grab_files(struct io_kiocb *req);
+static void io_ring_file_ref_flush(struct fixed_file_data *data);
static struct kmem_cache *req_cachep;
* reap events and wake us up.
*/
if (inflight ||
- (!time_after(jiffies, timeout) && ret != -EBUSY)) {
+ (!time_after(jiffies, timeout) && ret != -EBUSY &&
+ !percpu_ref_is_dying(&ctx->refs))) {
cond_resched();
continue;
}
if (!data)
return -ENXIO;
- /* protect against inflight atomic switch, which drops the ref */
- percpu_ref_get(&data->refs);
- /* wait for existing switches */
- flush_work(&data->ref_work);
percpu_ref_kill_and_confirm(&data->refs, io_file_ref_kill);
- wait_for_completion(&data->done);
- percpu_ref_put(&data->refs);
- /* flush potential new switch */
flush_work(&data->ref_work);
+ wait_for_completion(&data->done);
+ io_ring_file_ref_flush(data);
percpu_ref_exit(&data->refs);
__io_sqe_files_unregister(ctx);
struct completion *done;
};
-static void io_ring_file_ref_switch(struct work_struct *work)
+static void io_ring_file_ref_flush(struct fixed_file_data *data)
{
struct io_file_put *pfile, *tmp;
- struct fixed_file_data *data;
struct llist_node *node;
- data = container_of(work, struct fixed_file_data, ref_work);
-
while ((node = llist_del_all(&data->put_llist)) != NULL) {
llist_for_each_entry_safe(pfile, tmp, node, llist) {
io_ring_file_put(data->ctx, pfile->file);
kfree(pfile);
}
}
+}
+static void io_ring_file_ref_switch(struct work_struct *work)
+{
+ struct fixed_file_data *data;
+
+ data = container_of(work, struct fixed_file_data, ref_work);
+ io_ring_file_ref_flush(data);
percpu_ref_get(&data->refs);
percpu_ref_switch_to_percpu(&data->refs);
}
data = container_of(ref, struct fixed_file_data, refs);
- /* we can't safely switch from inside this context, punt to wq */
- queue_work(system_wq, &data->ref_work);
+ /*
+ * We can't safely switch from inside this context, punt to wq. If
+ * the table ref is going away, the table is being unregistered.
+ * Don't queue up the async work for that case, the caller will
+ * handle it.
+ */
+ if (!percpu_ref_is_dying(&data->refs))
+ queue_work(system_wq, &data->ref_work);
}
static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
for (j = 0; j < imu->nr_bvecs; j++)
- put_user_page(imu->bvec[j].bv_page);
+ unpin_user_page(imu->bvec[j].bv_page);
if (ctx->account_mem)
io_unaccount_mem(ctx->user, imu->nr_bvecs);
ret = 0;
down_read(¤t->mm->mmap_sem);
- pret = get_user_pages(ubuf, nr_pages,
+ pret = pin_user_pages(ubuf, nr_pages,
FOLL_WRITE | FOLL_LONGTERM,
pages, vmas);
if (pret == nr_pages) {
* release any pages we did get
*/
if (pret > 0)
- put_user_pages(pages, pret);
+ unpin_user_pages(pages, pret);
if (ctx->account_mem)
io_unaccount_mem(ctx->user, nr_pages);
kvfree(imu->bvec);
percpu_ref_kill(&ctx->refs);
mutex_unlock(&ctx->uring_lock);
+ /*
+ * Wait for sq thread to idle, if we have one. It won't spin on new
+ * work after we've killed the ctx ref above. This is important to do
+ * before we cancel existing commands, as the thread could otherwise
+ * be queueing new work post that. If that's work we need to cancel,
+ * it could cause shutdown to hang.
+ */
+ while (ctx->sqo_thread && !wq_has_sleeper(&ctx->sqo_wait))
+ cpu_relax();
+
io_kill_timeouts(ctx);
io_poll_remove_all(ctx);