power: supply: Allow charger manager can be built as a module
[linux-2.6-microblaze.git] / fs / io_uring.c
index 1580f1e..77f22c3 100644 (file)
@@ -585,8 +585,7 @@ struct io_submit_state {
         * io_kiocb alloc cache
         */
        void                    *reqs[IO_IOPOLL_BATCH];
-       unsigned                int free_reqs;
-       unsigned                int cur_req;
+       unsigned int            free_reqs;
 
        /*
         * File reference cache
@@ -754,6 +753,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
                                 struct io_uring_files_update *ip,
                                 unsigned nr_args);
 static int io_grab_files(struct io_kiocb *req);
+static void io_ring_file_ref_flush(struct fixed_file_data *data);
 
 static struct kmem_cache *req_cachep;
 
@@ -1190,12 +1190,10 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
                        ret = 1;
                }
                state->free_reqs = ret - 1;
-               state->cur_req = 1;
-               req = state->reqs[0];
+               req = state->reqs[ret - 1];
        } else {
-               req = state->reqs[state->cur_req];
                state->free_reqs--;
-               state->cur_req++;
+               req = state->reqs[state->free_reqs];
        }
 
 got_it:
@@ -1870,8 +1868,11 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                req->flags |= REQ_F_CUR_POS;
                kiocb->ki_pos = req->file->f_pos;
        }
-       kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
        kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
+       kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
+       ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
+       if (unlikely(ret))
+               return ret;
 
        ioprio = READ_ONCE(sqe->ioprio);
        if (ioprio) {
@@ -1883,10 +1884,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
        } else
                kiocb->ki_ioprio = get_current_ioprio();
 
-       ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
-       if (unlikely(ret))
-               return ret;
-
        /* don't allow async punt if RWF_NOWAIT was requested */
        if ((kiocb->ki_flags & IOCB_NOWAIT) ||
            (req->file->f_flags & O_NONBLOCK))
@@ -4849,8 +4846,7 @@ static void io_submit_state_end(struct io_submit_state *state)
        blk_finish_plug(&state->plug);
        io_file_put(state);
        if (state->free_reqs)
-               kmem_cache_free_bulk(req_cachep, state->free_reqs,
-                                       &state->reqs[state->cur_req]);
+               kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
 }
 
 /*
@@ -5075,7 +5071,8 @@ static int io_sq_thread(void *data)
                         * reap events and wake us up.
                         */
                        if (inflight ||
-                           (!time_after(jiffies, timeout) && ret != -EBUSY)) {
+                           (!time_after(jiffies, timeout) && ret != -EBUSY &&
+                           !percpu_ref_is_dying(&ctx->refs))) {
                                cond_resched();
                                continue;
                        }
@@ -5265,15 +5262,10 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
        if (!data)
                return -ENXIO;
 
-       /* protect against inflight atomic switch, which drops the ref */
-       percpu_ref_get(&data->refs);
-       /* wait for existing switches */
-       flush_work(&data->ref_work);
        percpu_ref_kill_and_confirm(&data->refs, io_file_ref_kill);
-       wait_for_completion(&data->done);
-       percpu_ref_put(&data->refs);
-       /* flush potential new switch */
        flush_work(&data->ref_work);
+       wait_for_completion(&data->done);
+       io_ring_file_ref_flush(data);
        percpu_ref_exit(&data->refs);
 
        __io_sqe_files_unregister(ctx);
@@ -5511,14 +5503,11 @@ struct io_file_put {
        struct completion *done;
 };
 
-static void io_ring_file_ref_switch(struct work_struct *work)
+static void io_ring_file_ref_flush(struct fixed_file_data *data)
 {
        struct io_file_put *pfile, *tmp;
-       struct fixed_file_data *data;
        struct llist_node *node;
 
-       data = container_of(work, struct fixed_file_data, ref_work);
-
        while ((node = llist_del_all(&data->put_llist)) != NULL) {
                llist_for_each_entry_safe(pfile, tmp, node, llist) {
                        io_ring_file_put(data->ctx, pfile->file);
@@ -5528,7 +5517,14 @@ static void io_ring_file_ref_switch(struct work_struct *work)
                                kfree(pfile);
                }
        }
+}
+
+static void io_ring_file_ref_switch(struct work_struct *work)
+{
+       struct fixed_file_data *data;
 
+       data = container_of(work, struct fixed_file_data, ref_work);
+       io_ring_file_ref_flush(data);
        percpu_ref_get(&data->refs);
        percpu_ref_switch_to_percpu(&data->refs);
 }
@@ -5539,8 +5535,14 @@ static void io_file_data_ref_zero(struct percpu_ref *ref)
 
        data = container_of(ref, struct fixed_file_data, refs);
 
-       /* we can't safely switch from inside this context, punt to wq */
-       queue_work(system_wq, &data->ref_work);
+       /*
+        * We can't safely switch from inside this context, punt to wq. If
+        * the table ref is going away, the table is being unregistered.
+        * Don't queue up the async work for that case, the caller will
+        * handle it.
+        */
+       if (!percpu_ref_is_dying(&data->refs))
+               queue_work(system_wq, &data->ref_work);
 }
 
 static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
@@ -6039,7 +6041,7 @@ static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx)
                struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
 
                for (j = 0; j < imu->nr_bvecs; j++)
-                       put_user_page(imu->bvec[j].bv_page);
+                       unpin_user_page(imu->bvec[j].bv_page);
 
                if (ctx->account_mem)
                        io_unaccount_mem(ctx->user, imu->nr_bvecs);
@@ -6160,7 +6162,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
 
                ret = 0;
                down_read(&current->mm->mmap_sem);
-               pret = get_user_pages(ubuf, nr_pages,
+               pret = pin_user_pages(ubuf, nr_pages,
                                      FOLL_WRITE | FOLL_LONGTERM,
                                      pages, vmas);
                if (pret == nr_pages) {
@@ -6184,7 +6186,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
                         * release any pages we did get
                         */
                        if (pret > 0)
-                               put_user_pages(pages, pret);
+                               unpin_user_pages(pages, pret);
                        if (ctx->account_mem)
                                io_unaccount_mem(ctx->user, nr_pages);
                        kvfree(imu->bvec);
@@ -6329,6 +6331,16 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
        percpu_ref_kill(&ctx->refs);
        mutex_unlock(&ctx->uring_lock);
 
+       /*
+        * Wait for sq thread to idle, if we have one. It won't spin on new
+        * work after we've killed the ctx ref above. This is important to do
+        * before we cancel existing commands, as the thread could otherwise
+        * be queueing new work post that. If that's work we need to cancel,
+        * it could cause shutdown to hang.
+        */
+       while (ctx->sqo_thread && !wq_has_sleeper(&ctx->sqo_wait))
+               cpu_relax();
+
        io_kill_timeouts(ctx);
        io_poll_remove_all(ctx);