io_uring: disable multishot poll for double poll add cases

[linux-2.6-microblaze.git] / fs / io_uring.c
diff --git a/fs/io_uring.c b/fs/io_uring.c

index 880c774..4803e31 100644 (file)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -488,20 +488,16 @@ struct io_poll_iocb {
         __poll_t                        events;
         bool                            done;
         bool                            canceled;
-       bool                            update_events;
-       bool                            update_user_data;
-       union {
-               struct wait_queue_entry wait;
-               struct {
-                       u64             old_user_data;
-                       u64             new_user_data;
-               };
-       };
+       struct wait_queue_entry         wait;
  };
  
-struct io_poll_remove {
+struct io_poll_update {
         struct file                     *file;
-       u64                             addr;
+       u64                             old_user_data;
+       u64                             new_user_data;
+       __poll_t                        events;
+       bool                            update_events;
+       bool                            update_user_data;
  };
  
  struct io_close {
@@ -789,7 +785,7 @@ struct io_kiocb {
                 struct file             *file;
                 struct io_rw            rw;
                 struct io_poll_iocb     poll;
-               struct io_poll_remove   poll_remove;
+               struct io_poll_update   poll_update;
                 struct io_accept        accept;
                 struct io_sync          sync;
                 struct io_cancel        cancel;
@@ -1081,6 +1077,18 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req)
         }
  }
  
+static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
+{
+       bool got = percpu_ref_tryget(ref);
+
+       /* already at zero, wait for ->release() */
+       if (!got)
+               wait_for_completion(compl);
+       percpu_ref_resurrect(ref);
+       if (got)
+               percpu_ref_put(ref);
+}
+
  static bool io_match_task(struct io_kiocb *head,
                           struct task_struct *task,
                           struct files_struct *files)
@@ -1101,7 +1109,7 @@ static bool io_match_task(struct io_kiocb *head,
  
  static inline void req_set_fail_links(struct io_kiocb *req)
  {
-       if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
+       if (req->flags & REQ_F_LINK)
                 req->flags |= REQ_F_FAIL_LINK;
  }
  
@@ -1250,12 +1258,11 @@ static void io_queue_async_work(struct io_kiocb *req)
  }
  
  static void io_kill_timeout(struct io_kiocb *req, int status)
+       __must_hold(&req->ctx->completion_lock)
  {
         struct io_timeout_data *io = req->async_data;
-       int ret;
  
-       ret = hrtimer_try_to_cancel(&io->timer);
-       if (ret != -1) {
+       if (hrtimer_try_to_cancel(&io->timer) != -1) {
                 atomic_set(&req->ctx->cq_timeouts,
                         atomic_read(&req->ctx->cq_timeouts) + 1);
                 list_del_init(&req->timeout.list);
@@ -1497,32 +1504,28 @@ static bool io_cqring_event_overflow(struct io_kiocb *req, long res,
                                      unsigned int cflags)
  {
         struct io_ring_ctx *ctx = req->ctx;
+       struct io_overflow_cqe *ocqe;
  
-       if (!atomic_read(&req->task->io_uring->in_idle)) {
-               struct io_overflow_cqe *ocqe;
-
-               ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
-               if (!ocqe)
-                       goto overflow;
-               if (list_empty(&ctx->cq_overflow_list)) {
-                       set_bit(0, &ctx->sq_check_overflow);
-                       set_bit(0, &ctx->cq_check_overflow);
-                       ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
-               }
-               ocqe->cqe.user_data = req->user_data;
-               ocqe->cqe.res = res;
-               ocqe->cqe.flags = cflags;
-               list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
-               return true;
+       ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
+       if (!ocqe) {
+               /*
+                * If we're in ring overflow flush mode, or in task cancel mode,
+                * or cannot allocate an overflow entry, then we need to drop it
+                * on the floor.
+                */
+               WRITE_ONCE(ctx->rings->cq_overflow, ++ctx->cached_cq_overflow);
+               return false;
         }
-overflow:
-       /*
-        * If we're in ring overflow flush mode, or in task cancel mode,
-        * or cannot allocate an overflow entry, then we need to drop it
-        * on the floor.
-        */
-       WRITE_ONCE(ctx->rings->cq_overflow, ++ctx->cached_cq_overflow);
-       return false;
+       if (list_empty(&ctx->cq_overflow_list)) {
+               set_bit(0, &ctx->sq_check_overflow);
+               set_bit(0, &ctx->cq_check_overflow);
+               ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
+       }
+       ocqe->cqe.user_data = req->user_data;
+       ocqe->cqe.res = res;
+       ocqe->cqe.flags = cflags;
+       list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
+       return true;
  }
  
  static inline bool __io_cqring_fill_event(struct io_kiocb *req, long res,
@@ -1772,12 +1775,10 @@ static bool io_kill_linked_timeout(struct io_kiocb *req)
          */
         if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) {
                 struct io_timeout_data *io = link->async_data;
-               int ret;
  
                 io_remove_next_linked(req);
                 link->timeout.head = NULL;
-               ret = hrtimer_try_to_cancel(&io->timer);
-               if (ret != -1) {
+               if (hrtimer_try_to_cancel(&io->timer) != -1) {
                         io_cqring_fill_event(link, -ECANCELED, 0);
                         io_put_req_deferred(link, 1);
                         return true;
@@ -1810,7 +1811,8 @@ static bool io_disarm_next(struct io_kiocb *req)
  
         if (likely(req->flags & REQ_F_LINK_TIMEOUT))
                 posted = io_kill_linked_timeout(req);
-       if (unlikely(req->flags & REQ_F_FAIL_LINK)) {
+       if (unlikely((req->flags & REQ_F_FAIL_LINK) &&
+                    !(req->flags & REQ_F_HARDLINK))) {
                 posted |= (req->link != NULL);
                 io_fail_links(req);
         }
@@ -2318,27 +2320,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
         return ret;
  }
  
-/*
- * Poll for a minimum of 'min' events. Note that if min == 0 we consider that a
- * non-spinning poll check - we'll still enter the driver poll loop, but only
- * as a non-spinning completion check.
- */
-static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
-                               long min)
-{
-       while (!list_empty(&ctx->iopoll_list) && !need_resched()) {
-               int ret;
-
-               ret = io_do_iopoll(ctx, nr_events, min);
-               if (ret < 0)
-                       return ret;
-               if (*nr_events >= min)
-                       return 0;
-       }
-
-       return 1;
-}
-
  /*
   * We can't just wait for polled events to come to us, we have to actively
   * find and complete them.
@@ -2374,7 +2355,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
  static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
  {
         unsigned int nr_events = 0;
-       int iters = 0, ret = 0;
+       int ret = 0;
  
         /*
          * We disallow the app entering submit/complete with polling, but we
@@ -2382,17 +2363,16 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
          * that got punted to a workqueue.
          */
         mutex_lock(&ctx->uring_lock);
+       /*
+        * Don't enter poll loop if we already have events pending.
+        * If we do, we can potentially be spinning for commands that
+        * already triggered a CQE (eg in error).
+        */
+       if (test_bit(0, &ctx->cq_check_overflow))
+               __io_cqring_overflow_flush(ctx, false);
+       if (io_cqring_events(ctx))
+               goto out;
         do {
-               /*
-                * Don't enter poll loop if we already have events pending.
-                * If we do, we can potentially be spinning for commands that
-                * already triggered a CQE (eg in error).
-                */
-               if (test_bit(0, &ctx->cq_check_overflow))
-                       __io_cqring_overflow_flush(ctx, false);
-               if (io_cqring_events(ctx))
-                       break;
-
                 /*
                  * If a submit got punted to a workqueue, we can have the
                  * application entering polling for a command before it gets
@@ -2403,18 +2383,17 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
                  * forever, while the workqueue is stuck trying to acquire the
                  * very same mutex.
                  */
-               if (!(++iters & 7)) {
+               if (list_empty(&ctx->iopoll_list)) {
                         mutex_unlock(&ctx->uring_lock);
                         io_run_task_work();
                         mutex_lock(&ctx->uring_lock);
-               }
-
-               ret = io_iopoll_getevents(ctx, &nr_events, min);
-               if (ret <= 0)
-                       break;
-               ret = 0;
-       } while (min && !nr_events && !need_resched());
  
+                       if (list_empty(&ctx->iopoll_list))
+                               break;
+               }
+               ret = io_do_iopoll(ctx, &nr_events, min);
+       } while (!ret && nr_events < min && !need_resched());
+out:
         mutex_unlock(&ctx->uring_lock);
         return ret;
  }
@@ -2525,7 +2504,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
  /*
   * After the iocb has been issued, it's safe to be found on the poll list.
   * Adding the kiocb to the list AFTER submission ensures that we don't
- * find it from a io_iopoll_getevents() thread before the issuer is done
+ * find it from a io_do_iopoll() thread before the issuer is done
   * accessing the kiocb cookie.
   */
  static void io_iopoll_req_issued(struct io_kiocb *req, bool in_async)
@@ -4971,7 +4950,6 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
         poll->head = NULL;
         poll->done = false;
         poll->canceled = false;
-       poll->update_events = poll->update_user_data = false;
  #define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
         /* mask in events that we always want/need */
         poll->events = events | IO_POLL_UNMASK;
@@ -4998,6 +4976,12 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
                         pt->error = -EINVAL;
                         return;
                 }
+               /*
+                * Can't handle multishot for double wait for now, turn it
+                * into one-shot mode.
+                */
+               if (!(req->poll.events & EPOLLONESHOT))
+                       req->poll.events |= EPOLLONESHOT;
                 /* double add on the same waitqueue head, ignore */
                 if (poll->head == head)
                         return;
@@ -5205,21 +5189,16 @@ static bool io_poll_remove_waitqs(struct io_kiocb *req)
         bool do_complete;
  
         io_poll_remove_double(req);
+       do_complete = __io_poll_remove_one(req, io_poll_get_single(req), true);
  
-       if (req->opcode == IORING_OP_POLL_ADD) {
-               do_complete = __io_poll_remove_one(req, &req->poll, true);
-       } else {
+       if (req->opcode != IORING_OP_POLL_ADD && do_complete) {
                 struct async_poll *apoll = req->apoll;
  
                 /* non-poll requests have submit ref still */
-               do_complete = __io_poll_remove_one(req, &apoll->poll, true);
-               if (do_complete) {
-                       req_ref_put(req);
-                       kfree(apoll->double_poll);
-                       kfree(apoll);
-               }
+               req_ref_put(req);
+               kfree(apoll->double_poll);
+               kfree(apoll);
         }
-
         return do_complete;
  }
  
@@ -5267,7 +5246,8 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
         return posted != 0;
  }
  
-static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr)
+static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr,
+                                    bool poll_only)
         __must_hold(&ctx->completion_lock)
  {
         struct hlist_head *list;
@@ -5277,18 +5257,20 @@ static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr)
         hlist_for_each_entry(req, list, hash_node) {
                 if (sqe_addr != req->user_data)
                         continue;
+               if (poll_only && req->opcode != IORING_OP_POLL_ADD)
+                       continue;
                 return req;
         }
-
         return NULL;
  }
  
-static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
+static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr,
+                         bool poll_only)
         __must_hold(&ctx->completion_lock)
  {
         struct io_kiocb *req;
  
-       req = io_poll_find(ctx, sqe_addr);
+       req = io_poll_find(ctx, sqe_addr, poll_only);
         if (!req)
                 return -ENOENT;
         if (io_poll_remove_one(req))
@@ -5297,35 +5279,50 @@ static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
         return -EALREADY;
  }
  
-static int io_poll_remove_prep(struct io_kiocb *req,
+static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe,
+                                    unsigned int flags)
+{
+       u32 events;
+
+       events = READ_ONCE(sqe->poll32_events);
+#ifdef __BIG_ENDIAN
+       events = swahw32(events);
+#endif
+       if (!(flags & IORING_POLL_ADD_MULTI))
+               events |= EPOLLONESHOT;
+       return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
+}
+
+static int io_poll_update_prep(struct io_kiocb *req,
                                const struct io_uring_sqe *sqe)
  {
+       struct io_poll_update *upd = &req->poll_update;
+       u32 flags;
+
         if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
                 return -EINVAL;
-       if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
-           sqe->poll_events)
+       if (sqe->ioprio || sqe->buf_index)
+               return -EINVAL;
+       flags = READ_ONCE(sqe->len);
+       if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA |
+                     IORING_POLL_ADD_MULTI))
+               return -EINVAL;
+       /* meaningless without update */
+       if (flags == IORING_POLL_ADD_MULTI)
                 return -EINVAL;
  
-       req->poll_remove.addr = READ_ONCE(sqe->addr);
-       return 0;
-}
-
-/*
- * Find a running poll command that matches one specified in sqe->addr,
- * and remove it if found.
- */
-static int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
-{
-       struct io_ring_ctx *ctx = req->ctx;
-       int ret;
+       upd->old_user_data = READ_ONCE(sqe->addr);
+       upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
+       upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
  
-       spin_lock_irq(&ctx->completion_lock);
-       ret = io_poll_cancel(ctx, req->poll_remove.addr);
-       spin_unlock_irq(&ctx->completion_lock);
+       upd->new_user_data = READ_ONCE(sqe->off);
+       if (!upd->update_user_data && upd->new_user_data)
+               return -EINVAL;
+       if (upd->update_events)
+               upd->events = io_poll_parse_events(sqe, flags);
+       else if (sqe->poll32_events)
+               return -EINVAL;
  
-       if (ret < 0)
-               req_set_fail_links(req);
-       __io_req_complete(req, issue_flags, ret, 0);
         return 0;
  }
  
@@ -5349,40 +5346,21 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
  static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  {
         struct io_poll_iocb *poll = &req->poll;
-       u32 events, flags;
+       u32 flags;
  
         if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
                 return -EINVAL;
-       if (sqe->ioprio || sqe->buf_index)
+       if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->addr)
                 return -EINVAL;
         flags = READ_ONCE(sqe->len);
-       if (flags & ~(IORING_POLL_ADD_MULTI | IORING_POLL_UPDATE_EVENTS |
-                       IORING_POLL_UPDATE_USER_DATA))
-               return -EINVAL;
-       events = READ_ONCE(sqe->poll32_events);
-#ifdef __BIG_ENDIAN
-       events = swahw32(events);
-#endif
-       if (!(flags & IORING_POLL_ADD_MULTI))
-               events |= EPOLLONESHOT;
-       poll->update_events = poll->update_user_data = false;
-       if (flags & IORING_POLL_UPDATE_EVENTS) {
-               poll->update_events = true;
-               poll->old_user_data = READ_ONCE(sqe->addr);
-       }
-       if (flags & IORING_POLL_UPDATE_USER_DATA) {
-               poll->update_user_data = true;
-               poll->new_user_data = READ_ONCE(sqe->off);
-       }
-       if (!(poll->update_events || poll->update_user_data) &&
-            (sqe->off || sqe->addr))
+       if (flags & ~IORING_POLL_ADD_MULTI)
                 return -EINVAL;
-       poll->events = demangle_poll(events) |
-                               (events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
+
+       poll->events = io_poll_parse_events(sqe, flags);
         return 0;
  }
  
-static int __io_poll_add(struct io_kiocb *req)
+static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
  {
         struct io_poll_iocb *poll = &req->poll;
         struct io_ring_ctx *ctx = req->ctx;
@@ -5408,7 +5386,7 @@ static int __io_poll_add(struct io_kiocb *req)
         return ipt.error;
  }
  
-static int io_poll_update(struct io_kiocb *req)
+static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
  {
         struct io_ring_ctx *ctx = req->ctx;
         struct io_kiocb *preq;
@@ -5416,13 +5394,15 @@ static int io_poll_update(struct io_kiocb *req)
         int ret;
  
         spin_lock_irq(&ctx->completion_lock);
-       preq = io_poll_find(ctx, req->poll.old_user_data);
+       preq = io_poll_find(ctx, req->poll_update.old_user_data, true);
         if (!preq) {
                 ret = -ENOENT;
                 goto err;
-       } else if (preq->opcode != IORING_OP_POLL_ADD) {
-               /* don't allow internal poll updates */
-               ret = -EACCES;
+       }
+
+       if (!req->poll_update.update_events && !req->poll_update.update_user_data) {
+               completing = true;
+               ret = io_poll_remove_one(preq) ? 0 : -EALREADY;
                 goto err;
         }
  
@@ -5446,21 +5426,20 @@ err:
                 return 0;
         }
         /* only mask one event flags, keep behavior flags */
-       if (req->poll.update_events) {
+       if (req->poll_update.update_events) {
                 preq->poll.events &= ~0xffff;
-               preq->poll.events |= req->poll.events & 0xffff;
+               preq->poll.events |= req->poll_update.events & 0xffff;
                 preq->poll.events |= IO_POLL_UNMASK;
         }
-       if (req->poll.update_user_data)
-               preq->user_data = req->poll.new_user_data;
-
+       if (req->poll_update.update_user_data)
+               preq->user_data = req->poll_update.new_user_data;
         spin_unlock_irq(&ctx->completion_lock);
  
         /* complete update request, we're done with it */
         io_req_complete(req, ret);
  
         if (!completing) {
-               ret = __io_poll_add(preq);
+               ret = io_poll_add(preq, issue_flags);
                 if (ret < 0) {
                         req_set_fail_links(preq);
                         io_req_complete(preq, ret);
@@ -5469,13 +5448,6 @@ err:
         return 0;
  }
  
-static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
-{
-       if (!req->poll.update_events && !req->poll.update_user_data)
-               return __io_poll_add(req);
-       return io_poll_update(req);
-}
-
  static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
  {
         struct io_timeout_data *data = container_of(timer,
@@ -5505,21 +5477,18 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
  {
         struct io_timeout_data *io;
         struct io_kiocb *req;
-       int ret = -ENOENT;
+       bool found = false;
  
         list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
-               if (user_data == req->user_data) {
-                       ret = 0;
+               found = user_data == req->user_data;
+               if (found)
                         break;
-               }
         }
-
-       if (ret == -ENOENT)
-               return ERR_PTR(ret);
+       if (!found)
+               return ERR_PTR(-ENOENT);
  
         io = req->async_data;
-       ret = hrtimer_try_to_cancel(&io->timer);
-       if (ret == -1)
+       if (hrtimer_try_to_cancel(&io->timer) == -1)
                 return ERR_PTR(-EALREADY);
         list_del_init(&req->timeout.list);
         return req;
@@ -5755,7 +5724,7 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
         ret = io_timeout_cancel(ctx, sqe_addr);
         if (ret != -ENOENT)
                 goto done;
-       ret = io_poll_cancel(ctx, sqe_addr);
+       ret = io_poll_cancel(ctx, sqe_addr, false);
  done:
         if (!ret)
                 ret = success_ret;
@@ -5797,7 +5766,7 @@ static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
         ret = io_timeout_cancel(ctx, sqe_addr);
         if (ret != -ENOENT)
                 goto done;
-       ret = io_poll_cancel(ctx, sqe_addr);
+       ret = io_poll_cancel(ctx, sqe_addr, false);
         if (ret != -ENOENT)
                 goto done;
         spin_unlock_irq(&ctx->completion_lock);
@@ -5883,7 +5852,7 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
         case IORING_OP_POLL_ADD:
                 return io_poll_add_prep(req, sqe);
         case IORING_OP_POLL_REMOVE:
-               return io_poll_remove_prep(req, sqe);
+               return io_poll_update_prep(req, sqe);
         case IORING_OP_FSYNC:
                 return io_fsync_prep(req, sqe);
         case IORING_OP_SYNC_FILE_RANGE:
@@ -6114,7 +6083,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
                 ret = io_poll_add(req, issue_flags);
                 break;
         case IORING_OP_POLL_REMOVE:
-               ret = io_poll_remove(req, issue_flags);
+               ret = io_poll_update(req, issue_flags);
                 break;
         case IORING_OP_SYNC_FILE_RANGE:
                 ret = io_sync_file_range(req, issue_flags);
@@ -7076,6 +7045,10 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
                         fput(file);
         }
  #endif
+       io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
+       kfree(ctx->file_data);
+       ctx->file_data = NULL;
+       ctx->nr_user_files = 0;
  }
  
  static inline void io_rsrc_ref_lock(struct io_ring_ctx *ctx)
@@ -7182,21 +7155,14 @@ static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx,
  
  static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
  {
-       struct io_rsrc_data *data = ctx->file_data;
         int ret;
  
-       if (!data)
+       if (!ctx->file_data)
                 return -ENXIO;
-       ret = io_rsrc_ref_quiesce(data, ctx);
-       if (ret)
-               return ret;
-
-       __io_sqe_files_unregister(ctx);
-       io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
-       kfree(data);
-       ctx->file_data = NULL;
-       ctx->nr_user_files = 0;
-       return 0;
+       ret = io_rsrc_ref_quiesce(ctx->file_data, ctx);
+       if (!ret)
+               __io_sqe_files_unregister(ctx);
+       return ret;
  }
  
  static void io_sq_thread_unpark(struct io_sq_data *sqd)
@@ -7229,9 +7195,10 @@ static void io_sq_thread_park(struct io_sq_data *sqd)
  static void io_sq_thread_stop(struct io_sq_data *sqd)
  {
         WARN_ON_ONCE(sqd->thread == current);
+       WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state));
  
-       mutex_lock(&sqd->lock);
         set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+       mutex_lock(&sqd->lock);
         if (sqd->thread)
                 wake_up_process(sqd->thread);
         mutex_unlock(&sqd->lock);
@@ -7645,7 +7612,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
  
         ret = io_sqe_files_scm(ctx);
         if (ret) {
-               io_sqe_files_unregister(ctx);
+               __io_sqe_files_unregister(ctx);
                 return ret;
         }
  
@@ -8446,7 +8413,11 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
         }
  
         mutex_lock(&ctx->uring_lock);
-       io_sqe_files_unregister(ctx);
+       if (ctx->file_data) {
+               if (!atomic_dec_and_test(&ctx->file_data->refs))
+                       wait_for_completion(&ctx->file_data->done);
+               __io_sqe_files_unregister(ctx);
+       }
         if (ctx->rings)
                 __io_cqring_overflow_flush(ctx, true);
         mutex_unlock(&ctx->uring_lock);
@@ -8589,6 +8560,9 @@ static void io_ring_exit_work(struct work_struct *work)
                 WARN_ON_ONCE(time_after(jiffies, timeout));
         } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
  
+       init_completion(&exit.completion);
+       init_task_work(&exit.task_work, io_tctx_exit_cb);
+       exit.ctx = ctx;
         /*
          * Some may use context even when all refs and requests have been put,
          * and they are free to do so while still holding uring_lock or
@@ -8601,9 +8575,8 @@ static void io_ring_exit_work(struct work_struct *work)
  
                 node = list_first_entry(&ctx->tctx_list, struct io_tctx_node,
                                         ctx_node);
-               exit.ctx = ctx;
-               init_completion(&exit.completion);
-               init_task_work(&exit.task_work, io_tctx_exit_cb);
+               /* don't spin on a single task if cancellation failed */
+               list_rotate_left(&ctx->tctx_list);
                 ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL);
                 if (WARN_ON_ONCE(ret))
                         continue;
@@ -8611,7 +8584,6 @@ static void io_ring_exit_work(struct work_struct *work)
  
                 mutex_unlock(&ctx->uring_lock);
                 wait_for_completion(&exit.completion);
-               cond_resched();
                 mutex_lock(&ctx->uring_lock);
         }
         mutex_unlock(&ctx->uring_lock);
@@ -9786,12 +9758,11 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
                         if (ret < 0)
                                 break;
                 } while (1);
-
                 mutex_lock(&ctx->uring_lock);
  
                 if (ret) {
-                       percpu_ref_resurrect(&ctx->refs);
-                       goto out_quiesce;
+                       io_refs_resurrect(&ctx->refs, &ctx->ref_comp);
+                       return ret;
                 }
         }
  
@@ -9884,7 +9855,6 @@ out:
         if (io_register_op_must_quiesce(opcode)) {
                 /* bring the ctx back to life */
                 percpu_ref_reinit(&ctx->refs);
-out_quiesce:
                 reinit_completion(&ctx->ref_comp);
         }
         return ret;