Merge tag 'net-5.14-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

[linux-2.6-microblaze.git] / fs / io_uring.c
diff --git a/fs/io_uring.c b/fs/io_uring.c

index d94fb58..bf548af 100644 (file)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1279,8 +1279,17 @@ static void io_prep_async_link(struct io_kiocb *req)
  {
         struct io_kiocb *cur;
  
-       io_for_each_link(cur, req)
-               io_prep_async_work(cur);
+       if (req->flags & REQ_F_LINK_TIMEOUT) {
+               struct io_ring_ctx *ctx = req->ctx;
+
+               spin_lock_irq(&ctx->completion_lock);
+               io_for_each_link(cur, req)
+                       io_prep_async_work(cur);
+               spin_unlock_irq(&ctx->completion_lock);
+       } else {
+               io_for_each_link(cur, req)
+                       io_prep_async_work(cur);
+       }
  }
  
  static void io_queue_async_work(struct io_kiocb *req)
@@ -1294,6 +1303,17 @@ static void io_queue_async_work(struct io_kiocb *req)
  
         /* init ->work of the whole link before punting */
         io_prep_async_link(req);
+
+       /*
+        * Not expected to happen, but if we do have a bug where this _can_
+        * happen, catch it here and ensure the request is marked as
+        * canceled. That will make io-wq go through the usual work cancel
+        * procedure rather than attempt to run this request (or create a new
+        * worker for it).
+        */
+       if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
+               req->work.flags |= IO_WQ_WORK_CANCEL;
+
         trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
                                         &req->work, req->flags);
         io_wq_enqueue(tctx->io_wq, &req->work);
@@ -1939,9 +1959,13 @@ static void tctx_task_work(struct callback_head *cb)
                         node = next;
                 }
                 if (wq_list_empty(&tctx->task_list)) {
+                       spin_lock_irq(&tctx->task_lock);
                         clear_bit(0, &tctx->task_state);
-                       if (wq_list_empty(&tctx->task_list))
+                       if (wq_list_empty(&tctx->task_list)) {
+                               spin_unlock_irq(&tctx->task_lock);
                                 break;
+                       }
+                       spin_unlock_irq(&tctx->task_lock);
                         /* another tctx_task_work() is enqueued, yield */
                         if (test_and_set_bit(0, &tctx->task_state))
                                 break;
@@ -2016,7 +2040,7 @@ static void io_req_task_submit(struct io_kiocb *req)
  
         /* ctx stays valid until unlock, even if we drop all ours ctx->refs */
         mutex_lock(&ctx->uring_lock);
-       if (!(current->flags & PF_EXITING) && !current->in_execve)
+       if (!(req->task->flags & PF_EXITING) && !req->task->in_execve)
                 __io_queue_sqe(req);
         else
                 io_req_complete_failed(req, -EFAULT);
@@ -2036,6 +2060,12 @@ static void io_req_task_queue(struct io_kiocb *req)
         io_req_task_work_add(req);
  }
  
+static void io_req_task_queue_reissue(struct io_kiocb *req)
+{
+       req->io_task_work.func = io_queue_async_work;
+       io_req_task_work_add(req);
+}
+
  static inline void io_queue_next(struct io_kiocb *req)
  {
         struct io_kiocb *nxt = io_req_find_next(req);
@@ -2205,7 +2235,7 @@ static inline bool io_run_task_work(void)
   * Find and free completed poll iocbs
   */
  static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
-                              struct list_head *done)
+                              struct list_head *done, bool resubmit)
  {
         struct req_batch rb;
         struct io_kiocb *req;
@@ -2220,11 +2250,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
                 req = list_first_entry(done, struct io_kiocb, inflight_entry);
                 list_del(&req->inflight_entry);
  
-               if (READ_ONCE(req->result) == -EAGAIN &&
+               if (READ_ONCE(req->result) == -EAGAIN && resubmit &&
                     !(req->flags & REQ_F_DONT_REISSUE)) {
                         req->iopoll_completed = 0;
                         req_ref_get(req);
-                       io_queue_async_work(req);
+                       io_req_task_queue_reissue(req);
                         continue;
                 }
  
@@ -2244,7 +2274,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
  }
  
  static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
-                       long min)
+                       long min, bool resubmit)
  {
         struct io_kiocb *req, *tmp;
         LIST_HEAD(done);
@@ -2287,7 +2317,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
         }
  
         if (!list_empty(&done))
-               io_iopoll_complete(ctx, nr_events, &done);
+               io_iopoll_complete(ctx, nr_events, &done, resubmit);
  
         return ret;
  }
@@ -2305,7 +2335,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
         while (!list_empty(&ctx->iopoll_list)) {
                 unsigned int nr_events = 0;
  
-               io_do_iopoll(ctx, &nr_events, 0);
+               io_do_iopoll(ctx, &nr_events, 0, false);
  
                 /* let it sleep and repeat later if can't complete a request */
                 if (nr_events == 0)
@@ -2367,7 +2397,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
                             list_empty(&ctx->iopoll_list))
                                 break;
                 }
-               ret = io_do_iopoll(ctx, &nr_events, min);
+               ret = io_do_iopoll(ctx, &nr_events, min, true);
         } while (!ret && nr_events < min && !need_resched());
  out:
         mutex_unlock(&ctx->uring_lock);
@@ -2417,6 +2447,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
          */
         if (percpu_ref_is_dying(&ctx->refs))
                 return false;
+       /*
+        * Play it safe and assume not safe to re-import and reissue if we're
+        * not in the original thread group (or in task context).
+        */
+       if (!same_thread_group(req->task, current) || !in_task())
+               return false;
         return true;
  }
  #else
@@ -2747,7 +2783,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
                 req->flags &= ~REQ_F_REISSUE;
                 if (io_resubmit_prep(req)) {
                         req_ref_get(req);
-                       io_queue_async_work(req);
+                       io_req_task_queue_reissue(req);
                 } else {
                         int cflags = 0;
  
@@ -4802,6 +4838,7 @@ IO_NETOP_FN(recv);
  struct io_poll_table {
         struct poll_table_struct pt;
         struct io_kiocb *req;
+       int nr_entries;
         int error;
  };
  
@@ -4902,7 +4939,6 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
         if (req->poll.events & EPOLLONESHOT)
                 flags = 0;
         if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) {
-               io_poll_remove_waitqs(req);
                 req->poll.done = true;
                 flags = 0;
         }
@@ -4925,6 +4961,7 @@ static void io_poll_task_func(struct io_kiocb *req)
  
                 done = io_poll_complete(req, req->result);
                 if (done) {
+                       io_poll_remove_double(req);
                         hash_del(&req->hash_node);
                 } else {
                         req->result = 0;
@@ -4995,11 +5032,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
         struct io_kiocb *req = pt->req;
  
         /*
-        * If poll->head is already set, it's because the file being polled
-        * uses multiple waitqueues for poll handling (eg one for read, one
-        * for write). Setup a separate io_poll_iocb if this happens.
+        * The file being polled uses multiple waitqueues for poll handling
+        * (e.g. one for read, one for write). Setup a separate io_poll_iocb
+        * if this happens.
          */
-       if (unlikely(poll->head)) {
+       if (unlikely(pt->nr_entries)) {
                 struct io_poll_iocb *poll_one = poll;
  
                 /* already have a 2nd entry, fail a third attempt */
@@ -5027,7 +5064,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
                 *poll_ptr = poll;
         }
  
-       pt->error = 0;
+       pt->nr_entries++;
         poll->head = head;
  
         if (poll->events & EPOLLEXCLUSIVE)
@@ -5104,11 +5141,16 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
  
         ipt->pt._key = mask;
         ipt->req = req;
-       ipt->error = -EINVAL;
+       ipt->error = 0;
+       ipt->nr_entries = 0;
  
         mask = vfs_poll(req->file, &ipt->pt) & poll->events;
+       if (unlikely(!ipt->nr_entries) && !ipt->error)
+               ipt->error = -EINVAL;
  
         spin_lock_irq(&ctx->completion_lock);
+       if (ipt->error || (mask && (poll->events & EPOLLONESHOT)))
+               io_poll_remove_double(req);
         if (likely(poll->head)) {
                 spin_lock(&poll->head->lock);
                 if (unlikely(list_empty(&poll->wait.entry))) {
@@ -5179,7 +5221,6 @@ static int io_arm_poll_handler(struct io_kiocb *req)
         ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
                                         io_async_wake);
         if (ret || ipt.error) {
-               io_poll_remove_double(req);
                 spin_unlock_irq(&ctx->completion_lock);
                 if (ret)
                         return IO_APOLL_READY;
@@ -6019,11 +6060,13 @@ static bool io_drain_req(struct io_kiocb *req)
  
         ret = io_req_prep_async(req);
         if (ret)
-               return ret;
+               goto fail;
         io_prep_async_link(req);
         de = kmalloc(sizeof(*de), GFP_KERNEL);
         if (!de) {
-               io_req_complete_failed(req, -ENOMEM);
+               ret = -ENOMEM;
+fail:
+               io_req_complete_failed(req, ret);
                 return true;
         }
  
@@ -6790,7 +6833,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
  
                 mutex_lock(&ctx->uring_lock);
                 if (!list_empty(&ctx->iopoll_list))
-                       io_do_iopoll(ctx, &nr_events, 0);
+                       io_do_iopoll(ctx, &nr_events, 0, true);
  
                 /*
                  * Don't submit if refs are dying, good for io_uring_register(),
@@ -7897,15 +7940,19 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
         struct io_wq_data data;
         unsigned int concurrency;
  
+       mutex_lock(&ctx->uring_lock);
         hash = ctx->hash_map;
         if (!hash) {
                 hash = kzalloc(sizeof(*hash), GFP_KERNEL);
-               if (!hash)
+               if (!hash) {
+                       mutex_unlock(&ctx->uring_lock);
                         return ERR_PTR(-ENOMEM);
+               }
                 refcount_set(&hash->refs, 1);
                 init_waitqueue_head(&hash->wait);
                 ctx->hash_map = hash;
         }
+       mutex_unlock(&ctx->uring_lock);
  
         data.hash = hash;
         data.task = task;
@@ -7979,9 +8026,11 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
                 f = fdget(p->wq_fd);
                 if (!f.file)
                         return -ENXIO;
-               fdput(f);
-               if (f.file->f_op != &io_uring_fops)
+               if (f.file->f_op != &io_uring_fops) {
+                       fdput(f);
                         return -EINVAL;
+               }
+               fdput(f);
         }
         if (ctx->flags & IORING_SETUP_SQPOLL) {
                 struct task_struct *tsk;