io_uring: drop req/tctx io_identity separately

[linux-2.6-microblaze.git] / fs / io_uring.c
diff --git a/fs/io_uring.c b/fs/io_uring.c

index b42dfa0..728f3a3 100644 (file)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -995,20 +995,33 @@ static void io_sq_thread_drop_mm(void)
         if (mm) {
                 kthread_unuse_mm(mm);
                 mmput(mm);
+               current->mm = NULL;
         }
  }
  
  static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
  {
-       if (!current->mm) {
-               if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) ||
-                            !ctx->sqo_task->mm ||
-                            !mmget_not_zero(ctx->sqo_task->mm)))
-                       return -EFAULT;
-               kthread_use_mm(ctx->sqo_task->mm);
+       struct mm_struct *mm;
+
+       if (current->mm)
+               return 0;
+
+       /* Should never happen */
+       if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL)))
+               return -EFAULT;
+
+       task_lock(ctx->sqo_task);
+       mm = ctx->sqo_task->mm;
+       if (unlikely(!mm || !mmget_not_zero(mm)))
+               mm = NULL;
+       task_unlock(ctx->sqo_task);
+
+       if (mm) {
+               kthread_use_mm(mm);
+               return 0;
         }
  
-       return 0;
+       return -EFAULT;
  }
  
  static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx,
@@ -1274,9 +1287,12 @@ static bool io_identity_cow(struct io_kiocb *req)
         /* add one for this request */
         refcount_inc(&id->count);
  
-       /* drop old identity, assign new one. one ref for req, one for tctx */
-       if (req->work.identity != tctx->identity &&
-           refcount_sub_and_test(2, &req->work.identity->count))
+       /* drop tctx and req identity references, if needed */
+       if (tctx->identity != &tctx->__identity &&
+           refcount_dec_and_test(&tctx->identity->count))
+               kfree(tctx->identity);
+       if (req->work.identity != &tctx->__identity &&
+           refcount_dec_and_test(&req->work.identity->count))
                 kfree(req->work.identity);
  
         req->work.identity = id;
@@ -1365,6 +1381,9 @@ static void io_prep_async_work(struct io_kiocb *req)
         io_req_init_async(req);
         id = req->work.identity;
  
+       if (req->flags & REQ_F_FORCE_ASYNC)
+               req->work.flags |= IO_WQ_WORK_CONCURRENT;
+
         if (req->flags & REQ_F_ISREG) {
                 if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
                         io_wq_hash_work(&req->work, file_inode(req->file));
@@ -1665,7 +1684,8 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
                 WRITE_ONCE(cqe->user_data, req->user_data);
                 WRITE_ONCE(cqe->res, res);
                 WRITE_ONCE(cqe->flags, cflags);
-       } else if (ctx->cq_overflow_flushed || req->task->io_uring->in_idle) {
+       } else if (ctx->cq_overflow_flushed ||
+                  atomic_read(&req->task->io_uring->in_idle)) {
                 /*
                  * If we're in ring overflow flush mode, or in task cancel mode,
                  * then we cannot store the request for later flushing, we need
@@ -1835,7 +1855,7 @@ static void __io_free_req(struct io_kiocb *req)
         io_dismantle_req(req);
  
         percpu_counter_dec(&tctx->inflight);
-       if (tctx->in_idle)
+       if (atomic_read(&tctx->in_idle))
                 wake_up(&tctx->wait);
         put_task_struct(req->task);
  
@@ -1846,59 +1866,39 @@ static void __io_free_req(struct io_kiocb *req)
         percpu_ref_put(&ctx->refs);
  }
  
-static bool io_link_cancel_timeout(struct io_kiocb *req)
+static void io_kill_linked_timeout(struct io_kiocb *req)
  {
-       struct io_timeout_data *io = req->async_data;
         struct io_ring_ctx *ctx = req->ctx;
-       int ret;
-
-       ret = hrtimer_try_to_cancel(&io->timer);
-       if (ret != -1) {
-               io_cqring_fill_event(req, -ECANCELED);
-               io_commit_cqring(ctx);
-               req->flags &= ~REQ_F_LINK_HEAD;
-               io_put_req_deferred(req, 1);
-               return true;
-       }
-
-       return false;
-}
-
-static bool __io_kill_linked_timeout(struct io_kiocb *req)
-{
         struct io_kiocb *link;
-       bool wake_ev;
+       bool cancelled = false;
+       unsigned long flags;
  
-       if (list_empty(&req->link_list))
-               return false;
-       link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
-       if (link->opcode != IORING_OP_LINK_TIMEOUT)
-               return false;
+       spin_lock_irqsave(&ctx->completion_lock, flags);
+       link = list_first_entry_or_null(&req->link_list, struct io_kiocb,
+                                       link_list);
         /*
          * Can happen if a linked timeout fired and link had been like
          * req -> link t-out -> link t-out [-> ...]
          */
-       if (!(link->flags & REQ_F_LTIMEOUT_ACTIVE))
-               return false;
+       if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) {
+               struct io_timeout_data *io = link->async_data;
+               int ret;
  
-       list_del_init(&link->link_list);
-       wake_ev = io_link_cancel_timeout(link);
+               list_del_init(&link->link_list);
+               ret = hrtimer_try_to_cancel(&io->timer);
+               if (ret != -1) {
+                       io_cqring_fill_event(link, -ECANCELED);
+                       io_commit_cqring(ctx);
+                       cancelled = true;
+               }
+       }
         req->flags &= ~REQ_F_LINK_TIMEOUT;
-       return wake_ev;
-}
-
-static void io_kill_linked_timeout(struct io_kiocb *req)
-{
-       struct io_ring_ctx *ctx = req->ctx;
-       unsigned long flags;
-       bool wake_ev;
-
-       spin_lock_irqsave(&ctx->completion_lock, flags);
-       wake_ev = __io_kill_linked_timeout(req);
         spin_unlock_irqrestore(&ctx->completion_lock, flags);
  
-       if (wake_ev)
+       if (cancelled) {
                 io_cqring_ev_posted(ctx);
+               io_put_req(link);
+       }
  }
  
  static struct io_kiocb *io_req_link_next(struct io_kiocb *req)
@@ -4977,8 +4977,10 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
                 /* make sure double remove sees this as being gone */
                 wait->private = NULL;
                 spin_unlock(&poll->head->lock);
-               if (!done)
-                       __io_async_wake(req, poll, mask, io_poll_task_func);
+               if (!done) {
+                       /* use wait func handler, so it matches the rq type */
+                       poll->wait.func(&poll->wait, mode, sync, key);
+               }
         }
         refcount_dec(&req->refs);
         return 1;
@@ -6180,7 +6182,6 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
  static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs)
  {
         struct io_kiocb *linked_timeout;
-       struct io_kiocb *nxt;
         const struct cred *old_creds = NULL;
         int ret;
  
@@ -6206,7 +6207,6 @@ again:
          */
         if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
                 if (!io_arm_poll_handler(req)) {
-punt:
                         /*
                          * Queued up for async execution, worker will release
                          * submit reference when the iocb is actually submitted.
@@ -6216,33 +6216,25 @@ punt:
  
                 if (linked_timeout)
                         io_queue_linked_timeout(linked_timeout);
-               goto exit;
-       }
+       } else if (likely(!ret)) {
+               /* drop submission reference */
+               req = io_put_req_find_next(req);
+               if (linked_timeout)
+                       io_queue_linked_timeout(linked_timeout);
  
-       if (unlikely(ret)) {
+               if (req) {
+                       if (!(req->flags & REQ_F_FORCE_ASYNC))
+                               goto again;
+                       io_queue_async_work(req);
+               }
+       } else {
                 /* un-prep timeout, so it'll be killed as any other linked */
                 req->flags &= ~REQ_F_LINK_TIMEOUT;
                 req_set_fail_links(req);
                 io_put_req(req);
                 io_req_complete(req, ret);
-               goto exit;
         }
  
-       /* drop submission reference */
-       nxt = io_put_req_find_next(req);
-       if (linked_timeout)
-               io_queue_linked_timeout(linked_timeout);
-
-       if (nxt) {
-               req = nxt;
-
-               if (req->flags & REQ_F_FORCE_ASYNC) {
-                       linked_timeout = NULL;
-                       goto punt;
-               }
-               goto again;
-       }
-exit:
         if (old_creds)
                 revert_creds(old_creds);
  }
@@ -6266,13 +6258,6 @@ fail_req:
                         if (unlikely(ret))
                                 goto fail_req;
                 }
-
-               /*
-                * Never try inline submit of IOSQE_ASYNC is set, go straight
-                * to async execution.
-                */
-               io_req_init_async(req);
-               req->work.flags |= IO_WQ_WORK_CONCURRENT;
                 io_queue_async_work(req);
         } else {
                 if (sqe) {
@@ -7727,7 +7712,8 @@ static int io_uring_alloc_task_context(struct task_struct *task)
         xa_init(&tctx->xa);
         init_waitqueue_head(&tctx->wait);
         tctx->last = NULL;
-       tctx->in_idle = 0;
+       atomic_set(&tctx->in_idle, 0);
+       tctx->sqpoll = false;
         io_init_identity(&tctx->__identity);
         tctx->identity = &tctx->__identity;
         task->io_uring = tctx;
@@ -8630,8 +8616,11 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
  {
         struct task_struct *task = current;
  
-       if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data)
+       if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
                 task = ctx->sq_data->thread;
+               atomic_inc(&task->io_uring->in_idle);
+               io_sq_thread_park(ctx->sq_data);
+       }
  
         io_cqring_overflow_flush(ctx, true, task, files);
  
@@ -8639,12 +8628,23 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
                 io_run_task_work();
                 cond_resched();
         }
+
+       if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
+               atomic_dec(&task->io_uring->in_idle);
+               /*
+                * If the files that are going away are the ones in the thread
+                * identity, clear them out.
+                */
+               if (task->io_uring->identity->files == files)
+                       task->io_uring->identity->files = NULL;
+               io_sq_thread_unpark(ctx->sq_data);
+       }
  }
  
  /*
   * Note that this task has used io_uring. We use it for cancelation purposes.
   */
-static int io_uring_add_task_file(struct file *file)
+static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
  {
         struct io_uring_task *tctx = current->io_uring;
  
@@ -8666,6 +8666,14 @@ static int io_uring_add_task_file(struct file *file)
                 tctx->last = file;
         }
  
+       /*
+        * This is race safe in that the task itself is doing this, hence it
+        * cannot be going through the exit/cancel paths at the same time.
+        * This cannot be modified while exit/cancel is running.
+        */
+       if (!tctx->sqpoll && (ctx->flags & IORING_SETUP_SQPOLL))
+               tctx->sqpoll = true;
+
         return 0;
  }
  
@@ -8707,7 +8715,7 @@ void __io_uring_files_cancel(struct files_struct *files)
         unsigned long index;
  
         /* make sure overflow events are dropped */
-       tctx->in_idle = true;
+       atomic_inc(&tctx->in_idle);
  
         xa_for_each(&tctx->xa, index, file) {
                 struct io_ring_ctx *ctx = file->private_data;
@@ -8716,6 +8724,35 @@ void __io_uring_files_cancel(struct files_struct *files)
                 if (files)
                         io_uring_del_task_file(file);
         }
+
+       atomic_dec(&tctx->in_idle);
+}
+
+static s64 tctx_inflight(struct io_uring_task *tctx)
+{
+       unsigned long index;
+       struct file *file;
+       s64 inflight;
+
+       inflight = percpu_counter_sum(&tctx->inflight);
+       if (!tctx->sqpoll)
+               return inflight;
+
+       /*
+        * If we have SQPOLL rings, then we need to iterate and find them, and
+        * add the pending count for those.
+        */
+       xa_for_each(&tctx->xa, index, file) {
+               struct io_ring_ctx *ctx = file->private_data;
+
+               if (ctx->flags & IORING_SETUP_SQPOLL) {
+                       struct io_uring_task *__tctx = ctx->sqo_task->io_uring;
+
+                       inflight += percpu_counter_sum(&__tctx->inflight);
+               }
+       }
+
+       return inflight;
  }
  
  /*
@@ -8729,11 +8766,11 @@ void __io_uring_task_cancel(void)
         s64 inflight;
  
         /* make sure overflow events are dropped */
-       tctx->in_idle = true;
+       atomic_inc(&tctx->in_idle);
  
         do {
                 /* read completions before cancelations */
-               inflight = percpu_counter_sum(&tctx->inflight);
+               inflight = tctx_inflight(tctx);
                 if (!inflight)
                         break;
                 __io_uring_files_cancel(NULL);
@@ -8744,13 +8781,13 @@ void __io_uring_task_cancel(void)
                  * If we've seen completions, retry. This avoids a race where
                  * a completion comes in before we did prepare_to_wait().
                  */
-               if (inflight != percpu_counter_sum(&tctx->inflight))
+               if (inflight != tctx_inflight(tctx))
                         continue;
                 schedule();
         } while (1);
  
         finish_wait(&tctx->wait, &wait);
-       tctx->in_idle = false;
+       atomic_dec(&tctx->in_idle);
  }
  
  static int io_uring_flush(struct file *file, void *data)
@@ -8895,7 +8932,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                         io_sqpoll_wait_sq(ctx);
                 submitted = to_submit;
         } else if (to_submit) {
-               ret = io_uring_add_task_file(f.file);
+               ret = io_uring_add_task_file(ctx, f.file);
                 if (unlikely(ret))
                         goto out;
                 mutex_lock(&ctx->uring_lock);
@@ -9124,7 +9161,7 @@ err_fd:
  #if defined(CONFIG_UNIX)
         ctx->ring_sock->file = file;
  #endif
-       if (unlikely(io_uring_add_task_file(file))) {
+       if (unlikely(io_uring_add_task_file(ctx, file))) {
                 file = ERR_PTR(-ENOMEM);
                 goto err_fd;
         }