Merge branches 'clk-range', 'clk-uniphier', 'clk-apple' and 'clk-qcom' into clk-next

[linux-2.6-microblaze.git] / fs / io_uring.c
diff --git a/fs/io_uring.c b/fs/io_uring.c

index de9c9de..77b9c7e 100644 (file)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1192,12 +1192,6 @@ static inline bool req_ref_put_and_test(struct io_kiocb *req)
         return atomic_dec_and_test(&req->refs);
  }
  
-static inline void req_ref_put(struct io_kiocb *req)
-{
-       WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
-       WARN_ON_ONCE(req_ref_put_and_test(req));
-}
-
  static inline void req_ref_get(struct io_kiocb *req)
  {
         WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
@@ -5234,7 +5228,6 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
                 min_ret = iov_iter_count(&msg.msg_iter);
  
         ret = sock_recvmsg(sock, &msg, flags);
-out_free:
         if (ret < min_ret) {
                 if (ret == -EAGAIN && force_nonblock)
                         return -EAGAIN;
@@ -5242,9 +5235,9 @@ out_free:
                         ret = -EINTR;
                 req_set_fail(req);
         } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
+out_free:
                 req_set_fail(req);
         }
-
         __io_req_complete(req, issue_flags, ret, io_put_kbuf(req));
         return 0;
  }
@@ -5468,12 +5461,14 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
  
  static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
  {
-       struct wait_queue_head *head = poll->head;
+       struct wait_queue_head *head = smp_load_acquire(&poll->head);
  
-       spin_lock_irq(&head->lock);
-       list_del_init(&poll->wait.entry);
-       poll->head = NULL;
-       spin_unlock_irq(&head->lock);
+       if (head) {
+               spin_lock_irq(&head->lock);
+               list_del_init(&poll->wait.entry);
+               poll->head = NULL;
+               spin_unlock_irq(&head->lock);
+       }
  }
  
  static void io_poll_remove_entries(struct io_kiocb *req)
@@ -5481,10 +5476,26 @@ static void io_poll_remove_entries(struct io_kiocb *req)
         struct io_poll_iocb *poll = io_poll_get_single(req);
         struct io_poll_iocb *poll_double = io_poll_get_double(req);
  
-       if (poll->head)
-               io_poll_remove_entry(poll);
-       if (poll_double && poll_double->head)
+       /*
+        * While we hold the waitqueue lock and the waitqueue is nonempty,
+        * wake_up_pollfree() will wait for us.  However, taking the waitqueue
+        * lock in the first place can race with the waitqueue being freed.
+        *
+        * We solve this as eventpoll does: by taking advantage of the fact that
+        * all users of wake_up_pollfree() will RCU-delay the actual free.  If
+        * we enter rcu_read_lock() and see that the pointer to the queue is
+        * non-NULL, we can then lock it without the memory being freed out from
+        * under us.
+        *
+        * Keep holding rcu_read_lock() as long as we hold the queue lock, in
+        * case the caller deletes the entry from the queue, leaving it empty.
+        * In that case, only RCU prevents the queue memory from being freed.
+        */
+       rcu_read_lock();
+       io_poll_remove_entry(poll);
+       if (poll_double)
                 io_poll_remove_entry(poll_double);
+       rcu_read_unlock();
  }
  
  /*
@@ -5624,6 +5635,30 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
                                                  wait);
         __poll_t mask = key_to_poll(key);
  
+       if (unlikely(mask & POLLFREE)) {
+               io_poll_mark_cancelled(req);
+               /* we have to kick tw in case it's not already */
+               io_poll_execute(req, 0);
+
+               /*
+                * If the waitqueue is being freed early but someone is already
+                * holds ownership over it, we have to tear down the request as
+                * best we can. That means immediately removing the request from
+                * its waitqueue and preventing all further accesses to the
+                * waitqueue via the request.
+                */
+               list_del_init(&poll->wait.entry);
+
+               /*
+                * Careful: this *must* be the last step, since as soon
+                * as req->head is NULL'ed out, the request can be
+                * completed and freed, since aio_poll_complete_work()
+                * will no longer need to take the waitqueue lock.
+                */
+               smp_store_release(&poll->head, NULL);
+               return 1;
+       }
+
         /* for instances that support it check for an event match first */
         if (mask && !(mask & poll->events))
                 return 0;
@@ -6350,16 +6385,21 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
         WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
  
         ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
-       if (ret != -ENOENT)
-               return ret;
+       /*
+        * Fall-through even for -EALREADY, as we may have poll armed
+        * that need unarming.
+        */
+       if (!ret)
+               return 0;
  
         spin_lock(&ctx->completion_lock);
+       ret = io_poll_cancel(ctx, sqe_addr, false);
+       if (ret != -ENOENT)
+               goto out;
+
         spin_lock_irq(&ctx->timeout_lock);
         ret = io_timeout_cancel(ctx, sqe_addr);
         spin_unlock_irq(&ctx->timeout_lock);
-       if (ret != -ENOENT)
-               goto out;
-       ret = io_poll_cancel(ctx, sqe_addr, false);
  out:
         spin_unlock(&ctx->completion_lock);
         return ret;
@@ -7781,10 +7821,15 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref)
         struct io_ring_ctx *ctx = node->rsrc_data->ctx;
         unsigned long flags;
         bool first_add = false;
+       unsigned long delay = HZ;
  
         spin_lock_irqsave(&ctx->rsrc_ref_lock, flags);
         node->done = true;
  
+       /* if we are mid-quiesce then do not delay */
+       if (node->rsrc_data->quiesce)
+               delay = 0;
+
         while (!list_empty(&ctx->rsrc_ref_list)) {
                 node = list_first_entry(&ctx->rsrc_ref_list,
                                             struct io_rsrc_node, node);
@@ -7797,10 +7842,10 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref)
         spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags);
  
         if (first_add)
-               mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ);
+               mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
  }
  
-static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
+static struct io_rsrc_node *io_rsrc_node_alloc(void)
  {
         struct io_rsrc_node *ref_node;
  
@@ -7851,7 +7896,7 @@ static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
  {
         if (ctx->rsrc_backup_node)
                 return 0;
-       ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx);
+       ctx->rsrc_backup_node = io_rsrc_node_alloc();
         return ctx->rsrc_backup_node ? 0 : -ENOMEM;
  }
  
@@ -8887,10 +8932,9 @@ static void io_mem_free(void *ptr)
  
  static void *io_mem_alloc(size_t size)
  {
-       gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP |
-                               __GFP_NORETRY | __GFP_ACCOUNT;
+       gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
  
-       return (void *) __get_free_pages(gfp_flags, get_order(size));
+       return (void *) __get_free_pages(gfp, get_order(size));
  }
  
  static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,