Merge tag 'for-5.9/io_uring-20200802' of git://git.kernel.dk/linux-block

[linux-2.6-microblaze.git] / mm / filemap.c
diff --git a/mm/filemap.c b/mm/filemap.c

index 385759c..9f131f1 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -987,44 +987,46 @@ void __init pagecache_init(void)
         page_writeback_init();
  }
  
-/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
-struct wait_page_key {
-       struct page *page;
-       int bit_nr;
-       int page_match;
-};
-
-struct wait_page_queue {
-       struct page *page;
-       int bit_nr;
-       wait_queue_entry_t wait;
-};
-
  static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
  {
+       int ret;
         struct wait_page_key *key = arg;
         struct wait_page_queue *wait_page
                 = container_of(wait, struct wait_page_queue, wait);
  
-       if (wait_page->page != key->page)
-              return 0;
-       key->page_match = 1;
-
-       if (wait_page->bit_nr != key->bit_nr)
+       if (!wake_page_match(wait_page, key))
                 return 0;
  
         /*
-        * Stop walking if it's locked.
-        * Is this safe if put_and_wait_on_page_locked() is in use?
-        * Yes: the waker must hold a reference to this page, and if PG_locked
-        * has now already been set by another task, that task must also hold
-        * a reference to the *same usage* of this page; so there is no need
-        * to walk on to wake even the put_and_wait_on_page_locked() callers.
+        * If it's an exclusive wait, we get the bit for it, and
+        * stop walking if we can't.
+        *
+        * If it's a non-exclusive wait, then the fact that this
+        * wake function was called means that the bit already
+        * was cleared, and we don't care if somebody then
+        * re-took it.
          */
-       if (test_bit(key->bit_nr, &key->page->flags))
-               return -1;
+       ret = 0;
+       if (wait->flags & WQ_FLAG_EXCLUSIVE) {
+               if (test_and_set_bit(key->bit_nr, &key->page->flags))
+                       return -1;
+               ret = 1;
+       }
+       wait->flags |= WQ_FLAG_WOKEN;
  
-       return autoremove_wake_function(wait, mode, sync, key);
+       wake_up_state(wait->private, mode);
+
+       /*
+        * Ok, we have successfully done what we're waiting for,
+        * and we can unconditionally remove the wait entry.
+        *
+        * Note that this has to be the absolute last thing we do,
+        * since after list_del_init(&wait->entry) the wait entry
+        * might be de-allocated and the process might even have
+        * exited.
+        */
+       list_del_init_careful(&wait->entry);
+       return ret;
  }
  
  static void wake_up_page_bit(struct page *page, int bit_nr)
@@ -1103,16 +1105,31 @@ enum behavior {
                          */
  };
  
+/*
+ * Attempt to check (or get) the page bit, and mark the
+ * waiter woken if successful.
+ */
+static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
+                                       struct wait_queue_entry *wait)
+{
+       if (wait->flags & WQ_FLAG_EXCLUSIVE) {
+               if (test_and_set_bit(bit_nr, &page->flags))
+                       return false;
+       } else if (test_bit(bit_nr, &page->flags))
+               return false;
+
+       wait->flags |= WQ_FLAG_WOKEN;
+       return true;
+}
+
  static inline int wait_on_page_bit_common(wait_queue_head_t *q,
         struct page *page, int bit_nr, int state, enum behavior behavior)
  {
         struct wait_page_queue wait_page;
         wait_queue_entry_t *wait = &wait_page.wait;
-       bool bit_is_set;
         bool thrashing = false;
         bool delayacct = false;
         unsigned long pflags;
-       int ret = 0;
  
         if (bit_nr == PG_locked &&
             !PageUptodate(page) && PageWorkingset(page)) {
@@ -1130,48 +1147,47 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
         wait_page.page = page;
         wait_page.bit_nr = bit_nr;
  
-       for (;;) {
-               spin_lock_irq(&q->lock);
+       /*
+        * Do one last check whether we can get the
+        * page bit synchronously.
+        *
+        * Do the SetPageWaiters() marking before that
+        * to let any waker we _just_ missed know they
+        * need to wake us up (otherwise they'll never
+        * even go to the slow case that looks at the
+        * page queue), and add ourselves to the wait
+        * queue if we need to sleep.
+        *
+        * This part needs to be done under the queue
+        * lock to avoid races.
+        */
+       spin_lock_irq(&q->lock);
+       SetPageWaiters(page);
+       if (!trylock_page_bit_common(page, bit_nr, wait))
+               __add_wait_queue_entry_tail(q, wait);
+       spin_unlock_irq(&q->lock);
  
-               if (likely(list_empty(&wait->entry))) {
-                       __add_wait_queue_entry_tail(q, wait);
-                       SetPageWaiters(page);
-               }
+       /*
+        * From now on, all the logic will be based on
+        * the WQ_FLAG_WOKEN flag, and the and the page
+        * bit testing (and setting) will be - or has
+        * already been - done by the wake function.
+        *
+        * We can drop our reference to the page.
+        */
+       if (behavior == DROP)
+               put_page(page);
  
+       for (;;) {
                 set_current_state(state);
  
-               spin_unlock_irq(&q->lock);
-
-               bit_is_set = test_bit(bit_nr, &page->flags);
-               if (behavior == DROP)
-                       put_page(page);
-
-               if (likely(bit_is_set))
-                       io_schedule();
-
-               if (behavior == EXCLUSIVE) {
-                       if (!test_and_set_bit_lock(bit_nr, &page->flags))
-                               break;
-               } else if (behavior == SHARED) {
-                       if (!test_bit(bit_nr, &page->flags))
-                               break;
-               }
-
-               if (signal_pending_state(state, current)) {
-                       ret = -EINTR;
+               if (signal_pending_state(state, current))
                         break;
-               }
  
-               if (behavior == DROP) {
-                       /*
-                        * We can no longer safely access page->flags:
-                        * even if CONFIG_MEMORY_HOTREMOVE is not enabled,
-                        * there is a risk of waiting forever on a page reused
-                        * for something that keeps it locked indefinitely.
-                        * But best check for -EINTR above before breaking.
-                        */
+               if (wait->flags & WQ_FLAG_WOKEN)
                         break;
-               }
+
+               io_schedule();
         }
  
         finish_wait(q, wait);
@@ -1190,7 +1206,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
          * bother with signals either.
          */
  
-       return ret;
+       return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
  }
  
  void wait_on_page_bit(struct page *page, int bit_nr)
@@ -1207,6 +1223,44 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
  }
  EXPORT_SYMBOL(wait_on_page_bit_killable);
  
+static int __wait_on_page_locked_async(struct page *page,
+                                      struct wait_page_queue *wait, bool set)
+{
+       struct wait_queue_head *q = page_waitqueue(page);
+       int ret = 0;
+
+       wait->page = page;
+       wait->bit_nr = PG_locked;
+
+       spin_lock_irq(&q->lock);
+       __add_wait_queue_entry_tail(q, &wait->wait);
+       SetPageWaiters(page);
+       if (set)
+               ret = !trylock_page(page);
+       else
+               ret = PageLocked(page);
+       /*
+        * If we were succesful now, we know we're still on the
+        * waitqueue as we're still under the lock. This means it's
+        * safe to remove and return success, we know the callback
+        * isn't going to trigger.
+        */
+       if (!ret)
+               __remove_wait_queue(q, &wait->wait);
+       else
+               ret = -EIOCBQUEUED;
+       spin_unlock_irq(&q->lock);
+       return ret;
+}
+
+static int wait_on_page_locked_async(struct page *page,
+                                    struct wait_page_queue *wait)
+{
+       if (!PageLocked(page))
+               return 0;
+       return __wait_on_page_locked_async(compound_head(page), wait, false);
+}
+
  /**
   * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
   * @page: The page to wait for.
@@ -1369,6 +1423,11 @@ int __lock_page_killable(struct page *__page)
  }
  EXPORT_SYMBOL_GPL(__lock_page_killable);
  
+int __lock_page_async(struct page *page, struct wait_page_queue *wait)
+{
+       return __wait_on_page_locked_async(page, wait, true);
+}
+
  /*
   * Return values:
   * 1 - page is locked; mmap_lock is still held.
@@ -2028,7 +2087,7 @@ find_page:
  
                 page = find_get_page(mapping, index);
                 if (!page) {
-                       if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
+                       if (iocb->ki_flags & IOCB_NOIO)
                                 goto would_block;
                         page_cache_sync_readahead(mapping,
                                         ra, filp,
@@ -2047,17 +2106,25 @@ find_page:
                                         index, last_index - index);
                 }
                 if (!PageUptodate(page)) {
-                       if (iocb->ki_flags & IOCB_NOWAIT) {
-                               put_page(page);
-                               goto would_block;
-                       }
-
                         /*
                          * See comment in do_read_cache_page on why
                          * wait_on_page_locked is used to avoid unnecessarily
                          * serialisations and why it's safe.
                          */
-                       error = wait_on_page_locked_killable(page);
+                       if (iocb->ki_flags & IOCB_WAITQ) {
+                               if (written) {
+                                       put_page(page);
+                                       goto out;
+                               }
+                               error = wait_on_page_locked_async(page,
+                                                               iocb->ki_waitq);
+                       } else {
+                               if (iocb->ki_flags & IOCB_NOWAIT) {
+                                       put_page(page);
+                                       goto would_block;
+                               }
+                               error = wait_on_page_locked_killable(page);
+                       }
                         if (unlikely(error))
                                 goto readpage_error;
                         if (PageUptodate(page))
@@ -2145,7 +2212,10 @@ page_ok:
  
  page_not_up_to_date:
                 /* Get exclusive access to the page ... */
-               error = lock_page_killable(page);
+               if (iocb->ki_flags & IOCB_WAITQ)
+                       error = lock_page_async(page, iocb->ki_waitq);
+               else
+                       error = lock_page_killable(page);
                 if (unlikely(error))
                         goto readpage_error;
  
@@ -2164,7 +2234,7 @@ page_not_up_to_date_locked:
                 }
  
  readpage:
-               if (iocb->ki_flags & IOCB_NOIO) {
+               if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
                         unlock_page(page);
                         put_page(page);
                         goto would_block;