Merge tag 'for-5.9/io_uring-20200802' of git://git.kernel.dk/linux-block
[linux-2.6-microblaze.git] / mm / filemap.c
index 385759c..9f131f1 100644 (file)
@@ -987,44 +987,46 @@ void __init pagecache_init(void)
        page_writeback_init();
 }
 
-/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
-struct wait_page_key {
-       struct page *page;
-       int bit_nr;
-       int page_match;
-};
-
-struct wait_page_queue {
-       struct page *page;
-       int bit_nr;
-       wait_queue_entry_t wait;
-};
-
 static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
 {
+       int ret;
        struct wait_page_key *key = arg;
        struct wait_page_queue *wait_page
                = container_of(wait, struct wait_page_queue, wait);
 
-       if (wait_page->page != key->page)
-              return 0;
-       key->page_match = 1;
-
-       if (wait_page->bit_nr != key->bit_nr)
+       if (!wake_page_match(wait_page, key))
                return 0;
 
        /*
-        * Stop walking if it's locked.
-        * Is this safe if put_and_wait_on_page_locked() is in use?
-        * Yes: the waker must hold a reference to this page, and if PG_locked
-        * has now already been set by another task, that task must also hold
-        * a reference to the *same usage* of this page; so there is no need
-        * to walk on to wake even the put_and_wait_on_page_locked() callers.
+        * If it's an exclusive wait, we get the bit for it, and
+        * stop walking if we can't.
+        *
+        * If it's a non-exclusive wait, then the fact that this
+        * wake function was called means that the bit already
+        * was cleared, and we don't care if somebody then
+        * re-took it.
         */
-       if (test_bit(key->bit_nr, &key->page->flags))
-               return -1;
+       ret = 0;
+       if (wait->flags & WQ_FLAG_EXCLUSIVE) {
+               if (test_and_set_bit(key->bit_nr, &key->page->flags))
+                       return -1;
+               ret = 1;
+       }
+       wait->flags |= WQ_FLAG_WOKEN;
 
-       return autoremove_wake_function(wait, mode, sync, key);
+       wake_up_state(wait->private, mode);
+
+       /*
+        * Ok, we have successfully done what we're waiting for,
+        * and we can unconditionally remove the wait entry.
+        *
+        * Note that this has to be the absolute last thing we do,
+        * since after list_del_init(&wait->entry) the wait entry
+        * might be de-allocated and the process might even have
+        * exited.
+        */
+       list_del_init_careful(&wait->entry);
+       return ret;
 }
 
 static void wake_up_page_bit(struct page *page, int bit_nr)
@@ -1103,16 +1105,31 @@ enum behavior {
                         */
 };
 
+/*
+ * Attempt to check (or get) the page bit, and mark the
+ * waiter woken if successful.
+ */
+static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
+                                       struct wait_queue_entry *wait)
+{
+       if (wait->flags & WQ_FLAG_EXCLUSIVE) {
+               if (test_and_set_bit(bit_nr, &page->flags))
+                       return false;
+       } else if (test_bit(bit_nr, &page->flags))
+               return false;
+
+       wait->flags |= WQ_FLAG_WOKEN;
+       return true;
+}
+
 static inline int wait_on_page_bit_common(wait_queue_head_t *q,
        struct page *page, int bit_nr, int state, enum behavior behavior)
 {
        struct wait_page_queue wait_page;
        wait_queue_entry_t *wait = &wait_page.wait;
-       bool bit_is_set;
        bool thrashing = false;
        bool delayacct = false;
        unsigned long pflags;
-       int ret = 0;
 
        if (bit_nr == PG_locked &&
            !PageUptodate(page) && PageWorkingset(page)) {
@@ -1130,48 +1147,47 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
        wait_page.page = page;
        wait_page.bit_nr = bit_nr;
 
-       for (;;) {
-               spin_lock_irq(&q->lock);
+       /*
+        * Do one last check whether we can get the
+        * page bit synchronously.
+        *
+        * Do the SetPageWaiters() marking before that
+        * to let any waker we _just_ missed know they
+        * need to wake us up (otherwise they'll never
+        * even go to the slow case that looks at the
+        * page queue), and add ourselves to the wait
+        * queue if we need to sleep.
+        *
+        * This part needs to be done under the queue
+        * lock to avoid races.
+        */
+       spin_lock_irq(&q->lock);
+       SetPageWaiters(page);
+       if (!trylock_page_bit_common(page, bit_nr, wait))
+               __add_wait_queue_entry_tail(q, wait);
+       spin_unlock_irq(&q->lock);
 
-               if (likely(list_empty(&wait->entry))) {
-                       __add_wait_queue_entry_tail(q, wait);
-                       SetPageWaiters(page);
-               }
+       /*
+        * From now on, all the logic will be based on
+        * the WQ_FLAG_WOKEN flag, and the and the page
+        * bit testing (and setting) will be - or has
+        * already been - done by the wake function.
+        *
+        * We can drop our reference to the page.
+        */
+       if (behavior == DROP)
+               put_page(page);
 
+       for (;;) {
                set_current_state(state);
 
-               spin_unlock_irq(&q->lock);
-
-               bit_is_set = test_bit(bit_nr, &page->flags);
-               if (behavior == DROP)
-                       put_page(page);
-
-               if (likely(bit_is_set))
-                       io_schedule();
-
-               if (behavior == EXCLUSIVE) {
-                       if (!test_and_set_bit_lock(bit_nr, &page->flags))
-                               break;
-               } else if (behavior == SHARED) {
-                       if (!test_bit(bit_nr, &page->flags))
-                               break;
-               }
-
-               if (signal_pending_state(state, current)) {
-                       ret = -EINTR;
+               if (signal_pending_state(state, current))
                        break;
-               }
 
-               if (behavior == DROP) {
-                       /*
-                        * We can no longer safely access page->flags:
-                        * even if CONFIG_MEMORY_HOTREMOVE is not enabled,
-                        * there is a risk of waiting forever on a page reused
-                        * for something that keeps it locked indefinitely.
-                        * But best check for -EINTR above before breaking.
-                        */
+               if (wait->flags & WQ_FLAG_WOKEN)
                        break;
-               }
+
+               io_schedule();
        }
 
        finish_wait(q, wait);
@@ -1190,7 +1206,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
         * bother with signals either.
         */
 
-       return ret;
+       return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
 }
 
 void wait_on_page_bit(struct page *page, int bit_nr)
@@ -1207,6 +1223,44 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
 }
 EXPORT_SYMBOL(wait_on_page_bit_killable);
 
+static int __wait_on_page_locked_async(struct page *page,
+                                      struct wait_page_queue *wait, bool set)
+{
+       struct wait_queue_head *q = page_waitqueue(page);
+       int ret = 0;
+
+       wait->page = page;
+       wait->bit_nr = PG_locked;
+
+       spin_lock_irq(&q->lock);
+       __add_wait_queue_entry_tail(q, &wait->wait);
+       SetPageWaiters(page);
+       if (set)
+               ret = !trylock_page(page);
+       else
+               ret = PageLocked(page);
+       /*
+        * If we were succesful now, we know we're still on the
+        * waitqueue as we're still under the lock. This means it's
+        * safe to remove and return success, we know the callback
+        * isn't going to trigger.
+        */
+       if (!ret)
+               __remove_wait_queue(q, &wait->wait);
+       else
+               ret = -EIOCBQUEUED;
+       spin_unlock_irq(&q->lock);
+       return ret;
+}
+
+static int wait_on_page_locked_async(struct page *page,
+                                    struct wait_page_queue *wait)
+{
+       if (!PageLocked(page))
+               return 0;
+       return __wait_on_page_locked_async(compound_head(page), wait, false);
+}
+
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
@@ -1369,6 +1423,11 @@ int __lock_page_killable(struct page *__page)
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
 
+int __lock_page_async(struct page *page, struct wait_page_queue *wait)
+{
+       return __wait_on_page_locked_async(page, wait, true);
+}
+
 /*
  * Return values:
  * 1 - page is locked; mmap_lock is still held.
@@ -2028,7 +2087,7 @@ find_page:
 
                page = find_get_page(mapping, index);
                if (!page) {
-                       if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
+                       if (iocb->ki_flags & IOCB_NOIO)
                                goto would_block;
                        page_cache_sync_readahead(mapping,
                                        ra, filp,
@@ -2047,17 +2106,25 @@ find_page:
                                        index, last_index - index);
                }
                if (!PageUptodate(page)) {
-                       if (iocb->ki_flags & IOCB_NOWAIT) {
-                               put_page(page);
-                               goto would_block;
-                       }
-
                        /*
                         * See comment in do_read_cache_page on why
                         * wait_on_page_locked is used to avoid unnecessarily
                         * serialisations and why it's safe.
                         */
-                       error = wait_on_page_locked_killable(page);
+                       if (iocb->ki_flags & IOCB_WAITQ) {
+                               if (written) {
+                                       put_page(page);
+                                       goto out;
+                               }
+                               error = wait_on_page_locked_async(page,
+                                                               iocb->ki_waitq);
+                       } else {
+                               if (iocb->ki_flags & IOCB_NOWAIT) {
+                                       put_page(page);
+                                       goto would_block;
+                               }
+                               error = wait_on_page_locked_killable(page);
+                       }
                        if (unlikely(error))
                                goto readpage_error;
                        if (PageUptodate(page))
@@ -2145,7 +2212,10 @@ page_ok:
 
 page_not_up_to_date:
                /* Get exclusive access to the page ... */
-               error = lock_page_killable(page);
+               if (iocb->ki_flags & IOCB_WAITQ)
+                       error = lock_page_async(page, iocb->ki_waitq);
+               else
+                       error = lock_page_killable(page);
                if (unlikely(error))
                        goto readpage_error;
 
@@ -2164,7 +2234,7 @@ page_not_up_to_date_locked:
                }
 
 readpage:
-               if (iocb->ki_flags & IOCB_NOIO) {
+               if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
                        unlock_page(page);
                        put_page(page);
                        goto would_block;