mm: put_and_wait_on_page_locked() while page is migrated

author Hugh Dickins <hughd@google.com>

Fri, 28 Dec 2018 08:36:14 +0000 (00:36 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 28 Dec 2018 20:11:48 +0000 (12:11 -0800)
author Hugh Dickins <hughd@google.com>
Fri, 28 Dec 2018 08:36:14 +0000 (00:36 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 28 Dec 2018 20:11:48 +0000 (12:11 -0800)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h

index 226f96f..e2d7039 100644 (file)
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -537,6 +537,8 @@ static inline int wait_on_page_locked_killable(struct page *page)
         return wait_on_page_bit_killable(compound_head(page), PG_locked);
  }
  
+extern void put_and_wait_on_page_locked(struct page *page);
+
  /* 
   * Wait for a page to complete writeback
   */
diff --git a/mm/filemap.c b/mm/filemap.c

index 81adec8..d2df272 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -981,7 +981,14 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
         if (wait_page->bit_nr != key->bit_nr)
                 return 0;
  
-       /* Stop walking if it's locked */
+       /*
+        * Stop walking if it's locked.
+        * Is this safe if put_and_wait_on_page_locked() is in use?
+        * Yes: the waker must hold a reference to this page, and if PG_locked
+        * has now already been set by another task, that task must also hold
+        * a reference to the *same usage* of this page; so there is no need
+        * to walk on to wake even the put_and_wait_on_page_locked() callers.
+        */
         if (test_bit(key->bit_nr, &key->page->flags))
                 return -1;
  
@@ -1049,25 +1056,44 @@ static void wake_up_page(struct page *page, int bit)
         wake_up_page_bit(page, bit);
  }
  
+/*
+ * A choice of three behaviors for wait_on_page_bit_common():
+ */
+enum behavior {
+       EXCLUSIVE,      /* Hold ref to page and take the bit when woken, like
+                        * __lock_page() waiting on then setting PG_locked.
+                        */
+       SHARED,         /* Hold ref to page and check the bit when woken, like
+                        * wait_on_page_writeback() waiting on PG_writeback.
+                        */
+       DROP,           /* Drop ref to page before wait, no check when woken,
+                        * like put_and_wait_on_page_locked() on PG_locked.
+                        */
+};
+
  static inline int wait_on_page_bit_common(wait_queue_head_t *q,
-               struct page *page, int bit_nr, int state, bool lock)
+       struct page *page, int bit_nr, int state, enum behavior behavior)
  {
         struct wait_page_queue wait_page;
         wait_queue_entry_t *wait = &wait_page.wait;
+       bool bit_is_set;
         bool thrashing = false;
+       bool delayacct = false;
         unsigned long pflags;
         int ret = 0;
  
         if (bit_nr == PG_locked &&
             !PageUptodate(page) && PageWorkingset(page)) {
-               if (!PageSwapBacked(page))
+               if (!PageSwapBacked(page)) {
                         delayacct_thrashing_start();
+                       delayacct = true;
+               }
                 psi_memstall_enter(&pflags);
                 thrashing = true;
         }
  
         init_wait(wait);
-       wait->flags = lock ? WQ_FLAG_EXCLUSIVE : 0;
+       wait->flags = behavior == EXCLUSIVE ? WQ_FLAG_EXCLUSIVE : 0;
         wait->func = wake_page_function;
         wait_page.page = page;
         wait_page.bit_nr = bit_nr;
@@ -1084,14 +1110,17 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
  
                 spin_unlock_irq(&q->lock);
  
-               if (likely(test_bit(bit_nr, &page->flags))) {
+               bit_is_set = test_bit(bit_nr, &page->flags);
+               if (behavior == DROP)
+                       put_page(page);
+
+               if (likely(bit_is_set))
                         io_schedule();
-               }
  
-               if (lock) {
+               if (behavior == EXCLUSIVE) {
                         if (!test_and_set_bit_lock(bit_nr, &page->flags))
                                 break;
-               } else {
+               } else if (behavior == SHARED) {
                         if (!test_bit(bit_nr, &page->flags))
                                 break;
                 }
@@ -1100,12 +1129,23 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
                         ret = -EINTR;
                         break;
                 }
+
+               if (behavior == DROP) {
+                       /*
+                        * We can no longer safely access page->flags:
+                        * even if CONFIG_MEMORY_HOTREMOVE is not enabled,
+                        * there is a risk of waiting forever on a page reused
+                        * for something that keeps it locked indefinitely.
+                        * But best check for -EINTR above before breaking.
+                        */
+                       break;
+               }
         }
  
         finish_wait(q, wait);
  
         if (thrashing) {
-               if (!PageSwapBacked(page))
+               if (delayacct)
                         delayacct_thrashing_end();
                 psi_memstall_leave(&pflags);
         }
@@ -1124,17 +1164,36 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
  void wait_on_page_bit(struct page *page, int bit_nr)
  {
         wait_queue_head_t *q = page_waitqueue(page);
-       wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false);
+       wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
  }
  EXPORT_SYMBOL(wait_on_page_bit);
  
  int wait_on_page_bit_killable(struct page *page, int bit_nr)
  {
         wait_queue_head_t *q = page_waitqueue(page);
-       return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false);
+       return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED);
  }
  EXPORT_SYMBOL(wait_on_page_bit_killable);
  
+/**
+ * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
+ * @page: The page to wait for.
+ *
+ * The caller should hold a reference on @page.  They expect the page to
+ * become unlocked relatively soon, but do not wish to hold up migration
+ * (for example) by holding the reference while waiting for the page to
+ * come unlocked.  After this function returns, the caller should not
+ * dereference @page.
+ */
+void put_and_wait_on_page_locked(struct page *page)
+{
+       wait_queue_head_t *q;
+
+       page = compound_head(page);
+       q = page_waitqueue(page);
+       wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, DROP);
+}
+
  /**
   * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
   * @page: Page defining the wait queue of interest
@@ -1264,7 +1323,8 @@ void __lock_page(struct page *__page)
  {
         struct page *page = compound_head(__page);
         wait_queue_head_t *q = page_waitqueue(page);
-       wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true);
+       wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE,
+                               EXCLUSIVE);
  }
  EXPORT_SYMBOL(__lock_page);
  
@@ -1272,7 +1332,8 @@ int __lock_page_killable(struct page *__page)
  {
         struct page *page = compound_head(__page);
         wait_queue_head_t *q = page_waitqueue(page);
-       return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true);
+       return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE,
+                                       EXCLUSIVE);
  }
  EXPORT_SYMBOL_GPL(__lock_page_killable);
  
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index da6682b..0c0e184 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1490,8 +1490,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
                 if (!get_page_unless_zero(page))
                         goto out_unlock;
                 spin_unlock(vmf->ptl);
-               wait_on_page_locked(page);
-               put_page(page);
+               put_and_wait_on_page_locked(page);
                 goto out;
         }
  
@@ -1527,8 +1526,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
                 if (!get_page_unless_zero(page))
                         goto out_unlock;
                 spin_unlock(vmf->ptl);
-               wait_on_page_locked(page);
-               put_page(page);
+               put_and_wait_on_page_locked(page);
                 goto out;
         }
  
diff --git a/mm/migrate.c b/mm/migrate.c

index f7e4bfd..acda06f 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -327,16 +327,13 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
  
         /*
          * Once page cache replacement of page migration started, page_count
-        * *must* be zero. And, we don't want to call wait_on_page_locked()
-        * against a page without get_page().
-        * So, we use get_page_unless_zero(), here. Even failed, page fault
-        * will occur again.
+        * is zero; but we must not call put_and_wait_on_page_locked() without
+        * a ref. Use get_page_unless_zero(), and just fault again if it fails.
          */
         if (!get_page_unless_zero(page))
                 goto out;
         pte_unmap_unlock(ptep, ptl);
-       wait_on_page_locked(page);
-       put_page(page);
+       put_and_wait_on_page_locked(page);
         return;
  out:
         pte_unmap_unlock(ptep, ptl);
@@ -370,8 +367,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
         if (!get_page_unless_zero(page))
                 goto unlock;
         spin_unlock(ptl);
-       wait_on_page_locked(page);
-       put_page(page);
+       put_and_wait_on_page_locked(page);
         return;
  unlock:
         spin_unlock(ptl);
diff --git a/mm/vmscan.c b/mm/vmscan.c

index bd8971a..a714c4f 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1460,14 +1460,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                         count_memcg_page_event(page, PGLAZYFREED);
                 } else if (!mapping || !__remove_mapping(mapping, page, true))
                         goto keep_locked;
-               /*
-                * At this point, we have no other references and there is
-                * no way to pick any more up (removed from LRU, removed
-                * from pagecache). Can use non-atomic bitops now (and
-                * we obviously don't have to worry about waking up a process
-                * waiting on the page lock, because there are no references.
-                */
-               __ClearPageLocked(page);
+
+               unlock_page(page);
  free_it:
                 nr_reclaimed++;
author	Hugh Dickins <hughd@google.com>
	Fri, 28 Dec 2018 08:36:14 +0000 (00:36 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 28 Dec 2018 20:11:48 +0000 (12:11 -0800)
include/linux/pagemap.h		patch \| blob \| history
mm/filemap.c		patch \| blob \| history
mm/huge_memory.c		patch \| blob \| history
mm/migrate.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history