hugetlbfs: fix anon huge page migration race

author Mike Kravetz <mike.kravetz@oracle.com>

Sat, 14 Nov 2020 06:52:16 +0000 (22:52 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 14 Nov 2020 19:26:04 +0000 (11:26 -0800)
author Mike Kravetz <mike.kravetz@oracle.com>
Sat, 14 Nov 2020 06:52:16 +0000 (22:52 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 14 Nov 2020 19:26:04 +0000 (11:26 -0800)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 5a620f6..37f15c3 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1567,104 +1567,24 @@ int PageHeadHuge(struct page *page_head)
         return page_head[1].compound_dtor == HUGETLB_PAGE_DTOR;
  }
  
-/*
- * Find address_space associated with hugetlbfs page.
- * Upon entry page is locked and page 'was' mapped although mapped state
- * could change.  If necessary, use anon_vma to find vma and associated
- * address space.  The returned mapping may be stale, but it can not be
- * invalid as page lock (which is held) is required to destroy mapping.
- */
-static struct address_space *_get_hugetlb_page_mapping(struct page *hpage)
-{
-       struct anon_vma *anon_vma;
-       pgoff_t pgoff_start, pgoff_end;
-       struct anon_vma_chain *avc;
-       struct address_space *mapping = page_mapping(hpage);
-
-       /* Simple file based mapping */
-       if (mapping)
-               return mapping;
-
-       /*
-        * Even anonymous hugetlbfs mappings are associated with an
-        * underlying hugetlbfs file (see hugetlb_file_setup in mmap
-        * code).  Find a vma associated with the anonymous vma, and
-        * use the file pointer to get address_space.
-        */
-       anon_vma = page_lock_anon_vma_read(hpage);
-       if (!anon_vma)
-               return mapping;  /* NULL */
-
-       /* Use first found vma */
-       pgoff_start = page_to_pgoff(hpage);
-       pgoff_end = pgoff_start + pages_per_huge_page(page_hstate(hpage)) - 1;
-       anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
-                                       pgoff_start, pgoff_end) {
-               struct vm_area_struct *vma = avc->vma;
-
-               mapping = vma->vm_file->f_mapping;
-               break;
-       }
-
-       anon_vma_unlock_read(anon_vma);
-       return mapping;
-}
-
  /*
   * Find and lock address space (mapping) in write mode.
   *
- * Upon entry, the page is locked which allows us to find the mapping
- * even in the case of an anon page.  However, locking order dictates
- * the i_mmap_rwsem be acquired BEFORE the page lock.  This is hugetlbfs
- * specific.  So, we first try to lock the sema while still holding the
- * page lock.  If this works, great!  If not, then we need to drop the
- * page lock and then acquire i_mmap_rwsem and reacquire page lock.  Of
- * course, need to revalidate state along the way.
+ * Upon entry, the page is locked which means that page_mapping() is
+ * stable.  Due to locking order, we can only trylock_write.  If we can
+ * not get the lock, simply return NULL to caller.
   */
  struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage)
  {
-       struct address_space *mapping, *mapping2;
+       struct address_space *mapping = page_mapping(hpage);
  
-       mapping = _get_hugetlb_page_mapping(hpage);
-retry:
         if (!mapping)
                 return mapping;
  
-       /*
-        * If no contention, take lock and return
-        */
         if (i_mmap_trylock_write(mapping))
                 return mapping;
  
-       /*
-        * Must drop page lock and wait on mapping sema.
-        * Note:  Once page lock is dropped, mapping could become invalid.
-        * As a hack, increase map count until we lock page again.
-        */
-       atomic_inc(&hpage->_mapcount);
-       unlock_page(hpage);
-       i_mmap_lock_write(mapping);
-       lock_page(hpage);
-       atomic_add_negative(-1, &hpage->_mapcount);
-
-       /* verify page is still mapped */
-       if (!page_mapped(hpage)) {
-               i_mmap_unlock_write(mapping);
-               return NULL;
-       }
-
-       /*
-        * Get address space again and verify it is the same one
-        * we locked.  If not, drop lock and retry.
-        */
-       mapping2 = _get_hugetlb_page_mapping(hpage);
-       if (mapping2 != mapping) {
-               i_mmap_unlock_write(mapping);
-               mapping = mapping2;
-               goto retry;
-       }
-
-       return mapping;
+       return NULL;
  }
  
  pgoff_t __basepage_index(struct page *page)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c

index c0bb186..5d880d4 100644 (file)
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1057,27 +1057,25 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
         if (!PageHuge(hpage)) {
                 unmap_success = try_to_unmap(hpage, ttu);
         } else {
-               /*
-                * For hugetlb pages, try_to_unmap could potentially call
-                * huge_pmd_unshare.  Because of this, take semaphore in
-                * write mode here and set TTU_RMAP_LOCKED to indicate we
-                * have taken the lock at this higer level.
-                *
-                * Note that the call to hugetlb_page_mapping_lock_write
-                * is necessary even if mapping is already set.  It handles
-                * ugliness of potentially having to drop page lock to obtain
-                * i_mmap_rwsem.
-                */
-               mapping = hugetlb_page_mapping_lock_write(hpage);
-
-               if (mapping) {
-                       unmap_success = try_to_unmap(hpage,
+               if (!PageAnon(hpage)) {
+                       /*
+                        * For hugetlb pages in shared mappings, try_to_unmap
+                        * could potentially call huge_pmd_unshare.  Because of
+                        * this, take semaphore in write mode here and set
+                        * TTU_RMAP_LOCKED to indicate we have taken the lock
+                        * at this higer level.
+                        */
+                       mapping = hugetlb_page_mapping_lock_write(hpage);
+                       if (mapping) {
+                               unmap_success = try_to_unmap(hpage,
                                                      ttu|TTU_RMAP_LOCKED);
-                       i_mmap_unlock_write(mapping);
+                               i_mmap_unlock_write(mapping);
+                       } else {
+                               pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
+                               unmap_success = false;
+                       }
                 } else {
-                       pr_info("Memory failure: %#lx: could not find mapping for mapped huge page\n",
-                               pfn);
-                       unmap_success = false;
+                       unmap_success = try_to_unmap(hpage, ttu);
                 }
         }
         if (!unmap_success)
diff --git a/mm/migrate.c b/mm/migrate.c

index 5ca5842..5795cb8 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1328,34 +1328,38 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
                 goto put_anon;
  
         if (page_mapped(hpage)) {
-               /*
-                * try_to_unmap could potentially call huge_pmd_unshare.
-                * Because of this, take semaphore in write mode here and
-                * set TTU_RMAP_LOCKED to let lower levels know we have
-                * taken the lock.
-                */
-               mapping = hugetlb_page_mapping_lock_write(hpage);
-               if (unlikely(!mapping))
-                       goto unlock_put_anon;
+               bool mapping_locked = false;
+               enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK|
+                                       TTU_IGNORE_ACCESS;
+
+               if (!PageAnon(hpage)) {
+                       /*
+                        * In shared mappings, try_to_unmap could potentially
+                        * call huge_pmd_unshare.  Because of this, take
+                        * semaphore in write mode here and set TTU_RMAP_LOCKED
+                        * to let lower levels know we have taken the lock.
+                        */
+                       mapping = hugetlb_page_mapping_lock_write(hpage);
+                       if (unlikely(!mapping))
+                               goto unlock_put_anon;
+
+                       mapping_locked = true;
+                       ttu |= TTU_RMAP_LOCKED;
+               }
  
-               try_to_unmap(hpage,
-                       TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS|
-                       TTU_RMAP_LOCKED);
+               try_to_unmap(hpage, ttu);
                 page_was_mapped = 1;
-               /*
-                * Leave mapping locked until after subsequent call to
-                * remove_migration_ptes()
-                */
+
+               if (mapping_locked)
+                       i_mmap_unlock_write(mapping);
         }
  
         if (!page_mapped(hpage))
                 rc = move_to_new_page(new_hpage, hpage, mode);
  
-       if (page_was_mapped) {
+       if (page_was_mapped)
                 remove_migration_ptes(hpage,
-                       rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, true);
-               i_mmap_unlock_write(mapping);
-       }
+                       rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
  
  unlock_put_anon:
         unlock_page(new_hpage);
diff --git a/mm/rmap.c b/mm/rmap.c

index 1b84945..31b2932 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1413,9 +1413,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                 /*
                  * If sharing is possible, start and end will be adjusted
                  * accordingly.
-                *
-                * If called for a huge page, caller must hold i_mmap_rwsem
-                * in write mode as it is possible to call huge_pmd_unshare.
                  */
                 adjust_range_if_pmd_sharing_possible(vma, &range.start,
                                                      &range.end);
@@ -1462,7 +1459,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
                 address = pvmw.address;
  
-               if (PageHuge(page)) {
+               if (PageHuge(page) && !PageAnon(page)) {
                         /*
                          * To call huge_pmd_unshare, i_mmap_rwsem must be
                          * held in write mode.  Caller needs to explicitly
author	Mike Kravetz <mike.kravetz@oracle.com>
	Sat, 14 Nov 2020 06:52:16 +0000 (22:52 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 14 Nov 2020 19:26:04 +0000 (11:26 -0800)
mm/hugetlb.c		patch \| blob \| history
mm/memory-failure.c		patch \| blob \| history
mm/migrate.c		patch \| blob \| history
mm/rmap.c		patch \| blob \| history