mm/rmap: introduce and use hugetlb_try_share_anon_rmap()
authorDavid Hildenbrand <david@redhat.com>
Wed, 20 Dec 2023 22:44:29 +0000 (23:44 +0100)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 29 Dec 2023 19:58:48 +0000 (11:58 -0800)
hugetlb rmap handling differs quite a lot from "ordinary" rmap code.  For
example, hugetlb currently only supports entire mappings, and treats any
mapping as mapped using a single "logical PTE".  Let's move it out of the
way so we can overhaul our "ordinary" rmap.  implementation/interface.

So let's introduce and use hugetlb_try_dup_anon_rmap() to make all hugetlb
handling use dedicated hugetlb_* rmap functions.

Add sanity checks that we end up with the right folios in the right
functions.

Note that try_to_unmap_one() does not need care.  Easy to spot because
among all that nasty hugetlb special-casing in that function, we're not
using set_huge_pte_at() on the anon path -- well, and that code assumes
that we would want to swapout.

Link: https://lkml.kernel.org/r/20231220224504.646757-6-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Yin Fengwei <fengwei.yin@intel.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/rmap.h
mm/rmap.c

index 5f26752..d6fefa0 100644 (file)
@@ -227,6 +227,30 @@ static inline int hugetlb_try_dup_anon_rmap(struct folio *folio,
        return 0;
 }
 
+/* See page_try_share_anon_rmap() */
+static inline int hugetlb_try_share_anon_rmap(struct folio *folio)
+{
+       VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
+       VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
+       VM_WARN_ON_FOLIO(!PageAnonExclusive(&folio->page), folio);
+
+       /* Paired with the memory barrier in try_grab_folio(). */
+       if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
+               smp_mb();
+
+       if (unlikely(folio_maybe_dma_pinned(folio)))
+               return -EBUSY;
+       ClearPageAnonExclusive(&folio->page);
+
+       /*
+        * This is conceptually a smp_wmb() paired with the smp_rmb() in
+        * gup_must_unshare().
+        */
+       if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
+               smp_mb__after_atomic();
+       return 0;
+}
+
 static inline void hugetlb_add_file_rmap(struct folio *folio)
 {
        VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
@@ -331,6 +355,7 @@ dup:
  */
 static inline int page_try_share_anon_rmap(struct page *page)
 {
+       VM_WARN_ON(folio_test_hugetlb(page_folio(page)));
        VM_BUG_ON_PAGE(!PageAnon(page) || !PageAnonExclusive(page), page);
 
        /* device private pages cannot get pinned via GUP. */
index a57ec92..c229e48 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2149,13 +2149,18 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
                                       !anon_exclusive, subpage);
 
                        /* See page_try_share_anon_rmap(): clear PTE first. */
-                       if (anon_exclusive &&
-                           page_try_share_anon_rmap(subpage)) {
-                               if (folio_test_hugetlb(folio))
+                       if (folio_test_hugetlb(folio)) {
+                               if (anon_exclusive &&
+                                   hugetlb_try_share_anon_rmap(folio)) {
                                        set_huge_pte_at(mm, address, pvmw.pte,
                                                        pteval, hsz);
-                               else
-                                       set_pte_at(mm, address, pvmw.pte, pteval);
+                                       ret = false;
+                                       page_vma_mapped_walk_done(&pvmw);
+                                       break;
+                               }
+                       } else if (anon_exclusive &&
+                                  page_try_share_anon_rmap(subpage)) {
+                               set_pte_at(mm, address, pvmw.pte, pteval);
                                ret = false;
                                page_vma_mapped_walk_done(&pvmw);
                                break;