mm/rmap: add support for PUD sized mappings to rmap
authorAlistair Popple <apopple@nvidia.com>
Fri, 28 Feb 2025 03:31:09 +0000 (14:31 +1100)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 18 Mar 2025 05:06:39 +0000 (22:06 -0700)
The rmap doesn't currently support adding a PUD mapping of a folio.  This
patch adds support for entire PUD mappings of folios, primarily to allow
for more standard refcounting of device DAX folios.  Currently DAX is the
only user of this and it doesn't require support for partially mapped
PUD-sized folios so we don't support for that for now.

Link: https://lkml.kernel.org/r/248582c07896e30627d1aeaeebc6949cfd91b851.1740713401.git-series.apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Asahi Lina <lina@asahilina.net>
Cc: Balbir Singh <balbirs@nvidia.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: linmiaohe <linmiaohe@huawei.com>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Matthew Wilcow (Oracle) <willy@infradead.org>
Cc: Michael "Camp Drill Sergeant" Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/rmap.h
mm/rmap.c

index 69e9a43..6abf796 100644 (file)
@@ -192,6 +192,7 @@ typedef int __bitwise rmap_t;
 enum rmap_level {
        RMAP_LEVEL_PTE = 0,
        RMAP_LEVEL_PMD,
+       RMAP_LEVEL_PUD,
 };
 
 static inline void __folio_rmap_sanity_checks(const struct folio *folio,
@@ -228,6 +229,14 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio,
                VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio);
                VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio);
                break;
+       case RMAP_LEVEL_PUD:
+               /*
+                * Assume that we are creating a single "entire" mapping of the
+                * folio.
+                */
+               VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PUD_NR, folio);
+               VM_WARN_ON_FOLIO(nr_pages != HPAGE_PUD_NR, folio);
+               break;
        default:
                VM_WARN_ON_ONCE(true);
        }
@@ -251,12 +260,16 @@ void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages,
        folio_add_file_rmap_ptes(folio, page, 1, vma)
 void folio_add_file_rmap_pmd(struct folio *, struct page *,
                struct vm_area_struct *);
+void folio_add_file_rmap_pud(struct folio *, struct page *,
+               struct vm_area_struct *);
 void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages,
                struct vm_area_struct *);
 #define folio_remove_rmap_pte(folio, page, vma) \
        folio_remove_rmap_ptes(folio, page, 1, vma)
 void folio_remove_rmap_pmd(struct folio *, struct page *,
                struct vm_area_struct *);
+void folio_remove_rmap_pud(struct folio *, struct page *,
+               struct vm_area_struct *);
 
 void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *,
                unsigned long address, rmap_t flags);
@@ -341,6 +354,7 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio,
                atomic_add(orig_nr_pages, &folio->_large_mapcount);
                break;
        case RMAP_LEVEL_PMD:
+       case RMAP_LEVEL_PUD:
                atomic_inc(&folio->_entire_mapcount);
                atomic_inc(&folio->_large_mapcount);
                break;
@@ -437,6 +451,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
                atomic_add(orig_nr_pages, &folio->_large_mapcount);
                break;
        case RMAP_LEVEL_PMD:
+       case RMAP_LEVEL_PUD:
                if (PageAnonExclusive(page)) {
                        if (unlikely(maybe_pinned))
                                return -EBUSY;
index 333ecac..bcec867 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1269,12 +1269,19 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio,
                atomic_add(orig_nr_pages, &folio->_large_mapcount);
                break;
        case RMAP_LEVEL_PMD:
+       case RMAP_LEVEL_PUD:
                first = atomic_inc_and_test(&folio->_entire_mapcount);
                if (first) {
                        nr = atomic_add_return_relaxed(ENTIRELY_MAPPED, mapped);
                        if (likely(nr < ENTIRELY_MAPPED + ENTIRELY_MAPPED)) {
-                               *nr_pmdmapped = folio_nr_pages(folio);
-                               nr = *nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED);
+                               nr_pages = folio_nr_pages(folio);
+                               /*
+                                * We only track PMD mappings of PMD-sized
+                                * folios separately.
+                                */
+                               if (level == RMAP_LEVEL_PMD)
+                                       *nr_pmdmapped = nr_pages;
+                               nr = nr_pages - (nr & FOLIO_PAGES_MAPPED);
                                /* Raced ahead of a remove and another add? */
                                if (unlikely(nr < 0))
                                        nr = 0;
@@ -1420,6 +1427,13 @@ static __always_inline void __folio_add_anon_rmap(struct folio *folio,
                case RMAP_LEVEL_PMD:
                        SetPageAnonExclusive(page);
                        break;
+               case RMAP_LEVEL_PUD:
+                       /*
+                        * Keep the compiler happy, we don't support anonymous
+                        * PUD mappings.
+                        */
+                       WARN_ON_ONCE(1);
+                       break;
                }
        }
        for (i = 0; i < nr_pages; i++) {
@@ -1613,6 +1627,27 @@ void folio_add_file_rmap_pmd(struct folio *folio, struct page *page,
 #endif
 }
 
+/**
+ * folio_add_file_rmap_pud - add a PUD mapping to a page range of a folio
+ * @folio:     The folio to add the mapping to
+ * @page:      The first page to add
+ * @vma:       The vm area in which the mapping is added
+ *
+ * The page range of the folio is defined by [page, page + HPAGE_PUD_NR)
+ *
+ * The caller needs to hold the page table lock.
+ */
+void folio_add_file_rmap_pud(struct folio *folio, struct page *page,
+               struct vm_area_struct *vma)
+{
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+       defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+       __folio_add_file_rmap(folio, page, HPAGE_PUD_NR, vma, RMAP_LEVEL_PUD);
+#else
+       WARN_ON_ONCE(true);
+#endif
+}
+
 static __always_inline void __folio_remove_rmap(struct folio *folio,
                struct page *page, int nr_pages, struct vm_area_struct *vma,
                enum rmap_level level)
@@ -1642,13 +1677,16 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
                partially_mapped = nr && atomic_read(mapped);
                break;
        case RMAP_LEVEL_PMD:
+       case RMAP_LEVEL_PUD:
                atomic_dec(&folio->_large_mapcount);
                last = atomic_add_negative(-1, &folio->_entire_mapcount);
                if (last) {
                        nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped);
                        if (likely(nr < ENTIRELY_MAPPED)) {
-                               nr_pmdmapped = folio_nr_pages(folio);
-                               nr = nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED);
+                               nr_pages = folio_nr_pages(folio);
+                               if (level == RMAP_LEVEL_PMD)
+                                       nr_pmdmapped = nr_pages;
+                               nr = nr_pages - (nr & FOLIO_PAGES_MAPPED);
                                /* Raced ahead of another remove and an add? */
                                if (unlikely(nr < 0))
                                        nr = 0;
@@ -1722,6 +1760,27 @@ void folio_remove_rmap_pmd(struct folio *folio, struct page *page,
 #endif
 }
 
+/**
+ * folio_remove_rmap_pud - remove a PUD mapping from a page range of a folio
+ * @folio:     The folio to remove the mapping from
+ * @page:      The first page to remove
+ * @vma:       The vm area from which the mapping is removed
+ *
+ * The page range of the folio is defined by [page, page + HPAGE_PUD_NR)
+ *
+ * The caller needs to hold the page table lock.
+ */
+void folio_remove_rmap_pud(struct folio *folio, struct page *page,
+               struct vm_area_struct *vma)
+{
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+       defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+       __folio_remove_rmap(folio, page, HPAGE_PUD_NR, vma, RMAP_LEVEL_PUD);
+#else
+       WARN_ON_ONCE(true);
+#endif
+}
+
 /* We support batch unmapping of PTEs for lazyfree large folios */
 static inline bool can_batch_unmap_folio_ptes(unsigned long addr,
                        struct folio *folio, pte_t *ptep)