mm/vmalloc: enable mapping of huge pages at pte level in vmap
authorChristophe Leroy <christophe.leroy@csgroup.eu>
Thu, 1 Jul 2021 01:48:06 +0000 (18:48 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 1 Jul 2021 03:47:26 +0000 (20:47 -0700)
On some architectures like powerpc, there are huge pages that are mapped
at pte level.

Enable it in vmap.

For that, architectures can provide arch_vmap_pte_range_map_size() that
returns the size of pages to map at pte level.

Link: https://lkml.kernel.org/r/fb3ccc73377832ac6708181ec419128a2f98ce36.1620795204.git.christophe.leroy@csgroup.eu
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Uladzislau Rezki <uladzislau.rezki@sony.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/vmalloc.h
mm/vmalloc.c

index bfaaf0b..54ec073 100644 (file)
@@ -104,6 +104,14 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
 }
 #endif
 
+#ifndef arch_vmap_pte_range_map_size
+static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end,
+                                                        u64 pfn, unsigned int max_page_shift)
+{
+       return PAGE_SIZE;
+}
+#endif
+
 /*
  *     Highlevel APIs for driver use
  */
index b2ec7f7..fe0af8d 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/overflow.h>
 #include <linux/pgtable.h>
 #include <linux/uaccess.h>
+#include <linux/hugetlb.h>
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
 
@@ -83,10 +84,11 @@ static void free_work(struct work_struct *w)
 /*** Page table manipulation functions ***/
 static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                        phys_addr_t phys_addr, pgprot_t prot,
-                       pgtbl_mod_mask *mask)
+                       unsigned int max_page_shift, pgtbl_mod_mask *mask)
 {
        pte_t *pte;
        u64 pfn;
+       unsigned long size = PAGE_SIZE;
 
        pfn = phys_addr >> PAGE_SHIFT;
        pte = pte_alloc_kernel_track(pmd, addr, mask);
@@ -94,9 +96,22 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                return -ENOMEM;
        do {
                BUG_ON(!pte_none(*pte));
+
+#ifdef CONFIG_HUGETLB_PAGE
+               size = arch_vmap_pte_range_map_size(addr, end, pfn, max_page_shift);
+               if (size != PAGE_SIZE) {
+                       pte_t entry = pfn_pte(pfn, prot);
+
+                       entry = pte_mkhuge(entry);
+                       entry = arch_make_huge_pte(entry, ilog2(size), 0);
+                       set_huge_pte_at(&init_mm, addr, pte, entry);
+                       pfn += PFN_DOWN(size);
+                       continue;
+               }
+#endif
                set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
                pfn++;
-       } while (pte++, addr += PAGE_SIZE, addr != end);
+       } while (pte += PFN_DOWN(size), addr += size, addr != end);
        *mask |= PGTBL_PTE_MODIFIED;
        return 0;
 }
@@ -145,7 +160,7 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
                        continue;
                }
 
-               if (vmap_pte_range(pmd, addr, next, phys_addr, prot, mask))
+               if (vmap_pte_range(pmd, addr, next, phys_addr, prot, max_page_shift, mask))
                        return -ENOMEM;
        } while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
        return 0;