arm: implement the new page table range API
authorMatthew Wilcox (Oracle) <willy@infradead.org>
Wed, 2 Aug 2023 15:13:37 +0000 (16:13 +0100)
committerAndrew Morton <akpm@linux-foundation.org>
Thu, 24 Aug 2023 23:20:20 +0000 (16:20 -0700)
Add set_ptes(), update_mmu_cache_range(), flush_dcache_folio() and
flush_icache_pages().  Change the PG_dcache_clear flag from being per-page
to per-folio which makes __dma_page_dev_to_cpu() a bit more exciting.
Also add flush_cache_pages(), even though this isn't used by generic code
(yet?)

[m.szyprowski@samsung.com: fix potential endless loop in __dma_page_dev_to_cpu()]
Link: https://lkml.kernel.org/r/20230809172737.3574190-1-m.szyprowski@samsung.com
[willy@infradead.org: fix folio conversion in __dma_page_dev_to_cpu()]
Link: https://lkml.kernel.org/r/20230823191852.1556561-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20230802151406.3735276-10-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Mike Rapoport (IBM) <rppt@kernel.org>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
12 files changed:
arch/arm/include/asm/cacheflush.h
arch/arm/include/asm/pgtable.h
arch/arm/include/asm/tlbflush.h
arch/arm/mm/copypage-v4mc.c
arch/arm/mm/copypage-v6.c
arch/arm/mm/copypage-xscale.c
arch/arm/mm/dma-mapping.c
arch/arm/mm/fault-armv.c
arch/arm/mm/flush.c
arch/arm/mm/mm.h
arch/arm/mm/mmu.c
arch/arm/mm/nommu.c

index a094f96..841e268 100644 (file)
@@ -231,14 +231,15 @@ vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
                                        vma->vm_flags);
 }
 
-static inline void
-vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
+static inline void vivt_flush_cache_pages(struct vm_area_struct *vma,
+               unsigned long user_addr, unsigned long pfn, unsigned int nr)
 {
        struct mm_struct *mm = vma->vm_mm;
 
        if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
                unsigned long addr = user_addr & PAGE_MASK;
-               __cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
+               __cpuc_flush_user_range(addr, addr + nr * PAGE_SIZE,
+                               vma->vm_flags);
        }
 }
 
@@ -247,15 +248,17 @@ vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsig
                vivt_flush_cache_mm(mm)
 #define flush_cache_range(vma,start,end) \
                vivt_flush_cache_range(vma,start,end)
-#define flush_cache_page(vma,addr,pfn) \
-               vivt_flush_cache_page(vma,addr,pfn)
+#define flush_cache_pages(vma, addr, pfn, nr) \
+               vivt_flush_cache_pages(vma, addr, pfn, nr)
 #else
-extern void flush_cache_mm(struct mm_struct *mm);
-extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
-extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn);
+void flush_cache_mm(struct mm_struct *mm);
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+void flush_cache_pages(struct vm_area_struct *vma, unsigned long user_addr,
+               unsigned long pfn, unsigned int nr);
 #endif
 
 #define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+#define flush_cache_page(vma, addr, pfn) flush_cache_pages(vma, addr, pfn, 1)
 
 /*
  * flush_icache_user_range is used when we want to ensure that the
@@ -289,7 +292,9 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr
  * See update_mmu_cache for the user space part.
  */
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-extern void flush_dcache_page(struct page *);
+void flush_dcache_page(struct page *);
+void flush_dcache_folio(struct folio *folio);
+#define flush_dcache_folio flush_dcache_folio
 
 #define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 static inline void flush_kernel_vmap_range(void *addr, int size)
@@ -321,6 +326,7 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
  * duplicate cache flushing elsewhere performed by flush_dcache_page().
  */
 #define flush_icache_page(vma,page)    do { } while (0)
+#define flush_icache_pages(vma, page, nr)      do { } while (0)
 
 /*
  * flush_cache_vmap() is used when creating mappings (eg, via vmap,
index 34662a9..ba573f2 100644 (file)
@@ -207,8 +207,9 @@ static inline void __sync_icache_dcache(pte_t pteval)
 extern void __sync_icache_dcache(pte_t pteval);
 #endif
 
-void set_pte_at(struct mm_struct *mm, unsigned long addr,
-                     pte_t *ptep, pte_t pteval);
+void set_ptes(struct mm_struct *mm, unsigned long addr,
+                     pte_t *ptep, pte_t pteval, unsigned int nr);
+#define set_ptes set_ptes
 
 static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
 {
index 0ccc985..38c6e4a 100644 (file)
@@ -619,18 +619,22 @@ extern void flush_bp_all(void);
  * If PG_dcache_clean is not set for the page, we need to ensure that any
  * cache entries for the kernels virtual memory range are written
  * back to the page. On ARMv6 and later, the cache coherency is handled via
- * the set_pte_at() function.
+ * the set_ptes() function.
  */
 #if __LINUX_ARM_ARCH__ < 6
-extern void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
-       pte_t *ptep);
+void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
+               unsigned long addr, pte_t *ptep, unsigned int nr);
 #else
-static inline void update_mmu_cache(struct vm_area_struct *vma,
-                                   unsigned long addr, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_fault *vmf,
+               struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+               unsigned int nr)
 {
 }
 #endif
 
+#define update_mmu_cache(vma, addr, ptep) \
+       update_mmu_cache_range(NULL, vma, addr, ptep, 1)
+
 #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
 
 #endif
index f1da3b4..7ddd82b 100644 (file)
@@ -64,10 +64,11 @@ static void mc_copy_user_page(void *from, void *to)
 void v4_mc_copy_user_highpage(struct page *to, struct page *from,
        unsigned long vaddr, struct vm_area_struct *vma)
 {
+       struct folio *src = page_folio(from);
        void *kto = kmap_atomic(to);
 
-       if (!test_and_set_bit(PG_dcache_clean, &from->flags))
-               __flush_dcache_page(page_mapping_file(from), from);
+       if (!test_and_set_bit(PG_dcache_clean, &src->flags))
+               __flush_dcache_folio(folio_flush_mapping(src), src);
 
        raw_spin_lock(&minicache_lock);
 
index d8a115d..a1a71f3 100644 (file)
@@ -69,11 +69,12 @@ static void discard_old_kernel_data(void *kto)
 static void v6_copy_user_highpage_aliasing(struct page *to,
        struct page *from, unsigned long vaddr, struct vm_area_struct *vma)
 {
+       struct folio *src = page_folio(from);
        unsigned int offset = CACHE_COLOUR(vaddr);
        unsigned long kfrom, kto;
 
-       if (!test_and_set_bit(PG_dcache_clean, &from->flags))
-               __flush_dcache_page(page_mapping_file(from), from);
+       if (!test_and_set_bit(PG_dcache_clean, &src->flags))
+               __flush_dcache_folio(folio_flush_mapping(src), src);
 
        /* FIXME: not highmem safe */
        discard_old_kernel_data(page_address(to));
index bcb4856..f1e29d3 100644 (file)
@@ -84,10 +84,11 @@ static void mc_copy_user_page(void *from, void *to)
 void xscale_mc_copy_user_highpage(struct page *to, struct page *from,
        unsigned long vaddr, struct vm_area_struct *vma)
 {
+       struct folio *src = page_folio(from);
        void *kto = kmap_atomic(to);
 
-       if (!test_and_set_bit(PG_dcache_clean, &from->flags))
-               __flush_dcache_page(page_mapping_file(from), from);
+       if (!test_and_set_bit(PG_dcache_clean, &src->flags))
+               __flush_dcache_folio(folio_flush_mapping(src), src);
 
        raw_spin_lock(&minicache_lock);
 
index 033a1bc..5409225 100644 (file)
@@ -709,19 +709,21 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
         * Mark the D-cache clean for these pages to avoid extra flushing.
         */
        if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
-               unsigned long pfn;
-               size_t left = size;
-
-               pfn = page_to_pfn(page) + off / PAGE_SIZE;
-               off %= PAGE_SIZE;
-               if (off) {
-                       pfn++;
-                       left -= PAGE_SIZE - off;
-               }
-               while (left >= PAGE_SIZE) {
-                       page = pfn_to_page(pfn++);
-                       set_bit(PG_dcache_clean, &page->flags);
-                       left -= PAGE_SIZE;
+               struct folio *folio = pfn_folio(paddr / PAGE_SIZE);
+               size_t offset = offset_in_folio(folio, paddr);
+
+               for (;;) {
+                       size_t sz = folio_size(folio) - offset;
+
+                       if (size < sz)
+                               break;
+                       if (!offset)
+                               set_bit(PG_dcache_clean, &folio->flags);
+                       offset = 0;
+                       size -= sz;
+                       if (!size)
+                               break;
+                       folio = folio_next(folio);
                }
        }
 }
index 7cb1254..2286c2e 100644 (file)
@@ -180,12 +180,12 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
  *
  * Note that the pte lock will be held.
  */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
-       pte_t *ptep)
+void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
+               unsigned long addr, pte_t *ptep, unsigned int nr)
 {
        unsigned long pfn = pte_pfn(*ptep);
        struct address_space *mapping;
-       struct page *page;
+       struct folio *folio;
 
        if (!pfn_valid(pfn))
                return;
@@ -194,13 +194,13 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
         * The zero page is never written to, so never has any dirty
         * cache lines, and therefore never needs to be flushed.
         */
-       page = pfn_to_page(pfn);
-       if (page == ZERO_PAGE(0))
+       if (is_zero_pfn(pfn))
                return;
 
-       mapping = page_mapping_file(page);
-       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
-               __flush_dcache_page(mapping, page);
+       folio = page_folio(pfn_to_page(pfn));
+       mapping = folio_flush_mapping(folio);
+       if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
+               __flush_dcache_folio(mapping, folio);
        if (mapping) {
                if (cache_is_vivt())
                        make_coherent(mapping, vma, addr, ptep, pfn);
index 2508be9..d19d140 100644 (file)
@@ -95,10 +95,10 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
                __flush_icache_all();
 }
 
-void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
+void flush_cache_pages(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn, unsigned int nr)
 {
        if (cache_is_vivt()) {
-               vivt_flush_cache_page(vma, user_addr, pfn);
+               vivt_flush_cache_pages(vma, user_addr, pfn, nr);
                return;
        }
 
@@ -196,29 +196,31 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 #endif
 }
 
-void __flush_dcache_page(struct address_space *mapping, struct page *page)
+void __flush_dcache_folio(struct address_space *mapping, struct folio *folio)
 {
        /*
         * Writeback any data associated with the kernel mapping of this
         * page.  This ensures that data in the physical page is mutually
         * coherent with the kernels mapping.
         */
-       if (!PageHighMem(page)) {
-               __cpuc_flush_dcache_area(page_address(page), page_size(page));
+       if (!folio_test_highmem(folio)) {
+               __cpuc_flush_dcache_area(folio_address(folio),
+                                       folio_size(folio));
        } else {
                unsigned long i;
                if (cache_is_vipt_nonaliasing()) {
-                       for (i = 0; i < compound_nr(page); i++) {
-                               void *addr = kmap_atomic(page + i);
+                       for (i = 0; i < folio_nr_pages(folio); i++) {
+                               void *addr = kmap_local_folio(folio,
+                                                               i * PAGE_SIZE);
                                __cpuc_flush_dcache_area(addr, PAGE_SIZE);
-                               kunmap_atomic(addr);
+                               kunmap_local(addr);
                        }
                } else {
-                       for (i = 0; i < compound_nr(page); i++) {
-                               void *addr = kmap_high_get(page + i);
+                       for (i = 0; i < folio_nr_pages(folio); i++) {
+                               void *addr = kmap_high_get(folio_page(folio, i));
                                if (addr) {
                                        __cpuc_flush_dcache_area(addr, PAGE_SIZE);
-                                       kunmap_high(page + i);
+                                       kunmap_high(folio_page(folio, i));
                                }
                        }
                }
@@ -230,15 +232,14 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
         * userspace colour, which is congruent with page->index.
         */
        if (mapping && cache_is_vipt_aliasing())
-               flush_pfn_alias(page_to_pfn(page),
-                               page->index << PAGE_SHIFT);
+               flush_pfn_alias(folio_pfn(folio), folio_pos(folio));
 }
 
-static void __flush_dcache_aliases(struct address_space *mapping, struct page *page)
+static void __flush_dcache_aliases(struct address_space *mapping, struct folio *folio)
 {
        struct mm_struct *mm = current->active_mm;
-       struct vm_area_struct *mpnt;
-       pgoff_t pgoff;
+       struct vm_area_struct *vma;
+       pgoff_t pgoff, pgoff_end;
 
        /*
         * There are possible user space mappings of this page:
@@ -246,21 +247,36 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
         *   data in the current VM view associated with this page.
         * - aliasing VIPT: we only need to find one mapping of this page.
         */
-       pgoff = page->index;
+       pgoff = folio->index;
+       pgoff_end = pgoff + folio_nr_pages(folio) - 1;
 
        flush_dcache_mmap_lock(mapping);
-       vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
-               unsigned long offset;
+       vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff_end) {
+               unsigned long start, offset, pfn;
+               unsigned int nr;
 
                /*
                 * If this VMA is not in our MM, we can ignore it.
                 */
-               if (mpnt->vm_mm != mm)
+               if (vma->vm_mm != mm)
                        continue;
-               if (!(mpnt->vm_flags & VM_MAYSHARE))
+               if (!(vma->vm_flags & VM_MAYSHARE))
                        continue;
-               offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
-               flush_cache_page(mpnt, mpnt->vm_start + offset, page_to_pfn(page));
+
+               start = vma->vm_start;
+               pfn = folio_pfn(folio);
+               nr = folio_nr_pages(folio);
+               offset = pgoff - vma->vm_pgoff;
+               if (offset > -nr) {
+                       pfn -= offset;
+                       nr += offset;
+               } else {
+                       start += offset * PAGE_SIZE;
+               }
+               if (start + nr * PAGE_SIZE > vma->vm_end)
+                       nr = (vma->vm_end - start) / PAGE_SIZE;
+
+               flush_cache_pages(vma, start, pfn, nr);
        }
        flush_dcache_mmap_unlock(mapping);
 }
@@ -269,7 +285,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
 void __sync_icache_dcache(pte_t pteval)
 {
        unsigned long pfn;
-       struct page *page;
+       struct folio *folio;
        struct address_space *mapping;
 
        if (cache_is_vipt_nonaliasing() && !pte_exec(pteval))
@@ -279,14 +295,14 @@ void __sync_icache_dcache(pte_t pteval)
        if (!pfn_valid(pfn))
                return;
 
-       page = pfn_to_page(pfn);
+       folio = page_folio(pfn_to_page(pfn));
        if (cache_is_vipt_aliasing())
-               mapping = page_mapping_file(page);
+               mapping = folio_flush_mapping(folio);
        else
                mapping = NULL;
 
-       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
-               __flush_dcache_page(mapping, page);
+       if (!test_and_set_bit(PG_dcache_clean, &folio->flags))
+               __flush_dcache_folio(mapping, folio);
 
        if (pte_exec(pteval))
                __flush_icache_all();
@@ -312,7 +328,7 @@ void __sync_icache_dcache(pte_t pteval)
  * Note that we disable the lazy flush for SMP configurations where
  * the cache maintenance operations are not automatically broadcasted.
  */
-void flush_dcache_page(struct page *page)
+void flush_dcache_folio(struct folio *folio)
 {
        struct address_space *mapping;
 
@@ -320,31 +336,36 @@ void flush_dcache_page(struct page *page)
         * The zero page is never written to, so never has any dirty
         * cache lines, and therefore never needs to be flushed.
         */
-       if (page == ZERO_PAGE(0))
+       if (is_zero_pfn(folio_pfn(folio)))
                return;
 
        if (!cache_ops_need_broadcast() && cache_is_vipt_nonaliasing()) {
-               if (test_bit(PG_dcache_clean, &page->flags))
-                       clear_bit(PG_dcache_clean, &page->flags);
+               if (test_bit(PG_dcache_clean, &folio->flags))
+                       clear_bit(PG_dcache_clean, &folio->flags);
                return;
        }
 
-       mapping = page_mapping_file(page);
+       mapping = folio_flush_mapping(folio);
 
        if (!cache_ops_need_broadcast() &&
-           mapping && !page_mapcount(page))
-               clear_bit(PG_dcache_clean, &page->flags);
+           mapping && !folio_mapped(folio))
+               clear_bit(PG_dcache_clean, &folio->flags);
        else {
-               __flush_dcache_page(mapping, page);
+               __flush_dcache_folio(mapping, folio);
                if (mapping && cache_is_vivt())
-                       __flush_dcache_aliases(mapping, page);
+                       __flush_dcache_aliases(mapping, folio);
                else if (mapping)
                        __flush_icache_all();
-               set_bit(PG_dcache_clean, &page->flags);
+               set_bit(PG_dcache_clean, &folio->flags);
        }
 }
-EXPORT_SYMBOL(flush_dcache_page);
+EXPORT_SYMBOL(flush_dcache_folio);
 
+void flush_dcache_page(struct page *page)
+{
+       flush_dcache_folio(page_folio(page));
+}
+EXPORT_SYMBOL(flush_dcache_page);
 /*
  * Flush an anonymous page so that users of get_user_pages()
  * can safely access the data.  The expected sequence is:
index d7ffccb..4193163 100644 (file)
@@ -45,7 +45,7 @@ struct mem_type {
 
 const struct mem_type *get_mem_type(unsigned int type);
 
-extern void __flush_dcache_page(struct address_space *mapping, struct page *page);
+void __flush_dcache_folio(struct address_space *mapping, struct folio *folio);
 
 /*
  * ARM specific vm_struct->flags bits.
index fdeaee3..674ed71 100644 (file)
@@ -1789,7 +1789,7 @@ void __init paging_init(const struct machine_desc *mdesc)
        bootmem_init();
 
        empty_zero_page = virt_to_page(zero_page);
-       __flush_dcache_page(NULL, empty_zero_page);
+       __flush_dcache_folio(NULL, page_folio(empty_zero_page));
 }
 
 void __init early_mm_init(const struct machine_desc *mdesc)
@@ -1798,8 +1798,8 @@ void __init early_mm_init(const struct machine_desc *mdesc)
        early_paging_init(mdesc);
 }
 
-void set_pte_at(struct mm_struct *mm, unsigned long addr,
-                             pte_t *ptep, pte_t pteval)
+void set_ptes(struct mm_struct *mm, unsigned long addr,
+                             pte_t *ptep, pte_t pteval, unsigned int nr)
 {
        unsigned long ext = 0;
 
@@ -1809,5 +1809,11 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr,
                ext |= PTE_EXT_NG;
        }
 
-       set_pte_ext(ptep, pteval, ext);
+       for (;;) {
+               set_pte_ext(ptep, pteval, ext);
+               if (--nr == 0)
+                       break;
+               ptep++;
+               pte_val(pteval) += PAGE_SIZE;
+       }
 }
index 43cfd06..c415f38 100644 (file)
@@ -180,6 +180,12 @@ void setup_mm_for_reboot(void)
 {
 }
 
+void flush_dcache_folio(struct folio *folio)
+{
+       __cpuc_flush_dcache_area(folio_address(folio), folio_size(folio));
+}
+EXPORT_SYMBOL(flush_dcache_folio);
+
 void flush_dcache_page(struct page *page)
 {
        __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE);