mm: fix misplaced unlock_page in do_wp_page()
[linux-2.6-microblaze.git] / mm / memory.c
index 148eafb..f3eb559 100644 (file)
@@ -73,6 +73,7 @@
 #include <linux/numa.h>
 #include <linux/perf_event.h>
 #include <linux/ptrace.h>
+#include <linux/vmalloc.h>
 
 #include <trace/events/kmem.h>
 
@@ -83,6 +84,7 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
+#include "pgalloc-track.h"
 #include "internal.h"
 
 #if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST)
@@ -693,85 +695,92 @@ out:
  * covered by this vma.
  */
 
-static inline unsigned long
-copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static unsigned long
+copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
                unsigned long addr, int *rss)
 {
        unsigned long vm_flags = vma->vm_flags;
        pte_t pte = *src_pte;
        struct page *page;
+       swp_entry_t entry = pte_to_swp_entry(pte);
+
+       if (likely(!non_swap_entry(entry))) {
+               if (swap_duplicate(entry) < 0)
+                       return entry.val;
+
+               /* make sure dst_mm is on swapoff's mmlist. */
+               if (unlikely(list_empty(&dst_mm->mmlist))) {
+                       spin_lock(&mmlist_lock);
+                       if (list_empty(&dst_mm->mmlist))
+                               list_add(&dst_mm->mmlist,
+                                               &src_mm->mmlist);
+                       spin_unlock(&mmlist_lock);
+               }
+               rss[MM_SWAPENTS]++;
+       } else if (is_migration_entry(entry)) {
+               page = migration_entry_to_page(entry);
 
-       /* pte contains position in swap or file, so copy. */
-       if (unlikely(!pte_present(pte))) {
-               swp_entry_t entry = pte_to_swp_entry(pte);
-
-               if (likely(!non_swap_entry(entry))) {
-                       if (swap_duplicate(entry) < 0)
-                               return entry.val;
-
-                       /* make sure dst_mm is on swapoff's mmlist. */
-                       if (unlikely(list_empty(&dst_mm->mmlist))) {
-                               spin_lock(&mmlist_lock);
-                               if (list_empty(&dst_mm->mmlist))
-                                       list_add(&dst_mm->mmlist,
-                                                       &src_mm->mmlist);
-                               spin_unlock(&mmlist_lock);
-                       }
-                       rss[MM_SWAPENTS]++;
-               } else if (is_migration_entry(entry)) {
-                       page = migration_entry_to_page(entry);
-
-                       rss[mm_counter(page)]++;
-
-                       if (is_write_migration_entry(entry) &&
-                                       is_cow_mapping(vm_flags)) {
-                               /*
-                                * COW mappings require pages in both
-                                * parent and child to be set to read.
-                                */
-                               make_migration_entry_read(&entry);
-                               pte = swp_entry_to_pte(entry);
-                               if (pte_swp_soft_dirty(*src_pte))
-                                       pte = pte_swp_mksoft_dirty(pte);
-                               if (pte_swp_uffd_wp(*src_pte))
-                                       pte = pte_swp_mkuffd_wp(pte);
-                               set_pte_at(src_mm, addr, src_pte, pte);
-                       }
-               } else if (is_device_private_entry(entry)) {
-                       page = device_private_entry_to_page(entry);
+               rss[mm_counter(page)]++;
 
+               if (is_write_migration_entry(entry) &&
+                               is_cow_mapping(vm_flags)) {
                        /*
-                        * Update rss count even for unaddressable pages, as
-                        * they should treated just like normal pages in this
-                        * respect.
-                        *
-                        * We will likely want to have some new rss counters
-                        * for unaddressable pages, at some point. But for now
-                        * keep things as they are.
+                        * COW mappings require pages in both
+                        * parent and child to be set to read.
                         */
-                       get_page(page);
-                       rss[mm_counter(page)]++;
-                       page_dup_rmap(page, false);
+                       make_migration_entry_read(&entry);
+                       pte = swp_entry_to_pte(entry);
+                       if (pte_swp_soft_dirty(*src_pte))
+                               pte = pte_swp_mksoft_dirty(pte);
+                       if (pte_swp_uffd_wp(*src_pte))
+                               pte = pte_swp_mkuffd_wp(pte);
+                       set_pte_at(src_mm, addr, src_pte, pte);
+               }
+       } else if (is_device_private_entry(entry)) {
+               page = device_private_entry_to_page(entry);
 
-                       /*
-                        * We do not preserve soft-dirty information, because so
-                        * far, checkpoint/restore is the only feature that
-                        * requires that. And checkpoint/restore does not work
-                        * when a device driver is involved (you cannot easily
-                        * save and restore device driver state).
-                        */
-                       if (is_write_device_private_entry(entry) &&
-                           is_cow_mapping(vm_flags)) {
-                               make_device_private_entry_read(&entry);
-                               pte = swp_entry_to_pte(entry);
-                               if (pte_swp_uffd_wp(*src_pte))
-                                       pte = pte_swp_mkuffd_wp(pte);
-                               set_pte_at(src_mm, addr, src_pte, pte);
-                       }
+               /*
+                * Update rss count even for unaddressable pages, as
+                * they should treated just like normal pages in this
+                * respect.
+                *
+                * We will likely want to have some new rss counters
+                * for unaddressable pages, at some point. But for now
+                * keep things as they are.
+                */
+               get_page(page);
+               rss[mm_counter(page)]++;
+               page_dup_rmap(page, false);
+
+               /*
+                * We do not preserve soft-dirty information, because so
+                * far, checkpoint/restore is the only feature that
+                * requires that. And checkpoint/restore does not work
+                * when a device driver is involved (you cannot easily
+                * save and restore device driver state).
+                */
+               if (is_write_device_private_entry(entry) &&
+                   is_cow_mapping(vm_flags)) {
+                       make_device_private_entry_read(&entry);
+                       pte = swp_entry_to_pte(entry);
+                       if (pte_swp_uffd_wp(*src_pte))
+                               pte = pte_swp_mkuffd_wp(pte);
+                       set_pte_at(src_mm, addr, src_pte, pte);
                }
-               goto out_set_pte;
        }
+       set_pte_at(dst_mm, addr, dst_pte, pte);
+       return 0;
+}
+
+static inline void
+copy_present_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+               pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+               unsigned long addr, int *rss)
+{
+       unsigned long vm_flags = vma->vm_flags;
+       pte_t pte = *src_pte;
+       struct page *page;
 
        /*
         * If it's a COW mapping, write protect it both
@@ -805,9 +814,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                rss[mm_counter(page)]++;
        }
 
-out_set_pte:
        set_pte_at(dst_mm, addr, dst_pte, pte);
-       return 0;
 }
 
 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -849,10 +856,17 @@ again:
                        progress++;
                        continue;
                }
-               entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
+               if (unlikely(!pte_present(*src_pte))) {
+                       entry.val = copy_nonpresent_pte(dst_mm, src_mm,
+                                                       dst_pte, src_pte,
                                                        vma, addr, rss);
-               if (entry.val)
-                       break;
+                       if (entry.val)
+                               break;
+                       progress += 8;
+                       continue;
+               }
+               copy_present_pte(dst_mm, src_mm, dst_pte, src_pte,
+                                vma, addr, rss);
                progress += 8;
        } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
 
@@ -2206,7 +2220,8 @@ EXPORT_SYMBOL(vm_iomap_memory);
 
 static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
                                     unsigned long addr, unsigned long end,
-                                    pte_fn_t fn, void *data, bool create)
+                                    pte_fn_t fn, void *data, bool create,
+                                    pgtbl_mod_mask *mask)
 {
        pte_t *pte;
        int err = 0;
@@ -2214,7 +2229,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 
        if (create) {
                pte = (mm == &init_mm) ?
-                       pte_alloc_kernel(pmd, addr) :
+                       pte_alloc_kernel_track(pmd, addr, mask) :
                        pte_alloc_map_lock(mm, pmd, addr, &ptl);
                if (!pte)
                        return -ENOMEM;
@@ -2235,6 +2250,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
                                break;
                }
        } while (addr += PAGE_SIZE, addr != end);
+       *mask |= PGTBL_PTE_MODIFIED;
 
        arch_leave_lazy_mmu_mode();
 
@@ -2245,7 +2261,8 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 
 static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
                                     unsigned long addr, unsigned long end,
-                                    pte_fn_t fn, void *data, bool create)
+                                    pte_fn_t fn, void *data, bool create,
+                                    pgtbl_mod_mask *mask)
 {
        pmd_t *pmd;
        unsigned long next;
@@ -2254,7 +2271,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
        BUG_ON(pud_huge(*pud));
 
        if (create) {
-               pmd = pmd_alloc(mm, pud, addr);
+               pmd = pmd_alloc_track(mm, pud, addr, mask);
                if (!pmd)
                        return -ENOMEM;
        } else {
@@ -2264,7 +2281,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
                next = pmd_addr_end(addr, end);
                if (create || !pmd_none_or_clear_bad(pmd)) {
                        err = apply_to_pte_range(mm, pmd, addr, next, fn, data,
-                                                create);
+                                                create, mask);
                        if (err)
                                break;
                }
@@ -2274,14 +2291,15 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
 
 static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
                                     unsigned long addr, unsigned long end,
-                                    pte_fn_t fn, void *data, bool create)
+                                    pte_fn_t fn, void *data, bool create,
+                                    pgtbl_mod_mask *mask)
 {
        pud_t *pud;
        unsigned long next;
        int err = 0;
 
        if (create) {
-               pud = pud_alloc(mm, p4d, addr);
+               pud = pud_alloc_track(mm, p4d, addr, mask);
                if (!pud)
                        return -ENOMEM;
        } else {
@@ -2291,7 +2309,7 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
                next = pud_addr_end(addr, end);
                if (create || !pud_none_or_clear_bad(pud)) {
                        err = apply_to_pmd_range(mm, pud, addr, next, fn, data,
-                                                create);
+                                                create, mask);
                        if (err)
                                break;
                }
@@ -2301,14 +2319,15 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
 
 static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
                                     unsigned long addr, unsigned long end,
-                                    pte_fn_t fn, void *data, bool create)
+                                    pte_fn_t fn, void *data, bool create,
+                                    pgtbl_mod_mask *mask)
 {
        p4d_t *p4d;
        unsigned long next;
        int err = 0;
 
        if (create) {
-               p4d = p4d_alloc(mm, pgd, addr);
+               p4d = p4d_alloc_track(mm, pgd, addr, mask);
                if (!p4d)
                        return -ENOMEM;
        } else {
@@ -2318,7 +2337,7 @@ static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
                next = p4d_addr_end(addr, end);
                if (create || !p4d_none_or_clear_bad(p4d)) {
                        err = apply_to_pud_range(mm, p4d, addr, next, fn, data,
-                                                create);
+                                                create, mask);
                        if (err)
                                break;
                }
@@ -2331,8 +2350,9 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
                                 void *data, bool create)
 {
        pgd_t *pgd;
-       unsigned long next;
+       unsigned long start = addr, next;
        unsigned long end = addr + size;
+       pgtbl_mod_mask mask = 0;
        int err = 0;
 
        if (WARN_ON(addr >= end))
@@ -2343,11 +2363,14 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
                next = pgd_addr_end(addr, end);
                if (!create && pgd_none_or_clear_bad(pgd))
                        continue;
-               err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create);
+               err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create, &mask);
                if (err)
                        break;
        } while (pgd++, addr = next, addr != end);
 
+       if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
+               arch_sync_kernel_mappings(start, start + size);
+
        return err;
 }
 
@@ -2944,8 +2967,8 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
                 * page count reference, and the page is locked,
                 * it's dark out, and we're wearing sunglasses. Hit it.
                 */
-               wp_page_reuse(vmf);
                unlock_page(page);
+               wp_page_reuse(vmf);
                return VM_FAULT_WRITE;
        } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
                                        (VM_WRITE|VM_SHARED))) {