Merge tag 'pwm/for-5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/thierry...
[linux-2.6-microblaze.git] / mm / memory.c
index 550405f..cbdc2cd 100644 (file)
@@ -2260,26 +2260,17 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
        return 0;
 }
 
-/**
- * remap_pfn_range - remap kernel memory to userspace
- * @vma: user vma to map to
- * @addr: target page aligned user address to start at
- * @pfn: page frame number of kernel physical memory address
- * @size: size of mapping area
- * @prot: page protection flags for this mapping
- *
- * Note: this is only safe if the mm semaphore is held when called.
- *
- * Return: %0 on success, negative error code otherwise.
+/*
+ * Variant of remap_pfn_range that does not call track_pfn_remap.  The caller
+ * must have pre-validated the caching bits of the pgprot_t.
  */
-int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
-                   unsigned long pfn, unsigned long size, pgprot_t prot)
+int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
+               unsigned long pfn, unsigned long size, pgprot_t prot)
 {
        pgd_t *pgd;
        unsigned long next;
        unsigned long end = addr + PAGE_ALIGN(size);
        struct mm_struct *mm = vma->vm_mm;
-       unsigned long remap_pfn = pfn;
        int err;
 
        if (WARN_ON_ONCE(!PAGE_ALIGNED(addr)))
@@ -2309,10 +2300,6 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
                vma->vm_pgoff = pfn;
        }
 
-       err = track_pfn_remap(vma, &prot, remap_pfn, addr, PAGE_ALIGN(size));
-       if (err)
-               return -EINVAL;
-
        vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 
        BUG_ON(addr >= end);
@@ -2324,12 +2311,36 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
                err = remap_p4d_range(mm, pgd, addr, next,
                                pfn + (addr >> PAGE_SHIFT), prot);
                if (err)
-                       break;
+                       return err;
        } while (pgd++, addr = next, addr != end);
 
+       return 0;
+}
+
+/**
+ * remap_pfn_range - remap kernel memory to userspace
+ * @vma: user vma to map to
+ * @addr: target page aligned user address to start at
+ * @pfn: page frame number of kernel physical memory address
+ * @size: size of mapping area
+ * @prot: page protection flags for this mapping
+ *
+ * Note: this is only safe if the mm semaphore is held when called.
+ *
+ * Return: %0 on success, negative error code otherwise.
+ */
+int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
+                   unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+       int err;
+
+       err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
        if (err)
-               untrack_pfn(vma, remap_pfn, PAGE_ALIGN(size));
+               return -EINVAL;
 
+       err = remap_pfn_range_notrack(vma, addr, pfn, size, prot);
+       if (err)
+               untrack_pfn(vma, pfn, PAGE_ALIGN(size));
        return err;
 }
 EXPORT_SYMBOL(remap_pfn_range);
@@ -2446,13 +2457,21 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
        }
        do {
                next = pmd_addr_end(addr, end);
-               if (create || !pmd_none_or_clear_bad(pmd)) {
-                       err = apply_to_pte_range(mm, pmd, addr, next, fn, data,
-                                                create, mask);
-                       if (err)
-                               break;
+               if (pmd_none(*pmd) && !create)
+                       continue;
+               if (WARN_ON_ONCE(pmd_leaf(*pmd)))
+                       return -EINVAL;
+               if (!pmd_none(*pmd) && WARN_ON_ONCE(pmd_bad(*pmd))) {
+                       if (!create)
+                               continue;
+                       pmd_clear_bad(pmd);
                }
+               err = apply_to_pte_range(mm, pmd, addr, next,
+                                        fn, data, create, mask);
+               if (err)
+                       break;
        } while (pmd++, addr = next, addr != end);
+
        return err;
 }
 
@@ -2474,13 +2493,21 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
        }
        do {
                next = pud_addr_end(addr, end);
-               if (create || !pud_none_or_clear_bad(pud)) {
-                       err = apply_to_pmd_range(mm, pud, addr, next, fn, data,
-                                                create, mask);
-                       if (err)
-                               break;
+               if (pud_none(*pud) && !create)
+                       continue;
+               if (WARN_ON_ONCE(pud_leaf(*pud)))
+                       return -EINVAL;
+               if (!pud_none(*pud) && WARN_ON_ONCE(pud_bad(*pud))) {
+                       if (!create)
+                               continue;
+                       pud_clear_bad(pud);
                }
+               err = apply_to_pmd_range(mm, pud, addr, next,
+                                        fn, data, create, mask);
+               if (err)
+                       break;
        } while (pud++, addr = next, addr != end);
+
        return err;
 }
 
@@ -2502,13 +2529,21 @@ static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
        }
        do {
                next = p4d_addr_end(addr, end);
-               if (create || !p4d_none_or_clear_bad(p4d)) {
-                       err = apply_to_pud_range(mm, p4d, addr, next, fn, data,
-                                                create, mask);
-                       if (err)
-                               break;
+               if (p4d_none(*p4d) && !create)
+                       continue;
+               if (WARN_ON_ONCE(p4d_leaf(*p4d)))
+                       return -EINVAL;
+               if (!p4d_none(*p4d) && WARN_ON_ONCE(p4d_bad(*p4d))) {
+                       if (!create)
+                               continue;
+                       p4d_clear_bad(p4d);
                }
+               err = apply_to_pud_range(mm, p4d, addr, next,
+                                        fn, data, create, mask);
+               if (err)
+                       break;
        } while (p4d++, addr = next, addr != end);
+
        return err;
 }
 
@@ -2528,9 +2563,17 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
        pgd = pgd_offset(mm, addr);
        do {
                next = pgd_addr_end(addr, end);
-               if (!create && pgd_none_or_clear_bad(pgd))
+               if (pgd_none(*pgd) && !create)
                        continue;
-               err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create, &mask);
+               if (WARN_ON_ONCE(pgd_leaf(*pgd)))
+                       return -EINVAL;
+               if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) {
+                       if (!create)
+                               continue;
+                       pgd_clear_bad(pgd);
+               }
+               err = apply_to_p4d_range(mm, pgd, addr, next,
+                                        fn, data, create, &mask);
                if (err)
                        break;
        } while (pgd++, addr = next, addr != end);
@@ -3309,28 +3352,26 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
                        page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
                                                        vmf->address);
                        if (page) {
-                               int err;
-
                                __SetPageLocked(page);
                                __SetPageSwapBacked(page);
-                               set_page_private(page, entry.val);
 
-                               /* Tell memcg to use swap ownership records */
-                               SetPageSwapCache(page);
-                               err = mem_cgroup_charge(page, vma->vm_mm,
-                                                       GFP_KERNEL);
-                               ClearPageSwapCache(page);
-                               if (err) {
+                               if (mem_cgroup_swapin_charge_page(page,
+                                       vma->vm_mm, GFP_KERNEL, entry)) {
                                        ret = VM_FAULT_OOM;
                                        goto out_page;
                                }
+                               mem_cgroup_swapin_uncharge_swap(entry);
 
                                shadow = get_shadow_from_swap_cache(entry);
                                if (shadow)
                                        workingset_refault(page, shadow);
 
                                lru_cache_add(page);
+
+                               /* To provide entry to swap_readpage() */
+                               set_page_private(page, entry.val);
                                swap_readpage(page, true);
+                               set_page_private(page, 0);
                        }
                } else {
                        page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
@@ -4100,7 +4141,6 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
        int page_nid = NUMA_NO_NODE;
        int last_cpupid;
        int target_nid;
-       bool migrated = false;
        pte_t pte, old_pte;
        bool was_writable = pte_savedwrite(vmf->orig_pte);
        int flags = 0;
@@ -4117,29 +4157,17 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
                goto out;
        }
 
-       /*
-        * Make it present again, Depending on how arch implementes non
-        * accessible ptes, some can allow access by kernel mode.
-        */
-       old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
+       /* Get the normal PTE  */
+       old_pte = ptep_get(vmf->pte);
        pte = pte_modify(old_pte, vma->vm_page_prot);
-       pte = pte_mkyoung(pte);
-       if (was_writable)
-               pte = pte_mkwrite(pte);
-       ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
-       update_mmu_cache(vma, vmf->address, vmf->pte);
 
        page = vm_normal_page(vma, vmf->address, pte);
-       if (!page) {
-               pte_unmap_unlock(vmf->pte, vmf->ptl);
-               return 0;
-       }
+       if (!page)
+               goto out_map;
 
        /* TODO: handle PTE-mapped THP */
-       if (PageCompound(page)) {
-               pte_unmap_unlock(vmf->pte, vmf->ptl);
-               return 0;
-       }
+       if (PageCompound(page))
+               goto out_map;
 
        /*
         * Avoid grouping on RO pages in general. RO pages shouldn't hurt as
@@ -4149,7 +4177,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
         * pte_dirty has unpredictable behaviour between PTE scan updates,
         * background writeback, dirty balancing and application behaviour.
         */
-       if (!pte_write(pte))
+       if (!was_writable)
                flags |= TNF_NO_GROUP;
 
        /*
@@ -4163,24 +4191,45 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
        page_nid = page_to_nid(page);
        target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid,
                        &flags);
-       pte_unmap_unlock(vmf->pte, vmf->ptl);
        if (target_nid == NUMA_NO_NODE) {
                put_page(page);
-               goto out;
+               goto out_map;
        }
+       pte_unmap_unlock(vmf->pte, vmf->ptl);
 
        /* Migrate to the requested node */
-       migrated = migrate_misplaced_page(page, vma, target_nid);
-       if (migrated) {
+       if (migrate_misplaced_page(page, vma, target_nid)) {
                page_nid = target_nid;
                flags |= TNF_MIGRATED;
-       } else
+       } else {
                flags |= TNF_MIGRATE_FAIL;
+               vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
+               spin_lock(vmf->ptl);
+               if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) {
+                       pte_unmap_unlock(vmf->pte, vmf->ptl);
+                       goto out;
+               }
+               goto out_map;
+       }
 
 out:
        if (page_nid != NUMA_NO_NODE)
                task_numa_fault(last_cpupid, page_nid, 1, flags);
        return 0;
+out_map:
+       /*
+        * Make it present again, depending on how arch implements
+        * non-accessible ptes, some can allow access by kernel mode.
+        */
+       old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
+       pte = pte_modify(old_pte, vma->vm_page_prot);
+       pte = pte_mkyoung(pte);
+       if (was_writable)
+               pte = pte_mkwrite(pte);
+       ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
+       update_mmu_cache(vma, vmf->address, vmf->pte);
+       pte_unmap_unlock(vmf->pte, vmf->ptl);
+       goto out;
 }
 
 static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)