fs/iomap: Fix buffered write page prefaulting
[linux-2.6-microblaze.git] / mm / memory.c
index bcc4b07..8f1de81 100644 (file)
@@ -433,35 +433,39 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
        }
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
+void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
 {
-       spinlock_t *ptl;
-       pgtable_t new = pte_alloc_one(mm);
-       if (!new)
-               return -ENOMEM;
+       spinlock_t *ptl = pmd_lock(mm, pmd);
 
-       /*
-        * Ensure all pte setup (eg. pte page lock and page clearing) are
-        * visible before the pte is made visible to other CPUs by being
-        * put into page tables.
-        *
-        * The other side of the story is the pointer chasing in the page
-        * table walking code (when walking the page table without locking;
-        * ie. most of the time). Fortunately, these data accesses consist
-        * of a chain of data-dependent loads, meaning most CPUs (alpha
-        * being the notable exception) will already guarantee loads are
-        * seen in-order. See the alpha page table accessors for the
-        * smp_rmb() barriers in page table walking code.
-        */
-       smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
-
-       ptl = pmd_lock(mm, pmd);
        if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
                mm_inc_nr_ptes(mm);
-               pmd_populate(mm, pmd, new);
-               new = NULL;
+               /*
+                * Ensure all pte setup (eg. pte page lock and page clearing) are
+                * visible before the pte is made visible to other CPUs by being
+                * put into page tables.
+                *
+                * The other side of the story is the pointer chasing in the page
+                * table walking code (when walking the page table without locking;
+                * ie. most of the time). Fortunately, these data accesses consist
+                * of a chain of data-dependent loads, meaning most CPUs (alpha
+                * being the notable exception) will already guarantee loads are
+                * seen in-order. See the alpha page table accessors for the
+                * smp_rmb() barriers in page table walking code.
+                */
+               smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
+               pmd_populate(mm, pmd, *pte);
+               *pte = NULL;
        }
        spin_unlock(ptl);
+}
+
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
+{
+       pgtable_t new = pte_alloc_one(mm);
+       if (!new)
+               return -ENOMEM;
+
+       pmd_install(mm, pmd, &new);
        if (new)
                pte_free(mm, new);
        return 0;
@@ -473,10 +477,9 @@ int __pte_alloc_kernel(pmd_t *pmd)
        if (!new)
                return -ENOMEM;
 
-       smp_wmb(); /* See comment in __pte_alloc */
-
        spin_lock(&init_mm.page_table_lock);
        if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
+               smp_wmb(); /* See comment in pmd_install() */
                pmd_populate_kernel(&init_mm, pmd, new);
                new = NULL;
        }
@@ -1333,16 +1336,8 @@ again:
                        struct page *page;
 
                        page = vm_normal_page(vma, addr, ptent);
-                       if (unlikely(details) && page) {
-                               /*
-                                * unmap_shared_mapping_pages() wants to
-                                * invalidate cache without truncating:
-                                * unmap shared but keep private pages.
-                                */
-                               if (details->check_mapping &&
-                                   details->check_mapping != page_rmapping(page))
-                                       continue;
-                       }
+                       if (unlikely(zap_skip_check_mapping(details, page)))
+                               continue;
                        ptent = ptep_get_and_clear_full(mm, addr, pte,
                                                        tlb->fullmm);
                        tlb_remove_tlb_entry(tlb, pte, addr);
@@ -1375,17 +1370,8 @@ again:
                    is_device_exclusive_entry(entry)) {
                        struct page *page = pfn_swap_entry_to_page(entry);
 
-                       if (unlikely(details && details->check_mapping)) {
-                               /*
-                                * unmap_shared_mapping_pages() wants to
-                                * invalidate cache without truncating:
-                                * unmap shared but keep private pages.
-                                */
-                               if (details->check_mapping !=
-                                   page_rmapping(page))
-                                       continue;
-                       }
-
+                       if (unlikely(zap_skip_check_mapping(details, page)))
+                               continue;
                        pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
                        rss[mm_counter(page)]--;
 
@@ -2724,19 +2710,19 @@ EXPORT_SYMBOL_GPL(apply_to_existing_page_range);
  * proceeding (but do_wp_page is only called after already making such a check;
  * and do_anonymous_page can safely check later on).
  */
-static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
-                               pte_t *page_table, pte_t orig_pte)
+static inline int pte_unmap_same(struct vm_fault *vmf)
 {
        int same = 1;
 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION)
        if (sizeof(pte_t) > sizeof(unsigned long)) {
-               spinlock_t *ptl = pte_lockptr(mm, pmd);
+               spinlock_t *ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
                spin_lock(ptl);
-               same = pte_same(*page_table, orig_pte);
+               same = pte_same(*vmf->pte, vmf->orig_pte);
                spin_unlock(ptl);
        }
 #endif
-       pte_unmap(page_table);
+       pte_unmap(vmf->pte);
+       vmf->pte = NULL;
        return same;
 }
 
@@ -3321,20 +3307,20 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
 }
 
 static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
+                                           pgoff_t first_index,
+                                           pgoff_t last_index,
                                            struct zap_details *details)
 {
        struct vm_area_struct *vma;
        pgoff_t vba, vea, zba, zea;
 
-       vma_interval_tree_foreach(vma, root,
-                       details->first_index, details->last_index) {
-
+       vma_interval_tree_foreach(vma, root, first_index, last_index) {
                vba = vma->vm_pgoff;
                vea = vba + vma_pages(vma) - 1;
-               zba = details->first_index;
+               zba = first_index;
                if (zba < vba)
                        zba = vba;
-               zea = details->last_index;
+               zea = last_index;
                if (zea > vea)
                        zea = vea;
 
@@ -3360,18 +3346,22 @@ void unmap_mapping_page(struct page *page)
 {
        struct address_space *mapping = page->mapping;
        struct zap_details details = { };
+       pgoff_t first_index;
+       pgoff_t last_index;
 
        VM_BUG_ON(!PageLocked(page));
        VM_BUG_ON(PageTail(page));
 
-       details.check_mapping = mapping;
-       details.first_index = page->index;
-       details.last_index = page->index + thp_nr_pages(page) - 1;
+       first_index = page->index;
+       last_index = page->index + thp_nr_pages(page) - 1;
+
+       details.zap_mapping = mapping;
        details.single_page = page;
 
        i_mmap_lock_write(mapping);
        if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
-               unmap_mapping_range_tree(&mapping->i_mmap, &details);
+               unmap_mapping_range_tree(&mapping->i_mmap, first_index,
+                                        last_index, &details);
        i_mmap_unlock_write(mapping);
 }
 
@@ -3391,16 +3381,17 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
                pgoff_t nr, bool even_cows)
 {
        struct zap_details details = { };
+       pgoff_t first_index = start;
+       pgoff_t last_index = start + nr - 1;
 
-       details.check_mapping = even_cows ? NULL : mapping;
-       details.first_index = start;
-       details.last_index = start + nr - 1;
-       if (details.last_index < details.first_index)
-               details.last_index = ULONG_MAX;
+       details.zap_mapping = even_cows ? NULL : mapping;
+       if (last_index < first_index)
+               last_index = ULONG_MAX;
 
        i_mmap_lock_write(mapping);
        if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
-               unmap_mapping_range_tree(&mapping->i_mmap, &details);
+               unmap_mapping_range_tree(&mapping->i_mmap, first_index,
+                                        last_index, &details);
        i_mmap_unlock_write(mapping);
 }
 EXPORT_SYMBOL_GPL(unmap_mapping_pages);
@@ -3488,7 +3479,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
        vm_fault_t ret = 0;
        void *shadow = NULL;
 
-       if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
+       if (!pte_unmap_same(vmf))
                goto out;
 
        entry = pte_to_swp_entry(vmf->orig_pte);
@@ -3853,7 +3844,6 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
                vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
                if (!vmf->prealloc_pte)
                        return VM_FAULT_OOM;
-               smp_wmb(); /* See comment in __pte_alloc() */
        }
 
        ret = vma->vm_ops->fault(vmf);
@@ -3924,7 +3914,6 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
                vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
                if (!vmf->prealloc_pte)
                        return VM_FAULT_OOM;
-               smp_wmb(); /* See comment in __pte_alloc() */
        }
 
        vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
@@ -4037,17 +4026,10 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
                                return ret;
                }
 
-               if (vmf->prealloc_pte) {
-                       vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
-                       if (likely(pmd_none(*vmf->pmd))) {
-                               mm_inc_nr_ptes(vma->vm_mm);
-                               pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
-                               vmf->prealloc_pte = NULL;
-                       }
-                       spin_unlock(vmf->ptl);
-               } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
+               if (vmf->prealloc_pte)
+                       pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte);
+               else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
                        return VM_FAULT_OOM;
-               }
        }
 
        /* See comment in handle_pte_fault() */
@@ -4156,7 +4138,6 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
                vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
                if (!vmf->prealloc_pte)
                        return VM_FAULT_OOM;
-               smp_wmb(); /* See comment in __pte_alloc() */
        }
 
        return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
@@ -4831,13 +4812,13 @@ int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
        if (!new)
                return -ENOMEM;
 
-       smp_wmb(); /* See comment in __pte_alloc */
-
        spin_lock(&mm->page_table_lock);
-       if (pgd_present(*pgd))          /* Another has populated it */
+       if (pgd_present(*pgd)) {        /* Another has populated it */
                p4d_free(mm, new);
-       else
+       } else {
+               smp_wmb(); /* See comment in pmd_install() */
                pgd_populate(mm, pgd, new);
+       }
        spin_unlock(&mm->page_table_lock);
        return 0;
 }
@@ -4854,11 +4835,10 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
        if (!new)
                return -ENOMEM;
 
-       smp_wmb(); /* See comment in __pte_alloc */
-
        spin_lock(&mm->page_table_lock);
        if (!p4d_present(*p4d)) {
                mm_inc_nr_puds(mm);
+               smp_wmb(); /* See comment in pmd_install() */
                p4d_populate(mm, p4d, new);
        } else  /* Another has populated it */
                pud_free(mm, new);
@@ -4879,14 +4859,14 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
        if (!new)
                return -ENOMEM;
 
-       smp_wmb(); /* See comment in __pte_alloc */
-
        ptl = pud_lock(mm, pud);
        if (!pud_present(*pud)) {
                mm_inc_nr_pmds(mm);
+               smp_wmb(); /* See comment in pmd_install() */
                pud_populate(mm, pud, new);
-       } else  /* Another has populated it */
+       } else {        /* Another has populated it */
                pmd_free(mm, new);
+       }
        spin_unlock(ptl);
        return 0;
 }
@@ -5423,7 +5403,6 @@ long copy_huge_page_from_user(struct page *dst_page,
                                unsigned int pages_per_huge_page,
                                bool allow_pagefault)
 {
-       void *src = (void *)usr_src;
        void *page_kaddr;
        unsigned long i, rc = 0;
        unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
@@ -5436,8 +5415,7 @@ long copy_huge_page_from_user(struct page *dst_page,
                else
                        page_kaddr = kmap_atomic(subpage);
                rc = copy_from_user(page_kaddr,
-                               (const void __user *)(src + i * PAGE_SIZE),
-                               PAGE_SIZE);
+                               usr_src + i * PAGE_SIZE, PAGE_SIZE);
                if (allow_pagefault)
                        kunmap(subpage);
                else