fs/iomap: Fix buffered write page prefaulting

[linux-2.6-microblaze.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index bcc4b07..8f1de81 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -433,35 +433,39 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
         }
  }
  
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
+void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
  {
-       spinlock_t *ptl;
-       pgtable_t new = pte_alloc_one(mm);
-       if (!new)
-               return -ENOMEM;
+       spinlock_t *ptl = pmd_lock(mm, pmd);
  
-       /*
-        * Ensure all pte setup (eg. pte page lock and page clearing) are
-        * visible before the pte is made visible to other CPUs by being
-        * put into page tables.
-        *
-        * The other side of the story is the pointer chasing in the page
-        * table walking code (when walking the page table without locking;
-        * ie. most of the time). Fortunately, these data accesses consist
-        * of a chain of data-dependent loads, meaning most CPUs (alpha
-        * being the notable exception) will already guarantee loads are
-        * seen in-order. See the alpha page table accessors for the
-        * smp_rmb() barriers in page table walking code.
-        */
-       smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
-
-       ptl = pmd_lock(mm, pmd);
         if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
                 mm_inc_nr_ptes(mm);
-               pmd_populate(mm, pmd, new);
-               new = NULL;
+               /*
+                * Ensure all pte setup (eg. pte page lock and page clearing) are
+                * visible before the pte is made visible to other CPUs by being
+                * put into page tables.
+                *
+                * The other side of the story is the pointer chasing in the page
+                * table walking code (when walking the page table without locking;
+                * ie. most of the time). Fortunately, these data accesses consist
+                * of a chain of data-dependent loads, meaning most CPUs (alpha
+                * being the notable exception) will already guarantee loads are
+                * seen in-order. See the alpha page table accessors for the
+                * smp_rmb() barriers in page table walking code.
+                */
+               smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
+               pmd_populate(mm, pmd, *pte);
+               *pte = NULL;
         }
         spin_unlock(ptl);
+}
+
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
+{
+       pgtable_t new = pte_alloc_one(mm);
+       if (!new)
+               return -ENOMEM;
+
+       pmd_install(mm, pmd, &new);
         if (new)
                 pte_free(mm, new);
         return 0;
@@ -473,10 +477,9 @@ int __pte_alloc_kernel(pmd_t *pmd)
         if (!new)
                 return -ENOMEM;
  
-       smp_wmb(); /* See comment in __pte_alloc */
-
         spin_lock(&init_mm.page_table_lock);
         if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
+               smp_wmb(); /* See comment in pmd_install() */
                 pmd_populate_kernel(&init_mm, pmd, new);
                 new = NULL;
         }
@@ -1333,16 +1336,8 @@ again:
                         struct page *page;
  
                         page = vm_normal_page(vma, addr, ptent);
-                       if (unlikely(details) && page) {
-                               /*
-                                * unmap_shared_mapping_pages() wants to
-                                * invalidate cache without truncating:
-                                * unmap shared but keep private pages.
-                                */
-                               if (details->check_mapping &&
-                                   details->check_mapping != page_rmapping(page))
-                                       continue;
-                       }
+                       if (unlikely(zap_skip_check_mapping(details, page)))
+                               continue;
                         ptent = ptep_get_and_clear_full(mm, addr, pte,
                                                         tlb->fullmm);
                         tlb_remove_tlb_entry(tlb, pte, addr);
@@ -1375,17 +1370,8 @@ again:
                     is_device_exclusive_entry(entry)) {
                         struct page *page = pfn_swap_entry_to_page(entry);
  
-                       if (unlikely(details && details->check_mapping)) {
-                               /*
-                                * unmap_shared_mapping_pages() wants to
-                                * invalidate cache without truncating:
-                                * unmap shared but keep private pages.
-                                */
-                               if (details->check_mapping !=
-                                   page_rmapping(page))
-                                       continue;
-                       }
-
+                       if (unlikely(zap_skip_check_mapping(details, page)))
+                               continue;
                         pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
                         rss[mm_counter(page)]--;
  
@@ -2724,19 +2710,19 @@ EXPORT_SYMBOL_GPL(apply_to_existing_page_range);
   * proceeding (but do_wp_page is only called after already making such a check;
   * and do_anonymous_page can safely check later on).
   */
-static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
-                               pte_t *page_table, pte_t orig_pte)
+static inline int pte_unmap_same(struct vm_fault *vmf)
  {
         int same = 1;
  #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION)
         if (sizeof(pte_t) > sizeof(unsigned long)) {
-               spinlock_t *ptl = pte_lockptr(mm, pmd);
+               spinlock_t *ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
                 spin_lock(ptl);
-               same = pte_same(*page_table, orig_pte);
+               same = pte_same(*vmf->pte, vmf->orig_pte);
                 spin_unlock(ptl);
         }
  #endif
-       pte_unmap(page_table);
+       pte_unmap(vmf->pte);
+       vmf->pte = NULL;
         return same;
  }
  
@@ -3321,20 +3307,20 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
  }
  
  static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
+                                           pgoff_t first_index,
+                                           pgoff_t last_index,
                                             struct zap_details *details)
  {
         struct vm_area_struct *vma;
         pgoff_t vba, vea, zba, zea;
  
-       vma_interval_tree_foreach(vma, root,
-                       details->first_index, details->last_index) {
-
+       vma_interval_tree_foreach(vma, root, first_index, last_index) {
                 vba = vma->vm_pgoff;
                 vea = vba + vma_pages(vma) - 1;
-               zba = details->first_index;
+               zba = first_index;
                 if (zba < vba)
                         zba = vba;
-               zea = details->last_index;
+               zea = last_index;
                 if (zea > vea)
                         zea = vea;
  
@@ -3360,18 +3346,22 @@ void unmap_mapping_page(struct page *page)
  {
         struct address_space *mapping = page->mapping;
         struct zap_details details = { };
+       pgoff_t first_index;
+       pgoff_t last_index;
  
         VM_BUG_ON(!PageLocked(page));
         VM_BUG_ON(PageTail(page));
  
-       details.check_mapping = mapping;
-       details.first_index = page->index;
-       details.last_index = page->index + thp_nr_pages(page) - 1;
+       first_index = page->index;
+       last_index = page->index + thp_nr_pages(page) - 1;
+
+       details.zap_mapping = mapping;
         details.single_page = page;
  
         i_mmap_lock_write(mapping);
         if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
-               unmap_mapping_range_tree(&mapping->i_mmap, &details);
+               unmap_mapping_range_tree(&mapping->i_mmap, first_index,
+                                        last_index, &details);
         i_mmap_unlock_write(mapping);
  }
  
@@ -3391,16 +3381,17 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
                 pgoff_t nr, bool even_cows)
  {
         struct zap_details details = { };
+       pgoff_t first_index = start;
+       pgoff_t last_index = start + nr - 1;
  
-       details.check_mapping = even_cows ? NULL : mapping;
-       details.first_index = start;
-       details.last_index = start + nr - 1;
-       if (details.last_index < details.first_index)
-               details.last_index = ULONG_MAX;
+       details.zap_mapping = even_cows ? NULL : mapping;
+       if (last_index < first_index)
+               last_index = ULONG_MAX;
  
         i_mmap_lock_write(mapping);
         if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
-               unmap_mapping_range_tree(&mapping->i_mmap, &details);
+               unmap_mapping_range_tree(&mapping->i_mmap, first_index,
+                                        last_index, &details);
         i_mmap_unlock_write(mapping);
  }
  EXPORT_SYMBOL_GPL(unmap_mapping_pages);
@@ -3488,7 +3479,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
         vm_fault_t ret = 0;
         void *shadow = NULL;
  
-       if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
+       if (!pte_unmap_same(vmf))
                 goto out;
  
         entry = pte_to_swp_entry(vmf->orig_pte);
@@ -3853,7 +3844,6 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
                 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
                 if (!vmf->prealloc_pte)
                         return VM_FAULT_OOM;
-               smp_wmb(); /* See comment in __pte_alloc() */
         }
  
         ret = vma->vm_ops->fault(vmf);
@@ -3924,7 +3914,6 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
                 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
                 if (!vmf->prealloc_pte)
                         return VM_FAULT_OOM;
-               smp_wmb(); /* See comment in __pte_alloc() */
         }
  
         vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
@@ -4037,17 +4026,10 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
                                 return ret;
                 }
  
-               if (vmf->prealloc_pte) {
-                       vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
-                       if (likely(pmd_none(*vmf->pmd))) {
-                               mm_inc_nr_ptes(vma->vm_mm);
-                               pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
-                               vmf->prealloc_pte = NULL;
-                       }
-                       spin_unlock(vmf->ptl);
-               } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
+               if (vmf->prealloc_pte)
+                       pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte);
+               else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
                         return VM_FAULT_OOM;
-               }
         }
  
         /* See comment in handle_pte_fault() */
@@ -4156,7 +4138,6 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
                 vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
                 if (!vmf->prealloc_pte)
                         return VM_FAULT_OOM;
-               smp_wmb(); /* See comment in __pte_alloc() */
         }
  
         return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
@@ -4831,13 +4812,13 @@ int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
         if (!new)
                 return -ENOMEM;
  
-       smp_wmb(); /* See comment in __pte_alloc */
-
         spin_lock(&mm->page_table_lock);
-       if (pgd_present(*pgd))          /* Another has populated it */
+       if (pgd_present(*pgd)) {        /* Another has populated it */
                 p4d_free(mm, new);
-       else
+       } else {
+               smp_wmb(); /* See comment in pmd_install() */
                 pgd_populate(mm, pgd, new);
+       }
         spin_unlock(&mm->page_table_lock);
         return 0;
  }
@@ -4854,11 +4835,10 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
         if (!new)
                 return -ENOMEM;
  
-       smp_wmb(); /* See comment in __pte_alloc */
-
         spin_lock(&mm->page_table_lock);
         if (!p4d_present(*p4d)) {
                 mm_inc_nr_puds(mm);
+               smp_wmb(); /* See comment in pmd_install() */
                 p4d_populate(mm, p4d, new);
         } else  /* Another has populated it */
                 pud_free(mm, new);
@@ -4879,14 +4859,14 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
         if (!new)
                 return -ENOMEM;
  
-       smp_wmb(); /* See comment in __pte_alloc */
-
         ptl = pud_lock(mm, pud);
         if (!pud_present(*pud)) {
                 mm_inc_nr_pmds(mm);
+               smp_wmb(); /* See comment in pmd_install() */
                 pud_populate(mm, pud, new);
-       } else  /* Another has populated it */
+       } else {        /* Another has populated it */
                 pmd_free(mm, new);
+       }
         spin_unlock(ptl);
         return 0;
  }
@@ -5423,7 +5403,6 @@ long copy_huge_page_from_user(struct page *dst_page,
                                 unsigned int pages_per_huge_page,
                                 bool allow_pagefault)
  {
-       void *src = (void *)usr_src;
         void *page_kaddr;
         unsigned long i, rc = 0;
         unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
@@ -5436,8 +5415,7 @@ long copy_huge_page_from_user(struct page *dst_page,
                 else
                         page_kaddr = kmap_atomic(subpage);
                 rc = copy_from_user(page_kaddr,
-                               (const void __user *)(src + i * PAGE_SIZE),
-                               PAGE_SIZE);
+                               usr_src + i * PAGE_SIZE, PAGE_SIZE);
                 if (allow_pagefault)
                         kunmap(subpage);
                 else