fs/hugetlbfs/inode.c: fix bugs in hugetlb_vmtruncate_list()

[linux-2.6-microblaze.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index c387430..ff17850 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -50,6 +50,7 @@
  #include <linux/export.h>
  #include <linux/delayacct.h>
  #include <linux/init.h>
+#include <linux/pfn_t.h>
  #include <linux/writeback.h>
  #include <linux/memcontrol.h>
  #include <linux/mmu_notifier.h>
@@ -566,7 +567,6 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
  {
         spinlock_t *ptl;
         pgtable_t new = pte_alloc_one(mm, address);
-       int wait_split_huge_page;
         if (!new)
                 return -ENOMEM;
  
@@ -586,18 +586,14 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
         smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
  
         ptl = pmd_lock(mm, pmd);
-       wait_split_huge_page = 0;
         if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
                 atomic_long_inc(&mm->nr_ptes);
                 pmd_populate(mm, pmd, new);
                 new = NULL;
-       } else if (unlikely(pmd_trans_splitting(*pmd)))
-               wait_split_huge_page = 1;
+       }
         spin_unlock(ptl);
         if (new)
                 pte_free(mm, new);
-       if (wait_split_huge_page)
-               wait_split_huge_page(vma->anon_vma, pmd);
         return 0;
  }
  
@@ -613,8 +609,7 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
         if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
                 pmd_populate_kernel(&init_mm, pmd, new);
                 new = NULL;
-       } else
-               VM_BUG_ON(pmd_trans_splitting(*pmd));
+       }
         spin_unlock(&init_mm.page_table_lock);
         if (new)
                 pte_free_kernel(&init_mm, new);
@@ -832,10 +827,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 } else if (is_migration_entry(entry)) {
                         page = migration_entry_to_page(entry);
  
-                       if (PageAnon(page))
-                               rss[MM_ANONPAGES]++;
-                       else
-                               rss[MM_FILEPAGES]++;
+                       rss[mm_counter(page)]++;
  
                         if (is_write_migration_entry(entry) &&
                                         is_cow_mapping(vm_flags)) {
@@ -873,11 +865,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
         page = vm_normal_page(vma, addr, pte);
         if (page) {
                 get_page(page);
-               page_dup_rmap(page);
-               if (PageAnon(page))
-                       rss[MM_ANONPAGES]++;
-               else
-                       rss[MM_FILEPAGES]++;
+               page_dup_rmap(page, false);
+               rss[mm_counter(page)]++;
         }
  
  out_set_pte:
@@ -961,7 +950,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
         src_pmd = pmd_offset(src_pud, addr);
         do {
                 next = pmd_addr_end(addr, end);
-               if (pmd_trans_huge(*src_pmd)) {
+               if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) {
                         int err;
                         VM_BUG_ON(next-addr != HPAGE_PMD_SIZE);
                         err = copy_huge_pmd(dst_mm, src_mm,
@@ -1113,9 +1102,8 @@ again:
                         tlb_remove_tlb_entry(tlb, pte, addr);
                         if (unlikely(!page))
                                 continue;
-                       if (PageAnon(page))
-                               rss[MM_ANONPAGES]--;
-                       else {
+
+                       if (!PageAnon(page)) {
                                 if (pte_dirty(ptent)) {
                                         force_flush = 1;
                                         set_page_dirty(page);
@@ -1123,9 +1111,9 @@ again:
                                 if (pte_young(ptent) &&
                                     likely(!(vma->vm_flags & VM_SEQ_READ)))
                                         mark_page_accessed(page);
-                               rss[MM_FILEPAGES]--;
                         }
-                       page_remove_rmap(page);
+                       rss[mm_counter(page)]--;
+                       page_remove_rmap(page, false);
                         if (unlikely(page_mapcount(page) < 0))
                                 print_bad_pte(vma, addr, ptent, page);
                         if (unlikely(!__tlb_remove_page(tlb, page))) {
@@ -1146,11 +1134,7 @@ again:
                         struct page *page;
  
                         page = migration_entry_to_page(entry);
-
-                       if (PageAnon(page))
-                               rss[MM_ANONPAGES]--;
-                       else
-                               rss[MM_FILEPAGES]--;
+                       rss[mm_counter(page)]--;
                 }
                 if (unlikely(!free_swap_and_cache(entry)))
                         print_bad_pte(vma, addr, ptent, NULL);
@@ -1193,7 +1177,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
         pmd = pmd_offset(pud, addr);
         do {
                 next = pmd_addr_end(addr, end);
-               if (pmd_trans_huge(*pmd)) {
+               if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
                         if (next - addr != HPAGE_PMD_SIZE) {
  #ifdef CONFIG_DEBUG_VM
                                 if (!rwsem_is_locked(&tlb->mm->mmap_sem)) {
@@ -1204,7 +1188,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
                                         BUG();
                                 }
  #endif
-                               split_huge_page_pmd(vma, addr, pmd);
+                               split_huge_pmd(vma, pmd, addr);
                         } else if (zap_huge_pmd(tlb, vma, pmd, addr))
                                 goto next;
                         /* fall through */
@@ -1460,7 +1444,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
  
         /* Ok, finally just insert the thing.. */
         get_page(page);
-       inc_mm_counter_fast(mm, MM_FILEPAGES);
+       inc_mm_counter_fast(mm, mm_counter_file(page));
         page_add_file_rmap(page);
         set_pte_at(mm, addr, pte, mk_pte(page, prot));
  
@@ -1517,7 +1501,7 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
  EXPORT_SYMBOL(vm_insert_page);
  
  static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
-                       unsigned long pfn, pgprot_t prot)
+                       pfn_t pfn, pgprot_t prot)
  {
         struct mm_struct *mm = vma->vm_mm;
         int retval;
@@ -1533,7 +1517,10 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
                 goto out_unlock;
  
         /* Ok, finally just insert the thing.. */
-       entry = pte_mkspecial(pfn_pte(pfn, prot));
+       if (pfn_t_devmap(pfn))
+               entry = pte_mkdevmap(pfn_t_pte(pfn, prot));
+       else
+               entry = pte_mkspecial(pfn_t_pte(pfn, prot));
         set_pte_at(mm, addr, pte, entry);
         update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */
  
@@ -1580,17 +1567,17 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
  
         if (addr < vma->vm_start || addr >= vma->vm_end)
                 return -EFAULT;
-       if (track_pfn_insert(vma, &pgprot, pfn))
+       if (track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)))
                 return -EINVAL;
  
-       ret = insert_pfn(vma, addr, pfn, pgprot);
+       ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot);
  
         return ret;
  }
  EXPORT_SYMBOL(vm_insert_pfn);
  
  int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
-                       unsigned long pfn)
+                       pfn_t pfn)
  {
         BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
  
@@ -1604,10 +1591,10 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
          * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
          * without pte special, it would there be refcounted as a normal page.
          */
-       if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) {
+       if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) {
                 struct page *page;
  
-               page = pfn_to_page(pfn);
+               page = pfn_t_to_page(pfn);
                 return insert_page(vma, addr, page, vma->vm_page_prot);
         }
         return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
@@ -1949,6 +1936,20 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
                 copy_user_highpage(dst, src, va, vma);
  }
  
+static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma)
+{
+       struct file *vm_file = vma->vm_file;
+
+       if (vm_file)
+               return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO;
+
+       /*
+        * Special mappings (e.g. VDSO) do not have any file so fake
+        * a default GFP_KERNEL for them.
+        */
+       return GFP_KERNEL;
+}
+
  /*
   * Notify the address space that the page is about to become writable so that
   * it can prohibit this or wait for the page to get into an appropriate state.
@@ -1964,6 +1965,7 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page,
         vmf.virtual_address = (void __user *)(address & PAGE_MASK);
         vmf.pgoff = page->index;
         vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
+       vmf.gfp_mask = __get_fault_gfp_mask(vma);
         vmf.page = page;
         vmf.cow_page = NULL;
  
@@ -2083,7 +2085,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
                 cow_user_page(new_page, old_page, address, vma);
         }
  
-       if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
+       if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
                 goto oom_free_new;
  
         __SetPageUptodate(new_page);
@@ -2097,7 +2099,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
         if (likely(pte_same(*page_table, orig_pte))) {
                 if (old_page) {
                         if (!PageAnon(old_page)) {
-                               dec_mm_counter_fast(mm, MM_FILEPAGES);
+                               dec_mm_counter_fast(mm,
+                                               mm_counter_file(old_page));
                                 inc_mm_counter_fast(mm, MM_ANONPAGES);
                         }
                 } else {
@@ -2113,8 +2116,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
                  * thread doing COW.
                  */
                 ptep_clear_flush_notify(vma, address, page_table);
-               page_add_new_anon_rmap(new_page, vma, address);
-               mem_cgroup_commit_charge(new_page, memcg, false);
+               page_add_new_anon_rmap(new_page, vma, address, false);
+               mem_cgroup_commit_charge(new_page, memcg, false, false);
                 lru_cache_add_active_or_unevictable(new_page, vma);
                 /*
                  * We call the notify macro here because, when using secondary
@@ -2146,14 +2149,14 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
                          * mapcount is visible. So transitively, TLBs to
                          * old page will be flushed before it can be reused.
                          */
-                       page_remove_rmap(old_page);
+                       page_remove_rmap(old_page, false);
                 }
  
                 /* Free the old page.. */
                 new_page = old_page;
                 page_copied = 1;
         } else {
-               mem_cgroup_cancel_charge(new_page, memcg);
+               mem_cgroup_cancel_charge(new_page, memcg, false);
         }
  
         if (new_page)
@@ -2168,7 +2171,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
                  */
                 if (page_copied && (vma->vm_flags & VM_LOCKED)) {
                         lock_page(old_page);    /* LRU manipulation */
-                       munlock_vma_page(old_page);
+                       if (PageMlocked(old_page))
+                               munlock_vma_page(old_page);
                         unlock_page(old_page);
                 }
                 page_cache_release(old_page);
@@ -2528,7 +2532,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 goto out_page;
         }
  
-       if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) {
+       if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false)) {
                 ret = VM_FAULT_OOM;
                 goto out_page;
         }
@@ -2562,7 +2566,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
                 flags &= ~FAULT_FLAG_WRITE;
                 ret |= VM_FAULT_WRITE;
-               exclusive = 1;
+               exclusive = RMAP_EXCLUSIVE;
         }
         flush_icache_page(vma, page);
         if (pte_swp_soft_dirty(orig_pte))
@@ -2570,10 +2574,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
         set_pte_at(mm, address, page_table, pte);
         if (page == swapcache) {
                 do_page_add_anon_rmap(page, vma, address, exclusive);
-               mem_cgroup_commit_charge(page, memcg, true);
+               mem_cgroup_commit_charge(page, memcg, true, false);
         } else { /* ksm created a completely new copy */
-               page_add_new_anon_rmap(page, vma, address);
-               mem_cgroup_commit_charge(page, memcg, false);
+               page_add_new_anon_rmap(page, vma, address, false);
+               mem_cgroup_commit_charge(page, memcg, false, false);
                 lru_cache_add_active_or_unevictable(page, vma);
         }
  
@@ -2608,7 +2612,7 @@ unlock:
  out:
         return ret;
  out_nomap:
-       mem_cgroup_cancel_charge(page, memcg);
+       mem_cgroup_cancel_charge(page, memcg, false);
         pte_unmap_unlock(page_table, ptl);
  out_page:
         unlock_page(page);
@@ -2702,7 +2706,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
         if (!page)
                 goto oom;
  
-       if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
+       if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false))
                 goto oom_free_page;
  
         /*
@@ -2723,15 +2727,15 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
         /* Deliver the page fault to userland, check inside PT lock */
         if (userfaultfd_missing(vma)) {
                 pte_unmap_unlock(page_table, ptl);
-               mem_cgroup_cancel_charge(page, memcg);
+               mem_cgroup_cancel_charge(page, memcg, false);
                 page_cache_release(page);
                 return handle_userfault(vma, address, flags,
                                         VM_UFFD_MISSING);
         }
  
         inc_mm_counter_fast(mm, MM_ANONPAGES);
-       page_add_new_anon_rmap(page, vma, address);
-       mem_cgroup_commit_charge(page, memcg, false);
+       page_add_new_anon_rmap(page, vma, address, false);
+       mem_cgroup_commit_charge(page, memcg, false, false);
         lru_cache_add_active_or_unevictable(page, vma);
  setpte:
         set_pte_at(mm, address, page_table, entry);
@@ -2742,7 +2746,7 @@ unlock:
         pte_unmap_unlock(page_table, ptl);
         return 0;
  release:
-       mem_cgroup_cancel_charge(page, memcg);
+       mem_cgroup_cancel_charge(page, memcg, false);
         page_cache_release(page);
         goto unlock;
  oom_free_page:
@@ -2767,6 +2771,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
         vmf.pgoff = pgoff;
         vmf.flags = flags;
         vmf.page = NULL;
+       vmf.gfp_mask = __get_fault_gfp_mask(vma);
         vmf.cow_page = cow_page;
  
         ret = vma->vm_ops->fault(vma, &vmf);
@@ -2818,9 +2823,9 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
                 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
         if (anon) {
                 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
-               page_add_new_anon_rmap(page, vma, address);
+               page_add_new_anon_rmap(page, vma, address, false);
         } else {
-               inc_mm_counter_fast(vma->vm_mm, MM_FILEPAGES);
+               inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
                 page_add_file_rmap(page);
         }
         set_pte_at(vma->vm_mm, address, pte, entry);
@@ -2933,6 +2938,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
         vmf.pgoff = pgoff;
         vmf.max_pgoff = max_pgoff;
         vmf.flags = flags;
+       vmf.gfp_mask = __get_fault_gfp_mask(vma);
         vma->vm_ops->map_pages(vma, &vmf);
  }
  
@@ -2993,7 +2999,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         if (!new_page)
                 return VM_FAULT_OOM;
  
-       if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) {
+       if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) {
                 page_cache_release(new_page);
                 return VM_FAULT_OOM;
         }
@@ -3022,7 +3028,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                 goto uncharge_out;
         }
         do_set_pte(vma, address, new_page, pte, true, true);
-       mem_cgroup_commit_charge(new_page, memcg, false);
+       mem_cgroup_commit_charge(new_page, memcg, false, false);
         lru_cache_add_active_or_unevictable(new_page, vma);
         pte_unmap_unlock(pte, ptl);
         if (fault_page) {
@@ -3037,7 +3043,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         }
         return ret;
  uncharge_out:
-       mem_cgroup_cancel_charge(new_page, memcg);
+       mem_cgroup_cancel_charge(new_page, memcg, false);
         page_cache_release(new_page);
         return ret;
  }
@@ -3089,7 +3095,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
          * pinned by vma->vm_file's reference.  We rely on unlock_page()'s
          * release semantics to prevent the compiler from undoing this copying.
          */
-       mapping = fault_page->mapping;
+       mapping = page_rmapping(fault_page);
         unlock_page(fault_page);
         if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) {
                 /*
@@ -3191,6 +3197,12 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 return 0;
         }
  
+       /* TODO: handle PTE-mapped THP */
+       if (PageCompound(page)) {
+               pte_unmap_unlock(ptep, ptl);
+               return 0;
+       }
+
         /*
          * Avoid grouping on RO pages in general. RO pages shouldn't hurt as
          * much anyway since they can be in shared cache state. This misses
@@ -3363,17 +3375,9 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                 int ret;
  
                 barrier();
-               if (pmd_trans_huge(orig_pmd)) {
+               if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
                         unsigned int dirty = flags & FAULT_FLAG_WRITE;
  
-                       /*
-                        * If the pmd is splitting, return and retry the
-                        * the fault.  Alternative: wait until the split
-                        * is done, and goto retry.
-                        */
-                       if (pmd_trans_splitting(orig_pmd))
-                               return 0;
-
                         if (pmd_protnone(orig_pmd))
                                 return do_huge_pmd_numa_page(mm, vma, address,
                                                              orig_pmd, pmd);
@@ -3400,7 +3404,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
             unlikely(__pte_alloc(mm, vma, pmd, address)))
                 return VM_FAULT_OOM;
         /* if an huge pmd materialized from under us just retry later */
-       if (unlikely(pmd_trans_huge(*pmd)))
+       if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
                 return 0;
         /*
          * A regular pmd is established and it can't morph into a huge pmd