fs/hugetlbfs/inode.c: fix bugs in hugetlb_vmtruncate_list()
[linux-2.6-microblaze.git] / mm / memory.c
index d4e4d37..ff17850 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/export.h>
 #include <linux/delayacct.h>
 #include <linux/init.h>
+#include <linux/pfn_t.h>
 #include <linux/writeback.h>
 #include <linux/memcontrol.h>
 #include <linux/mmu_notifier.h>
@@ -566,7 +567,6 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        spinlock_t *ptl;
        pgtable_t new = pte_alloc_one(mm, address);
-       int wait_split_huge_page;
        if (!new)
                return -ENOMEM;
 
@@ -586,18 +586,14 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
        smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
 
        ptl = pmd_lock(mm, pmd);
-       wait_split_huge_page = 0;
        if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
                atomic_long_inc(&mm->nr_ptes);
                pmd_populate(mm, pmd, new);
                new = NULL;
-       } else if (unlikely(pmd_trans_splitting(*pmd)))
-               wait_split_huge_page = 1;
+       }
        spin_unlock(ptl);
        if (new)
                pte_free(mm, new);
-       if (wait_split_huge_page)
-               wait_split_huge_page(vma->anon_vma, pmd);
        return 0;
 }
 
@@ -613,8 +609,7 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
        if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
                pmd_populate_kernel(&init_mm, pmd, new);
                new = NULL;
-       } else
-               VM_BUG_ON(pmd_trans_splitting(*pmd));
+       }
        spin_unlock(&init_mm.page_table_lock);
        if (new)
                pte_free_kernel(&init_mm, new);
@@ -870,7 +865,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
        page = vm_normal_page(vma, addr, pte);
        if (page) {
                get_page(page);
-               page_dup_rmap(page);
+               page_dup_rmap(page, false);
                rss[mm_counter(page)]++;
        }
 
@@ -955,7 +950,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
        src_pmd = pmd_offset(src_pud, addr);
        do {
                next = pmd_addr_end(addr, end);
-               if (pmd_trans_huge(*src_pmd)) {
+               if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) {
                        int err;
                        VM_BUG_ON(next-addr != HPAGE_PMD_SIZE);
                        err = copy_huge_pmd(dst_mm, src_mm,
@@ -1118,7 +1113,7 @@ again:
                                        mark_page_accessed(page);
                        }
                        rss[mm_counter(page)]--;
-                       page_remove_rmap(page);
+                       page_remove_rmap(page, false);
                        if (unlikely(page_mapcount(page) < 0))
                                print_bad_pte(vma, addr, ptent, page);
                        if (unlikely(!__tlb_remove_page(tlb, page))) {
@@ -1182,7 +1177,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
        pmd = pmd_offset(pud, addr);
        do {
                next = pmd_addr_end(addr, end);
-               if (pmd_trans_huge(*pmd)) {
+               if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
                        if (next - addr != HPAGE_PMD_SIZE) {
 #ifdef CONFIG_DEBUG_VM
                                if (!rwsem_is_locked(&tlb->mm->mmap_sem)) {
@@ -1193,7 +1188,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
                                        BUG();
                                }
 #endif
-                               split_huge_page_pmd(vma, addr, pmd);
+                               split_huge_pmd(vma, pmd, addr);
                        } else if (zap_huge_pmd(tlb, vma, pmd, addr))
                                goto next;
                        /* fall through */
@@ -1506,7 +1501,7 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
 EXPORT_SYMBOL(vm_insert_page);
 
 static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
-                       unsigned long pfn, pgprot_t prot)
+                       pfn_t pfn, pgprot_t prot)
 {
        struct mm_struct *mm = vma->vm_mm;
        int retval;
@@ -1522,7 +1517,10 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
                goto out_unlock;
 
        /* Ok, finally just insert the thing.. */
-       entry = pte_mkspecial(pfn_pte(pfn, prot));
+       if (pfn_t_devmap(pfn))
+               entry = pte_mkdevmap(pfn_t_pte(pfn, prot));
+       else
+               entry = pte_mkspecial(pfn_t_pte(pfn, prot));
        set_pte_at(mm, addr, pte, entry);
        update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */
 
@@ -1569,17 +1567,17 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 
        if (addr < vma->vm_start || addr >= vma->vm_end)
                return -EFAULT;
-       if (track_pfn_insert(vma, &pgprot, pfn))
+       if (track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)))
                return -EINVAL;
 
-       ret = insert_pfn(vma, addr, pfn, pgprot);
+       ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot);
 
        return ret;
 }
 EXPORT_SYMBOL(vm_insert_pfn);
 
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
-                       unsigned long pfn)
+                       pfn_t pfn)
 {
        BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
 
@@ -1593,10 +1591,10 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
         * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
         * without pte special, it would there be refcounted as a normal page.
         */
-       if (!HAVE_PTE_SPECIAL && pfn_valid(pfn)) {
+       if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) {
                struct page *page;
 
-               page = pfn_to_page(pfn);
+               page = pfn_t_to_page(pfn);
                return insert_page(vma, addr, page, vma->vm_page_prot);
        }
        return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
@@ -2087,7 +2085,7 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
                cow_user_page(new_page, old_page, address, vma);
        }
 
-       if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
+       if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
                goto oom_free_new;
 
        __SetPageUptodate(new_page);
@@ -2118,8 +2116,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
                 * thread doing COW.
                 */
                ptep_clear_flush_notify(vma, address, page_table);
-               page_add_new_anon_rmap(new_page, vma, address);
-               mem_cgroup_commit_charge(new_page, memcg, false);
+               page_add_new_anon_rmap(new_page, vma, address, false);
+               mem_cgroup_commit_charge(new_page, memcg, false, false);
                lru_cache_add_active_or_unevictable(new_page, vma);
                /*
                 * We call the notify macro here because, when using secondary
@@ -2151,14 +2149,14 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
                         * mapcount is visible. So transitively, TLBs to
                         * old page will be flushed before it can be reused.
                         */
-                       page_remove_rmap(old_page);
+                       page_remove_rmap(old_page, false);
                }
 
                /* Free the old page.. */
                new_page = old_page;
                page_copied = 1;
        } else {
-               mem_cgroup_cancel_charge(new_page, memcg);
+               mem_cgroup_cancel_charge(new_page, memcg, false);
        }
 
        if (new_page)
@@ -2173,7 +2171,8 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
                 */
                if (page_copied && (vma->vm_flags & VM_LOCKED)) {
                        lock_page(old_page);    /* LRU manipulation */
-                       munlock_vma_page(old_page);
+                       if (PageMlocked(old_page))
+                               munlock_vma_page(old_page);
                        unlock_page(old_page);
                }
                page_cache_release(old_page);
@@ -2533,7 +2532,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                goto out_page;
        }
 
-       if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) {
+       if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false)) {
                ret = VM_FAULT_OOM;
                goto out_page;
        }
@@ -2567,7 +2566,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
                flags &= ~FAULT_FLAG_WRITE;
                ret |= VM_FAULT_WRITE;
-               exclusive = 1;
+               exclusive = RMAP_EXCLUSIVE;
        }
        flush_icache_page(vma, page);
        if (pte_swp_soft_dirty(orig_pte))
@@ -2575,10 +2574,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
        set_pte_at(mm, address, page_table, pte);
        if (page == swapcache) {
                do_page_add_anon_rmap(page, vma, address, exclusive);
-               mem_cgroup_commit_charge(page, memcg, true);
+               mem_cgroup_commit_charge(page, memcg, true, false);
        } else { /* ksm created a completely new copy */
-               page_add_new_anon_rmap(page, vma, address);
-               mem_cgroup_commit_charge(page, memcg, false);
+               page_add_new_anon_rmap(page, vma, address, false);
+               mem_cgroup_commit_charge(page, memcg, false, false);
                lru_cache_add_active_or_unevictable(page, vma);
        }
 
@@ -2613,7 +2612,7 @@ unlock:
 out:
        return ret;
 out_nomap:
-       mem_cgroup_cancel_charge(page, memcg);
+       mem_cgroup_cancel_charge(page, memcg, false);
        pte_unmap_unlock(page_table, ptl);
 out_page:
        unlock_page(page);
@@ -2707,7 +2706,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
        if (!page)
                goto oom;
 
-       if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
+       if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg, false))
                goto oom_free_page;
 
        /*
@@ -2728,15 +2727,15 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
        /* Deliver the page fault to userland, check inside PT lock */
        if (userfaultfd_missing(vma)) {
                pte_unmap_unlock(page_table, ptl);
-               mem_cgroup_cancel_charge(page, memcg);
+               mem_cgroup_cancel_charge(page, memcg, false);
                page_cache_release(page);
                return handle_userfault(vma, address, flags,
                                        VM_UFFD_MISSING);
        }
 
        inc_mm_counter_fast(mm, MM_ANONPAGES);
-       page_add_new_anon_rmap(page, vma, address);
-       mem_cgroup_commit_charge(page, memcg, false);
+       page_add_new_anon_rmap(page, vma, address, false);
+       mem_cgroup_commit_charge(page, memcg, false, false);
        lru_cache_add_active_or_unevictable(page, vma);
 setpte:
        set_pte_at(mm, address, page_table, entry);
@@ -2747,7 +2746,7 @@ unlock:
        pte_unmap_unlock(page_table, ptl);
        return 0;
 release:
-       mem_cgroup_cancel_charge(page, memcg);
+       mem_cgroup_cancel_charge(page, memcg, false);
        page_cache_release(page);
        goto unlock;
 oom_free_page:
@@ -2824,7 +2823,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
        if (anon) {
                inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
-               page_add_new_anon_rmap(page, vma, address);
+               page_add_new_anon_rmap(page, vma, address, false);
        } else {
                inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
                page_add_file_rmap(page);
@@ -3000,7 +2999,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        if (!new_page)
                return VM_FAULT_OOM;
 
-       if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) {
+       if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) {
                page_cache_release(new_page);
                return VM_FAULT_OOM;
        }
@@ -3029,7 +3028,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                goto uncharge_out;
        }
        do_set_pte(vma, address, new_page, pte, true, true);
-       mem_cgroup_commit_charge(new_page, memcg, false);
+       mem_cgroup_commit_charge(new_page, memcg, false, false);
        lru_cache_add_active_or_unevictable(new_page, vma);
        pte_unmap_unlock(pte, ptl);
        if (fault_page) {
@@ -3044,7 +3043,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        }
        return ret;
 uncharge_out:
-       mem_cgroup_cancel_charge(new_page, memcg);
+       mem_cgroup_cancel_charge(new_page, memcg, false);
        page_cache_release(new_page);
        return ret;
 }
@@ -3096,7 +3095,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         * pinned by vma->vm_file's reference.  We rely on unlock_page()'s
         * release semantics to prevent the compiler from undoing this copying.
         */
-       mapping = fault_page->mapping;
+       mapping = page_rmapping(fault_page);
        unlock_page(fault_page);
        if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) {
                /*
@@ -3198,6 +3197,12 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                return 0;
        }
 
+       /* TODO: handle PTE-mapped THP */
+       if (PageCompound(page)) {
+               pte_unmap_unlock(ptep, ptl);
+               return 0;
+       }
+
        /*
         * Avoid grouping on RO pages in general. RO pages shouldn't hurt as
         * much anyway since they can be in shared cache state. This misses
@@ -3370,17 +3375,9 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                int ret;
 
                barrier();
-               if (pmd_trans_huge(orig_pmd)) {
+               if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
                        unsigned int dirty = flags & FAULT_FLAG_WRITE;
 
-                       /*
-                        * If the pmd is splitting, return and retry the
-                        * the fault.  Alternative: wait until the split
-                        * is done, and goto retry.
-                        */
-                       if (pmd_trans_splitting(orig_pmd))
-                               return 0;
-
                        if (pmd_protnone(orig_pmd))
                                return do_huge_pmd_numa_page(mm, vma, address,
                                                             orig_pmd, pmd);
@@ -3407,7 +3404,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
            unlikely(__pte_alloc(mm, vma, pmd, address)))
                return VM_FAULT_OOM;
        /* if an huge pmd materialized from under us just retry later */
-       if (unlikely(pmd_trans_huge(*pmd)))
+       if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
                return 0;
        /*
         * A regular pmd is established and it can't morph into a huge pmd