drm/i915: Fix i915_sg_page_sizes to record dma segments rather than physical pages
[linux-2.6-microblaze.git] / mm / hugetlb.c
index 8fb42c6..5b1ab1f 100644 (file)
@@ -331,6 +331,24 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
        }
 }
 
+static inline long
+hugetlb_resv_map_add(struct resv_map *map, struct file_region *rg, long from,
+                    long to, struct hstate *h, struct hugetlb_cgroup *cg,
+                    long *regions_needed)
+{
+       struct file_region *nrg;
+
+       if (!regions_needed) {
+               nrg = get_file_region_entry_from_cache(map, from, to);
+               record_hugetlb_cgroup_uncharge_info(cg, h, map, nrg);
+               list_add(&nrg->link, rg->link.prev);
+               coalesce_file_region(map, nrg);
+       } else
+               *regions_needed += 1;
+
+       return to - from;
+}
+
 /*
  * Must be called with resv->lock held.
  *
@@ -346,7 +364,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
        long add = 0;
        struct list_head *head = &resv->regions;
        long last_accounted_offset = f;
-       struct file_region *rg = NULL, *trg = NULL, *nrg = NULL;
+       struct file_region *rg = NULL, *trg = NULL;
 
        if (regions_needed)
                *regions_needed = 0;
@@ -369,24 +387,17 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
                /* When we find a region that starts beyond our range, we've
                 * finished.
                 */
-               if (rg->from > t)
+               if (rg->from >= t)
                        break;
 
                /* Add an entry for last_accounted_offset -> rg->from, and
                 * update last_accounted_offset.
                 */
-               if (rg->from > last_accounted_offset) {
-                       add += rg->from - last_accounted_offset;
-                       if (!regions_needed) {
-                               nrg = get_file_region_entry_from_cache(
-                                       resv, last_accounted_offset, rg->from);
-                               record_hugetlb_cgroup_uncharge_info(h_cg, h,
-                                                                   resv, nrg);
-                               list_add(&nrg->link, rg->link.prev);
-                               coalesce_file_region(resv, nrg);
-                       } else
-                               *regions_needed += 1;
-               }
+               if (rg->from > last_accounted_offset)
+                       add += hugetlb_resv_map_add(resv, rg,
+                                                   last_accounted_offset,
+                                                   rg->from, h, h_cg,
+                                                   regions_needed);
 
                last_accounted_offset = rg->to;
        }
@@ -394,17 +405,9 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
        /* Handle the case where our range extends beyond
         * last_accounted_offset.
         */
-       if (last_accounted_offset < t) {
-               add += t - last_accounted_offset;
-               if (!regions_needed) {
-                       nrg = get_file_region_entry_from_cache(
-                               resv, last_accounted_offset, t);
-                       record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg);
-                       list_add(&nrg->link, rg->link.prev);
-                       coalesce_file_region(resv, nrg);
-               } else
-                       *regions_needed += 1;
-       }
+       if (last_accounted_offset < t)
+               add += hugetlb_resv_map_add(resv, rg, last_accounted_offset,
+                                           t, h, h_cg, regions_needed);
 
        VM_BUG_ON(add < 0);
        return add;
@@ -3725,21 +3728,32 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
                return false;
 }
 
+static void
+hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
+                    struct page *new_page)
+{
+       __SetPageUptodate(new_page);
+       set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
+       hugepage_add_new_anon_rmap(new_page, vma, addr);
+       hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
+       ClearHPageRestoreReserve(new_page);
+       SetHPageMigratable(new_page);
+}
+
 int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                            struct vm_area_struct *vma)
 {
        pte_t *src_pte, *dst_pte, entry, dst_entry;
        struct page *ptepage;
        unsigned long addr;
-       int cow;
+       bool cow = is_cow_mapping(vma->vm_flags);
        struct hstate *h = hstate_vma(vma);
        unsigned long sz = huge_page_size(h);
+       unsigned long npages = pages_per_huge_page(h);
        struct address_space *mapping = vma->vm_file->f_mapping;
        struct mmu_notifier_range range;
        int ret = 0;
 
-       cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
-
        if (cow) {
                mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, src,
                                        vma->vm_start,
@@ -3784,6 +3798,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
                entry = huge_ptep_get(src_pte);
                dst_entry = huge_ptep_get(dst_pte);
+again:
                if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
                        /*
                         * Skip if src entry none.  Also, skip in the
@@ -3807,6 +3822,52 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                        }
                        set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
                } else {
+                       entry = huge_ptep_get(src_pte);
+                       ptepage = pte_page(entry);
+                       get_page(ptepage);
+
+                       /*
+                        * This is a rare case where we see pinned hugetlb
+                        * pages while they're prone to COW.  We need to do the
+                        * COW earlier during fork.
+                        *
+                        * When pre-allocating the page or copying data, we
+                        * need to be without the pgtable locks since we could
+                        * sleep during the process.
+                        */
+                       if (unlikely(page_needs_cow_for_dma(vma, ptepage))) {
+                               pte_t src_pte_old = entry;
+                               struct page *new;
+
+                               spin_unlock(src_ptl);
+                               spin_unlock(dst_ptl);
+                               /* Do not use reserve as it's private owned */
+                               new = alloc_huge_page(vma, addr, 1);
+                               if (IS_ERR(new)) {
+                                       put_page(ptepage);
+                                       ret = PTR_ERR(new);
+                                       break;
+                               }
+                               copy_user_huge_page(new, ptepage, addr, vma,
+                                                   npages);
+                               put_page(ptepage);
+
+                               /* Install the new huge page if src pte stable */
+                               dst_ptl = huge_pte_lock(h, dst, dst_pte);
+                               src_ptl = huge_pte_lockptr(h, src, src_pte);
+                               spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+                               entry = huge_ptep_get(src_pte);
+                               if (!pte_same(src_pte_old, entry)) {
+                                       put_page(new);
+                                       /* dst_entry won't change as in child */
+                                       goto again;
+                               }
+                               hugetlb_install_page(vma, dst_pte, addr, new);
+                               spin_unlock(src_ptl);
+                               spin_unlock(dst_ptl);
+                               continue;
+                       }
+
                        if (cow) {
                                /*
                                 * No need to notify as we are downgrading page
@@ -3817,12 +3878,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                                 */
                                huge_ptep_set_wrprotect(src, addr, src_pte);
                        }
-                       entry = huge_ptep_get(src_pte);
-                       ptepage = pte_page(entry);
-                       get_page(ptepage);
+
                        page_dup_rmap(ptepage, true);
                        set_huge_pte_at(dst, addr, dst_pte, entry);
-                       hugetlb_count_add(pages_per_huge_page(h), dst);
+                       hugetlb_count_add(npages, dst);
                }
                spin_unlock(src_ptl);
                spin_unlock(dst_ptl);