mm/vmalloc: fallback to a single page allocator
[linux-2.6-microblaze.git] / mm / hugetlb.c
index 95918f4..5ba5a0d 100644 (file)
@@ -1588,15 +1588,12 @@ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage)
        return NULL;
 }
 
-pgoff_t __basepage_index(struct page *page)
+pgoff_t hugetlb_basepage_index(struct page *page)
 {
        struct page *page_head = compound_head(page);
        pgoff_t index = page_index(page_head);
        unsigned long compound_idx;
 
-       if (!PageHuge(page_head))
-               return page_index(page);
-
        if (compound_order(page_head) >= MAX_ORDER)
                compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
        else
@@ -1793,7 +1790,7 @@ retry:
                        SetPageHWPoison(page);
                        ClearPageHWPoison(head);
                }
-               remove_hugetlb_page(h, page, false);
+               remove_hugetlb_page(h, head, false);
                h->max_huge_pages--;
                spin_unlock_irq(&hugetlb_lock);
                update_and_free_page(h, head);
@@ -2121,12 +2118,18 @@ out:
  * be restored when a newly allocated huge page must be freed.  It is
  * to be called after calling vma_needs_reservation to determine if a
  * reservation exists.
+ *
+ * vma_del_reservation is used in error paths where an entry in the reserve
+ * map was created during huge page allocation and must be removed.  It is to
+ * be called after calling vma_needs_reservation to determine if a reservation
+ * exists.
  */
 enum vma_resv_mode {
        VMA_NEEDS_RESV,
        VMA_COMMIT_RESV,
        VMA_END_RESV,
        VMA_ADD_RESV,
+       VMA_DEL_RESV,
 };
 static long __vma_reservation_common(struct hstate *h,
                                struct vm_area_struct *vma, unsigned long addr,
@@ -2170,11 +2173,21 @@ static long __vma_reservation_common(struct hstate *h,
                        ret = region_del(resv, idx, idx + 1);
                }
                break;
+       case VMA_DEL_RESV:
+               if (vma->vm_flags & VM_MAYSHARE) {
+                       region_abort(resv, idx, idx + 1, 1);
+                       ret = region_del(resv, idx, idx + 1);
+               } else {
+                       ret = region_add(resv, idx, idx + 1, 1, NULL, NULL);
+                       /* region_add calls of range 1 should never fail. */
+                       VM_BUG_ON(ret < 0);
+               }
+               break;
        default:
                BUG();
        }
 
-       if (vma->vm_flags & VM_MAYSHARE)
+       if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV)
                return ret;
        /*
         * We know private mapping must have HPAGE_RESV_OWNER set.
@@ -2222,25 +2235,39 @@ static long vma_add_reservation(struct hstate *h,
        return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
 }
 
+static long vma_del_reservation(struct hstate *h,
+                       struct vm_area_struct *vma, unsigned long addr)
+{
+       return __vma_reservation_common(h, vma, addr, VMA_DEL_RESV);
+}
+
 /*
- * This routine is called to restore a reservation on error paths.  In the
- * specific error paths, a huge page was allocated (via alloc_huge_page)
- * and is about to be freed.  If a reservation for the page existed,
- * alloc_huge_page would have consumed the reservation and set
- * HPageRestoreReserve in the newly allocated page.  When the page is freed
- * via free_huge_page, the global reservation count will be incremented if
- * HPageRestoreReserve is set.  However, free_huge_page can not adjust the
- * reserve map.  Adjust the reserve map here to be consistent with global
- * reserve count adjustments to be made by free_huge_page.
+ * This routine is called to restore reservation information on error paths.
+ * It should ONLY be called for pages allocated via alloc_huge_page(), and
+ * the hugetlb mutex should remain held when calling this routine.
+ *
+ * It handles two specific cases:
+ * 1) A reservation was in place and the page consumed the reservation.
+ *    HPageRestoreReserve is set in the page.
+ * 2) No reservation was in place for the page, so HPageRestoreReserve is
+ *    not set.  However, alloc_huge_page always updates the reserve map.
+ *
+ * In case 1, free_huge_page later in the error path will increment the
+ * global reserve count.  But, free_huge_page does not have enough context
+ * to adjust the reservation map.  This case deals primarily with private
+ * mappings.  Adjust the reserve map here to be consistent with global
+ * reserve count adjustments to be made by free_huge_page.  Make sure the
+ * reserve map indicates there is a reservation present.
+ *
+ * In case 2, simply undo reserve map modifications done by alloc_huge_page.
  */
-static void restore_reserve_on_error(struct hstate *h,
-                       struct vm_area_struct *vma, unsigned long address,
-                       struct page *page)
+void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
+                       unsigned long address, struct page *page)
 {
-       if (unlikely(HPageRestoreReserve(page))) {
-               long rc = vma_needs_reservation(h, vma, address);
+       long rc = vma_needs_reservation(h, vma, address);
 
-               if (unlikely(rc < 0)) {
+       if (HPageRestoreReserve(page)) {
+               if (unlikely(rc < 0))
                        /*
                         * Rare out of memory condition in reserve map
                         * manipulation.  Clear HPageRestoreReserve so that
@@ -2253,16 +2280,57 @@ static void restore_reserve_on_error(struct hstate *h,
                         * accounting of reserve counts.
                         */
                        ClearHPageRestoreReserve(page);
-               } else if (rc) {
-                       rc = vma_add_reservation(h, vma, address);
-                       if (unlikely(rc < 0))
+               else if (rc)
+                       (void)vma_add_reservation(h, vma, address);
+               else
+                       vma_end_reservation(h, vma, address);
+       } else {
+               if (!rc) {
+                       /*
+                        * This indicates there is an entry in the reserve map
+                        * added by alloc_huge_page.  We know it was added
+                        * before the alloc_huge_page call, otherwise
+                        * HPageRestoreReserve would be set on the page.
+                        * Remove the entry so that a subsequent allocation
+                        * does not consume a reservation.
+                        */
+                       rc = vma_del_reservation(h, vma, address);
+                       if (rc < 0)
                                /*
-                                * See above comment about rare out of
-                                * memory condition.
+                                * VERY rare out of memory condition.  Since
+                                * we can not delete the entry, set
+                                * HPageRestoreReserve so that the reserve
+                                * count will be incremented when the page
+                                * is freed.  This reserve will be consumed
+                                * on a subsequent allocation.
                                 */
-                               ClearHPageRestoreReserve(page);
+                               SetHPageRestoreReserve(page);
+               } else if (rc < 0) {
+                       /*
+                        * Rare out of memory condition from
+                        * vma_needs_reservation call.  Memory allocation is
+                        * only attempted if a new entry is needed.  Therefore,
+                        * this implies there is not an entry in the
+                        * reserve map.
+                        *
+                        * For shared mappings, no entry in the map indicates
+                        * no reservation.  We are done.
+                        */
+                       if (!(vma->vm_flags & VM_MAYSHARE))
+                               /*
+                                * For private mappings, no entry indicates
+                                * a reservation is present.  Since we can
+                                * not add an entry, set SetHPageRestoreReserve
+                                * on the page so reserve count will be
+                                * incremented when freed.  This reserve will
+                                * be consumed on a subsequent allocation.
+                                */
+                               SetHPageRestoreReserve(page);
                } else
-                       vma_end_reservation(h, vma, address);
+                       /*
+                        * No reservation present, do nothing
+                        */
+                        vma_end_reservation(h, vma, address);
        }
 }
 
@@ -4037,6 +4105,8 @@ again:
                                spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
                                entry = huge_ptep_get(src_pte);
                                if (!pte_same(src_pte_old, entry)) {
+                                       restore_reserve_on_error(h, vma, addr,
+                                                               new);
                                        put_page(new);
                                        /* dst_entry won't change as in child */
                                        goto again;
@@ -4889,10 +4959,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
                if (!page)
                        goto out;
        } else if (!*pagep) {
-               ret = -ENOMEM;
+               /* If a page already exists, then it's UFFDIO_COPY for
+                * a non-missing case. Return -EEXIST.
+                */
+               if (vm_shared &&
+                   hugetlbfs_pagecache_present(h, dst_vma, dst_addr)) {
+                       ret = -EEXIST;
+                       goto out;
+               }
+
                page = alloc_huge_page(dst_vma, dst_addr, 0);
-               if (IS_ERR(page))
+               if (IS_ERR(page)) {
+                       ret = -ENOMEM;
                        goto out;
+               }
 
                ret = copy_huge_page_from_user(page,
                                                (const void __user *) src_addr,
@@ -4996,6 +5076,7 @@ out_release_unlock:
        if (vm_shared || is_continue)
                unlock_page(page);
 out_release_nounlock:
+       restore_reserve_on_error(h, dst_vma, dst_addr, page);
        put_page(page);
        goto out;
 }
@@ -5847,6 +5928,21 @@ unlock:
        return ret;
 }
 
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+       int ret = 0;
+
+       *hugetlb = false;
+       spin_lock_irq(&hugetlb_lock);
+       if (PageHeadHuge(page)) {
+               *hugetlb = true;
+               if (HPageFreed(page) || HPageMigratable(page))
+                       ret = get_page_unless_zero(page);
+       }
+       spin_unlock_irq(&hugetlb_lock);
+       return ret;
+}
+
 void putback_active_hugepage(struct page *page)
 {
        spin_lock_irq(&hugetlb_lock);