mm/hugetlb: add missing annotation for gather_surplus_pages()

[linux-2.6-microblaze.git] / mm / hugetlb.c
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index c7835e9..f5fb53f 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -220,31 +220,6 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
         return subpool_inode(file_inode(vma->vm_file));
  }
  
-/*
- * Region tracking -- allows tracking of reservations and instantiated pages
- *                    across the pages in a mapping.
- *
- * The region data structures are embedded into a resv_map and protected
- * by a resv_map's lock.  The set of regions within the resv_map represent
- * reservations for huge pages, or huge pages that have already been
- * instantiated within the map.  The from and to elements are huge page
- * indicies into the associated mapping.  from indicates the starting index
- * of the region.  to represents the first index past the end of  the region.
- *
- * For example, a file region structure with from == 0 and to == 4 represents
- * four huge pages in a mapping.  It is important to note that the to element
- * represents the first element past the end of the region. This is used in
- * arithmetic as 4(to) - 0(from) = 4 huge pages in the region.
- *
- * Interval notation of the form [from, to) will be used to indicate that
- * the endpoint from is inclusive and to is exclusive.
- */
-struct file_region {
-       struct list_head link;
-       long from;
-       long to;
-};
-
  /* Helper that removes a struct file_region from the resv_map cache and returns
   * it for use.
   */
@@ -266,6 +241,83 @@ get_file_region_entry_from_cache(struct resv_map *resv, long from, long to)
         return nrg;
  }
  
+static void copy_hugetlb_cgroup_uncharge_info(struct file_region *nrg,
+                                             struct file_region *rg)
+{
+#ifdef CONFIG_CGROUP_HUGETLB
+       nrg->reservation_counter = rg->reservation_counter;
+       nrg->css = rg->css;
+       if (rg->css)
+               css_get(rg->css);
+#endif
+}
+
+/* Helper that records hugetlb_cgroup uncharge info. */
+static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
+                                               struct hstate *h,
+                                               struct resv_map *resv,
+                                               struct file_region *nrg)
+{
+#ifdef CONFIG_CGROUP_HUGETLB
+       if (h_cg) {
+               nrg->reservation_counter =
+                       &h_cg->rsvd_hugepage[hstate_index(h)];
+               nrg->css = &h_cg->css;
+               if (!resv->pages_per_hpage)
+                       resv->pages_per_hpage = pages_per_huge_page(h);
+               /* pages_per_hpage should be the same for all entries in
+                * a resv_map.
+                */
+               VM_BUG_ON(resv->pages_per_hpage != pages_per_huge_page(h));
+       } else {
+               nrg->reservation_counter = NULL;
+               nrg->css = NULL;
+       }
+#endif
+}
+
+static bool has_same_uncharge_info(struct file_region *rg,
+                                  struct file_region *org)
+{
+#ifdef CONFIG_CGROUP_HUGETLB
+       return rg && org &&
+              rg->reservation_counter == org->reservation_counter &&
+              rg->css == org->css;
+
+#else
+       return true;
+#endif
+}
+
+static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
+{
+       struct file_region *nrg = NULL, *prg = NULL;
+
+       prg = list_prev_entry(rg, link);
+       if (&prg->link != &resv->regions && prg->to == rg->from &&
+           has_same_uncharge_info(prg, rg)) {
+               prg->to = rg->to;
+
+               list_del(&rg->link);
+               kfree(rg);
+
+               coalesce_file_region(resv, prg);
+               return;
+       }
+
+       nrg = list_next_entry(rg, link);
+       if (&nrg->link != &resv->regions && nrg->from == rg->to &&
+           has_same_uncharge_info(nrg, rg)) {
+               nrg->from = rg->from;
+
+               list_del(&rg->link);
+               kfree(rg);
+
+               coalesce_file_region(resv, nrg);
+               return;
+       }
+}
+
  /* Must be called with resv->lock held. Calling this with count_only == true
   * will count the number of pages to be added but will not modify the linked
   * list. If regions_needed != NULL and count_only == true, then regions_needed
@@ -273,7 +325,9 @@ get_file_region_entry_from_cache(struct resv_map *resv, long from, long to)
   * add the regions for this range.
   */
  static long add_reservation_in_range(struct resv_map *resv, long f, long t,
-                                    long *regions_needed, bool count_only)
+                                    struct hugetlb_cgroup *h_cg,
+                                    struct hstate *h, long *regions_needed,
+                                    bool count_only)
  {
         long add = 0;
         struct list_head *head = &resv->regions;
@@ -312,7 +366,10 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
                         if (!count_only) {
                                 nrg = get_file_region_entry_from_cache(
                                         resv, last_accounted_offset, rg->from);
+                               record_hugetlb_cgroup_uncharge_info(h_cg, h,
+                                                                   resv, nrg);
                                 list_add(&nrg->link, rg->link.prev);
+                               coalesce_file_region(resv, nrg);
                         } else if (regions_needed)
                                 *regions_needed += 1;
                 }
@@ -328,7 +385,9 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
                 if (!count_only) {
                         nrg = get_file_region_entry_from_cache(
                                 resv, last_accounted_offset, t);
+                       record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg);
                         list_add(&nrg->link, rg->link.prev);
+                       coalesce_file_region(resv, nrg);
                 } else if (regions_needed)
                         *regions_needed += 1;
         }
@@ -416,7 +475,8 @@ out_of_memory:
   * 1 page will only require at most 1 entry.
   */
  static long region_add(struct resv_map *resv, long f, long t,
-                      long in_regions_needed)
+                      long in_regions_needed, struct hstate *h,
+                      struct hugetlb_cgroup *h_cg)
  {
         long add = 0, actual_regions_needed = 0;
  
@@ -424,7 +484,8 @@ static long region_add(struct resv_map *resv, long f, long t,
  retry:
  
         /* Count how many regions are actually needed to execute this add. */
-       add_reservation_in_range(resv, f, t, &actual_regions_needed, true);
+       add_reservation_in_range(resv, f, t, NULL, NULL, &actual_regions_needed,
+                                true);
  
         /*
          * Check for sufficient descriptors in the cache to accommodate
@@ -452,7 +513,7 @@ retry:
                 goto retry;
         }
  
-       add = add_reservation_in_range(resv, f, t, NULL, false);
+       add = add_reservation_in_range(resv, f, t, h_cg, h, NULL, false);
  
         resv->adds_in_progress -= in_regions_needed;
  
@@ -489,7 +550,8 @@ static long region_chg(struct resv_map *resv, long f, long t,
         spin_lock(&resv->lock);
  
         /* Count how many hugepages in this range are NOT respresented. */
-       chg = add_reservation_in_range(resv, f, t, out_regions_needed, true);
+       chg = add_reservation_in_range(resv, f, t, NULL, NULL,
+                                      out_regions_needed, true);
  
         if (*out_regions_needed == 0)
                 *out_regions_needed = 1;
@@ -589,11 +651,17 @@ retry:
                         /* New entry for end of split region */
                         nrg->from = t;
                         nrg->to = rg->to;
+
+                       copy_hugetlb_cgroup_uncharge_info(nrg, rg);
+
                         INIT_LIST_HEAD(&nrg->link);
  
                         /* Original entry is trimmed */
                         rg->to = f;
  
+                       hugetlb_cgroup_uncharge_file_region(
+                               resv, rg, nrg->to - nrg->from);
+
                         list_add(&nrg->link, &rg->link);
                         nrg = NULL;
                         break;
@@ -601,6 +669,8 @@ retry:
  
                 if (f <= rg->from && t >= rg->to) { /* Remove entire region */
                         del += rg->to - rg->from;
+                       hugetlb_cgroup_uncharge_file_region(resv, rg,
+                                                           rg->to - rg->from);
                         list_del(&rg->link);
                         kfree(rg);
                         continue;
@@ -609,9 +679,15 @@ retry:
                 if (f <= rg->from) {    /* Trim beginning of region */
                         del += t - rg->from;
                         rg->from = t;
+
+                       hugetlb_cgroup_uncharge_file_region(resv, rg,
+                                                           t - rg->from);
                 } else {                /* Trim end of region */
                         del += rg->to - f;
                         rg->to = f;
+
+                       hugetlb_cgroup_uncharge_file_region(resv, rg,
+                                                           rg->to - f);
                 }
         }
  
@@ -1313,6 +1389,8 @@ static void __free_huge_page(struct page *page)
         clear_page_huge_active(page);
         hugetlb_cgroup_uncharge_page(hstate_index(h),
                                      pages_per_huge_page(h), page);
+       hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
+                                         pages_per_huge_page(h), page);
         if (restore_reserve)
                 h->resv_huge_pages++;
  
@@ -1450,7 +1528,7 @@ int PageHeadHuge(struct page *page_head)
         if (!PageHead(page_head))
                 return 0;
  
-       return get_compound_page_dtor(page_head) == free_huge_page;
+       return page_head[1].compound_dtor == HUGETLB_PAGE_DTOR;
  }
  
  /*
@@ -1932,6 +2010,7 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
   * of size 'delta'.
   */
  static int gather_surplus_pages(struct hstate *h, int delta)
+       __must_hold(&hugetlb_lock)
  {
         struct list_head surplus_list;
         struct page *page, *tmp;
@@ -2124,7 +2203,7 @@ static long __vma_reservation_common(struct hstate *h,
                 VM_BUG_ON(dummy_out_regions_needed != 1);
                 break;
         case VMA_COMMIT_RESV:
-               ret = region_add(resv, idx, idx + 1, 1);
+               ret = region_add(resv, idx, idx + 1, 1, NULL, NULL);
                 /* region_add calls of range 1 should never fail. */
                 VM_BUG_ON(ret < 0);
                 break;
@@ -2134,7 +2213,7 @@ static long __vma_reservation_common(struct hstate *h,
                 break;
         case VMA_ADD_RESV:
                 if (vma->vm_flags & VM_MAYSHARE) {
-                       ret = region_add(resv, idx, idx + 1, 1);
+                       ret = region_add(resv, idx, idx + 1, 1, NULL, NULL);
                         /* region_add calls of range 1 should never fail. */
                         VM_BUG_ON(ret < 0);
                 } else {
@@ -2249,6 +2328,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
         long gbl_chg;
         int ret, idx;
         struct hugetlb_cgroup *h_cg;
+       bool deferred_reserve;
  
         idx = hstate_index(h);
         /*
@@ -2286,9 +2366,19 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
                         gbl_chg = 1;
         }
  
+       /* If this allocation is not consuming a reservation, charge it now.
+        */
+       deferred_reserve = map_chg || avoid_reserve || !vma_resv_map(vma);
+       if (deferred_reserve) {
+               ret = hugetlb_cgroup_charge_cgroup_rsvd(
+                       idx, pages_per_huge_page(h), &h_cg);
+               if (ret)
+                       goto out_subpool_put;
+       }
+
         ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
         if (ret)
-               goto out_subpool_put;
+               goto out_uncharge_cgroup_reservation;
  
         spin_lock(&hugetlb_lock);
         /*
@@ -2311,6 +2401,14 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
                 /* Fall through */
         }
         hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
+       /* If allocation is not consuming a reservation, also store the
+        * hugetlb_cgroup pointer on the page.
+        */
+       if (deferred_reserve) {
+               hugetlb_cgroup_commit_charge_rsvd(idx, pages_per_huge_page(h),
+                                                 h_cg, page);
+       }
+
         spin_unlock(&hugetlb_lock);
  
         set_page_private(page, (unsigned long)spool);
@@ -2335,6 +2433,10 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
  
  out_uncharge_cgroup:
         hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg);
+out_uncharge_cgroup_reservation:
+       if (deferred_reserve)
+               hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h),
+                                                   h_cg);
  out_subpool_put:
         if (map_chg || avoid_reserve)
                 hugepage_subpool_put_pages(spool, 1);
@@ -4830,7 +4932,7 @@ int hugetlb_reserve_pages(struct inode *inode,
         struct hstate *h = hstate_inode(inode);
         struct hugepage_subpool *spool = subpool_inode(inode);
         struct resv_map *resv_map;
-       struct hugetlb_cgroup *h_cg;
+       struct hugetlb_cgroup *h_cg = NULL;
         long gbl_reserve, regions_needed = 0;
  
         /* This should never happen */
@@ -4871,19 +4973,6 @@ int hugetlb_reserve_pages(struct inode *inode,
  
                 chg = to - from;
  
-               if (hugetlb_cgroup_charge_cgroup_rsvd(
-                           hstate_index(h), chg * pages_per_huge_page(h),
-                           &h_cg)) {
-                       kref_put(&resv_map->refs, resv_map_release);
-                       return -ENOMEM;
-               }
-
-               /*
-                * Since this branch handles private mappings, we attach the
-                * counter to uncharge for this reservation off resv_map.
-                */
-               resv_map_set_hugetlb_cgroup_uncharge_info(resv_map, h_cg, h);
-
                 set_vma_resv_map(vma, resv_map);
                 set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
         }
@@ -4893,6 +4982,21 @@ int hugetlb_reserve_pages(struct inode *inode,
                 goto out_err;
         }
  
+       ret = hugetlb_cgroup_charge_cgroup_rsvd(
+               hstate_index(h), chg * pages_per_huge_page(h), &h_cg);
+
+       if (ret < 0) {
+               ret = -ENOMEM;
+               goto out_err;
+       }
+
+       if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) {
+               /* For private mappings, the hugetlb_cgroup uncharge info hangs
+                * of the resv_map.
+                */
+               resv_map_set_hugetlb_cgroup_uncharge_info(resv_map, h_cg, h);
+       }
+
         /*
          * There must be enough pages in the subpool for the mapping. If
          * the subpool has a minimum size, there may be some global
@@ -4901,7 +5005,7 @@ int hugetlb_reserve_pages(struct inode *inode,
         gbl_reserve = hugepage_subpool_get_pages(spool, chg);
         if (gbl_reserve < 0) {
                 ret = -ENOSPC;
-               goto out_err;
+               goto out_uncharge_cgroup;
         }
  
         /*
@@ -4910,9 +5014,7 @@ int hugetlb_reserve_pages(struct inode *inode,
          */
         ret = hugetlb_acct_memory(h, gbl_reserve);
         if (ret < 0) {
-               /* put back original number of pages, chg */
-               (void)hugepage_subpool_put_pages(spool, chg);
-               goto out_err;
+               goto out_put_pages;
         }
  
         /*
@@ -4927,13 +5029,11 @@ int hugetlb_reserve_pages(struct inode *inode,
          * else has to be done for private mappings here
          */
         if (!vma || vma->vm_flags & VM_MAYSHARE) {
-               add = region_add(resv_map, from, to, regions_needed);
+               add = region_add(resv_map, from, to, regions_needed, h, h_cg);
  
                 if (unlikely(add < 0)) {
                         hugetlb_acct_memory(h, -gbl_reserve);
-                       /* put back original number of pages, chg */
-                       (void)hugepage_subpool_put_pages(spool, chg);
-                       goto out_err;
+                       goto out_put_pages;
                 } else if (unlikely(chg > add)) {
                         /*
                          * pages in this range were added to the reserve
@@ -4944,12 +5044,22 @@ int hugetlb_reserve_pages(struct inode *inode,
                          */
                         long rsv_adjust;
  
+                       hugetlb_cgroup_uncharge_cgroup_rsvd(
+                               hstate_index(h),
+                               (chg - add) * pages_per_huge_page(h), h_cg);
+
                         rsv_adjust = hugepage_subpool_put_pages(spool,
                                                                 chg - add);
                         hugetlb_acct_memory(h, -rsv_adjust);
                 }
         }
         return 0;
+out_put_pages:
+       /* put back original number of pages, chg */
+       (void)hugepage_subpool_put_pages(spool, chg);
+out_uncharge_cgroup:
+       hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
+                                           chg * pages_per_huge_page(h), h_cg);
  out_err:
         if (!vma || vma->vm_flags & VM_MAYSHARE)
                 /* Only call region_abort if the region_chg succeeded but the
@@ -5047,7 +5157,7 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
  void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
                                 unsigned long *start, unsigned long *end)
  {
-       unsigned long check_addr = *start;
+       unsigned long check_addr;
  
         if (!(vma->vm_flags & VM_MAYSHARE))
                 return;