mm/hugetlb: add mempolicy check in the reservation routine

[linux-2.6-microblaze.git] / mm / hugetlb.c
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 590111e..dffafb5 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -31,6 +31,7 @@
  #include <linux/cma.h>
  
  #include <asm/page.h>
+#include <asm/pgalloc.h>
  #include <asm/tlb.h>
  
  #include <linux/io.h>
@@ -3457,13 +3458,21 @@ static int __init default_hugepagesz_setup(char *s)
  }
  __setup("default_hugepagesz=", default_hugepagesz_setup);
  
-static unsigned int cpuset_mems_nr(unsigned int *array)
+static unsigned int allowed_mems_nr(struct hstate *h)
  {
         int node;
         unsigned int nr = 0;
+       nodemask_t *mpol_allowed;
+       unsigned int *array = h->free_huge_pages_node;
+       gfp_t gfp_mask = htlb_alloc_mask(h);
+
+       mpol_allowed = policy_nodemask_current(gfp_mask);
  
-       for_each_node_mask(node, cpuset_current_mems_allowed)
-               nr += array[node];
+       for_each_node_mask(node, cpuset_current_mems_allowed) {
+               if (!mpol_allowed ||
+                   (mpol_allowed && node_isset(node, *mpol_allowed)))
+                       nr += array[node];
+       }
  
         return nr;
  }
@@ -3642,12 +3651,18 @@ static int hugetlb_acct_memory(struct hstate *h, long delta)
          * we fall back to check against current free page availability as
          * a best attempt and hopefully to minimize the impact of changing
          * semantics that cpuset has.
+        *
+        * Apart from cpuset, we also have memory policy mechanism that
+        * also determines from which node the kernel will allocate memory
+        * in a NUMA system. So similar to cpuset, we also should consider
+        * the memory policy of the current task. Similar to the description
+        * above.
          */
         if (delta > 0) {
                 if (gather_surplus_pages(h, delta) < 0)
                         goto out;
  
-               if (delta > cpuset_mems_nr(h->free_huge_pages_node)) {
+               if (delta > allowed_mems_nr(h)) {
                         return_unused_surplus_pages(h, delta);
                         goto out;
                 }
@@ -5313,25 +5328,21 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
  void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
                                 unsigned long *start, unsigned long *end)
  {
-       unsigned long check_addr;
+       unsigned long a_start, a_end;
  
         if (!(vma->vm_flags & VM_MAYSHARE))
                 return;
  
-       for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
-               unsigned long a_start = check_addr & PUD_MASK;
-               unsigned long a_end = a_start + PUD_SIZE;
+       /* Extend the range to be PUD aligned for a worst case scenario */
+       a_start = ALIGN_DOWN(*start, PUD_SIZE);
+       a_end = ALIGN(*end, PUD_SIZE);
  
-               /*
-                * If sharing is possible, adjust start/end if necessary.
-                */
-               if (range_in_vma(vma, a_start, a_end)) {
-                       if (a_start < *start)
-                               *start = a_start;
-                       if (a_end > *end)
-                               *end = a_end;
-               }
-       }
+       /*
+        * Intersect the range with the vma range, since pmd sharing won't be
+        * across vma after all
+        */
+       *start = max(vma->vm_start, a_start);
+       *end = min(vma->vm_end, a_end);
  }
  
  /*