mm/hugetlb: add support for mempolicy MPOL_PREFERRED_MANY
authorBen Widawsky <ben.widawsky@intel.com>
Thu, 2 Sep 2021 22:00:13 +0000 (15:00 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Sep 2021 16:58:17 +0000 (09:58 -0700)
Implement the missing huge page allocation functionality while obeying the
preferred node semantics.  This is similar to the implementation for
general page allocation, as it uses a fallback mechanism to try multiple
preferred nodes first, and then all other nodes.

To avoid adding too many "#ifdef CONFIG_NUMA" check, add a helper function
in mempolicy.h to check whether a mempolicy is MPOL_PREFERRED_MANY.

[akpm@linux-foundation.org: fix compiling issue when merging with other hugetlb patch]
[Thanks to 0day bot for catching the !CONFIG_NUMA compiling issue]
[mhocko@suse.com: suggest to remove the #ifdef CONFIG_NUMA check]
[ben.widawsky@intel.com: add helpers to avoid ifdefs]
Link: https://lore.kernel.org/r/20200630212517.308045-12-ben.widawsky@intel.com
Link: https://lkml.kernel.org/r/1627970362-61305-4-git-send-email-feng.tang@intel.com
Link: https://lkml.kernel.org/r/20210809024430.GA46432@shbuild999.sh.intel.com
[nathan@kernel.org: initialize page to NULL in alloc_buddy_huge_page_with_mpol()]
Link: https://lkml.kernel.org/r/20210810200632.3812797-1-nathan@kernel.org
Link: https://lore.kernel.org/r/20200630212517.308045-12-ben.widawsky@intel.com
Link: https://lkml.kernel.org/r/1627970362-61305-4-git-send-email-feng.tang@intel.com
Link: https://lkml.kernel.org/r/20210809024430.GA46432@shbuild999.sh.intel.com
Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Co-developed-by: Feng Tang <feng.tang@intel.com>
Suggested-by: Michal Hocko <mhocko@suse.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/mempolicy.h
mm/hugetlb.c

index 4ca025e..4091692 100644 (file)
@@ -186,6 +186,12 @@ extern void mpol_put_task_policy(struct task_struct *);
 
 extern bool numa_demotion_enabled;
 
+static inline bool mpol_is_preferred_many(struct mempolicy *pol)
+{
+       return  (pol->mode == MPOL_PREFERRED_MANY);
+}
+
+
 #else
 
 struct mempolicy {};
@@ -296,5 +302,11 @@ static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
 }
 
 #define numa_demotion_enabled  false
+
+static inline bool mpol_is_preferred_many(struct mempolicy *pol)
+{
+       return  false;
+}
+
 #endif /* CONFIG_NUMA */
 #endif
index 41a1778..95dc7b8 100644 (file)
@@ -1145,7 +1145,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
                                unsigned long address, int avoid_reserve,
                                long chg)
 {
-       struct page *page;
+       struct page *page = NULL;
        struct mempolicy *mpol;
        gfp_t gfp_mask;
        nodemask_t *nodemask;
@@ -1166,7 +1166,17 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 
        gfp_mask = htlb_alloc_mask(h);
        nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
-       page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+
+       if (mpol_is_preferred_many(mpol)) {
+               page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+
+               /* Fallback to all nodes if page==NULL */
+               nodemask = NULL;
+       }
+
+       if (!page)
+               page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+
        if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
                SetHPageRestoreReserve(page);
                h->resv_huge_pages--;
@@ -2142,16 +2152,26 @@ static
 struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
                struct vm_area_struct *vma, unsigned long addr)
 {
-       struct page *page;
+       struct page *page = NULL;
        struct mempolicy *mpol;
        gfp_t gfp_mask = htlb_alloc_mask(h);
        int nid;
        nodemask_t *nodemask;
 
        nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask);
-       page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask, false);
-       mpol_cond_put(mpol);
+       if (mpol_is_preferred_many(mpol)) {
+               gfp_t gfp = gfp_mask | __GFP_NOWARN;
+
+               gfp &=  ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
+               page = alloc_surplus_huge_page(h, gfp, nid, nodemask, false);
 
+               /* Fallback to all nodes if page==NULL */
+               nodemask = NULL;
+       }
+
+       if (!page)
+               page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask, false);
+       mpol_cond_put(mpol);
        return page;
 }