net: hns3: add query basic info support for VF

[linux-2.6-microblaze.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index e2f19bf..aaa1655 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -72,7 +72,6 @@
  #include <linux/padata.h>
  #include <linux/khugepaged.h>
  #include <linux/buffer_head.h>
-
  #include <asm/sections.h>
  #include <asm/tlbflush.h>
  #include <asm/div64.h>
@@ -108,6 +107,17 @@ typedef int __bitwise fpi_t;
   */
  #define FPI_TO_TAIL            ((__force fpi_t)BIT(1))
  
+/*
+ * Don't poison memory with KASAN (only for the tag-based modes).
+ * During boot, all non-reserved memblock memory is exposed to page_alloc.
+ * Poisoning all that memory lengthens boot time, especially on systems with
+ * large amount of RAM. This flag is used to skip that poisoning.
+ * This is only done for the tag-based KASAN modes, as those are able to
+ * detect memory corruptions with the memory tags assigned by default.
+ * All memory allocated normally after boot gets poisoned as usual.
+ */
+#define FPI_SKIP_KASAN_POISON  ((__force fpi_t)BIT(2))
+
  /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
  static DEFINE_MUTEX(pcp_batch_high_lock);
  #define MIN_PERCPU_PAGELIST_FRACTION   (8)
@@ -384,10 +394,15 @@ static DEFINE_STATIC_KEY_TRUE(deferred_pages);
   * on-demand allocation and then freed again before the deferred pages
   * initialization is done, but this is not likely to happen.
   */
-static inline void kasan_free_nondeferred_pages(struct page *page, int order)
+static inline void kasan_free_nondeferred_pages(struct page *page, int order,
+                                               bool init, fpi_t fpi_flags)
  {
-       if (!static_branch_unlikely(&deferred_pages))
-               kasan_free_pages(page, order);
+       if (static_branch_unlikely(&deferred_pages))
+               return;
+       if (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
+                       (fpi_flags & FPI_SKIP_KASAN_POISON))
+               return;
+       kasan_free_pages(page, order, init);
  }
  
  /* Returns true if the struct page for the pfn is uninitialised */
@@ -438,7 +453,14 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
         return false;
  }
  #else
-#define kasan_free_nondeferred_pages(p, o)     kasan_free_pages(p, o)
+static inline void kasan_free_nondeferred_pages(struct page *page, int order,
+                                               bool init, fpi_t fpi_flags)
+{
+       if (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
+                       (fpi_flags & FPI_SKIP_KASAN_POISON))
+               return;
+       kasan_free_pages(page, order, init);
+}
  
  static inline bool early_page_uninitialised(unsigned long pfn)
  {
@@ -764,32 +786,36 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
   */
  void init_mem_debugging_and_hardening(void)
  {
+       bool page_poisoning_requested = false;
+
+#ifdef CONFIG_PAGE_POISONING
+       /*
+        * Page poisoning is debug page alloc for some arches. If
+        * either of those options are enabled, enable poisoning.
+        */
+       if (page_poisoning_enabled() ||
+            (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
+             debug_pagealloc_enabled())) {
+               static_branch_enable(&_page_poisoning_enabled);
+               page_poisoning_requested = true;
+       }
+#endif
+
         if (_init_on_alloc_enabled_early) {
-               if (page_poisoning_enabled())
+               if (page_poisoning_requested)
                         pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
                                 "will take precedence over init_on_alloc\n");
                 else
                         static_branch_enable(&init_on_alloc);
         }
         if (_init_on_free_enabled_early) {
-               if (page_poisoning_enabled())
+               if (page_poisoning_requested)
                         pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
                                 "will take precedence over init_on_free\n");
                 else
                         static_branch_enable(&init_on_free);
         }
  
-#ifdef CONFIG_PAGE_POISONING
-       /*
-        * Page poisoning is debug page alloc for some arches. If
-        * either of those options are enabled, enable poisoning.
-        */
-       if (page_poisoning_enabled() ||
-            (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
-             debug_pagealloc_enabled()))
-               static_branch_enable(&_page_poisoning_enabled);
-#endif
-
  #ifdef CONFIG_DEBUG_PAGEALLOC
         if (!debug_pagealloc_enabled())
                 return;
@@ -867,7 +893,7 @@ compaction_capture(struct capture_control *capc, struct page *page,
                 return false;
  
         /*
-        * Do not let lower order allocations polluate a movable pageblock.
+        * Do not let lower order allocations pollute a movable pageblock.
          * This might let an unmovable request use a reclaimable pageblock
          * and vice-versa but no more than normal fallback logic which can
          * have trouble finding a high-order free page.
@@ -1103,7 +1129,7 @@ static inline bool page_expected_state(struct page *page,
         if (unlikely((unsigned long)page->mapping |
                         page_ref_count(page) |
  #ifdef CONFIG_MEMCG
-                       (unsigned long)page_memcg(page) |
+                       page->memcg_data |
  #endif
                         (page->flags & check_flags)))
                 return false;
@@ -1128,7 +1154,7 @@ static const char *page_bad_reason(struct page *page, unsigned long flags)
                         bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
         }
  #ifdef CONFIG_MEMCG
-       if (unlikely(page_memcg(page)))
+       if (unlikely(page->memcg_data))
                 bad_reason = "page still charged to cgroup";
  #endif
         return bad_reason;
@@ -1216,9 +1242,10 @@ static void kernel_init_free_pages(struct page *page, int numpages)
  }
  
  static __always_inline bool free_pages_prepare(struct page *page,
-                                       unsigned int order, bool check_free)
+                       unsigned int order, bool check_free, fpi_t fpi_flags)
  {
         int bad = 0;
+       bool init;
  
         VM_BUG_ON_PAGE(PageTail(page), page);
  
@@ -1276,16 +1303,21 @@ static __always_inline bool free_pages_prepare(struct page *page,
                 debug_check_no_obj_freed(page_address(page),
                                            PAGE_SIZE << order);
         }
-       if (want_init_on_free())
-               kernel_init_free_pages(page, 1 << order);
  
         kernel_poison_pages(page, 1 << order);
  
         /*
+        * As memory initialization might be integrated into KASAN,
+        * kasan_free_pages and kernel_init_free_pages must be
+        * kept together to avoid discrepancies in behavior.
+        *
          * With hardware tag-based KASAN, memory tags must be set before the
          * page becomes unavailable via debug_pagealloc or arch_free_page.
          */
-       kasan_free_nondeferred_pages(page, order);
+       init = want_init_on_free();
+       if (init && !kasan_has_integrated_init())
+               kernel_init_free_pages(page, 1 << order);
+       kasan_free_nondeferred_pages(page, order, init, fpi_flags);
  
         /*
          * arch_free_page() can make the page's contents inaccessible.  s390
@@ -1307,7 +1339,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
   */
  static bool free_pcp_prepare(struct page *page)
  {
-       return free_pages_prepare(page, 0, true);
+       return free_pages_prepare(page, 0, true, FPI_NONE);
  }
  
  static bool bulkfree_pcp_prepare(struct page *page)
@@ -1327,9 +1359,9 @@ static bool bulkfree_pcp_prepare(struct page *page)
  static bool free_pcp_prepare(struct page *page)
  {
         if (debug_pagealloc_enabled_static())
-               return free_pages_prepare(page, 0, true);
+               return free_pages_prepare(page, 0, true, FPI_NONE);
         else
-               return free_pages_prepare(page, 0, false);
+               return free_pages_prepare(page, 0, false, FPI_NONE);
  }
  
  static bool bulkfree_pcp_prepare(struct page *page)
@@ -1537,7 +1569,7 @@ static void __free_pages_ok(struct page *page, unsigned int order,
         int migratetype;
         unsigned long pfn = page_to_pfn(page);
  
-       if (!free_pages_prepare(page, order, true))
+       if (!free_pages_prepare(page, order, true, fpi_flags))
                 return;
  
         migratetype = get_pfnblock_migratetype(page, pfn);
@@ -1574,7 +1606,7 @@ void __free_pages_core(struct page *page, unsigned int order)
          * Bypass PCP and place fresh pages right to the tail, primarily
          * relevant for memory onlining.
          */
-       __free_pages_ok(page, order, FPI_TO_TAIL);
+       __free_pages_ok(page, order, FPI_TO_TAIL | FPI_SKIP_KASAN_POISON);
  }
  
  #ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -2292,17 +2324,32 @@ static bool check_new_pages(struct page *page, unsigned int order)
  inline void post_alloc_hook(struct page *page, unsigned int order,
                                 gfp_t gfp_flags)
  {
+       bool init;
+
         set_page_private(page, 0);
         set_page_refcounted(page);
  
         arch_alloc_page(page, order);
         debug_pagealloc_map_pages(page, 1 << order);
-       kasan_alloc_pages(page, order);
+
+       /*
+        * Page unpoisoning must happen before memory initialization.
+        * Otherwise, the poison pattern will be overwritten for __GFP_ZERO
+        * allocations and the page unpoisoning code will complain.
+        */
         kernel_unpoison_pages(page, 1 << order);
-       set_page_owner(page, order, gfp_flags);
  
-       if (!want_init_on_free() && want_init_on_alloc(gfp_flags))
+       /*
+        * As memory initialization might be integrated into KASAN,
+        * kasan_alloc_pages and kernel_init_free_pages must be
+        * kept together to avoid discrepancies in behavior.
+        */
+       init = !want_init_on_free() && want_init_on_alloc(gfp_flags);
+       kasan_alloc_pages(page, order, init);
+       if (init && !kasan_has_integrated_init())
                 kernel_init_free_pages(page, 1 << order);
+
+       set_page_owner(page, order, gfp_flags);
  }
  
  static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
@@ -2386,19 +2433,21 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
   * boundary. If alignment is required, use move_freepages_block()
   */
  static int move_freepages(struct zone *zone,
-                         struct page *start_page, struct page *end_page,
+                         unsigned long start_pfn, unsigned long end_pfn,
                           int migratetype, int *num_movable)
  {
         struct page *page;
+       unsigned long pfn;
         unsigned int order;
         int pages_moved = 0;
  
-       for (page = start_page; page <= end_page;) {
-               if (!pfn_valid_within(page_to_pfn(page))) {
-                       page++;
+       for (pfn = start_pfn; pfn <= end_pfn;) {
+               if (!pfn_valid_within(pfn)) {
+                       pfn++;
                         continue;
                 }
  
+               page = pfn_to_page(pfn);
                 if (!PageBuddy(page)) {
                         /*
                          * We assume that pages that could be isolated for
@@ -2408,8 +2457,7 @@ static int move_freepages(struct zone *zone,
                         if (num_movable &&
                                         (PageLRU(page) || __PageMovable(page)))
                                 (*num_movable)++;
-
-                       page++;
+                       pfn++;
                         continue;
                 }
  
@@ -2419,7 +2467,7 @@ static int move_freepages(struct zone *zone,
  
                 order = buddy_order(page);
                 move_to_free_list(page, zone, order, migratetype);
-               page += 1 << order;
+               pfn += 1 << order;
                 pages_moved += 1 << order;
         }
  
@@ -2429,25 +2477,22 @@ static int move_freepages(struct zone *zone,
  int move_freepages_block(struct zone *zone, struct page *page,
                                 int migratetype, int *num_movable)
  {
-       unsigned long start_pfn, end_pfn;
-       struct page *start_page, *end_page;
+       unsigned long start_pfn, end_pfn, pfn;
  
         if (num_movable)
                 *num_movable = 0;
  
-       start_pfn = page_to_pfn(page);
-       start_pfn = start_pfn & ~(pageblock_nr_pages-1);
-       start_page = pfn_to_page(start_pfn);
-       end_page = start_page + pageblock_nr_pages - 1;
+       pfn = page_to_pfn(page);
+       start_pfn = pfn & ~(pageblock_nr_pages - 1);
         end_pfn = start_pfn + pageblock_nr_pages - 1;
  
         /* Do not cross zone boundaries */
         if (!zone_spans_pfn(zone, start_pfn))
-               start_page = page;
+               start_pfn = pfn;
         if (!zone_spans_pfn(zone, end_pfn))
                 return 0;
  
-       return move_freepages(zone, start_page, end_page, migratetype,
+       return move_freepages(zone, start_pfn, end_pfn, migratetype,
                                                                 num_movable);
  }
  
@@ -2731,7 +2776,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
                         /*
                          * In page freeing path, migratetype change is racy so
                          * we can counter several free pages in a pageblock
-                        * in this loop althoug we changed the pageblock type
+                        * in this loop although we changed the pageblock type
                          * from highatomic to ac->migratetype. So we should
                          * adjust the count once.
                          */
@@ -2908,7 +2953,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
                         unsigned long count, struct list_head *list,
                         int migratetype, unsigned int alloc_flags)
  {
-       int i, alloced = 0;
+       int i, allocated = 0;
  
         spin_lock(&zone->lock);
         for (i = 0; i < count; ++i) {
@@ -2931,7 +2976,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
                  * pages are ordered properly.
                  */
                 list_add_tail(&page->lru, list);
-               alloced++;
+               allocated++;
                 if (is_migrate_cma(get_pcppage_migratetype(page)))
                         __mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
                                               -(1 << order));
@@ -2940,12 +2985,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
         /*
          * i pages were removed from the buddy list even if some leak due
          * to check_pcp_refill failing so adjust NR_FREE_PAGES based
-        * on i. Do not confuse with 'alloced' which is the number of
+        * on i. Do not confuse with 'allocated' which is the number of
          * pages added to the pcp list.
          */
         __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
         spin_unlock(&zone->lock);
-       return alloced;
+       return allocated;
  }
  
  #ifdef CONFIG_NUMA
@@ -3035,7 +3080,7 @@ static void drain_local_pages_wq(struct work_struct *work)
          * drain_all_pages doesn't use proper cpu hotplug protection so
          * we can race with cpu offline when the WQ can move this from
          * a cpu pinned worker to an unbound one. We can operate on a different
-        * cpu which is allright but we also have to make sure to not move to
+        * cpu which is alright but we also have to make sure to not move to
          * a different one.
          */
         preempt_disable();
@@ -3415,7 +3460,8 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
  }
  
  /* Remove page from the per-cpu list, caller must protect the list */
-static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
+static inline
+struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
                         unsigned int alloc_flags,
                         struct per_cpu_pages *pcp,
                         struct list_head *list)
@@ -3813,16 +3859,13 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
         return alloc_flags;
  }
  
-static inline unsigned int current_alloc_flags(gfp_t gfp_mask,
-                                       unsigned int alloc_flags)
+/* Must be called after current_gfp_context() which can change gfp_mask */
+static inline unsigned int gfp_to_alloc_flags_cma(gfp_t gfp_mask,
+                                                 unsigned int alloc_flags)
  {
  #ifdef CONFIG_CMA
-       unsigned int pflags = current->flags;
-
-       if (!(pflags & PF_MEMALLOC_NOCMA) &&
-                       gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+       if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
                 alloc_flags |= ALLOC_CMA;
-
  #endif
         return alloc_flags;
  }
@@ -3922,7 +3965,7 @@ retry:
                         if (alloc_flags & ALLOC_NO_WATERMARKS)
                                 goto try_this_zone;
  
-                       if (node_reclaim_mode == 0 ||
+                       if (!node_reclaim_enabled() ||
                             !zone_allows_reclaim(ac->preferred_zoneref->zone, zone))
                                 continue;
  
@@ -4130,7 +4173,7 @@ out:
  }
  
  /*
- * Maximum number of compaction retries wit a progress before OOM
+ * Maximum number of compaction retries with a progress before OOM
   * killer is consider as the only way to move forward.
   */
  #define MAX_COMPACT_RETRIES 16
@@ -4158,6 +4201,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
         memalloc_noreclaim_restore(noreclaim_flag);
         psi_memstall_leave(&pflags);
  
+       if (*compact_result == COMPACT_SKIPPED)
+               return NULL;
         /*
          * At least in one zone compaction wasn't deferred or skipped, so let's
          * count a compaction stall
@@ -4478,7 +4523,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
         } else if (unlikely(rt_task(current)) && !in_interrupt())
                 alloc_flags |= ALLOC_HARDER;
  
-       alloc_flags = current_alloc_flags(gfp_mask, alloc_flags);
+       alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
  
         return alloc_flags;
  }
@@ -4780,7 +4825,7 @@ retry:
  
         reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
         if (reserve_flags)
-               alloc_flags = current_alloc_flags(gfp_mask, reserve_flags);
+               alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, reserve_flags);
  
         /*
          * Reset the nodemask and zonelist iterators if memory policies can be
@@ -4921,7 +4966,7 @@ got_pg:
  
  static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
                 int preferred_nid, nodemask_t *nodemask,
-               struct alloc_context *ac, gfp_t *alloc_mask,
+               struct alloc_context *ac, gfp_t *alloc_gfp,
                 unsigned int *alloc_flags)
  {
         ac->highest_zoneidx = gfp_zone(gfp_mask);
@@ -4930,7 +4975,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
         ac->migratetype = gfp_migratetype(gfp_mask);
  
         if (cpusets_enabled()) {
-               *alloc_mask |= __GFP_HARDWALL;
+               *alloc_gfp |= __GFP_HARDWALL;
                 /*
                  * When we are in the interrupt context, it is irrelevant
                  * to the current task context. It means that any node ok.
@@ -4949,7 +4994,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
         if (should_fail_alloc_page(gfp_mask, order))
                 return false;
  
-       *alloc_flags = current_alloc_flags(gfp_mask, *alloc_flags);
+       *alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, *alloc_flags);
  
         /* Dirty zone balancing only done in the fast path */
         ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE);
@@ -4965,16 +5010,161 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
         return true;
  }
  
+/*
+ * __alloc_pages_bulk - Allocate a number of order-0 pages to a list or array
+ * @gfp: GFP flags for the allocation
+ * @preferred_nid: The preferred NUMA node ID to allocate from
+ * @nodemask: Set of nodes to allocate from, may be NULL
+ * @nr_pages: The number of pages desired on the list or array
+ * @page_list: Optional list to store the allocated pages
+ * @page_array: Optional array to store the pages
+ *
+ * This is a batched version of the page allocator that attempts to
+ * allocate nr_pages quickly. Pages are added to page_list if page_list
+ * is not NULL, otherwise it is assumed that the page_array is valid.
+ *
+ * For lists, nr_pages is the number of pages that should be allocated.
+ *
+ * For arrays, only NULL elements are populated with pages and nr_pages
+ * is the maximum number of pages that will be stored in the array.
+ *
+ * Returns the number of pages on the list or array.
+ */
+unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
+                       nodemask_t *nodemask, int nr_pages,
+                       struct list_head *page_list,
+                       struct page **page_array)
+{
+       struct page *page;
+       unsigned long flags;
+       struct zone *zone;
+       struct zoneref *z;
+       struct per_cpu_pages *pcp;
+       struct list_head *pcp_list;
+       struct alloc_context ac;
+       gfp_t alloc_gfp;
+       unsigned int alloc_flags = ALLOC_WMARK_LOW;
+       int nr_populated = 0;
+
+       if (unlikely(nr_pages <= 0))
+               return 0;
+
+       /*
+        * Skip populated array elements to determine if any pages need
+        * to be allocated before disabling IRQs.
+        */
+       while (page_array && page_array[nr_populated] && nr_populated < nr_pages)
+               nr_populated++;
+
+       /* Use the single page allocator for one page. */
+       if (nr_pages - nr_populated == 1)
+               goto failed;
+
+       /* May set ALLOC_NOFRAGMENT, fragmentation will return 1 page. */
+       gfp &= gfp_allowed_mask;
+       alloc_gfp = gfp;
+       if (!prepare_alloc_pages(gfp, 0, preferred_nid, nodemask, &ac, &alloc_gfp, &alloc_flags))
+               return 0;
+       gfp = alloc_gfp;
+
+       /* Find an allowed local zone that meets the low watermark. */
+       for_each_zone_zonelist_nodemask(zone, z, ac.zonelist, ac.highest_zoneidx, ac.nodemask) {
+               unsigned long mark;
+
+               if (cpusets_enabled() && (alloc_flags & ALLOC_CPUSET) &&
+                   !__cpuset_zone_allowed(zone, gfp)) {
+                       continue;
+               }
+
+               if (nr_online_nodes > 1 && zone != ac.preferred_zoneref->zone &&
+                   zone_to_nid(zone) != zone_to_nid(ac.preferred_zoneref->zone)) {
+                       goto failed;
+               }
+
+               mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK) + nr_pages;
+               if (zone_watermark_fast(zone, 0,  mark,
+                               zonelist_zone_idx(ac.preferred_zoneref),
+                               alloc_flags, gfp)) {
+                       break;
+               }
+       }
+
+       /*
+        * If there are no allowed local zones that meets the watermarks then
+        * try to allocate a single page and reclaim if necessary.
+        */
+       if (unlikely(!zone))
+               goto failed;
+
+       /* Attempt the batch allocation */
+       local_irq_save(flags);
+       pcp = &this_cpu_ptr(zone->pageset)->pcp;
+       pcp_list = &pcp->lists[ac.migratetype];
+
+       while (nr_populated < nr_pages) {
+
+               /* Skip existing pages */
+               if (page_array && page_array[nr_populated]) {
+                       nr_populated++;
+                       continue;
+               }
+
+               page = __rmqueue_pcplist(zone, ac.migratetype, alloc_flags,
+                                                               pcp, pcp_list);
+               if (unlikely(!page)) {
+                       /* Try and get at least one page */
+                       if (!nr_populated)
+                               goto failed_irq;
+                       break;
+               }
+
+               /*
+                * Ideally this would be batched but the best way to do
+                * that cheaply is to first convert zone_statistics to
+                * be inaccurate per-cpu counter like vm_events to avoid
+                * a RMW cycle then do the accounting with IRQs enabled.
+                */
+               __count_zid_vm_events(PGALLOC, zone_idx(zone), 1);
+               zone_statistics(ac.preferred_zoneref->zone, zone);
+
+               prep_new_page(page, 0, gfp, 0);
+               if (page_list)
+                       list_add(&page->lru, page_list);
+               else
+                       page_array[nr_populated] = page;
+               nr_populated++;
+       }
+
+       local_irq_restore(flags);
+
+       return nr_populated;
+
+failed_irq:
+       local_irq_restore(flags);
+
+failed:
+       page = __alloc_pages(gfp, 0, preferred_nid, nodemask);
+       if (page) {
+               if (page_list)
+                       list_add(&page->lru, page_list);
+               else
+                       page_array[nr_populated] = page;
+               nr_populated++;
+       }
+
+       return nr_populated;
+}
+EXPORT_SYMBOL_GPL(__alloc_pages_bulk);
+
  /*
   * This is the 'heart' of the zoned buddy allocator.
   */
-struct page *
-__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
+struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
                                                         nodemask_t *nodemask)
  {
         struct page *page;
         unsigned int alloc_flags = ALLOC_WMARK_LOW;
-       gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
+       gfp_t alloc_gfp; /* The gfp_t that was actually used for allocation */
         struct alloc_context ac = { };
  
         /*
@@ -4982,33 +5172,36 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
          * so bail out early if the request is out of bound.
          */
         if (unlikely(order >= MAX_ORDER)) {
-               WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
+               WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
                 return NULL;
         }
  
-       gfp_mask &= gfp_allowed_mask;
-       alloc_mask = gfp_mask;
-       if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
+       gfp &= gfp_allowed_mask;
+       /*
+        * Apply scoped allocation constraints. This is mainly about GFP_NOFS
+        * resp. GFP_NOIO which has to be inherited for all allocation requests
+        * from a particular context which has been marked by
+        * memalloc_no{fs,io}_{save,restore}. And PF_MEMALLOC_PIN which ensures
+        * movable zones are not used during allocation.
+        */
+       gfp = current_gfp_context(gfp);
+       alloc_gfp = gfp;
+       if (!prepare_alloc_pages(gfp, order, preferred_nid, nodemask, &ac,
+                       &alloc_gfp, &alloc_flags))
                 return NULL;
  
         /*
          * Forbid the first pass from falling back to types that fragment
          * memory until all local zones are considered.
          */
-       alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp_mask);
+       alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp);
  
         /* First allocation attempt */
-       page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
+       page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
         if (likely(page))
                 goto out;
  
-       /*
-        * Apply scoped allocation constraints. This is mainly about GFP_NOFS
-        * resp. GFP_NOIO which has to be inherited for all allocation requests
-        * from a particular context which has been marked by
-        * memalloc_no{fs,io}_{save,restore}.
-        */
-       alloc_mask = current_gfp_context(gfp_mask);
+       alloc_gfp = gfp;
         ac.spread_dirty_pages = false;
  
         /*
@@ -5017,20 +5210,20 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
          */
         ac.nodemask = nodemask;
  
-       page = __alloc_pages_slowpath(alloc_mask, order, &ac);
+       page = __alloc_pages_slowpath(alloc_gfp, order, &ac);
  
  out:
-       if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
-           unlikely(__memcg_kmem_charge_page(page, gfp_mask, order) != 0)) {
+       if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT) && page &&
+           unlikely(__memcg_kmem_charge_page(page, gfp, order) != 0)) {
                 __free_pages(page, order);
                 page = NULL;
         }
  
-       trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype);
+       trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
  
         return page;
  }
-EXPORT_SYMBOL(__alloc_pages_nodemask);
+EXPORT_SYMBOL(__alloc_pages);
  
  /*
   * Common helper functions. Never use with __GFP_HIGHMEM because the returned
@@ -5736,7 +5929,7 @@ static int build_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs)
  static int __parse_numa_zonelist_order(char *s)
  {
         /*
-        * We used to support different zonlists modes but they turned
+        * We used to support different zonelists modes but they turned
          * out to be just not useful. Let's keep the warning in place
          * if somebody still use the cmd line parameter so that we do
          * not fail it silently
@@ -7477,7 +7670,7 @@ static void check_for_memory(pg_data_t *pgdat, int nid)
  }
  
  /*
- * Some architecturs, e.g. ARC may have ZONE_HIGHMEM below ZONE_NORMAL. For
+ * Some architectures, e.g. ARC may have ZONE_HIGHMEM below ZONE_NORMAL. For
   * such cases we allow max_zone_pfn sorted in the descending order
   */
  bool __weak arch_has_descending_max_zone_pfns(void)
@@ -7689,7 +7882,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char
         return pages;
  }
  
-void __init mem_init_print_info(const char *str)
+void __init mem_init_print_info(void)
  {
         unsigned long physpages, codesize, datasize, rosize, bss_size;
         unsigned long init_code_size, init_data_size;
@@ -7728,17 +7921,17 @@ void __init mem_init_print_info(const char *str)
  #ifdef CONFIG_HIGHMEM
                 ", %luK highmem"
  #endif
-               "%s%s)\n",
+               ")\n",
                 nr_free_pages() << (PAGE_SHIFT - 10),
                 physpages << (PAGE_SHIFT - 10),
                 codesize >> 10, datasize >> 10, rosize >> 10,
                 (init_data_size + init_code_size) >> 10, bss_size >> 10,
                 (physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10),
-               totalcma_pages << (PAGE_SHIFT - 10),
+               totalcma_pages << (PAGE_SHIFT - 10)
  #ifdef CONFIG_HIGHMEM
-               totalhigh_pages() << (PAGE_SHIFT - 10),
+               , totalhigh_pages() << (PAGE_SHIFT - 10)
  #endif
-               str ? ", " : "", str ? str : "");
+               );
  }
  
  /**
@@ -8222,6 +8415,7 @@ void *__init alloc_large_system_hash(const char *tablename,
         void *table = NULL;
         gfp_t gfp_flags;
         bool virt;
+       bool huge;
  
         /* allow the kernel cmdline to have a say */
         if (!numentries) {
@@ -8289,6 +8483,7 @@ void *__init alloc_large_system_hash(const char *tablename,
                 } else if (get_order(size) >= MAX_ORDER || hashdist) {
                         table = __vmalloc(size, gfp_flags);
                         virt = true;
+                       huge = is_vm_area_hugepages(table);
                 } else {
                         /*
                          * If bucketsize is not a power-of-two, we may free
@@ -8305,7 +8500,7 @@ void *__init alloc_large_system_hash(const char *tablename,
  
         pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
                 tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
-               virt ? "vmalloc" : "linear");
+               virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
  
         if (_hash_shift)
                 *_hash_shift = log2qty;
@@ -8450,6 +8645,27 @@ static unsigned long pfn_max_align_up(unsigned long pfn)
                                 pageblock_nr_pages));
  }
  
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+       (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
+/* Usage: See admin-guide/dynamic-debug-howto.rst */
+static void alloc_contig_dump_pages(struct list_head *page_list)
+{
+       DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, "migrate failure");
+
+       if (DYNAMIC_DEBUG_BRANCH(descriptor)) {
+               struct page *page;
+
+               dump_stack();
+               list_for_each_entry(page, page_list, lru)
+                       dump_page(page, "migration failure");
+       }
+}
+#else
+static inline void alloc_contig_dump_pages(struct list_head *page_list)
+{
+}
+#endif
+
  /* [start, end) must belong to a single zone. */
  static int __alloc_contig_migrate_range(struct compact_control *cc,
                                         unsigned long start, unsigned long end)
@@ -8464,7 +8680,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
                 .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
         };
  
-       migrate_prep();
+       lru_cache_disable();
  
         while (pfn < end || !list_empty(&cc->migratepages)) {
                 if (fatal_signal_pending(current)) {
@@ -8474,14 +8690,13 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
  
                 if (list_empty(&cc->migratepages)) {
                         cc->nr_migratepages = 0;
-                       pfn = isolate_migratepages_range(cc, pfn, end);
-                       if (!pfn) {
-                               ret = -EINTR;
+                       ret = isolate_migratepages_range(cc, pfn, end);
+                       if (ret && ret != -EAGAIN)
                                 break;
-                       }
+                       pfn = cc->migrate_pfn;
                         tries = 0;
                 } else if (++tries == 5) {
-                       ret = ret < 0 ? ret : -EBUSY;
+                       ret = -EBUSY;
                         break;
                 }
  
@@ -8491,8 +8706,18 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
  
                 ret = migrate_pages(&cc->migratepages, alloc_migration_target,
                                 NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE);
+
+               /*
+                * On -ENOMEM, migrate_pages() bails out right away. It is pointless
+                * to retry again over this error, so do the same here.
+                */
+               if (ret == -ENOMEM)
+                       break;
         }
+
+       lru_cache_enable();
         if (ret < 0) {
+               alloc_contig_dump_pages(&cc->migratepages);
                 putback_movable_pages(&cc->migratepages);
                 return ret;
         }
@@ -8503,7 +8728,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
   * alloc_contig_range() -- tries to allocate given range of pages
   * @start:     start PFN to allocate
   * @end:       one-past-the-last PFN to allocate
- * @migratetype:       migratetype of the underlaying pageblocks (either
+ * @migratetype:       migratetype of the underlying pageblocks (either
   *                     #MIGRATE_MOVABLE or #MIGRATE_CMA).  All pageblocks
   *                     in range must have the same migratetype and it must
   *                     be either of the two.
@@ -8583,7 +8808,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
         ret = __alloc_contig_migrate_range(&cc, start, end);
         if (ret && ret != -EBUSY)
                 goto done;
-       ret =0;
+       ret = 0;
  
         /*
          * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
@@ -8602,8 +8827,6 @@ int alloc_contig_range(unsigned long start, unsigned long end,
          * isolated thus they won't get removed from buddy.
          */
  
-       lru_add_drain_all();
-
         order = 0;
         outer_start = start;
         while (!PageBuddy(pfn_to_page(outer_start))) {
@@ -8629,8 +8852,6 @@ int alloc_contig_range(unsigned long start, unsigned long end,
  
         /* Make sure the range is really isolated. */
         if (test_pages_isolated(outer_start, end, 0)) {
-               pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
-                       __func__, outer_start, end);
                 ret = -EBUSY;
                 goto done;
         }
@@ -8680,12 +8901,6 @@ static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
  
                 if (PageReserved(page))
                         return false;
-
-               if (page_count(page) > 0)
-                       return false;
-
-               if (PageHuge(page))
-                       return false;
         }
         return true;
  }
@@ -8757,9 +8972,9 @@ struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
  }
  #endif /* CONFIG_CONTIG_ALLOC */
  
-void free_contig_range(unsigned long pfn, unsigned int nr_pages)
+void free_contig_range(unsigned long pfn, unsigned long nr_pages)
  {
-       unsigned int count = 0;
+       unsigned long count = 0;
  
         for (; nr_pages--; pfn++) {
                 struct page *page = pfn_to_page(pfn);
@@ -8767,13 +8982,13 @@ void free_contig_range(unsigned long pfn, unsigned int nr_pages)
                 count += page_count(page) != 1;
                 __free_page(page);
         }
-       WARN(count != 0, "%d pages are still in use!\n", count);
+       WARN(count != 0, "%lu pages are still in use!\n", count);
  }
  EXPORT_SYMBOL(free_contig_range);
  
  /*
   * The zone indicated has a new number of managed_pages; batch sizes and percpu
- * page high values need to be recalulated.
+ * page high values need to be recalculated.
   */
  void __meminit zone_pcp_update(struct zone *zone)
  {
@@ -8805,12 +9020,9 @@ void zone_pcp_enable(struct zone *zone)
  
  void zone_pcp_reset(struct zone *zone)
  {
-       unsigned long flags;
         int cpu;
         struct per_cpu_pageset *pset;
  
-       /* avoid races with drain_pages()  */
-       local_irq_save(flags);
         if (zone->pageset != &boot_pageset) {
                 for_each_online_cpu(cpu) {
                         pset = per_cpu_ptr(zone->pageset, cpu);
@@ -8819,7 +9031,6 @@ void zone_pcp_reset(struct zone *zone)
                 free_percpu(zone->pageset);
                 zone->pageset = &boot_pageset;
         }
-       local_irq_restore(flags);
  }
  
  #ifdef CONFIG_MEMORY_HOTREMOVE