mm/page_alloc.c: skip setting nodemask when we are in interrupt

[linux-2.6-microblaze.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index e028b87..9f9e15a 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -459,25 +459,23 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
  {
  #ifdef CONFIG_SPARSEMEM
         pfn &= (PAGES_PER_SECTION-1);
-       return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
  #else
         pfn = pfn - round_down(page_zone(page)->zone_start_pfn, pageblock_nr_pages);
-       return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
  #endif /* CONFIG_SPARSEMEM */
+       return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
  }
  
  /**
   * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages
   * @page: The page within the block of interest
   * @pfn: The target page frame number
- * @end_bitidx: The last bit of interest to retrieve
   * @mask: mask of bits that the caller is interested in
   *
   * Return: pageblock_bits flags
   */
-static __always_inline unsigned long __get_pfnblock_flags_mask(struct page *page,
+static __always_inline
+unsigned long __get_pfnblock_flags_mask(struct page *page,
                                         unsigned long pfn,
-                                       unsigned long end_bitidx,
                                         unsigned long mask)
  {
         unsigned long *bitmap;
@@ -490,20 +488,18 @@ static __always_inline unsigned long __get_pfnblock_flags_mask(struct page *page
         bitidx &= (BITS_PER_LONG-1);
  
         word = bitmap[word_bitidx];
-       bitidx += end_bitidx;
-       return (word >> (BITS_PER_LONG - bitidx - 1)) & mask;
+       return (word >> bitidx) & mask;
  }
  
  unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
-                                       unsigned long end_bitidx,
                                         unsigned long mask)
  {
-       return __get_pfnblock_flags_mask(page, pfn, end_bitidx, mask);
+       return __get_pfnblock_flags_mask(page, pfn, mask);
  }
  
  static __always_inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn)
  {
-       return __get_pfnblock_flags_mask(page, pfn, PB_migrate_end, MIGRATETYPE_MASK);
+       return __get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
  }
  
  /**
@@ -511,12 +507,10 @@ static __always_inline int get_pfnblock_migratetype(struct page *page, unsigned
   * @page: The page within the block of interest
   * @flags: The flags to set
   * @pfn: The target page frame number
- * @end_bitidx: The last bit of interest
   * @mask: mask of bits that the caller is interested in
   */
  void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
                                         unsigned long pfn,
-                                       unsigned long end_bitidx,
                                         unsigned long mask)
  {
         unsigned long *bitmap;
@@ -533,9 +527,8 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
  
         VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
  
-       bitidx += end_bitidx;
-       mask <<= (BITS_PER_LONG - bitidx - 1);
-       flags <<= (BITS_PER_LONG - bitidx - 1);
+       mask <<= bitidx;
+       flags <<= bitidx;
  
         word = READ_ONCE(bitmap[word_bitidx]);
         for (;;) {
@@ -552,8 +545,8 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
                      migratetype < MIGRATE_PCPTYPES))
                 migratetype = MIGRATE_UNMOVABLE;
  
-       set_pageblock_flags_group(page, (unsigned long)migratetype,
-                                       PB_migrate, PB_migrate_end);
+       set_pfnblock_flags_mask(page, (unsigned long)migratetype,
+                               page_to_pfn(page), MIGRATETYPE_MASK);
  }
  
  #ifdef CONFIG_DEBUG_VM
@@ -813,11 +806,10 @@ static inline struct capture_control *task_capc(struct zone *zone)
  {
         struct capture_control *capc = current->capture_control;
  
-       return capc &&
+       return unlikely(capc) &&
                 !(current->flags & PF_KTHREAD) &&
                 !capc->page &&
-               capc->cc->zone == zone &&
-               capc->cc->direct_compaction ? capc : NULL;
+               capc->cc->zone == zone ? capc : NULL;
  }
  
  static inline bool
@@ -961,7 +953,7 @@ static inline void __free_one_page(struct page *page,
                 int migratetype, bool report)
  {
         struct capture_control *capc = task_capc(zone);
-       unsigned long uninitialized_var(buddy_pfn);
+       unsigned long buddy_pfn;
         unsigned long combined_pfn;
         unsigned int max_order;
         struct page *buddy;
@@ -1164,8 +1156,11 @@ static void kernel_init_free_pages(struct page *page, int numpages)
  {
         int i;
  
+       /* s390's use of memset() could override KASAN redzones. */
+       kasan_disable_current();
         for (i = 0; i < numpages; i++)
                 clear_highpage(page + i);
+       kasan_enable_current();
  }
  
  static __always_inline bool free_pages_prepare(struct page *page,
@@ -2273,7 +2268,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
   * This array describes the order lists are fallen back to when
   * the free lists for the desirable migrate type are depleted
   */
-static int fallbacks[MIGRATE_TYPES][4] = {
+static int fallbacks[MIGRATE_TYPES][3] = {
         [MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,   MIGRATE_TYPES },
         [MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES },
         [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,   MIGRATE_TYPES },
@@ -3487,6 +3482,29 @@ static noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
  }
  ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE);
  
+static inline long __zone_watermark_unusable_free(struct zone *z,
+                               unsigned int order, unsigned int alloc_flags)
+{
+       const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
+       long unusable_free = (1 << order) - 1;
+
+       /*
+        * If the caller does not have rights to ALLOC_HARDER then subtract
+        * the high-atomic reserves. This will over-estimate the size of the
+        * atomic reserve but it avoids a search.
+        */
+       if (likely(!alloc_harder))
+               unusable_free += z->nr_reserved_highatomic;
+
+#ifdef CONFIG_CMA
+       /* If allocation can't use CMA areas don't use free CMA pages */
+       if (!(alloc_flags & ALLOC_CMA))
+               unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
+#endif
+
+       return unusable_free;
+}
+
  /*
   * Return true if free base pages are above 'mark'. For high-order checks it
   * will return true of the order-0 watermark is reached and there is at least
@@ -3502,19 +3520,12 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
         const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
  
         /* free_pages may go negative - that's OK */
-       free_pages -= (1 << order) - 1;
+       free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
  
         if (alloc_flags & ALLOC_HIGH)
                 min -= min / 2;
  
-       /*
-        * If the caller does not have rights to ALLOC_HARDER then subtract
-        * the high-atomic reserves. This will over-estimate the size of the
-        * atomic reserve but it avoids a search.
-        */
-       if (likely(!alloc_harder)) {
-               free_pages -= z->nr_reserved_highatomic;
-       } else {
+       if (unlikely(alloc_harder)) {
                 /*
                  * OOM victims can try even harder than normal ALLOC_HARDER
                  * users on the grounds that it's definitely going to be in
@@ -3527,13 +3538,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
                         min -= min / 4;
         }
  
-
-#ifdef CONFIG_CMA
-       /* If allocation can't use CMA areas don't use free CMA pages */
-       if (!(alloc_flags & ALLOC_CMA))
-               free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
-
         /*
          * Check watermarks for an order-0 allocation request. If these
          * are not met, then a high-order request also cannot go ahead
@@ -3580,30 +3584,42 @@ bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
  
  static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
                                 unsigned long mark, int highest_zoneidx,
-                               unsigned int alloc_flags)
+                               unsigned int alloc_flags, gfp_t gfp_mask)
  {
-       long free_pages = zone_page_state(z, NR_FREE_PAGES);
-       long cma_pages = 0;
+       long free_pages;
  
-#ifdef CONFIG_CMA
-       /* If allocation can't use CMA areas don't use free CMA pages */
-       if (!(alloc_flags & ALLOC_CMA))
-               cma_pages = zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
+       free_pages = zone_page_state(z, NR_FREE_PAGES);
  
         /*
          * Fast check for order-0 only. If this fails then the reserves
-        * need to be calculated. There is a corner case where the check
-        * passes but only the high-order atomic reserve are free. If
-        * the caller is !atomic then it'll uselessly search the free
-        * list. That corner case is then slower but it is harmless.
+        * need to be calculated.
          */
-       if (!order && (free_pages - cma_pages) >
-                               mark + z->lowmem_reserve[highest_zoneidx])
+       if (!order) {
+               long fast_free;
+
+               fast_free = free_pages;
+               fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags);
+               if (fast_free > mark + z->lowmem_reserve[highest_zoneidx])
+                       return true;
+       }
+
+       if (__zone_watermark_ok(z, order, mark, highest_zoneidx, alloc_flags,
+                                       free_pages))
                 return true;
+       /*
+        * Ignore watermark boosting for GFP_ATOMIC order-0 allocations
+        * when checking the min watermark. The min watermark is the
+        * point where boosting is ignored so that kswapd is woken up
+        * when below the low watermark.
+        */
+       if (unlikely(!order && (gfp_mask & __GFP_ATOMIC) && z->watermark_boost
+               && ((alloc_flags & ALLOC_WMARK_MASK) == WMARK_MIN))) {
+               mark = z->_watermark[WMARK_MIN];
+               return __zone_watermark_ok(z, order, mark, highest_zoneidx,
+                                       alloc_flags, free_pages);
+       }
  
-       return __zone_watermark_ok(z, order, mark, highest_zoneidx, alloc_flags,
-                                       free_pages);
+       return false;
  }
  
  bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
@@ -3747,7 +3763,8 @@ retry:
  
                 mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
                 if (!zone_watermark_fast(zone, order, mark,
-                                      ac->highest_zoneidx, alloc_flags)) {
+                                      ac->highest_zoneidx, alloc_flags,
+                                      gfp_mask)) {
                         int ret;
  
  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
@@ -4771,7 +4788,11 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
  
         if (cpusets_enabled()) {
                 *alloc_mask |= __GFP_HARDWALL;
-               if (!ac->nodemask)
+               /*
+                * When we are in the interrupt context, it is irrelevant
+                * to the current task context. It means that any node ok.
+                */
+               if (!in_interrupt() && !ac->nodemask)
                         ac->nodemask = &cpuset_current_mems_allowed;
                 else
                         *alloc_flags |= ALLOC_CPUSET;
@@ -5165,19 +5186,6 @@ unsigned long nr_free_buffer_pages(void)
  }
  EXPORT_SYMBOL_GPL(nr_free_buffer_pages);
  
-/**
- * nr_free_pagecache_pages - count number of pages beyond high watermark
- *
- * nr_free_pagecache_pages() counts the number of pages which are beyond the
- * high watermark within all zones.
- *
- * Return: number of pages beyond high watermark within all zones.
- */
-unsigned long nr_free_pagecache_pages(void)
-{
-       return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE));
-}
-
  static inline void show_node(struct zone *zone)
  {
         if (IS_ENABLED(CONFIG_NUMA))
@@ -5220,8 +5228,8 @@ long si_mem_available(void)
          * items that are in use, and cannot be freed. Cap this estimate at the
          * low watermark.
          */
-       reclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE) +
-                       global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
+       reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) +
+               global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
         available += reclaimable - min(reclaimable / 2, wmark_low);
  
         if (available < 0)
@@ -5364,8 +5372,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                 global_node_page_state(NR_UNEVICTABLE),
                 global_node_page_state(NR_FILE_DIRTY),
                 global_node_page_state(NR_WRITEBACK),
-               global_node_page_state(NR_SLAB_RECLAIMABLE),
-               global_node_page_state(NR_SLAB_UNRECLAIMABLE),
+               global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B),
+               global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
                 global_node_page_state(NR_FILE_MAPPED),
                 global_node_page_state(NR_SHMEM),
                 global_zone_page_state(NR_PAGETABLE),
@@ -5396,6 +5404,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                         " anon_thp: %lukB"
  #endif
                         " writeback_tmp:%lukB"
+                       " kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+                       " shadow_call_stack:%lukB"
+#endif
                         " all_unreclaimable? %s"
                         "\n",
                         pgdat->node_id,
@@ -5417,6 +5429,10 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                         K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
  #endif
                         K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
+                       node_page_state(pgdat, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+                       node_page_state(pgdat, NR_KERNEL_SCS_KB),
+#endif
                         pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
                                 "yes" : "no");
         }
@@ -5448,10 +5464,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                         " present:%lukB"
                         " managed:%lukB"
                         " mlocked:%lukB"
-                       " kernel_stack:%lukB"
-#ifdef CONFIG_SHADOW_CALL_STACK
-                       " shadow_call_stack:%lukB"
-#endif
                         " pagetables:%lukB"
                         " bounce:%lukB"
                         " free_pcp:%lukB"
@@ -5473,10 +5485,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                         K(zone->present_pages),
                         K(zone_managed_pages(zone)),
                         K(zone_page_state(zone, NR_MLOCK)),
-                       zone_page_state(zone, NR_KERNEL_STACK_KB),
-#ifdef CONFIG_SHADOW_CALL_STACK
-                       zone_page_state(zone, NR_KERNEL_SCS_KB),
-#endif
                         K(zone_page_state(zone, NR_PAGETABLE)),
                         K(zone_page_state(zone, NR_BOUNCE)),
                         K(free_pcp),
@@ -5891,13 +5899,16 @@ build_all_zonelists_init(void)
   */
  void __ref build_all_zonelists(pg_data_t *pgdat)
  {
+       unsigned long vm_total_pages;
+
         if (system_state == SYSTEM_BOOTING) {
                 build_all_zonelists_init();
         } else {
                 __build_all_zonelists(pgdat);
                 /* cpuset refresh routine should be here */
         }
-       vm_total_pages = nr_free_pagecache_pages();
+       /* Get the number of free pages beyond high watermark in all zones. */
+       vm_total_pages = nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE));
         /*
          * Disable grouping by mobility if the number of pages in the
          * system is too low to allow the mechanism to work. It would be
@@ -6324,22 +6335,6 @@ void __meminit init_currently_empty_zone(struct zone *zone,
         zone->initialized = 1;
  }
  
-/**
- * sparse_memory_present_with_active_regions - Call memory_present for each active range
- * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
- *
- * If an architecture guarantees that all ranges registered contain no holes and may
- * be freed, this function may be used instead of calling memory_present() manually.
- */
-void __init sparse_memory_present_with_active_regions(int nid)
-{
-       unsigned long start_pfn, end_pfn;
-       int i, this_nid;
-
-       for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
-               memory_present(this_nid, start_pfn, end_pfn);
-}
-
  /**
   * get_pfn_range_for_nid - Return the start and end page frames for a node
   * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.