Merge tag 'pci-v3.20-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaa...

[linux-2.6-microblaze.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 4aead0b..a47f0b2 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -25,6 +25,7 @@
  #include <linux/compiler.h>
  #include <linux/kernel.h>
  #include <linux/kmemcheck.h>
+#include <linux/kasan.h>
  #include <linux/module.h>
  #include <linux/suspend.h>
  #include <linux/pagevec.h>
@@ -172,7 +173,7 @@ static void __free_pages_ok(struct page *page, unsigned int order);
   *     1G machine -> (16M dma, 784M normal, 224M high)
   *     NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
   *     HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
- *     HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
+ *     HIGHMEM allocation will leave (224M+784M)/256 of ram reserved in ZONE_DMA
   *
   * TBD: should special case ZONE_DMA32 machines here - in those we normally
   * don't need any ZONE_NORMAL reservation
@@ -232,27 +233,6 @@ EXPORT_SYMBOL(nr_node_ids);
  EXPORT_SYMBOL(nr_online_nodes);
  #endif
  
-/*
- * Structure for holding the mostly immutable allocation parameters passed
- * between alloc_pages* family of functions.
- *
- * nodemask, migratetype and high_zoneidx are initialized only once in
- * __alloc_pages_nodemask() and then never change.
- *
- * zonelist, preferred_zone and classzone_idx are set first in
- * __alloc_pages_nodemask() for the fast path, and might be later changed
- * in __alloc_pages_slowpath(). All other functions pass the whole strucure
- * by a const pointer.
- */
-struct alloc_context {
-       struct zonelist *zonelist;
-       nodemask_t *nodemask;
-       struct zone *preferred_zone;
-       int classzone_idx;
-       int migratetype;
-       enum zone_type high_zoneidx;
-};
-
  int page_group_by_mobility_disabled __read_mostly;
  
  void set_pageblock_migratetype(struct page *page, int migratetype)
@@ -265,8 +245,6 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
                                         PB_migrate, PB_migrate_end);
  }
  
-bool oom_killer_disabled __read_mostly;
-
  #ifdef CONFIG_DEBUG_VM
  static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
  {
@@ -402,36 +380,6 @@ void prep_compound_page(struct page *page, unsigned long order)
         }
  }
  
-/* update __split_huge_page_refcount if you change this function */
-static int destroy_compound_page(struct page *page, unsigned long order)
-{
-       int i;
-       int nr_pages = 1 << order;
-       int bad = 0;
-
-       if (unlikely(compound_order(page) != order)) {
-               bad_page(page, "wrong compound order", 0);
-               bad++;
-       }
-
-       __ClearPageHead(page);
-
-       for (i = 1; i < nr_pages; i++) {
-               struct page *p = page + i;
-
-               if (unlikely(!PageTail(p))) {
-                       bad_page(page, "PageTail not set", 0);
-                       bad++;
-               } else if (unlikely(p->first_page != page)) {
-                       bad_page(page, "first_page not consistent", 0);
-                       bad++;
-               }
-               __ClearPageTail(p);
-       }
-
-       return bad;
-}
-
  static inline void prep_zero_page(struct page *page, unsigned int order,
                                                         gfp_t gfp_flags)
  {
@@ -634,10 +582,7 @@ static inline void __free_one_page(struct page *page,
         int max_order = MAX_ORDER;
  
         VM_BUG_ON(!zone_is_initialized(zone));
-
-       if (unlikely(PageCompound(page)))
-               if (unlikely(destroy_compound_page(page, order)))
-                       return;
+       VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
  
         VM_BUG_ON(migratetype == -1);
         if (is_migrate_isolate(migratetype)) {
@@ -818,21 +763,41 @@ static void free_one_page(struct zone *zone,
         spin_unlock(&zone->lock);
  }
  
+static int free_tail_pages_check(struct page *head_page, struct page *page)
+{
+       if (!IS_ENABLED(CONFIG_DEBUG_VM))
+               return 0;
+       if (unlikely(!PageTail(page))) {
+               bad_page(page, "PageTail not set", 0);
+               return 1;
+       }
+       if (unlikely(page->first_page != head_page)) {
+               bad_page(page, "first_page not consistent", 0);
+               return 1;
+       }
+       return 0;
+}
+
  static bool free_pages_prepare(struct page *page, unsigned int order)
  {
-       int i;
-       int bad = 0;
+       bool compound = PageCompound(page);
+       int i, bad = 0;
  
         VM_BUG_ON_PAGE(PageTail(page), page);
-       VM_BUG_ON_PAGE(PageHead(page) && compound_order(page) != order, page);
+       VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
  
         trace_mm_page_free(page, order);
         kmemcheck_free_shadow(page, order);
+       kasan_free_pages(page, order);
  
         if (PageAnon(page))
                 page->mapping = NULL;
-       for (i = 0; i < (1 << order); i++)
+       bad += free_pages_check(page);
+       for (i = 1; i < (1 << order); i++) {
+               if (compound)
+                       bad += free_tail_pages_check(page, page + i);
                 bad += free_pages_check(page + i);
+       }
         if (bad)
                 return false;
  
@@ -1007,6 +972,7 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
  
         arch_alloc_page(page, order);
         kernel_map_pages(page, 1 << order, 1);
+       kasan_alloc_pages(page, order);
  
         if (gfp_flags & __GFP_ZERO)
                 prep_zero_page(page, order, gfp_flags);
@@ -1160,39 +1126,34 @@ static void change_pageblock_range(struct page *pageblock_page,
  }
  
  /*
- * If breaking a large block of pages, move all free pages to the preferred
- * allocation list. If falling back for a reclaimable kernel allocation, be
- * more aggressive about taking ownership of free pages.
+ * When we are falling back to another migratetype during allocation, try to
+ * steal extra free pages from the same pageblocks to satisfy further
+ * allocations, instead of polluting multiple pageblocks.
   *
- * On the other hand, never change migration type of MIGRATE_CMA pageblocks
- * nor move CMA pages to different free lists. We don't want unmovable pages
- * to be allocated from MIGRATE_CMA areas.
+ * If we are stealing a relatively large buddy page, it is likely there will
+ * be more free pages in the pageblock, so try to steal them all. For
+ * reclaimable and unmovable allocations, we steal regardless of page size,
+ * as fragmentation caused by those allocations polluting movable pageblocks
+ * is worse than movable allocations stealing from unmovable and reclaimable
+ * pageblocks.
   *
- * Returns the new migratetype of the pageblock (or the same old migratetype
- * if it was unchanged).
+ * If we claim more than half of the pageblock, change pageblock's migratetype
+ * as well.
   */
-static int try_to_steal_freepages(struct zone *zone, struct page *page,
+static void try_to_steal_freepages(struct zone *zone, struct page *page,
                                   int start_type, int fallback_type)
  {
         int current_order = page_order(page);
  
-       /*
-        * When borrowing from MIGRATE_CMA, we need to release the excess
-        * buddy pages to CMA itself. We also ensure the freepage_migratetype
-        * is set to CMA so it is returned to the correct freelist in case
-        * the page ends up being not actually allocated from the pcp lists.
-        */
-       if (is_migrate_cma(fallback_type))
-               return fallback_type;
-
         /* Take ownership for orders >= pageblock_order */
         if (current_order >= pageblock_order) {
                 change_pageblock_range(page, current_order, start_type);
-               return start_type;
+               return;
         }
  
         if (current_order >= pageblock_order / 2 ||
             start_type == MIGRATE_RECLAIMABLE ||
+           start_type == MIGRATE_UNMOVABLE ||
             page_group_by_mobility_disabled) {
                 int pages;
  
@@ -1200,15 +1161,9 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page,
  
                 /* Claim the whole block if over half of it is free */
                 if (pages >= (1 << (pageblock_order-1)) ||
-                               page_group_by_mobility_disabled) {
-
+                               page_group_by_mobility_disabled)
                         set_pageblock_migratetype(page, start_type);
-                       return start_type;
-               }
-
         }
-
-       return fallback_type;
  }
  
  /* Remove an element from the buddy allocator from the fallback list */
@@ -1218,14 +1173,15 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
         struct free_area *area;
         unsigned int current_order;
         struct page *page;
-       int migratetype, new_type, i;
  
         /* Find the largest possible block of pages in the other list */
         for (current_order = MAX_ORDER-1;
                                 current_order >= order && current_order <= MAX_ORDER-1;
                                 --current_order) {
+               int i;
                 for (i = 0;; i++) {
-                       migratetype = fallbacks[start_migratetype][i];
+                       int migratetype = fallbacks[start_migratetype][i];
+                       int buddy_type = start_migratetype;
  
                         /* MIGRATE_RESERVE handled later if necessary */
                         if (migratetype == MIGRATE_RESERVE)
@@ -1239,25 +1195,39 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
                                         struct page, lru);
                         area->nr_free--;
  
-                       new_type = try_to_steal_freepages(zone, page,
-                                                         start_migratetype,
-                                                         migratetype);
+                       if (!is_migrate_cma(migratetype)) {
+                               try_to_steal_freepages(zone, page,
+                                                       start_migratetype,
+                                                       migratetype);
+                       } else {
+                               /*
+                                * When borrowing from MIGRATE_CMA, we need to
+                                * release the excess buddy pages to CMA
+                                * itself, and we do not try to steal extra
+                                * free pages.
+                                */
+                               buddy_type = migratetype;
+                       }
  
                         /* Remove the page from the freelists */
                         list_del(&page->lru);
                         rmv_page_order(page);
  
                         expand(zone, page, order, current_order, area,
-                              new_type);
-                       /* The freepage_migratetype may differ from pageblock's
+                                       buddy_type);
+
+                       /*
+                        * The freepage_migratetype may differ from pageblock's
                          * migratetype depending on the decisions in
-                        * try_to_steal_freepages. This is OK as long as it does
-                        * not differ for MIGRATE_CMA type.
+                        * try_to_steal_freepages(). This is OK as long as it
+                        * does not differ for MIGRATE_CMA pageblocks. For CMA
+                        * we need to make sure unallocated pages flushed from
+                        * pcp lists are returned to the correct freelist.
                          */
-                       set_freepage_migratetype(page, new_type);
+                       set_freepage_migratetype(page, buddy_type);
  
                         trace_mm_page_alloc_extfrag(page, order, current_order,
-                               start_migratetype, migratetype, new_type);
+                               start_migratetype, migratetype);
  
                         return page;
                 }
@@ -2352,9 +2322,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
  
         *did_some_progress = 0;
  
-       if (oom_killer_disabled)
-               return NULL;
-
         /*
          * Acquire the per-zone oom lock for each zone.  If that
          * fails, somebody else is making progress for us.
@@ -2365,14 +2332,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
                 return NULL;
         }
  
-       /*
-        * PM-freezer should be notified that there might be an OOM killer on
-        * its way to kill and wake somebody up. This is too early and we might
-        * end up not killing anything but false positives are acceptable.
-        * See freeze_processes.
-        */
-       note_oom_kill();
-
         /*
          * Go through the zonelist yet one more time, keep very high watermark
          * here, this is only to catch a parallel oom killing, we must fail if
@@ -2407,8 +2366,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
                         goto out;
         }
         /* Exhausted what can be done so it's blamo time */
-       out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false);
-       *did_some_progress = 1;
+       if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false))
+               *did_some_progress = 1;
  out:
         oom_zonelist_unlock(ac->zonelist, gfp_mask);
         return page;
@@ -2429,10 +2388,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
                 return NULL;
  
         current->flags |= PF_MEMALLOC;
-       compact_result = try_to_compact_pages(ac->zonelist, order, gfp_mask,
-                                               ac->nodemask, mode,
-                                               contended_compaction,
-                                               alloc_flags, ac->classzone_idx);
+       compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
+                                               mode, contended_compaction);
         current->flags &= ~PF_MEMALLOC;
  
         switch (compact_result) {
@@ -3917,18 +3874,29 @@ static int __build_all_zonelists(void *data)
         return 0;
  }
  
+static noinline void __init
+build_all_zonelists_init(void)
+{
+       __build_all_zonelists(NULL);
+       mminit_verify_zonelist();
+       cpuset_init_current_mems_allowed();
+}
+
  /*
   * Called with zonelists_mutex held always
   * unless system_state == SYSTEM_BOOTING.
+ *
+ * __ref due to (1) call of __meminit annotated setup_zone_pageset
+ * [we're only called with non-NULL zone through __meminit paths] and
+ * (2) call of __init annotated helper build_all_zonelists_init
+ * [protected by SYSTEM_BOOTING].
   */
  void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
  {
         set_zonelist_order();
  
         if (system_state == SYSTEM_BOOTING) {
-               __build_all_zonelists(NULL);
-               mminit_verify_zonelist();
-               cpuset_init_current_mems_allowed();
+               build_all_zonelists_init();
         } else {
  #ifdef CONFIG_MEMORY_HOTPLUG
                 if (zone)
@@ -5031,8 +4999,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
         pgdat->node_start_pfn = node_start_pfn;
  #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
         get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
-       printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid,
-                       (u64) start_pfn << PAGE_SHIFT, (u64) (end_pfn << PAGE_SHIFT) - 1);
+       pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
+               (u64)start_pfn << PAGE_SHIFT, ((u64)end_pfn << PAGE_SHIFT) - 1);
  #endif
         calculate_node_totalpages(pgdat, start_pfn, end_pfn,
                                   zones_size, zholes_size);
@@ -5404,9 +5372,10 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
                                 arch_zone_highest_possible_pfn[i])
                         pr_cont("empty\n");
                 else
-                       pr_cont("[mem %0#10lx-%0#10lx]\n",
-                               arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT,
-                               (arch_zone_highest_possible_pfn[i]
+                       pr_cont("[mem %#018Lx-%#018Lx]\n",
+                               (u64)arch_zone_lowest_possible_pfn[i]
+                                       << PAGE_SHIFT,
+                               ((u64)arch_zone_highest_possible_pfn[i]
                                         << PAGE_SHIFT) - 1);
         }
  
@@ -5414,15 +5383,16 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
         pr_info("Movable zone start for each node\n");
         for (i = 0; i < MAX_NUMNODES; i++) {
                 if (zone_movable_pfn[i])
-                       pr_info("  Node %d: %#010lx\n", i,
-                              zone_movable_pfn[i] << PAGE_SHIFT);
+                       pr_info("  Node %d: %#018Lx\n", i,
+                              (u64)zone_movable_pfn[i] << PAGE_SHIFT);
         }
  
         /* Print out the early node map */
         pr_info("Early memory node ranges\n");
         for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
-               pr_info("  node %3d: [mem %#010lx-%#010lx]\n", nid,
-                      start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1);
+               pr_info("  node %3d: [mem %#018Lx-%#018Lx]\n", nid,
+                       (u64)start_pfn << PAGE_SHIFT,
+                       ((u64)end_pfn << PAGE_SHIFT) - 1);
  
         /* Initialise every node */
         mminit_verify_pageflags_layout();