mm/page_isolation.c: make start_isolate_page_range() fail if already isolated

[linux-2.6-microblaze.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 1741dd2..0b97b8e 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -265,17 +265,19 @@ int min_free_kbytes = 1024;
  int user_min_free_kbytes = -1;
  int watermark_scale_factor = 10;
  
-static unsigned long __meminitdata nr_kernel_pages;
-static unsigned long __meminitdata nr_all_pages;
-static unsigned long __meminitdata dma_reserve;
+static unsigned long nr_kernel_pages __meminitdata;
+static unsigned long nr_all_pages __meminitdata;
+static unsigned long dma_reserve __meminitdata;
  
  #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
-static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-static unsigned long __initdata required_kernelcore;
-static unsigned long __initdata required_movablecore;
-static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
-static bool mirrored_kernelcore;
+static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __meminitdata;
+static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __meminitdata;
+static unsigned long required_kernelcore __initdata;
+static unsigned long required_kernelcore_percent __initdata;
+static unsigned long required_movablecore __initdata;
+static unsigned long required_movablecore_percent __initdata;
+static unsigned long zone_movable_pfn[MAX_NUMNODES] __meminitdata;
+static bool mirrored_kernelcore __meminitdata;
  
  /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
  int movable_zone;
@@ -292,40 +294,6 @@ EXPORT_SYMBOL(nr_online_nodes);
  int page_group_by_mobility_disabled __read_mostly;
  
  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-
-/*
- * Determine how many pages need to be initialized during early boot
- * (non-deferred initialization).
- * The value of first_deferred_pfn will be set later, once non-deferred pages
- * are initialized, but for now set it ULONG_MAX.
- */
-static inline void reset_deferred_meminit(pg_data_t *pgdat)
-{
-       phys_addr_t start_addr, end_addr;
-       unsigned long max_pgcnt;
-       unsigned long reserved;
-
-       /*
-        * Initialise at least 2G of a node but also take into account that
-        * two large system hashes that can take up 1GB for 0.25TB/node.
-        */
-       max_pgcnt = max(2UL << (30 - PAGE_SHIFT),
-                       (pgdat->node_spanned_pages >> 8));
-
-       /*
-        * Compensate the all the memblock reservations (e.g. crash kernel)
-        * from the initial estimation to make sure we will initialize enough
-        * memory to boot.
-        */
-       start_addr = PFN_PHYS(pgdat->node_start_pfn);
-       end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt);
-       reserved = memblock_reserved_memory_within(start_addr, end_addr);
-       max_pgcnt += PHYS_PFN(reserved);
-
-       pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages);
-       pgdat->first_deferred_pfn = ULONG_MAX;
-}
-
  /* Returns true if the struct page for the pfn is uninitialised */
  static inline bool __meminit early_page_uninitialised(unsigned long pfn)
  {
@@ -361,10 +329,6 @@ static inline bool update_defer_init(pg_data_t *pgdat,
         return true;
  }
  #else
-static inline void reset_deferred_meminit(pg_data_t *pgdat)
-{
-}
-
  static inline bool early_page_uninitialised(unsigned long pfn)
  {
         return false;
@@ -1099,6 +1063,15 @@ static bool bulkfree_pcp_prepare(struct page *page)
  }
  #endif /* CONFIG_DEBUG_VM */
  
+static inline void prefetch_buddy(struct page *page)
+{
+       unsigned long pfn = page_to_pfn(page);
+       unsigned long buddy_pfn = __find_buddy_pfn(pfn, 0);
+       struct page *buddy = page + (buddy_pfn - pfn);
+
+       prefetch(buddy);
+}
+
  /*
   * Frees a number of pages from the PCP lists
   * Assumes all pages on list are in same zone, and of same order.
@@ -1115,13 +1088,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
  {
         int migratetype = 0;
         int batch_free = 0;
+       int prefetch_nr = 0;
         bool isolated_pageblocks;
-
-       spin_lock(&zone->lock);
-       isolated_pageblocks = has_isolate_pageblock(zone);
+       struct page *page, *tmp;
+       LIST_HEAD(head);
  
         while (count) {
-               struct page *page;
                 struct list_head *list;
  
                 /*
@@ -1143,26 +1115,48 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                         batch_free = count;
  
                 do {
-                       int mt; /* migratetype of the to-be-freed page */
-
                         page = list_last_entry(list, struct page, lru);
-                       /* must delete as __free_one_page list manipulates */
+                       /* must delete to avoid corrupting pcp list */
                         list_del(&page->lru);
-
-                       mt = get_pcppage_migratetype(page);
-                       /* MIGRATE_ISOLATE page should not go to pcplists */
-                       VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
-                       /* Pageblock could have been isolated meanwhile */
-                       if (unlikely(isolated_pageblocks))
-                               mt = get_pageblock_migratetype(page);
+                       pcp->count--;
  
                         if (bulkfree_pcp_prepare(page))
                                 continue;
  
-                       __free_one_page(page, page_to_pfn(page), zone, 0, mt);
-                       trace_mm_page_pcpu_drain(page, 0, mt);
+                       list_add_tail(&page->lru, &head);
+
+                       /*
+                        * We are going to put the page back to the global
+                        * pool, prefetch its buddy to speed up later access
+                        * under zone->lock. It is believed the overhead of
+                        * an additional test and calculating buddy_pfn here
+                        * can be offset by reduced memory latency later. To
+                        * avoid excessive prefetching due to large count, only
+                        * prefetch buddy for the first pcp->batch nr of pages.
+                        */
+                       if (prefetch_nr++ < pcp->batch)
+                               prefetch_buddy(page);
                 } while (--count && --batch_free && !list_empty(list));
         }
+
+       spin_lock(&zone->lock);
+       isolated_pageblocks = has_isolate_pageblock(zone);
+
+       /*
+        * Use safe version since after __free_one_page(),
+        * page->lru.next will not point to original list.
+        */
+       list_for_each_entry_safe(page, tmp, &head, lru) {
+               int mt = get_pcppage_migratetype(page);
+               /* MIGRATE_ISOLATE page should not go to pcplists */
+               VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
+               /* Pageblock could have been isolated meanwhile */
+               if (unlikely(isolated_pageblocks))
+                       mt = get_pageblock_migratetype(page);
+
+               __free_one_page(page, page_to_pfn(page), zone, 0, mt);
+               trace_mm_page_pcpu_drain(page, 0, mt);
+       }
         spin_unlock(&zone->lock);
  }
  
@@ -1181,10 +1175,9 @@ static void free_one_page(struct zone *zone,
  }
  
  static void __meminit __init_single_page(struct page *page, unsigned long pfn,
-                               unsigned long zone, int nid, bool zero)
+                               unsigned long zone, int nid)
  {
-       if (zero)
-               mm_zero_struct_page(page);
+       mm_zero_struct_page(page);
         set_page_links(page, zone, nid, pfn);
         init_page_count(page);
         page_mapcount_reset(page);
@@ -1198,12 +1191,6 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,
  #endif
  }
  
-static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
-                                       int nid, bool zero)
-{
-       return __init_single_page(pfn_to_page(pfn), pfn, zone, nid, zero);
-}
-
  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
  static void __meminit init_reserved_page(unsigned long pfn)
  {
@@ -1222,7 +1209,7 @@ static void __meminit init_reserved_page(unsigned long pfn)
                 if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
                         break;
         }
-       __init_single_pfn(pfn, zid, nid, true);
+       __init_single_page(pfn_to_page(pfn), pfn, zid, nid);
  }
  #else
  static inline void init_reserved_page(unsigned long pfn)
@@ -1506,7 +1493,7 @@ static void __init deferred_free_pages(int nid, int zid, unsigned long pfn,
                 } else if (!(pfn & nr_pgmask)) {
                         deferred_free_range(pfn - nr_free, nr_free);
                         nr_free = 1;
-                       cond_resched();
+                       touch_nmi_watchdog();
                 } else {
                         nr_free++;
                 }
@@ -1535,11 +1522,11 @@ static unsigned long  __init deferred_init_pages(int nid, int zid,
                         continue;
                 } else if (!page || !(pfn & nr_pgmask)) {
                         page = pfn_to_page(pfn);
-                       cond_resched();
+                       touch_nmi_watchdog();
                 } else {
                         page++;
                 }
-               __init_single_page(page, pfn, zid, nid, true);
+               __init_single_page(page, pfn, zid, nid);
                 nr_pages++;
         }
         return (nr_pages);
@@ -1552,23 +1539,25 @@ static int __init deferred_init_memmap(void *data)
         int nid = pgdat->node_id;
         unsigned long start = jiffies;
         unsigned long nr_pages = 0;
-       unsigned long spfn, epfn;
+       unsigned long spfn, epfn, first_init_pfn, flags;
         phys_addr_t spa, epa;
         int zid;
         struct zone *zone;
-       unsigned long first_init_pfn = pgdat->first_deferred_pfn;
         const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
         u64 i;
  
+       /* Bind memory initialisation thread to a local node if possible */
+       if (!cpumask_empty(cpumask))
+               set_cpus_allowed_ptr(current, cpumask);
+
+       pgdat_resize_lock(pgdat, &flags);
+       first_init_pfn = pgdat->first_deferred_pfn;
         if (first_init_pfn == ULONG_MAX) {
+               pgdat_resize_unlock(pgdat, &flags);
                 pgdat_init_report_one_done();
                 return 0;
         }
  
-       /* Bind memory initialisation thread to a local node if possible */
-       if (!cpumask_empty(cpumask))
-               set_cpus_allowed_ptr(current, cpumask);
-
         /* Sanity check boundaries */
         BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
         BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
@@ -1598,6 +1587,7 @@ static int __init deferred_init_memmap(void *data)
                 epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
                 deferred_free_pages(nid, zid, spfn, epfn);
         }
+       pgdat_resize_unlock(pgdat, &flags);
  
         /* Sanity check that the next zone really is unpopulated */
         WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
@@ -1608,6 +1598,117 @@ static int __init deferred_init_memmap(void *data)
         pgdat_init_report_one_done();
         return 0;
  }
+
+/*
+ * During boot we initialize deferred pages on-demand, as needed, but once
+ * page_alloc_init_late() has finished, the deferred pages are all initialized,
+ * and we can permanently disable that path.
+ */
+static DEFINE_STATIC_KEY_TRUE(deferred_pages);
+
+/*
+ * If this zone has deferred pages, try to grow it by initializing enough
+ * deferred pages to satisfy the allocation specified by order, rounded up to
+ * the nearest PAGES_PER_SECTION boundary.  So we're adding memory in increments
+ * of SECTION_SIZE bytes by initializing struct pages in increments of
+ * PAGES_PER_SECTION * sizeof(struct page) bytes.
+ *
+ * Return true when zone was grown, otherwise return false. We return true even
+ * when we grow less than requested, to let the caller decide if there are
+ * enough pages to satisfy the allocation.
+ *
+ * Note: We use noinline because this function is needed only during boot, and
+ * it is called from a __ref function _deferred_grow_zone. This way we are
+ * making sure that it is not inlined into permanent text section.
+ */
+static noinline bool __init
+deferred_grow_zone(struct zone *zone, unsigned int order)
+{
+       int zid = zone_idx(zone);
+       int nid = zone_to_nid(zone);
+       pg_data_t *pgdat = NODE_DATA(nid);
+       unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
+       unsigned long nr_pages = 0;
+       unsigned long first_init_pfn, spfn, epfn, t, flags;
+       unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
+       phys_addr_t spa, epa;
+       u64 i;
+
+       /* Only the last zone may have deferred pages */
+       if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
+               return false;
+
+       pgdat_resize_lock(pgdat, &flags);
+
+       /*
+        * If deferred pages have been initialized while we were waiting for
+        * the lock, return true, as the zone was grown.  The caller will retry
+        * this zone.  We won't return to this function since the caller also
+        * has this static branch.
+        */
+       if (!static_branch_unlikely(&deferred_pages)) {
+               pgdat_resize_unlock(pgdat, &flags);
+               return true;
+       }
+
+       /*
+        * If someone grew this zone while we were waiting for spinlock, return
+        * true, as there might be enough pages already.
+        */
+       if (first_deferred_pfn != pgdat->first_deferred_pfn) {
+               pgdat_resize_unlock(pgdat, &flags);
+               return true;
+       }
+
+       first_init_pfn = max(zone->zone_start_pfn, first_deferred_pfn);
+
+       if (first_init_pfn >= pgdat_end_pfn(pgdat)) {
+               pgdat_resize_unlock(pgdat, &flags);
+               return false;
+       }
+
+       for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
+               spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
+               epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
+
+               while (spfn < epfn && nr_pages < nr_pages_needed) {
+                       t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION);
+                       first_deferred_pfn = min(t, epfn);
+                       nr_pages += deferred_init_pages(nid, zid, spfn,
+                                                       first_deferred_pfn);
+                       spfn = first_deferred_pfn;
+               }
+
+               if (nr_pages >= nr_pages_needed)
+                       break;
+       }
+
+       for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
+               spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
+               epfn = min_t(unsigned long, first_deferred_pfn, PFN_DOWN(epa));
+               deferred_free_pages(nid, zid, spfn, epfn);
+
+               if (first_deferred_pfn == epfn)
+                       break;
+       }
+       pgdat->first_deferred_pfn = first_deferred_pfn;
+       pgdat_resize_unlock(pgdat, &flags);
+
+       return nr_pages > 0;
+}
+
+/*
+ * deferred_grow_zone() is __init, but it is called from
+ * get_page_from_freelist() during early boot until deferred_pages permanently
+ * disables this call. This is why we have refdata wrapper to avoid warning,
+ * and to ensure that the function body gets unloaded.
+ */
+static bool __ref
+_deferred_grow_zone(struct zone *zone, unsigned int order)
+{
+       return deferred_grow_zone(zone, order);
+}
+
  #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
  
  void __init page_alloc_init_late(void)
@@ -1626,6 +1727,12 @@ void __init page_alloc_init_late(void)
         /* Block until all are initialised */
         wait_for_completion(&pgdat_init_all_done_comp);
  
+       /*
+        * We initialized the rest of the deferred pages.  Permanently disable
+        * on-demand struct page initialization.
+        */
+       static_branch_disable(&deferred_pages);
+
         /* Reinit limits that are based on free pages after the kernel is up */
         files_maxfiles_init();
  #endif
@@ -2418,10 +2525,8 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
         local_irq_save(flags);
         batch = READ_ONCE(pcp->batch);
         to_drain = min(pcp->count, batch);
-       if (to_drain > 0) {
+       if (to_drain > 0)
                 free_pcppages_bulk(zone, to_drain, pcp);
-               pcp->count -= to_drain;
-       }
         local_irq_restore(flags);
  }
  #endif
@@ -2443,10 +2548,8 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
         pset = per_cpu_ptr(zone->pageset, cpu);
  
         pcp = &pset->pcp;
-       if (pcp->count) {
+       if (pcp->count)
                 free_pcppages_bulk(zone, pcp->count, pcp);
-               pcp->count = 0;
-       }
         local_irq_restore(flags);
  }
  
@@ -2670,7 +2773,6 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
         if (pcp->count >= pcp->high) {
                 unsigned long batch = READ_ONCE(pcp->batch);
                 free_pcppages_bulk(zone, batch, pcp);
-               pcp->count -= batch;
         }
  }
  
@@ -3205,6 +3307,16 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
                                        ac_classzone_idx(ac), alloc_flags)) {
                         int ret;
  
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+                       /*
+                        * Watermark failed for this zone, but see if we can
+                        * grow this zone if it contains deferred pages.
+                        */
+                       if (static_branch_unlikely(&deferred_pages)) {
+                               if (_deferred_grow_zone(zone, order))
+                                       goto try_this_zone;
+                       }
+#endif
                         /* Checked here to keep the fast path fast */
                         BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
                         if (alloc_flags & ALLOC_NO_WATERMARKS)
@@ -3246,6 +3358,14 @@ try_this_zone:
                                 reserve_highatomic_pageblock(page, zone, order);
  
                         return page;
+               } else {
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+                       /* Try again if zone has deferred pages */
+                       if (static_branch_unlikely(&deferred_pages)) {
+                               if (_deferred_grow_zone(zone, order))
+                                       goto try_this_zone;
+                       }
+#endif
                 }
         }
  
@@ -3685,16 +3805,18 @@ retry:
         return page;
  }
  
-static void wake_all_kswapds(unsigned int order, const struct alloc_context *ac)
+static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask,
+                            const struct alloc_context *ac)
  {
         struct zoneref *z;
         struct zone *zone;
         pg_data_t *last_pgdat = NULL;
+       enum zone_type high_zoneidx = ac->high_zoneidx;
  
-       for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
-                                       ac->high_zoneidx, ac->nodemask) {
+       for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, high_zoneidx,
+                                       ac->nodemask) {
                 if (last_pgdat != zone->zone_pgdat)
-                       wakeup_kswapd(zone, order, ac->high_zoneidx);
+                       wakeup_kswapd(zone, gfp_mask, order, high_zoneidx);
                 last_pgdat = zone->zone_pgdat;
         }
  }
@@ -3973,7 +4095,7 @@ retry_cpuset:
                 goto nopage;
  
         if (gfp_mask & __GFP_KSWAPD_RECLAIM)
-               wake_all_kswapds(order, ac);
+               wake_all_kswapds(order, gfp_mask, ac);
  
         /*
          * The adjusted alloc_flags might result in immediate success, so try
@@ -4031,7 +4153,7 @@ retry_cpuset:
  retry:
         /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
         if (gfp_mask & __GFP_KSWAPD_RECLAIM)
-               wake_all_kswapds(order, ac);
+               wake_all_kswapds(order, gfp_mask, ac);
  
         reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
         if (reserve_flags)
@@ -5334,6 +5456,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
         pg_data_t *pgdat = NODE_DATA(nid);
         unsigned long pfn;
         unsigned long nr_initialised = 0;
+       struct page *page;
  #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
         struct memblock_region *r = NULL, *tmp;
  #endif
@@ -5386,6 +5509,11 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
  #endif
  
  not_early:
+               page = pfn_to_page(pfn);
+               __init_single_page(page, pfn, zone, nid);
+               if (context == MEMMAP_HOTPLUG)
+                       SetPageReserved(page);
+
                 /*
                  * Mark the block movable so that blocks are reserved for
                  * movable at startup. This will force kernel allocations
@@ -5402,15 +5530,8 @@ not_early:
                  * because this is done early in sparse_add_one_section
                  */
                 if (!(pfn & (pageblock_nr_pages - 1))) {
-                       struct page *page = pfn_to_page(pfn);
-
-                       __init_single_page(page, pfn, zone, nid,
-                                       context != MEMMAP_HOTPLUG);
                         set_pageblock_migratetype(page, MIGRATE_MOVABLE);
                         cond_resched();
-               } else {
-                       __init_single_pfn(pfn, zone, nid,
-                                       context != MEMMAP_HOTPLUG);
                 }
         }
  }
@@ -6192,10 +6313,7 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
                 end = pgdat_end_pfn(pgdat);
                 end = ALIGN(end, MAX_ORDER_NR_PAGES);
                 size =  (end - start) * sizeof(struct page);
-               map = alloc_remap(pgdat->node_id, size);
-               if (!map)
-                       map = memblock_virt_alloc_node_nopanic(size,
-                                                              pgdat->node_id);
+               map = memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
                 pgdat->node_mem_map = map + offset;
         }
         pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
@@ -6244,7 +6362,15 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
  
         alloc_node_mem_map(pgdat);
  
-       reset_deferred_meminit(pgdat);
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+       /*
+        * We start only with one section of pages, more pages are added as
+        * needed until the rest of deferred pages are initialized.
+        */
+       pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
+                                        pgdat->node_spanned_pages);
+       pgdat->first_deferred_pfn = ULONG_MAX;
+#endif
         free_area_init_core(pgdat);
  }
  
@@ -6474,7 +6600,18 @@ static void __init find_zone_movable_pfns_for_nodes(void)
         }
  
         /*
-        * If movablecore=nn[KMG] was specified, calculate what size of
+        * If kernelcore=nn% or movablecore=nn% was specified, calculate the
+        * amount of necessary memory.
+        */
+       if (required_kernelcore_percent)
+               required_kernelcore = (totalpages * 100 * required_kernelcore_percent) /
+                                      10000UL;
+       if (required_movablecore_percent)
+               required_movablecore = (totalpages * 100 * required_movablecore_percent) /
+                                       10000UL;
+
+       /*
+        * If movablecore= was specified, calculate what size of
          * kernelcore that corresponds so that memory usable for
          * any allocation type is evenly spread. If both kernelcore
          * and movablecore are specified, then the value of kernelcore
@@ -6714,18 +6851,30 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
         zero_resv_unavail();
  }
  
-static int __init cmdline_parse_core(char *p, unsigned long *core)
+static int __init cmdline_parse_core(char *p, unsigned long *core,
+                                    unsigned long *percent)
  {
         unsigned long long coremem;
+       char *endptr;
+
         if (!p)
                 return -EINVAL;
  
-       coremem = memparse(p, &p);
-       *core = coremem >> PAGE_SHIFT;
+       /* Value may be a percentage of total memory, otherwise bytes */
+       coremem = simple_strtoull(p, &endptr, 0);
+       if (*endptr == '%') {
+               /* Paranoid check for percent values greater than 100 */
+               WARN_ON(coremem > 100);
  
-       /* Paranoid check that UL is enough for the coremem value */
-       WARN_ON((coremem >> PAGE_SHIFT) > ULONG_MAX);
+               *percent = coremem;
+       } else {
+               coremem = memparse(p, &p);
+               /* Paranoid check that UL is enough for the coremem value */
+               WARN_ON((coremem >> PAGE_SHIFT) > ULONG_MAX);
  
+               *core = coremem >> PAGE_SHIFT;
+               *percent = 0UL;
+       }
         return 0;
  }
  
@@ -6741,7 +6890,8 @@ static int __init cmdline_parse_kernelcore(char *p)
                 return 0;
         }
  
-       return cmdline_parse_core(p, &required_kernelcore);
+       return cmdline_parse_core(p, &required_kernelcore,
+                                 &required_kernelcore_percent);
  }
  
  /*
@@ -6750,7 +6900,8 @@ static int __init cmdline_parse_kernelcore(char *p)
   */
  static int __init cmdline_parse_movablecore(char *p)
  {
-       return cmdline_parse_core(p, &required_movablecore);
+       return cmdline_parse_core(p, &required_movablecore,
+                                 &required_movablecore_percent);
  }
  
  early_param("kernelcore", cmdline_parse_kernelcore);
@@ -7594,7 +7745,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
                 cc->nr_migratepages -= nr_reclaimed;
  
                 ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
-                                   NULL, 0, cc->mode, MR_CMA);
+                                   NULL, 0, cc->mode, MR_CONTIG_RANGE);
         }
         if (ret < 0) {
                 putback_movable_pages(&cc->migratepages);
@@ -7614,11 +7765,11 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
   * @gfp_mask:  GFP mask to use during compaction
   *
   * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES
- * aligned, however it's the caller's responsibility to guarantee that
- * we are the only thread that changes migrate type of pageblocks the
- * pages fall in.
+ * aligned.  The PFN range must belong to a single zone.
   *
- * The PFN range must belong to a single zone.
+ * The first thing this routine does is attempt to MIGRATE_ISOLATE all
+ * pageblocks in the range.  Once isolated, the pageblocks should not
+ * be modified by others.
   *
   * Returns zero on success or negative error code.  On success all
   * pages which PFN is in [start, end) are allocated for the caller and