mm/page_isolation.c: make start_isolate_page_range() fail if already isolated
[linux-2.6-microblaze.git] / mm / page_alloc.c
index 1741dd2..0b97b8e 100644 (file)
@@ -265,17 +265,19 @@ int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
 int watermark_scale_factor = 10;
 
-static unsigned long __meminitdata nr_kernel_pages;
-static unsigned long __meminitdata nr_all_pages;
-static unsigned long __meminitdata dma_reserve;
+static unsigned long nr_kernel_pages __meminitdata;
+static unsigned long nr_all_pages __meminitdata;
+static unsigned long dma_reserve __meminitdata;
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
-static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-static unsigned long __initdata required_kernelcore;
-static unsigned long __initdata required_movablecore;
-static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
-static bool mirrored_kernelcore;
+static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __meminitdata;
+static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __meminitdata;
+static unsigned long required_kernelcore __initdata;
+static unsigned long required_kernelcore_percent __initdata;
+static unsigned long required_movablecore __initdata;
+static unsigned long required_movablecore_percent __initdata;
+static unsigned long zone_movable_pfn[MAX_NUMNODES] __meminitdata;
+static bool mirrored_kernelcore __meminitdata;
 
 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
 int movable_zone;
@@ -292,40 +294,6 @@ EXPORT_SYMBOL(nr_online_nodes);
 int page_group_by_mobility_disabled __read_mostly;
 
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-
-/*
- * Determine how many pages need to be initialized during early boot
- * (non-deferred initialization).
- * The value of first_deferred_pfn will be set later, once non-deferred pages
- * are initialized, but for now set it ULONG_MAX.
- */
-static inline void reset_deferred_meminit(pg_data_t *pgdat)
-{
-       phys_addr_t start_addr, end_addr;
-       unsigned long max_pgcnt;
-       unsigned long reserved;
-
-       /*
-        * Initialise at least 2G of a node but also take into account that
-        * two large system hashes that can take up 1GB for 0.25TB/node.
-        */
-       max_pgcnt = max(2UL << (30 - PAGE_SHIFT),
-                       (pgdat->node_spanned_pages >> 8));
-
-       /*
-        * Compensate the all the memblock reservations (e.g. crash kernel)
-        * from the initial estimation to make sure we will initialize enough
-        * memory to boot.
-        */
-       start_addr = PFN_PHYS(pgdat->node_start_pfn);
-       end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt);
-       reserved = memblock_reserved_memory_within(start_addr, end_addr);
-       max_pgcnt += PHYS_PFN(reserved);
-
-       pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages);
-       pgdat->first_deferred_pfn = ULONG_MAX;
-}
-
 /* Returns true if the struct page for the pfn is uninitialised */
 static inline bool __meminit early_page_uninitialised(unsigned long pfn)
 {
@@ -361,10 +329,6 @@ static inline bool update_defer_init(pg_data_t *pgdat,
        return true;
 }
 #else
-static inline void reset_deferred_meminit(pg_data_t *pgdat)
-{
-}
-
 static inline bool early_page_uninitialised(unsigned long pfn)
 {
        return false;
@@ -1099,6 +1063,15 @@ static bool bulkfree_pcp_prepare(struct page *page)
 }
 #endif /* CONFIG_DEBUG_VM */
 
+static inline void prefetch_buddy(struct page *page)
+{
+       unsigned long pfn = page_to_pfn(page);
+       unsigned long buddy_pfn = __find_buddy_pfn(pfn, 0);
+       struct page *buddy = page + (buddy_pfn - pfn);
+
+       prefetch(buddy);
+}
+
 /*
  * Frees a number of pages from the PCP lists
  * Assumes all pages on list are in same zone, and of same order.
@@ -1115,13 +1088,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 {
        int migratetype = 0;
        int batch_free = 0;
+       int prefetch_nr = 0;
        bool isolated_pageblocks;
-
-       spin_lock(&zone->lock);
-       isolated_pageblocks = has_isolate_pageblock(zone);
+       struct page *page, *tmp;
+       LIST_HEAD(head);
 
        while (count) {
-               struct page *page;
                struct list_head *list;
 
                /*
@@ -1143,26 +1115,48 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                        batch_free = count;
 
                do {
-                       int mt; /* migratetype of the to-be-freed page */
-
                        page = list_last_entry(list, struct page, lru);
-                       /* must delete as __free_one_page list manipulates */
+                       /* must delete to avoid corrupting pcp list */
                        list_del(&page->lru);
-
-                       mt = get_pcppage_migratetype(page);
-                       /* MIGRATE_ISOLATE page should not go to pcplists */
-                       VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
-                       /* Pageblock could have been isolated meanwhile */
-                       if (unlikely(isolated_pageblocks))
-                               mt = get_pageblock_migratetype(page);
+                       pcp->count--;
 
                        if (bulkfree_pcp_prepare(page))
                                continue;
 
-                       __free_one_page(page, page_to_pfn(page), zone, 0, mt);
-                       trace_mm_page_pcpu_drain(page, 0, mt);
+                       list_add_tail(&page->lru, &head);
+
+                       /*
+                        * We are going to put the page back to the global
+                        * pool, prefetch its buddy to speed up later access
+                        * under zone->lock. It is believed the overhead of
+                        * an additional test and calculating buddy_pfn here
+                        * can be offset by reduced memory latency later. To
+                        * avoid excessive prefetching due to large count, only
+                        * prefetch buddy for the first pcp->batch nr of pages.
+                        */
+                       if (prefetch_nr++ < pcp->batch)
+                               prefetch_buddy(page);
                } while (--count && --batch_free && !list_empty(list));
        }
+
+       spin_lock(&zone->lock);
+       isolated_pageblocks = has_isolate_pageblock(zone);
+
+       /*
+        * Use safe version since after __free_one_page(),
+        * page->lru.next will not point to original list.
+        */
+       list_for_each_entry_safe(page, tmp, &head, lru) {
+               int mt = get_pcppage_migratetype(page);
+               /* MIGRATE_ISOLATE page should not go to pcplists */
+               VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
+               /* Pageblock could have been isolated meanwhile */
+               if (unlikely(isolated_pageblocks))
+                       mt = get_pageblock_migratetype(page);
+
+               __free_one_page(page, page_to_pfn(page), zone, 0, mt);
+               trace_mm_page_pcpu_drain(page, 0, mt);
+       }
        spin_unlock(&zone->lock);
 }
 
@@ -1181,10 +1175,9 @@ static void free_one_page(struct zone *zone,
 }
 
 static void __meminit __init_single_page(struct page *page, unsigned long pfn,
-                               unsigned long zone, int nid, bool zero)
+                               unsigned long zone, int nid)
 {
-       if (zero)
-               mm_zero_struct_page(page);
+       mm_zero_struct_page(page);
        set_page_links(page, zone, nid, pfn);
        init_page_count(page);
        page_mapcount_reset(page);
@@ -1198,12 +1191,6 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,
 #endif
 }
 
-static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
-                                       int nid, bool zero)
-{
-       return __init_single_page(pfn_to_page(pfn), pfn, zone, nid, zero);
-}
-
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 static void __meminit init_reserved_page(unsigned long pfn)
 {
@@ -1222,7 +1209,7 @@ static void __meminit init_reserved_page(unsigned long pfn)
                if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
                        break;
        }
-       __init_single_pfn(pfn, zid, nid, true);
+       __init_single_page(pfn_to_page(pfn), pfn, zid, nid);
 }
 #else
 static inline void init_reserved_page(unsigned long pfn)
@@ -1506,7 +1493,7 @@ static void __init deferred_free_pages(int nid, int zid, unsigned long pfn,
                } else if (!(pfn & nr_pgmask)) {
                        deferred_free_range(pfn - nr_free, nr_free);
                        nr_free = 1;
-                       cond_resched();
+                       touch_nmi_watchdog();
                } else {
                        nr_free++;
                }
@@ -1535,11 +1522,11 @@ static unsigned long  __init deferred_init_pages(int nid, int zid,
                        continue;
                } else if (!page || !(pfn & nr_pgmask)) {
                        page = pfn_to_page(pfn);
-                       cond_resched();
+                       touch_nmi_watchdog();
                } else {
                        page++;
                }
-               __init_single_page(page, pfn, zid, nid, true);
+               __init_single_page(page, pfn, zid, nid);
                nr_pages++;
        }
        return (nr_pages);
@@ -1552,23 +1539,25 @@ static int __init deferred_init_memmap(void *data)
        int nid = pgdat->node_id;
        unsigned long start = jiffies;
        unsigned long nr_pages = 0;
-       unsigned long spfn, epfn;
+       unsigned long spfn, epfn, first_init_pfn, flags;
        phys_addr_t spa, epa;
        int zid;
        struct zone *zone;
-       unsigned long first_init_pfn = pgdat->first_deferred_pfn;
        const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
        u64 i;
 
+       /* Bind memory initialisation thread to a local node if possible */
+       if (!cpumask_empty(cpumask))
+               set_cpus_allowed_ptr(current, cpumask);
+
+       pgdat_resize_lock(pgdat, &flags);
+       first_init_pfn = pgdat->first_deferred_pfn;
        if (first_init_pfn == ULONG_MAX) {
+               pgdat_resize_unlock(pgdat, &flags);
                pgdat_init_report_one_done();
                return 0;
        }
 
-       /* Bind memory initialisation thread to a local node if possible */
-       if (!cpumask_empty(cpumask))
-               set_cpus_allowed_ptr(current, cpumask);
-
        /* Sanity check boundaries */
        BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
        BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
@@ -1598,6 +1587,7 @@ static int __init deferred_init_memmap(void *data)
                epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
                deferred_free_pages(nid, zid, spfn, epfn);
        }
+       pgdat_resize_unlock(pgdat, &flags);
 
        /* Sanity check that the next zone really is unpopulated */
        WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
@@ -1608,6 +1598,117 @@ static int __init deferred_init_memmap(void *data)
        pgdat_init_report_one_done();
        return 0;
 }
+
+/*
+ * During boot we initialize deferred pages on-demand, as needed, but once
+ * page_alloc_init_late() has finished, the deferred pages are all initialized,
+ * and we can permanently disable that path.
+ */
+static DEFINE_STATIC_KEY_TRUE(deferred_pages);
+
+/*
+ * If this zone has deferred pages, try to grow it by initializing enough
+ * deferred pages to satisfy the allocation specified by order, rounded up to
+ * the nearest PAGES_PER_SECTION boundary.  So we're adding memory in increments
+ * of SECTION_SIZE bytes by initializing struct pages in increments of
+ * PAGES_PER_SECTION * sizeof(struct page) bytes.
+ *
+ * Return true when zone was grown, otherwise return false. We return true even
+ * when we grow less than requested, to let the caller decide if there are
+ * enough pages to satisfy the allocation.
+ *
+ * Note: We use noinline because this function is needed only during boot, and
+ * it is called from a __ref function _deferred_grow_zone. This way we are
+ * making sure that it is not inlined into permanent text section.
+ */
+static noinline bool __init
+deferred_grow_zone(struct zone *zone, unsigned int order)
+{
+       int zid = zone_idx(zone);
+       int nid = zone_to_nid(zone);
+       pg_data_t *pgdat = NODE_DATA(nid);
+       unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
+       unsigned long nr_pages = 0;
+       unsigned long first_init_pfn, spfn, epfn, t, flags;
+       unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
+       phys_addr_t spa, epa;
+       u64 i;
+
+       /* Only the last zone may have deferred pages */
+       if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
+               return false;
+
+       pgdat_resize_lock(pgdat, &flags);
+
+       /*
+        * If deferred pages have been initialized while we were waiting for
+        * the lock, return true, as the zone was grown.  The caller will retry
+        * this zone.  We won't return to this function since the caller also
+        * has this static branch.
+        */
+       if (!static_branch_unlikely(&deferred_pages)) {
+               pgdat_resize_unlock(pgdat, &flags);
+               return true;
+       }
+
+       /*
+        * If someone grew this zone while we were waiting for spinlock, return
+        * true, as there might be enough pages already.
+        */
+       if (first_deferred_pfn != pgdat->first_deferred_pfn) {
+               pgdat_resize_unlock(pgdat, &flags);
+               return true;
+       }
+
+       first_init_pfn = max(zone->zone_start_pfn, first_deferred_pfn);
+
+       if (first_init_pfn >= pgdat_end_pfn(pgdat)) {
+               pgdat_resize_unlock(pgdat, &flags);
+               return false;
+       }
+
+       for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
+               spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
+               epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
+
+               while (spfn < epfn && nr_pages < nr_pages_needed) {
+                       t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION);
+                       first_deferred_pfn = min(t, epfn);
+                       nr_pages += deferred_init_pages(nid, zid, spfn,
+                                                       first_deferred_pfn);
+                       spfn = first_deferred_pfn;
+               }
+
+               if (nr_pages >= nr_pages_needed)
+                       break;
+       }
+
+       for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
+               spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
+               epfn = min_t(unsigned long, first_deferred_pfn, PFN_DOWN(epa));
+               deferred_free_pages(nid, zid, spfn, epfn);
+
+               if (first_deferred_pfn == epfn)
+                       break;
+       }
+       pgdat->first_deferred_pfn = first_deferred_pfn;
+       pgdat_resize_unlock(pgdat, &flags);
+
+       return nr_pages > 0;
+}
+
+/*
+ * deferred_grow_zone() is __init, but it is called from
+ * get_page_from_freelist() during early boot until deferred_pages permanently
+ * disables this call. This is why we have refdata wrapper to avoid warning,
+ * and to ensure that the function body gets unloaded.
+ */
+static bool __ref
+_deferred_grow_zone(struct zone *zone, unsigned int order)
+{
+       return deferred_grow_zone(zone, order);
+}
+
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
 void __init page_alloc_init_late(void)
@@ -1626,6 +1727,12 @@ void __init page_alloc_init_late(void)
        /* Block until all are initialised */
        wait_for_completion(&pgdat_init_all_done_comp);
 
+       /*
+        * We initialized the rest of the deferred pages.  Permanently disable
+        * on-demand struct page initialization.
+        */
+       static_branch_disable(&deferred_pages);
+
        /* Reinit limits that are based on free pages after the kernel is up */
        files_maxfiles_init();
 #endif
@@ -2418,10 +2525,8 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
        local_irq_save(flags);
        batch = READ_ONCE(pcp->batch);
        to_drain = min(pcp->count, batch);
-       if (to_drain > 0) {
+       if (to_drain > 0)
                free_pcppages_bulk(zone, to_drain, pcp);
-               pcp->count -= to_drain;
-       }
        local_irq_restore(flags);
 }
 #endif
@@ -2443,10 +2548,8 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
        pset = per_cpu_ptr(zone->pageset, cpu);
 
        pcp = &pset->pcp;
-       if (pcp->count) {
+       if (pcp->count)
                free_pcppages_bulk(zone, pcp->count, pcp);
-               pcp->count = 0;
-       }
        local_irq_restore(flags);
 }
 
@@ -2670,7 +2773,6 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
        if (pcp->count >= pcp->high) {
                unsigned long batch = READ_ONCE(pcp->batch);
                free_pcppages_bulk(zone, batch, pcp);
-               pcp->count -= batch;
        }
 }
 
@@ -3205,6 +3307,16 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
                                       ac_classzone_idx(ac), alloc_flags)) {
                        int ret;
 
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+                       /*
+                        * Watermark failed for this zone, but see if we can
+                        * grow this zone if it contains deferred pages.
+                        */
+                       if (static_branch_unlikely(&deferred_pages)) {
+                               if (_deferred_grow_zone(zone, order))
+                                       goto try_this_zone;
+                       }
+#endif
                        /* Checked here to keep the fast path fast */
                        BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
                        if (alloc_flags & ALLOC_NO_WATERMARKS)
@@ -3246,6 +3358,14 @@ try_this_zone:
                                reserve_highatomic_pageblock(page, zone, order);
 
                        return page;
+               } else {
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+                       /* Try again if zone has deferred pages */
+                       if (static_branch_unlikely(&deferred_pages)) {
+                               if (_deferred_grow_zone(zone, order))
+                                       goto try_this_zone;
+                       }
+#endif
                }
        }
 
@@ -3685,16 +3805,18 @@ retry:
        return page;
 }
 
-static void wake_all_kswapds(unsigned int order, const struct alloc_context *ac)
+static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask,
+                            const struct alloc_context *ac)
 {
        struct zoneref *z;
        struct zone *zone;
        pg_data_t *last_pgdat = NULL;
+       enum zone_type high_zoneidx = ac->high_zoneidx;
 
-       for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
-                                       ac->high_zoneidx, ac->nodemask) {
+       for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, high_zoneidx,
+                                       ac->nodemask) {
                if (last_pgdat != zone->zone_pgdat)
-                       wakeup_kswapd(zone, order, ac->high_zoneidx);
+                       wakeup_kswapd(zone, gfp_mask, order, high_zoneidx);
                last_pgdat = zone->zone_pgdat;
        }
 }
@@ -3973,7 +4095,7 @@ retry_cpuset:
                goto nopage;
 
        if (gfp_mask & __GFP_KSWAPD_RECLAIM)
-               wake_all_kswapds(order, ac);
+               wake_all_kswapds(order, gfp_mask, ac);
 
        /*
         * The adjusted alloc_flags might result in immediate success, so try
@@ -4031,7 +4153,7 @@ retry_cpuset:
 retry:
        /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
        if (gfp_mask & __GFP_KSWAPD_RECLAIM)
-               wake_all_kswapds(order, ac);
+               wake_all_kswapds(order, gfp_mask, ac);
 
        reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
        if (reserve_flags)
@@ -5334,6 +5456,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
        pg_data_t *pgdat = NODE_DATA(nid);
        unsigned long pfn;
        unsigned long nr_initialised = 0;
+       struct page *page;
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
        struct memblock_region *r = NULL, *tmp;
 #endif
@@ -5386,6 +5509,11 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 #endif
 
 not_early:
+               page = pfn_to_page(pfn);
+               __init_single_page(page, pfn, zone, nid);
+               if (context == MEMMAP_HOTPLUG)
+                       SetPageReserved(page);
+
                /*
                 * Mark the block movable so that blocks are reserved for
                 * movable at startup. This will force kernel allocations
@@ -5402,15 +5530,8 @@ not_early:
                 * because this is done early in sparse_add_one_section
                 */
                if (!(pfn & (pageblock_nr_pages - 1))) {
-                       struct page *page = pfn_to_page(pfn);
-
-                       __init_single_page(page, pfn, zone, nid,
-                                       context != MEMMAP_HOTPLUG);
                        set_pageblock_migratetype(page, MIGRATE_MOVABLE);
                        cond_resched();
-               } else {
-                       __init_single_pfn(pfn, zone, nid,
-                                       context != MEMMAP_HOTPLUG);
                }
        }
 }
@@ -6192,10 +6313,7 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
                end = pgdat_end_pfn(pgdat);
                end = ALIGN(end, MAX_ORDER_NR_PAGES);
                size =  (end - start) * sizeof(struct page);
-               map = alloc_remap(pgdat->node_id, size);
-               if (!map)
-                       map = memblock_virt_alloc_node_nopanic(size,
-                                                              pgdat->node_id);
+               map = memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
                pgdat->node_mem_map = map + offset;
        }
        pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
@@ -6244,7 +6362,15 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 
        alloc_node_mem_map(pgdat);
 
-       reset_deferred_meminit(pgdat);
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+       /*
+        * We start only with one section of pages, more pages are added as
+        * needed until the rest of deferred pages are initialized.
+        */
+       pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
+                                        pgdat->node_spanned_pages);
+       pgdat->first_deferred_pfn = ULONG_MAX;
+#endif
        free_area_init_core(pgdat);
 }
 
@@ -6474,7 +6600,18 @@ static void __init find_zone_movable_pfns_for_nodes(void)
        }
 
        /*
-        * If movablecore=nn[KMG] was specified, calculate what size of
+        * If kernelcore=nn% or movablecore=nn% was specified, calculate the
+        * amount of necessary memory.
+        */
+       if (required_kernelcore_percent)
+               required_kernelcore = (totalpages * 100 * required_kernelcore_percent) /
+                                      10000UL;
+       if (required_movablecore_percent)
+               required_movablecore = (totalpages * 100 * required_movablecore_percent) /
+                                       10000UL;
+
+       /*
+        * If movablecore= was specified, calculate what size of
         * kernelcore that corresponds so that memory usable for
         * any allocation type is evenly spread. If both kernelcore
         * and movablecore are specified, then the value of kernelcore
@@ -6714,18 +6851,30 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
        zero_resv_unavail();
 }
 
-static int __init cmdline_parse_core(char *p, unsigned long *core)
+static int __init cmdline_parse_core(char *p, unsigned long *core,
+                                    unsigned long *percent)
 {
        unsigned long long coremem;
+       char *endptr;
+
        if (!p)
                return -EINVAL;
 
-       coremem = memparse(p, &p);
-       *core = coremem >> PAGE_SHIFT;
+       /* Value may be a percentage of total memory, otherwise bytes */
+       coremem = simple_strtoull(p, &endptr, 0);
+       if (*endptr == '%') {
+               /* Paranoid check for percent values greater than 100 */
+               WARN_ON(coremem > 100);
 
-       /* Paranoid check that UL is enough for the coremem value */
-       WARN_ON((coremem >> PAGE_SHIFT) > ULONG_MAX);
+               *percent = coremem;
+       } else {
+               coremem = memparse(p, &p);
+               /* Paranoid check that UL is enough for the coremem value */
+               WARN_ON((coremem >> PAGE_SHIFT) > ULONG_MAX);
 
+               *core = coremem >> PAGE_SHIFT;
+               *percent = 0UL;
+       }
        return 0;
 }
 
@@ -6741,7 +6890,8 @@ static int __init cmdline_parse_kernelcore(char *p)
                return 0;
        }
 
-       return cmdline_parse_core(p, &required_kernelcore);
+       return cmdline_parse_core(p, &required_kernelcore,
+                                 &required_kernelcore_percent);
 }
 
 /*
@@ -6750,7 +6900,8 @@ static int __init cmdline_parse_kernelcore(char *p)
  */
 static int __init cmdline_parse_movablecore(char *p)
 {
-       return cmdline_parse_core(p, &required_movablecore);
+       return cmdline_parse_core(p, &required_movablecore,
+                                 &required_movablecore_percent);
 }
 
 early_param("kernelcore", cmdline_parse_kernelcore);
@@ -7594,7 +7745,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
                cc->nr_migratepages -= nr_reclaimed;
 
                ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
-                                   NULL, 0, cc->mode, MR_CMA);
+                                   NULL, 0, cc->mode, MR_CONTIG_RANGE);
        }
        if (ret < 0) {
                putback_movable_pages(&cc->migratepages);
@@ -7614,11 +7765,11 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
  * @gfp_mask:  GFP mask to use during compaction
  *
  * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES
- * aligned, however it's the caller's responsibility to guarantee that
- * we are the only thread that changes migrate type of pageblocks the
- * pages fall in.
+ * aligned.  The PFN range must belong to a single zone.
  *
- * The PFN range must belong to a single zone.
+ * The first thing this routine does is attempt to MIGRATE_ISOLATE all
+ * pageblocks in the range.  Once isolated, the pageblocks should not
+ * be modified by others.
  *
  * Returns zero on success or negative error code.  On success all
  * pages which PFN is in [start, end) are allocated for the caller and