#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/kmemcheck.h>
+#include <linux/kasan.h>
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/pagevec.h>
* 1G machine -> (16M dma, 784M normal, 224M high)
* NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
* HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
- * HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
+ * HIGHMEM allocation will leave (224M+784M)/256 of ram reserved in ZONE_DMA
*
* TBD: should special case ZONE_DMA32 machines here - in those we normally
* don't need any ZONE_NORMAL reservation
EXPORT_SYMBOL(nr_online_nodes);
#endif
-/*
- * Structure for holding the mostly immutable allocation parameters passed
- * between alloc_pages* family of functions.
- *
- * nodemask, migratetype and high_zoneidx are initialized only once in
- * __alloc_pages_nodemask() and then never change.
- *
- * zonelist, preferred_zone and classzone_idx are set first in
- * __alloc_pages_nodemask() for the fast path, and might be later changed
- * in __alloc_pages_slowpath(). All other functions pass the whole strucure
- * by a const pointer.
- */
-struct alloc_context {
- struct zonelist *zonelist;
- nodemask_t *nodemask;
- struct zone *preferred_zone;
- int classzone_idx;
- int migratetype;
- enum zone_type high_zoneidx;
-};
-
int page_group_by_mobility_disabled __read_mostly;
void set_pageblock_migratetype(struct page *page, int migratetype)
PB_migrate, PB_migrate_end);
}
-bool oom_killer_disabled __read_mostly;
-
#ifdef CONFIG_DEBUG_VM
static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
{
}
}
-/* update __split_huge_page_refcount if you change this function */
-static int destroy_compound_page(struct page *page, unsigned long order)
-{
- int i;
- int nr_pages = 1 << order;
- int bad = 0;
-
- if (unlikely(compound_order(page) != order)) {
- bad_page(page, "wrong compound order", 0);
- bad++;
- }
-
- __ClearPageHead(page);
-
- for (i = 1; i < nr_pages; i++) {
- struct page *p = page + i;
-
- if (unlikely(!PageTail(p))) {
- bad_page(page, "PageTail not set", 0);
- bad++;
- } else if (unlikely(p->first_page != page)) {
- bad_page(page, "first_page not consistent", 0);
- bad++;
- }
- __ClearPageTail(p);
- }
-
- return bad;
-}
-
static inline void prep_zero_page(struct page *page, unsigned int order,
gfp_t gfp_flags)
{
int max_order = MAX_ORDER;
VM_BUG_ON(!zone_is_initialized(zone));
-
- if (unlikely(PageCompound(page)))
- if (unlikely(destroy_compound_page(page, order)))
- return;
+ VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
VM_BUG_ON(migratetype == -1);
if (is_migrate_isolate(migratetype)) {
spin_unlock(&zone->lock);
}
+static int free_tail_pages_check(struct page *head_page, struct page *page)
+{
+ if (!IS_ENABLED(CONFIG_DEBUG_VM))
+ return 0;
+ if (unlikely(!PageTail(page))) {
+ bad_page(page, "PageTail not set", 0);
+ return 1;
+ }
+ if (unlikely(page->first_page != head_page)) {
+ bad_page(page, "first_page not consistent", 0);
+ return 1;
+ }
+ return 0;
+}
+
static bool free_pages_prepare(struct page *page, unsigned int order)
{
- int i;
- int bad = 0;
+ bool compound = PageCompound(page);
+ int i, bad = 0;
VM_BUG_ON_PAGE(PageTail(page), page);
- VM_BUG_ON_PAGE(PageHead(page) && compound_order(page) != order, page);
+ VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
trace_mm_page_free(page, order);
kmemcheck_free_shadow(page, order);
+ kasan_free_pages(page, order);
if (PageAnon(page))
page->mapping = NULL;
- for (i = 0; i < (1 << order); i++)
+ bad += free_pages_check(page);
+ for (i = 1; i < (1 << order); i++) {
+ if (compound)
+ bad += free_tail_pages_check(page, page + i);
bad += free_pages_check(page + i);
+ }
if (bad)
return false;
arch_alloc_page(page, order);
kernel_map_pages(page, 1 << order, 1);
+ kasan_alloc_pages(page, order);
if (gfp_flags & __GFP_ZERO)
prep_zero_page(page, order, gfp_flags);
}
/*
- * If breaking a large block of pages, move all free pages to the preferred
- * allocation list. If falling back for a reclaimable kernel allocation, be
- * more aggressive about taking ownership of free pages.
+ * When we are falling back to another migratetype during allocation, try to
+ * steal extra free pages from the same pageblocks to satisfy further
+ * allocations, instead of polluting multiple pageblocks.
*
- * On the other hand, never change migration type of MIGRATE_CMA pageblocks
- * nor move CMA pages to different free lists. We don't want unmovable pages
- * to be allocated from MIGRATE_CMA areas.
+ * If we are stealing a relatively large buddy page, it is likely there will
+ * be more free pages in the pageblock, so try to steal them all. For
+ * reclaimable and unmovable allocations, we steal regardless of page size,
+ * as fragmentation caused by those allocations polluting movable pageblocks
+ * is worse than movable allocations stealing from unmovable and reclaimable
+ * pageblocks.
*
- * Returns the new migratetype of the pageblock (or the same old migratetype
- * if it was unchanged).
+ * If we claim more than half of the pageblock, change pageblock's migratetype
+ * as well.
*/
-static int try_to_steal_freepages(struct zone *zone, struct page *page,
+static void try_to_steal_freepages(struct zone *zone, struct page *page,
int start_type, int fallback_type)
{
int current_order = page_order(page);
- /*
- * When borrowing from MIGRATE_CMA, we need to release the excess
- * buddy pages to CMA itself. We also ensure the freepage_migratetype
- * is set to CMA so it is returned to the correct freelist in case
- * the page ends up being not actually allocated from the pcp lists.
- */
- if (is_migrate_cma(fallback_type))
- return fallback_type;
-
/* Take ownership for orders >= pageblock_order */
if (current_order >= pageblock_order) {
change_pageblock_range(page, current_order, start_type);
- return start_type;
+ return;
}
if (current_order >= pageblock_order / 2 ||
start_type == MIGRATE_RECLAIMABLE ||
+ start_type == MIGRATE_UNMOVABLE ||
page_group_by_mobility_disabled) {
int pages;
/* Claim the whole block if over half of it is free */
if (pages >= (1 << (pageblock_order-1)) ||
- page_group_by_mobility_disabled) {
-
+ page_group_by_mobility_disabled)
set_pageblock_migratetype(page, start_type);
- return start_type;
- }
-
}
-
- return fallback_type;
}
/* Remove an element from the buddy allocator from the fallback list */
struct free_area *area;
unsigned int current_order;
struct page *page;
- int migratetype, new_type, i;
/* Find the largest possible block of pages in the other list */
for (current_order = MAX_ORDER-1;
current_order >= order && current_order <= MAX_ORDER-1;
--current_order) {
+ int i;
for (i = 0;; i++) {
- migratetype = fallbacks[start_migratetype][i];
+ int migratetype = fallbacks[start_migratetype][i];
+ int buddy_type = start_migratetype;
/* MIGRATE_RESERVE handled later if necessary */
if (migratetype == MIGRATE_RESERVE)
struct page, lru);
area->nr_free--;
- new_type = try_to_steal_freepages(zone, page,
- start_migratetype,
- migratetype);
+ if (!is_migrate_cma(migratetype)) {
+ try_to_steal_freepages(zone, page,
+ start_migratetype,
+ migratetype);
+ } else {
+ /*
+ * When borrowing from MIGRATE_CMA, we need to
+ * release the excess buddy pages to CMA
+ * itself, and we do not try to steal extra
+ * free pages.
+ */
+ buddy_type = migratetype;
+ }
/* Remove the page from the freelists */
list_del(&page->lru);
rmv_page_order(page);
expand(zone, page, order, current_order, area,
- new_type);
- /* The freepage_migratetype may differ from pageblock's
+ buddy_type);
+
+ /*
+ * The freepage_migratetype may differ from pageblock's
* migratetype depending on the decisions in
- * try_to_steal_freepages. This is OK as long as it does
- * not differ for MIGRATE_CMA type.
+ * try_to_steal_freepages(). This is OK as long as it
+ * does not differ for MIGRATE_CMA pageblocks. For CMA
+ * we need to make sure unallocated pages flushed from
+ * pcp lists are returned to the correct freelist.
*/
- set_freepage_migratetype(page, new_type);
+ set_freepage_migratetype(page, buddy_type);
trace_mm_page_alloc_extfrag(page, order, current_order,
- start_migratetype, migratetype, new_type);
+ start_migratetype, migratetype);
return page;
}
*did_some_progress = 0;
- if (oom_killer_disabled)
- return NULL;
-
/*
* Acquire the per-zone oom lock for each zone. If that
* fails, somebody else is making progress for us.
return NULL;
}
- /*
- * PM-freezer should be notified that there might be an OOM killer on
- * its way to kill and wake somebody up. This is too early and we might
- * end up not killing anything but false positives are acceptable.
- * See freeze_processes.
- */
- note_oom_kill();
-
/*
* Go through the zonelist yet one more time, keep very high watermark
* here, this is only to catch a parallel oom killing, we must fail if
goto out;
}
/* Exhausted what can be done so it's blamo time */
- out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false);
- *did_some_progress = 1;
+ if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false))
+ *did_some_progress = 1;
out:
oom_zonelist_unlock(ac->zonelist, gfp_mask);
return page;
return NULL;
current->flags |= PF_MEMALLOC;
- compact_result = try_to_compact_pages(ac->zonelist, order, gfp_mask,
- ac->nodemask, mode,
- contended_compaction,
- alloc_flags, ac->classzone_idx);
+ compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
+ mode, contended_compaction);
current->flags &= ~PF_MEMALLOC;
switch (compact_result) {
return 0;
}
+static noinline void __init
+build_all_zonelists_init(void)
+{
+ __build_all_zonelists(NULL);
+ mminit_verify_zonelist();
+ cpuset_init_current_mems_allowed();
+}
+
/*
* Called with zonelists_mutex held always
* unless system_state == SYSTEM_BOOTING.
+ *
+ * __ref due to (1) call of __meminit annotated setup_zone_pageset
+ * [we're only called with non-NULL zone through __meminit paths] and
+ * (2) call of __init annotated helper build_all_zonelists_init
+ * [protected by SYSTEM_BOOTING].
*/
void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
{
set_zonelist_order();
if (system_state == SYSTEM_BOOTING) {
- __build_all_zonelists(NULL);
- mminit_verify_zonelist();
- cpuset_init_current_mems_allowed();
+ build_all_zonelists_init();
} else {
#ifdef CONFIG_MEMORY_HOTPLUG
if (zone)
pgdat->node_start_pfn = node_start_pfn;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
- printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid,
- (u64) start_pfn << PAGE_SHIFT, (u64) (end_pfn << PAGE_SHIFT) - 1);
+ pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
+ (u64)start_pfn << PAGE_SHIFT, ((u64)end_pfn << PAGE_SHIFT) - 1);
#endif
calculate_node_totalpages(pgdat, start_pfn, end_pfn,
zones_size, zholes_size);
arch_zone_highest_possible_pfn[i])
pr_cont("empty\n");
else
- pr_cont("[mem %0#10lx-%0#10lx]\n",
- arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT,
- (arch_zone_highest_possible_pfn[i]
+ pr_cont("[mem %#018Lx-%#018Lx]\n",
+ (u64)arch_zone_lowest_possible_pfn[i]
+ << PAGE_SHIFT,
+ ((u64)arch_zone_highest_possible_pfn[i]
<< PAGE_SHIFT) - 1);
}
pr_info("Movable zone start for each node\n");
for (i = 0; i < MAX_NUMNODES; i++) {
if (zone_movable_pfn[i])
- pr_info(" Node %d: %#010lx\n", i,
- zone_movable_pfn[i] << PAGE_SHIFT);
+ pr_info(" Node %d: %#018Lx\n", i,
+ (u64)zone_movable_pfn[i] << PAGE_SHIFT);
}
/* Print out the early node map */
pr_info("Early memory node ranges\n");
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
- pr_info(" node %3d: [mem %#010lx-%#010lx]\n", nid,
- start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1);
+ pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid,
+ (u64)start_pfn << PAGE_SHIFT,
+ ((u64)end_pfn << PAGE_SHIFT) - 1);
/* Initialise every node */
mminit_verify_pageflags_layout();