X-Git-Url: http://git.monstr.eu/?a=blobdiff_plain;f=mm%2Fpage_alloc.c;h=a47f0b229a1aca202b15195c88ab93d52e65064f;hb=cad3ab5883b35b044200820bf2e1fbabe742740a;hp=4aead0bd8d444374f39238cae5ca8be5668c6f18;hpb=a9263751e11a07af40a98dba88021821cd430cfd;p=linux-2.6-microblaze.git diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4aead0bd8d44..a47f0b229a1a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -172,7 +173,7 @@ static void __free_pages_ok(struct page *page, unsigned int order); * 1G machine -> (16M dma, 784M normal, 224M high) * NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA * HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL - * HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA + * HIGHMEM allocation will leave (224M+784M)/256 of ram reserved in ZONE_DMA * * TBD: should special case ZONE_DMA32 machines here - in those we normally * don't need any ZONE_NORMAL reservation @@ -232,27 +233,6 @@ EXPORT_SYMBOL(nr_node_ids); EXPORT_SYMBOL(nr_online_nodes); #endif -/* - * Structure for holding the mostly immutable allocation parameters passed - * between alloc_pages* family of functions. - * - * nodemask, migratetype and high_zoneidx are initialized only once in - * __alloc_pages_nodemask() and then never change. - * - * zonelist, preferred_zone and classzone_idx are set first in - * __alloc_pages_nodemask() for the fast path, and might be later changed - * in __alloc_pages_slowpath(). All other functions pass the whole strucure - * by a const pointer. - */ -struct alloc_context { - struct zonelist *zonelist; - nodemask_t *nodemask; - struct zone *preferred_zone; - int classzone_idx; - int migratetype; - enum zone_type high_zoneidx; -}; - int page_group_by_mobility_disabled __read_mostly; void set_pageblock_migratetype(struct page *page, int migratetype) @@ -265,8 +245,6 @@ void set_pageblock_migratetype(struct page *page, int migratetype) PB_migrate, PB_migrate_end); } -bool oom_killer_disabled __read_mostly; - #ifdef CONFIG_DEBUG_VM static int page_outside_zone_boundaries(struct zone *zone, struct page *page) { @@ -402,36 +380,6 @@ void prep_compound_page(struct page *page, unsigned long order) } } -/* update __split_huge_page_refcount if you change this function */ -static int destroy_compound_page(struct page *page, unsigned long order) -{ - int i; - int nr_pages = 1 << order; - int bad = 0; - - if (unlikely(compound_order(page) != order)) { - bad_page(page, "wrong compound order", 0); - bad++; - } - - __ClearPageHead(page); - - for (i = 1; i < nr_pages; i++) { - struct page *p = page + i; - - if (unlikely(!PageTail(p))) { - bad_page(page, "PageTail not set", 0); - bad++; - } else if (unlikely(p->first_page != page)) { - bad_page(page, "first_page not consistent", 0); - bad++; - } - __ClearPageTail(p); - } - - return bad; -} - static inline void prep_zero_page(struct page *page, unsigned int order, gfp_t gfp_flags) { @@ -634,10 +582,7 @@ static inline void __free_one_page(struct page *page, int max_order = MAX_ORDER; VM_BUG_ON(!zone_is_initialized(zone)); - - if (unlikely(PageCompound(page))) - if (unlikely(destroy_compound_page(page, order))) - return; + VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); VM_BUG_ON(migratetype == -1); if (is_migrate_isolate(migratetype)) { @@ -818,21 +763,41 @@ static void free_one_page(struct zone *zone, spin_unlock(&zone->lock); } +static int free_tail_pages_check(struct page *head_page, struct page *page) +{ + if (!IS_ENABLED(CONFIG_DEBUG_VM)) + return 0; + if (unlikely(!PageTail(page))) { + bad_page(page, "PageTail not set", 0); + return 1; + } + if (unlikely(page->first_page != head_page)) { + bad_page(page, "first_page not consistent", 0); + return 1; + } + return 0; +} + static bool free_pages_prepare(struct page *page, unsigned int order) { - int i; - int bad = 0; + bool compound = PageCompound(page); + int i, bad = 0; VM_BUG_ON_PAGE(PageTail(page), page); - VM_BUG_ON_PAGE(PageHead(page) && compound_order(page) != order, page); + VM_BUG_ON_PAGE(compound && compound_order(page) != order, page); trace_mm_page_free(page, order); kmemcheck_free_shadow(page, order); + kasan_free_pages(page, order); if (PageAnon(page)) page->mapping = NULL; - for (i = 0; i < (1 << order); i++) + bad += free_pages_check(page); + for (i = 1; i < (1 << order); i++) { + if (compound) + bad += free_tail_pages_check(page, page + i); bad += free_pages_check(page + i); + } if (bad) return false; @@ -1007,6 +972,7 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, arch_alloc_page(page, order); kernel_map_pages(page, 1 << order, 1); + kasan_alloc_pages(page, order); if (gfp_flags & __GFP_ZERO) prep_zero_page(page, order, gfp_flags); @@ -1160,39 +1126,34 @@ static void change_pageblock_range(struct page *pageblock_page, } /* - * If breaking a large block of pages, move all free pages to the preferred - * allocation list. If falling back for a reclaimable kernel allocation, be - * more aggressive about taking ownership of free pages. + * When we are falling back to another migratetype during allocation, try to + * steal extra free pages from the same pageblocks to satisfy further + * allocations, instead of polluting multiple pageblocks. * - * On the other hand, never change migration type of MIGRATE_CMA pageblocks - * nor move CMA pages to different free lists. We don't want unmovable pages - * to be allocated from MIGRATE_CMA areas. + * If we are stealing a relatively large buddy page, it is likely there will + * be more free pages in the pageblock, so try to steal them all. For + * reclaimable and unmovable allocations, we steal regardless of page size, + * as fragmentation caused by those allocations polluting movable pageblocks + * is worse than movable allocations stealing from unmovable and reclaimable + * pageblocks. * - * Returns the new migratetype of the pageblock (or the same old migratetype - * if it was unchanged). + * If we claim more than half of the pageblock, change pageblock's migratetype + * as well. */ -static int try_to_steal_freepages(struct zone *zone, struct page *page, +static void try_to_steal_freepages(struct zone *zone, struct page *page, int start_type, int fallback_type) { int current_order = page_order(page); - /* - * When borrowing from MIGRATE_CMA, we need to release the excess - * buddy pages to CMA itself. We also ensure the freepage_migratetype - * is set to CMA so it is returned to the correct freelist in case - * the page ends up being not actually allocated from the pcp lists. - */ - if (is_migrate_cma(fallback_type)) - return fallback_type; - /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order) { change_pageblock_range(page, current_order, start_type); - return start_type; + return; } if (current_order >= pageblock_order / 2 || start_type == MIGRATE_RECLAIMABLE || + start_type == MIGRATE_UNMOVABLE || page_group_by_mobility_disabled) { int pages; @@ -1200,15 +1161,9 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page, /* Claim the whole block if over half of it is free */ if (pages >= (1 << (pageblock_order-1)) || - page_group_by_mobility_disabled) { - + page_group_by_mobility_disabled) set_pageblock_migratetype(page, start_type); - return start_type; - } - } - - return fallback_type; } /* Remove an element from the buddy allocator from the fallback list */ @@ -1218,14 +1173,15 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) struct free_area *area; unsigned int current_order; struct page *page; - int migratetype, new_type, i; /* Find the largest possible block of pages in the other list */ for (current_order = MAX_ORDER-1; current_order >= order && current_order <= MAX_ORDER-1; --current_order) { + int i; for (i = 0;; i++) { - migratetype = fallbacks[start_migratetype][i]; + int migratetype = fallbacks[start_migratetype][i]; + int buddy_type = start_migratetype; /* MIGRATE_RESERVE handled later if necessary */ if (migratetype == MIGRATE_RESERVE) @@ -1239,25 +1195,39 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) struct page, lru); area->nr_free--; - new_type = try_to_steal_freepages(zone, page, - start_migratetype, - migratetype); + if (!is_migrate_cma(migratetype)) { + try_to_steal_freepages(zone, page, + start_migratetype, + migratetype); + } else { + /* + * When borrowing from MIGRATE_CMA, we need to + * release the excess buddy pages to CMA + * itself, and we do not try to steal extra + * free pages. + */ + buddy_type = migratetype; + } /* Remove the page from the freelists */ list_del(&page->lru); rmv_page_order(page); expand(zone, page, order, current_order, area, - new_type); - /* The freepage_migratetype may differ from pageblock's + buddy_type); + + /* + * The freepage_migratetype may differ from pageblock's * migratetype depending on the decisions in - * try_to_steal_freepages. This is OK as long as it does - * not differ for MIGRATE_CMA type. + * try_to_steal_freepages(). This is OK as long as it + * does not differ for MIGRATE_CMA pageblocks. For CMA + * we need to make sure unallocated pages flushed from + * pcp lists are returned to the correct freelist. */ - set_freepage_migratetype(page, new_type); + set_freepage_migratetype(page, buddy_type); trace_mm_page_alloc_extfrag(page, order, current_order, - start_migratetype, migratetype, new_type); + start_migratetype, migratetype); return page; } @@ -2352,9 +2322,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, *did_some_progress = 0; - if (oom_killer_disabled) - return NULL; - /* * Acquire the per-zone oom lock for each zone. If that * fails, somebody else is making progress for us. @@ -2365,14 +2332,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, return NULL; } - /* - * PM-freezer should be notified that there might be an OOM killer on - * its way to kill and wake somebody up. This is too early and we might - * end up not killing anything but false positives are acceptable. - * See freeze_processes. - */ - note_oom_kill(); - /* * Go through the zonelist yet one more time, keep very high watermark * here, this is only to catch a parallel oom killing, we must fail if @@ -2407,8 +2366,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, goto out; } /* Exhausted what can be done so it's blamo time */ - out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false); - *did_some_progress = 1; + if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)) + *did_some_progress = 1; out: oom_zonelist_unlock(ac->zonelist, gfp_mask); return page; @@ -2429,10 +2388,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, return NULL; current->flags |= PF_MEMALLOC; - compact_result = try_to_compact_pages(ac->zonelist, order, gfp_mask, - ac->nodemask, mode, - contended_compaction, - alloc_flags, ac->classzone_idx); + compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, + mode, contended_compaction); current->flags &= ~PF_MEMALLOC; switch (compact_result) { @@ -3917,18 +3874,29 @@ static int __build_all_zonelists(void *data) return 0; } +static noinline void __init +build_all_zonelists_init(void) +{ + __build_all_zonelists(NULL); + mminit_verify_zonelist(); + cpuset_init_current_mems_allowed(); +} + /* * Called with zonelists_mutex held always * unless system_state == SYSTEM_BOOTING. + * + * __ref due to (1) call of __meminit annotated setup_zone_pageset + * [we're only called with non-NULL zone through __meminit paths] and + * (2) call of __init annotated helper build_all_zonelists_init + * [protected by SYSTEM_BOOTING]. */ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone) { set_zonelist_order(); if (system_state == SYSTEM_BOOTING) { - __build_all_zonelists(NULL); - mminit_verify_zonelist(); - cpuset_init_current_mems_allowed(); + build_all_zonelists_init(); } else { #ifdef CONFIG_MEMORY_HOTPLUG if (zone) @@ -5031,8 +4999,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, pgdat->node_start_pfn = node_start_pfn; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); - printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid, - (u64) start_pfn << PAGE_SHIFT, (u64) (end_pfn << PAGE_SHIFT) - 1); + pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, + (u64)start_pfn << PAGE_SHIFT, ((u64)end_pfn << PAGE_SHIFT) - 1); #endif calculate_node_totalpages(pgdat, start_pfn, end_pfn, zones_size, zholes_size); @@ -5404,9 +5372,10 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) arch_zone_highest_possible_pfn[i]) pr_cont("empty\n"); else - pr_cont("[mem %0#10lx-%0#10lx]\n", - arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT, - (arch_zone_highest_possible_pfn[i] + pr_cont("[mem %#018Lx-%#018Lx]\n", + (u64)arch_zone_lowest_possible_pfn[i] + << PAGE_SHIFT, + ((u64)arch_zone_highest_possible_pfn[i] << PAGE_SHIFT) - 1); } @@ -5414,15 +5383,16 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) pr_info("Movable zone start for each node\n"); for (i = 0; i < MAX_NUMNODES; i++) { if (zone_movable_pfn[i]) - pr_info(" Node %d: %#010lx\n", i, - zone_movable_pfn[i] << PAGE_SHIFT); + pr_info(" Node %d: %#018Lx\n", i, + (u64)zone_movable_pfn[i] << PAGE_SHIFT); } /* Print out the early node map */ pr_info("Early memory node ranges\n"); for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) - pr_info(" node %3d: [mem %#010lx-%#010lx]\n", nid, - start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); + pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid, + (u64)start_pfn << PAGE_SHIFT, + ((u64)end_pfn << PAGE_SHIFT) - 1); /* Initialise every node */ mminit_verify_pageflags_layout();