#include "shuffle.h"
#include "page_reporting.h"
+/* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */
+typedef int __bitwise fpi_t;
+
+/* No special request */
+#define FPI_NONE ((__force fpi_t)0)
+
+/*
+ * Skip free page reporting notification for the (possibly merged) page.
+ * This does not hinder free page reporting from grabbing the page,
+ * reporting it and marking it "reported" - it only skips notifying
+ * the free page reporting infrastructure about a newly freed page. For
+ * example, used when temporarily pulling a page from a freelist and
+ * putting it back unmodified.
+ */
+#define FPI_SKIP_REPORT_NOTIFY ((__force fpi_t)BIT(0))
+
+/*
+ * Place the (possibly merged) page to the tail of the freelist. Will ignore
+ * page shuffling (relevant code - e.g., memory onlining - is expected to
+ * shuffle the whole zone).
+ *
+ * Note: No code should rely on this flag for correctness - it's purely
+ * to allow for optimizations when handing back either fresh pages
+ * (memory onlining) or untouched pages (page isolation, free page
+ * reporting).
+ */
+#define FPI_TO_TAIL ((__force fpi_t)BIT(1))
+
/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
static DEFINE_MUTEX(pcp_batch_high_lock);
#define MIN_PERCPU_PAGELIST_FRACTION (8)
unsigned int pageblock_order __read_mostly;
#endif
-static void __free_pages_ok(struct page *page, unsigned int order);
+static void __free_pages_ok(struct page *page, unsigned int order,
+ fpi_t fpi_flags);
/*
* results with 256, 32 in the lowmem_reserve sysctl:
void free_compound_page(struct page *page)
{
mem_cgroup_uncharge(page);
- __free_pages_ok(page, compound_order(page));
+ __free_pages_ok(page, compound_order(page), FPI_NONE);
}
void prep_compound_page(struct page *page, unsigned int order)
area->nr_free++;
}
-/* Used for pages which are on another list */
+/*
+ * Used for pages which are on another list. Move the pages to the tail
+ * of the list - so the moved pages won't immediately be considered for
+ * allocation again (e.g., optimization for memory onlining).
+ */
static inline void move_to_free_list(struct page *page, struct zone *zone,
unsigned int order, int migratetype)
{
struct free_area *area = &zone->free_area[order];
- list_move(&page->lru, &area->free_list[migratetype]);
+ list_move_tail(&page->lru, &area->free_list[migratetype]);
}
static inline void del_page_from_free_list(struct page *page, struct zone *zone,
static inline void __free_one_page(struct page *page,
unsigned long pfn,
struct zone *zone, unsigned int order,
- int migratetype, bool report)
+ int migratetype, fpi_t fpi_flags)
{
struct capture_control *capc = task_capc(zone);
unsigned long buddy_pfn;
done_merging:
set_page_order(page, order);
- if (is_shuffle_order(order))
+ if (fpi_flags & FPI_TO_TAIL)
+ to_tail = true;
+ else if (is_shuffle_order(order))
to_tail = shuffle_pick_tail();
else
to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order);
add_to_free_list(page, zone, order, migratetype);
/* Notify page reporting subsystem of freed page */
- if (report)
+ if (!(fpi_flags & FPI_SKIP_REPORT_NOTIFY))
page_reporting_notify_free(order);
}
trace_mm_page_free(page, order);
+ if (unlikely(PageHWPoison(page)) && !order) {
+ /*
+ * Do not let hwpoison pages hit pcplists/buddy
+ * Untie memcg state and reset page's owner
+ */
+ if (memcg_kmem_enabled() && PageKmemcg(page))
+ __memcg_kmem_uncharge_page(page, order);
+ reset_page_owner(page, order);
+ return false;
+ }
+
/*
* Check tail pages before head page information is cleared to
* avoid checking PageCompound for order-0 pages.
if (unlikely(isolated_pageblocks))
mt = get_pageblock_migratetype(page);
- __free_one_page(page, page_to_pfn(page), zone, 0, mt, true);
+ __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
trace_mm_page_pcpu_drain(page, 0, mt);
}
spin_unlock(&zone->lock);
static void free_one_page(struct zone *zone,
struct page *page, unsigned long pfn,
unsigned int order,
- int migratetype)
+ int migratetype, fpi_t fpi_flags)
{
spin_lock(&zone->lock);
if (unlikely(has_isolate_pageblock(zone) ||
is_migrate_isolate(migratetype))) {
migratetype = get_pfnblock_migratetype(page, pfn);
}
- __free_one_page(page, pfn, zone, order, migratetype, true);
+ __free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
spin_unlock(&zone->lock);
}
}
}
-static void __free_pages_ok(struct page *page, unsigned int order)
+static void __free_pages_ok(struct page *page, unsigned int order,
+ fpi_t fpi_flags)
{
unsigned long flags;
int migratetype;
migratetype = get_pfnblock_migratetype(page, pfn);
local_irq_save(flags);
__count_vm_events(PGFREE, 1 << order);
- free_one_page(page_zone(page), page, pfn, order, migratetype);
+ free_one_page(page_zone(page), page, pfn, order, migratetype,
+ fpi_flags);
local_irq_restore(flags);
}
struct page *p = page;
unsigned int loop;
+ /*
+ * When initializing the memmap, __init_single_page() sets the refcount
+ * of all pages to 1 ("allocated"/"not free"). We have to set the
+ * refcount of all involved pages to 0.
+ */
prefetchw(p);
for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
prefetchw(p + 1);
set_page_count(p, 0);
atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
- set_page_refcounted(page);
- __free_pages(page, order);
+
+ /*
+ * Bypass PCP and place fresh pages right to the tail, primarily
+ * relevant for memory onlining.
+ */
+ __free_pages_ok(page, order, FPI_TO_TAIL);
}
#ifdef CONFIG_NEED_MULTIPLE_NODES
#endif
/*
- * Move the free pages in a range to the free lists of the requested type.
+ * Move the free pages in a range to the freelist tail of the requested type.
* Note that start_page and end_pages are not aligned on a pageblock
* boundary. If alignment is required, use move_freepages_block()
*/
*/
if (migratetype >= MIGRATE_PCPTYPES) {
if (unlikely(is_migrate_isolate(migratetype))) {
- free_one_page(zone, page, pfn, 0, migratetype);
+ free_one_page(zone, page, pfn, 0, migratetype,
+ FPI_NONE);
return;
}
migratetype = MIGRATE_MOVABLE;
for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
- split_page_owner(page, order);
+ split_page_owner(page, 1 << order);
}
EXPORT_SYMBOL_GPL(split_page);
lockdep_assert_held(&zone->lock);
/* Return isolated page to tail of freelist. */
- __free_one_page(page, page_to_pfn(page), zone, order, mt, false);
+ __free_one_page(page, page_to_pfn(page), zone, order, mt,
+ FPI_SKIP_REPORT_NOTIFY | FPI_TO_TAIL);
}
/*
if (order == 0) /* Via pcp? */
free_unref_page(page);
else
- __free_pages_ok(page, order);
+ __free_pages_ok(page, order, FPI_NONE);
}
void __free_pages(struct page *page, unsigned int order)
* Initially all pages are reserved - free ones are freed
* up by memblock_free_all() once the early boot process is
* done. Non-atomic initialization, single-pass.
+ *
+ * All aligned pageblocks are initialized to the specified migratetype
+ * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
+ * zone stats (e.g., nr_isolate_pageblock) are touched.
*/
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
- unsigned long start_pfn, enum meminit_context context,
- struct vmem_altmap *altmap)
+ unsigned long start_pfn,
+ enum meminit_context context,
+ struct vmem_altmap *altmap, int migratetype)
{
unsigned long pfn, end_pfn = start_pfn + size;
struct page *page;
__SetPageReserved(page);
/*
- * Mark the block movable so that blocks are reserved for
- * movable at startup. This will force kernel allocations
- * to reserve their blocks rather than leaking throughout
- * the address space during boot when many long-lived
- * kernel allocations are made.
- *
- * bitmap is created for zone's valid pfn range. but memmap
- * can be created for invalid pages (for alignment)
- * check here not to call set_pageblock_migratetype() against
- * pfn out of zone.
+ * Usually, we want to mark the pageblock MIGRATE_MOVABLE,
+ * such that unmovable allocations won't be scattered all
+ * over the place during system boot.
*/
- if (!(pfn & (pageblock_nr_pages - 1))) {
- set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+ if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
+ set_pageblock_migratetype(page, migratetype);
cond_resched();
}
pfn++;
* the address space during boot when many long-lived
* kernel allocations are made.
*
- * bitmap is created for zone's valid pfn range. but memmap
- * can be created for invalid pages (for alignment)
- * check here not to call set_pageblock_migratetype() against
- * pfn out of zone.
- *
* Please note that MEMINIT_HOTPLUG path doesn't clear memmap
* because this is done early in section_activate()
*/
- if (!(pfn & (pageblock_nr_pages - 1))) {
+ if (IS_ALIGNED(pfn, pageblock_nr_pages)) {
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
cond_resched();
}
if (end_pfn > start_pfn) {
size = end_pfn - start_pfn;
memmap_init_zone(size, nid, zone, start_pfn,
- MEMINIT_EARLY, NULL);
+ MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
}
}
}
ret = start_isolate_page_range(pfn_max_align_down(start),
pfn_max_align_up(end), migratetype, 0);
- if (ret < 0)
+ if (ret)
return ret;
/*
#ifdef CONFIG_MEMORY_HOTREMOVE
/*
- * All pages in the range must be in a single zone and isolated
- * before calling this.
+ * All pages in the range must be in a single zone, must not contain holes,
+ * must span full sections, and must be isolated before calling this function.
*/
-unsigned long
-__offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
+void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
{
+ unsigned long pfn = start_pfn;
struct page *page;
struct zone *zone;
unsigned int order;
- unsigned long pfn;
unsigned long flags;
- unsigned long offlined_pages = 0;
-
- /* find the first valid pfn */
- for (pfn = start_pfn; pfn < end_pfn; pfn++)
- if (pfn_valid(pfn))
- break;
- if (pfn == end_pfn)
- return offlined_pages;
offline_mem_sections(pfn, end_pfn);
zone = page_zone(pfn_to_page(pfn));
spin_lock_irqsave(&zone->lock, flags);
- pfn = start_pfn;
while (pfn < end_pfn) {
- if (!pfn_valid(pfn)) {
- pfn++;
- continue;
- }
page = pfn_to_page(pfn);
/*
* The HWPoisoned page may be not in buddy system, and
*/
if (unlikely(!PageBuddy(page) && PageHWPoison(page))) {
pfn++;
- offlined_pages++;
continue;
}
/*
BUG_ON(page_count(page));
BUG_ON(PageBuddy(page));
pfn++;
- offlined_pages++;
continue;
}
BUG_ON(page_count(page));
BUG_ON(!PageBuddy(page));
order = page_order(page);
- offlined_pages += 1 << order;
del_page_from_free_list(page, zone, order);
pfn += (1 << order);
}
spin_unlock_irqrestore(&zone->lock, flags);
-
- return offlined_pages;
}
#endif
#ifdef CONFIG_MEMORY_FAILURE
/*
- * Set PG_hwpoison flag if a given page is confirmed to be a free page. This
- * test is performed under the zone lock to prevent a race against page
- * allocation.
+ * Break down a higher-order page in sub-pages, and keep our target out of
+ * buddy allocator.
*/
-bool set_hwpoison_free_buddy_page(struct page *page)
+static void break_down_buddy_pages(struct zone *zone, struct page *page,
+ struct page *target, int low, int high,
+ int migratetype)
+{
+ unsigned long size = 1 << high;
+ struct page *current_buddy, *next_page;
+
+ while (high > low) {
+ high--;
+ size >>= 1;
+
+ if (target >= &page[size]) {
+ next_page = page + size;
+ current_buddy = page;
+ } else {
+ next_page = page;
+ current_buddy = page + size;
+ }
+
+ if (set_page_guard(zone, current_buddy, high, migratetype))
+ continue;
+
+ if (current_buddy != target) {
+ add_to_free_list(current_buddy, zone, high, migratetype);
+ set_page_order(current_buddy, high);
+ page = next_page;
+ }
+ }
+}
+
+/*
+ * Take a page that will be marked as poisoned off the buddy allocator.
+ */
+bool take_page_off_buddy(struct page *page)
{
struct zone *zone = page_zone(page);
unsigned long pfn = page_to_pfn(page);
unsigned long flags;
unsigned int order;
- bool hwpoisoned = false;
+ bool ret = false;
spin_lock_irqsave(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
struct page *page_head = page - (pfn & ((1 << order) - 1));
+ int buddy_order = page_order(page_head);
- if (PageBuddy(page_head) && page_order(page_head) >= order) {
- if (!TestSetPageHWPoison(page))
- hwpoisoned = true;
+ if (PageBuddy(page_head) && buddy_order >= order) {
+ unsigned long pfn_head = page_to_pfn(page_head);
+ int migratetype = get_pfnblock_migratetype(page_head,
+ pfn_head);
+
+ del_page_from_free_list(page_head, zone, buddy_order);
+ break_down_buddy_pages(zone, page_head, page, 0,
+ buddy_order, migratetype);
+ ret = true;
break;
}
+ if (page_count(page_head) > 0)
+ break;
}
spin_unlock_irqrestore(&zone->lock, flags);
-
- return hwpoisoned;
+ return ret;
}
#endif