#include <linux/padata.h>
#include <linux/khugepaged.h>
#include <linux/buffer_head.h>
+#include <linux/vmalloc.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
*/
#define FPI_TO_TAIL ((__force fpi_t)BIT(1))
+/*
+ * Don't poison memory with KASAN (only for the tag-based modes).
+ * During boot, all non-reserved memblock memory is exposed to page_alloc.
+ * Poisoning all that memory lengthens boot time, especially on systems with
+ * large amount of RAM. This flag is used to skip that poisoning.
+ * This is only done for the tag-based KASAN modes, as those are able to
+ * detect memory corruptions with the memory tags assigned by default.
+ * All memory allocated normally after boot gets poisoned as usual.
+ */
+#define FPI_SKIP_KASAN_POISON ((__force fpi_t)BIT(2))
+
/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
static DEFINE_MUTEX(pcp_batch_high_lock);
#define MIN_PERCPU_PAGELIST_FRACTION (8)
* on-demand allocation and then freed again before the deferred pages
* initialization is done, but this is not likely to happen.
*/
-static inline void kasan_free_nondeferred_pages(struct page *page, int order)
+static inline void kasan_free_nondeferred_pages(struct page *page, int order,
+ bool init, fpi_t fpi_flags)
{
- if (!static_branch_unlikely(&deferred_pages))
- kasan_free_pages(page, order);
+ if (static_branch_unlikely(&deferred_pages))
+ return;
+ if (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
+ (fpi_flags & FPI_SKIP_KASAN_POISON))
+ return;
+ kasan_free_pages(page, order, init);
}
/* Returns true if the struct page for the pfn is uninitialised */
return false;
}
#else
-#define kasan_free_nondeferred_pages(p, o) kasan_free_pages(p, o)
+static inline void kasan_free_nondeferred_pages(struct page *page, int order,
+ bool init, fpi_t fpi_flags)
+{
+ if (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
+ (fpi_flags & FPI_SKIP_KASAN_POISON))
+ return;
+ kasan_free_pages(page, order, init);
+}
static inline bool early_page_uninitialised(unsigned long pfn)
{
if (unlikely((unsigned long)page->mapping |
page_ref_count(page) |
#ifdef CONFIG_MEMCG
- (unsigned long)page_memcg(page) |
+ page->memcg_data |
#endif
(page->flags & check_flags)))
return false;
bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
}
#ifdef CONFIG_MEMCG
- if (unlikely(page_memcg(page)))
+ if (unlikely(page->memcg_data))
bad_reason = "page still charged to cgroup";
#endif
return bad_reason;
}
static __always_inline bool free_pages_prepare(struct page *page,
- unsigned int order, bool check_free)
+ unsigned int order, bool check_free, fpi_t fpi_flags)
{
int bad = 0;
+ bool init;
VM_BUG_ON_PAGE(PageTail(page), page);
debug_check_no_obj_freed(page_address(page),
PAGE_SIZE << order);
}
- if (want_init_on_free())
- kernel_init_free_pages(page, 1 << order);
kernel_poison_pages(page, 1 << order);
/*
+ * As memory initialization might be integrated into KASAN,
+ * kasan_free_pages and kernel_init_free_pages must be
+ * kept together to avoid discrepancies in behavior.
+ *
* With hardware tag-based KASAN, memory tags must be set before the
* page becomes unavailable via debug_pagealloc or arch_free_page.
*/
- kasan_free_nondeferred_pages(page, order);
+ init = want_init_on_free();
+ if (init && !kasan_has_integrated_init())
+ kernel_init_free_pages(page, 1 << order);
+ kasan_free_nondeferred_pages(page, order, init, fpi_flags);
/*
* arch_free_page() can make the page's contents inaccessible. s390
*/
static bool free_pcp_prepare(struct page *page)
{
- return free_pages_prepare(page, 0, true);
+ return free_pages_prepare(page, 0, true, FPI_NONE);
}
static bool bulkfree_pcp_prepare(struct page *page)
static bool free_pcp_prepare(struct page *page)
{
if (debug_pagealloc_enabled_static())
- return free_pages_prepare(page, 0, true);
+ return free_pages_prepare(page, 0, true, FPI_NONE);
else
- return free_pages_prepare(page, 0, false);
+ return free_pages_prepare(page, 0, false, FPI_NONE);
}
static bool bulkfree_pcp_prepare(struct page *page)
int migratetype;
unsigned long pfn = page_to_pfn(page);
- if (!free_pages_prepare(page, order, true))
+ if (!free_pages_prepare(page, order, true, fpi_flags))
return;
migratetype = get_pfnblock_migratetype(page, pfn);
* Bypass PCP and place fresh pages right to the tail, primarily
* relevant for memory onlining.
*/
- __free_pages_ok(page, order, FPI_TO_TAIL);
+ __free_pages_ok(page, order, FPI_TO_TAIL | FPI_SKIP_KASAN_POISON);
}
#ifdef CONFIG_NEED_MULTIPLE_NODES
inline void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags)
{
+ bool init;
+
set_page_private(page, 0);
set_page_refcounted(page);
arch_alloc_page(page, order);
debug_pagealloc_map_pages(page, 1 << order);
- kasan_alloc_pages(page, order);
+
+ /*
+ * Page unpoisoning must happen before memory initialization.
+ * Otherwise, the poison pattern will be overwritten for __GFP_ZERO
+ * allocations and the page unpoisoning code will complain.
+ */
kernel_unpoison_pages(page, 1 << order);
- set_page_owner(page, order, gfp_flags);
- if (!want_init_on_free() && want_init_on_alloc(gfp_flags))
+ /*
+ * As memory initialization might be integrated into KASAN,
+ * kasan_alloc_pages and kernel_init_free_pages must be
+ * kept together to avoid discrepancies in behavior.
+ */
+ init = !want_init_on_free() && want_init_on_alloc(gfp_flags);
+ kasan_alloc_pages(page, order, init);
+ if (init && !kasan_has_integrated_init())
kernel_init_free_pages(page, 1 << order);
+
+ set_page_owner(page, order, gfp_flags);
}
static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
int preferred_nid, nodemask_t *nodemask,
- struct alloc_context *ac, gfp_t *alloc_mask,
+ struct alloc_context *ac, gfp_t *alloc_gfp,
unsigned int *alloc_flags)
{
ac->highest_zoneidx = gfp_zone(gfp_mask);
ac->migratetype = gfp_migratetype(gfp_mask);
if (cpusets_enabled()) {
- *alloc_mask |= __GFP_HARDWALL;
+ *alloc_gfp |= __GFP_HARDWALL;
/*
* When we are in the interrupt context, it is irrelevant
* to the current task context. It means that any node ok.
* This is the 'heart' of the zoned buddy allocator.
*/
struct page *
-__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
+__alloc_pages_nodemask(gfp_t gfp, unsigned int order, int preferred_nid,
nodemask_t *nodemask)
{
struct page *page;
unsigned int alloc_flags = ALLOC_WMARK_LOW;
- gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
+ gfp_t alloc_gfp; /* The gfp_t that was actually used for allocation */
struct alloc_context ac = { };
/*
* so bail out early if the request is out of bound.
*/
if (unlikely(order >= MAX_ORDER)) {
- WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
+ WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
return NULL;
}
- gfp_mask &= gfp_allowed_mask;
- alloc_mask = gfp_mask;
- if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
+ gfp &= gfp_allowed_mask;
+ alloc_gfp = gfp;
+ if (!prepare_alloc_pages(gfp, order, preferred_nid, nodemask, &ac,
+ &alloc_gfp, &alloc_flags))
return NULL;
/*
* Forbid the first pass from falling back to types that fragment
* memory until all local zones are considered.
*/
- alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp_mask);
+ alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp);
/* First allocation attempt */
- page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
+ page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
if (likely(page))
goto out;
* from a particular context which has been marked by
* memalloc_no{fs,io}_{save,restore}.
*/
- alloc_mask = current_gfp_context(gfp_mask);
+ alloc_gfp = current_gfp_context(gfp);
ac.spread_dirty_pages = false;
/*
*/
ac.nodemask = nodemask;
- page = __alloc_pages_slowpath(alloc_mask, order, &ac);
+ page = __alloc_pages_slowpath(alloc_gfp, order, &ac);
out:
- if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
- unlikely(__memcg_kmem_charge_page(page, gfp_mask, order) != 0)) {
+ if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT) && page &&
+ unlikely(__memcg_kmem_charge_page(page, gfp, order) != 0)) {
__free_pages(page, order);
page = NULL;
}
- trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype);
+ trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
return page;
}
return pages;
}
-void __init mem_init_print_info(const char *str)
+void __init mem_init_print_info(void)
{
unsigned long physpages, codesize, datasize, rosize, bss_size;
unsigned long init_code_size, init_data_size;
#ifdef CONFIG_HIGHMEM
", %luK highmem"
#endif
- "%s%s)\n",
+ ")\n",
nr_free_pages() << (PAGE_SHIFT - 10),
physpages << (PAGE_SHIFT - 10),
codesize >> 10, datasize >> 10, rosize >> 10,
(init_data_size + init_code_size) >> 10, bss_size >> 10,
(physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10),
- totalcma_pages << (PAGE_SHIFT - 10),
+ totalcma_pages << (PAGE_SHIFT - 10)
#ifdef CONFIG_HIGHMEM
- totalhigh_pages() << (PAGE_SHIFT - 10),
+ , totalhigh_pages() << (PAGE_SHIFT - 10)
#endif
- str ? ", " : "", str ? str : "");
+ );
}
/**
void *table = NULL;
gfp_t gfp_flags;
bool virt;
+ bool huge;
/* allow the kernel cmdline to have a say */
if (!numentries) {
} else if (get_order(size) >= MAX_ORDER || hashdist) {
table = __vmalloc(size, gfp_flags);
virt = true;
+ huge = is_vm_area_hugepages(table);
} else {
/*
* If bucketsize is not a power-of-two, we may free
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
- virt ? "vmalloc" : "linear");
+ virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift)
*_hash_shift = log2qty;
* isolated thus they won't get removed from buddy.
*/
- lru_add_drain_all();
-
order = 0;
outer_start = start;
while (!PageBuddy(pfn_to_page(outer_start))) {
/* Make sure the range is really isolated. */
if (test_pages_isolated(outer_start, end, 0)) {
- pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
- __func__, outer_start, end);
ret = -EBUSY;
goto done;
}