#include <linux/sched/mm.h>
#include <linux/ptrace.h>
#include <linux/oom.h>
+#include <linux/memory.h>
#include <asm/tlbflush.h>
int force, enum migrate_mode mode)
{
int rc = -EAGAIN;
- int page_was_mapped = 0;
+ bool page_was_mapped = false;
struct anon_vma *anon_vma = NULL;
bool is_lru = !__PageMovable(page);
}
/*
- * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
+ * By try_to_migrate(), page->mapcount goes down to 0 here. In this case,
* we cannot notice that anon_vma is freed while we migrates a page.
* This get_anon_vma() delays freeing anon_vma pointer until the end
* of migration. File cache pages are no problem because of page_lock()
VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
page);
try_to_migrate(page, 0);
- page_was_mapped = 1;
+ page_was_mapped = true;
}
if (!page_mapped(page))
* next_demotion_node() - Get the next node in the demotion path
* @node: The starting node to lookup the next node
*
- * @returns: node id for next memory node in the demotion path hierarchy
+ * Return: node id for next memory node in the demotion path hierarchy
* from @node; NUMA_NO_NODE if @node is terminal. This does not keep
* @node online or guarantee that it *continues* to be the next demotion
* target.
* @mode: The migration mode that specifies the constraints for
* page migration, if any.
* @reason: The reason for page migration.
+ * @ret_succeeded: Set to the number of pages migrated successfully if
+ * the caller passes a non-NULL pointer.
*
* The function returns after 10 attempts or if no pages are movable any more
* because the list has become empty or no retryable pages exist any more.
*/
int migrate_pages(struct list_head *from, new_page_t get_new_page,
free_page_t put_new_page, unsigned long private,
- enum migrate_mode mode, int reason)
+ enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
{
int retry = 1;
int thp_retry = 1;
if (!swapwrite)
current->flags &= ~PF_SWAPWRITE;
+ if (ret_succeeded)
+ *ret_succeeded = nr_succeeded;
+
return rc;
}
};
err = migrate_pages(pagelist, alloc_migration_target, NULL,
- (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL);
+ (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL);
if (err)
putback_movable_pages(pagelist);
return err;
mmap_read_unlock(mm);
}
+static int get_compat_pages_array(const void __user *chunk_pages[],
+ const void __user * __user *pages,
+ unsigned long chunk_nr)
+{
+ compat_uptr_t __user *pages32 = (compat_uptr_t __user *)pages;
+ compat_uptr_t p;
+ int i;
+
+ for (i = 0; i < chunk_nr; i++) {
+ if (get_user(p, pages32 + i))
+ return -EFAULT;
+ chunk_pages[i] = compat_ptr(p);
+ }
+
+ return 0;
+}
+
/*
* Determine the nodes of a user array of pages and store it in
* a user array of status.
if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
chunk_nr = DO_PAGES_STAT_CHUNK_NR;
- if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
- break;
+ if (in_compat_syscall()) {
+ if (get_compat_pages_array(chunk_pages, pages,
+ chunk_nr))
+ break;
+ } else {
+ if (copy_from_user(chunk_pages, pages,
+ chunk_nr * sizeof(*chunk_pages)))
+ break;
+ }
do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
}
-#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages,
- compat_uptr_t __user *, pages32,
- const int __user *, nodes,
- int __user *, status,
- int, flags)
-{
- const void __user * __user *pages;
- int i;
-
- pages = compat_alloc_user_space(nr_pages * sizeof(void *));
- for (i = 0; i < nr_pages; i++) {
- compat_uptr_t p;
-
- if (get_user(p, pages32 + i) ||
- put_user(compat_ptr(p), pages + i))
- return -EFAULT;
- }
- return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
-}
-#endif /* CONFIG_COMPAT */
-
#ifdef CONFIG_NUMA_BALANCING
/*
* Returns true if this is a safe migration target node for misplaced NUMA
static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
{
int page_lru;
+ int nr_pages = thp_nr_pages(page);
VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
return 0;
/* Avoid migrating to a node that is nearly full */
- if (!migrate_balanced_pgdat(pgdat, compound_nr(page)))
+ if (!migrate_balanced_pgdat(pgdat, nr_pages))
return 0;
if (isolate_lru_page(page))
page_lru = page_is_file_lru(page);
mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
- thp_nr_pages(page));
+ nr_pages);
/*
* Isolating the page has taken another reference, so the
list_add(&page->lru, &migratepages);
nr_remaining = migrate_pages(&migratepages, *new, NULL, node,
- MIGRATE_ASYNC, MR_NUMA_MISPLACED);
+ MIGRATE_ASYNC, MR_NUMA_MISPLACED, NULL);
if (nr_remaining) {
if (!list_empty(&migratepages)) {
list_del(&page->lru);
EXPORT_SYMBOL(migrate_vma_finalize);
#endif /* CONFIG_DEVICE_PRIVATE */
+#if defined(CONFIG_MEMORY_HOTPLUG)
/* Disable reclaim-based migration. */
static void __disable_all_migrate_targets(void)
{
/*
* For callers that do not hold get_online_mems() already.
*/
-__maybe_unused // <- temporay to prevent warnings during bisects
static void set_migration_target_nodes(void)
{
get_online_mems();
__set_migration_target_nodes();
put_online_mems();
}
+
+/*
+ * React to hotplug events that might affect the migration targets
+ * like events that online or offline NUMA nodes.
+ *
+ * The ordering is also currently dependent on which nodes have
+ * CPUs. That means we need CPU on/offline notification too.
+ */
+static int migration_online_cpu(unsigned int cpu)
+{
+ set_migration_target_nodes();
+ return 0;
+}
+
+static int migration_offline_cpu(unsigned int cpu)
+{
+ set_migration_target_nodes();
+ return 0;
+}
+
+/*
+ * This leaves migrate-on-reclaim transiently disabled between
+ * the MEM_GOING_OFFLINE and MEM_OFFLINE events. This runs
+ * whether reclaim-based migration is enabled or not, which
+ * ensures that the user can turn reclaim-based migration at
+ * any time without needing to recalculate migration targets.
+ *
+ * These callbacks already hold get_online_mems(). That is why
+ * __set_migration_target_nodes() can be used as opposed to
+ * set_migration_target_nodes().
+ */
+static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
+ unsigned long action, void *arg)
+{
+ switch (action) {
+ case MEM_GOING_OFFLINE:
+ /*
+ * Make sure there are not transient states where
+ * an offline node is a migration target. This
+ * will leave migration disabled until the offline
+ * completes and the MEM_OFFLINE case below runs.
+ */
+ disable_all_migrate_targets();
+ break;
+ case MEM_OFFLINE:
+ case MEM_ONLINE:
+ /*
+ * Recalculate the target nodes once the node
+ * reaches its final state (online or offline).
+ */
+ __set_migration_target_nodes();
+ break;
+ case MEM_CANCEL_OFFLINE:
+ /*
+ * MEM_GOING_OFFLINE disabled all the migration
+ * targets. Reenable them.
+ */
+ __set_migration_target_nodes();
+ break;
+ case MEM_GOING_ONLINE:
+ case MEM_CANCEL_ONLINE:
+ break;
+ }
+
+ return notifier_from_errno(0);
+}
+
+static int __init migrate_on_reclaim_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "migrate on reclaim",
+ migration_online_cpu,
+ migration_offline_cpu);
+ /*
+ * In the unlikely case that this fails, the automatic
+ * migration targets may become suboptimal for nodes
+ * where N_CPU changes. With such a small impact in a
+ * rare case, do not bother trying to do anything special.
+ */
+ WARN_ON(ret < 0);
+
+ hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
+ return 0;
+}
+late_initcall(migrate_on_reclaim_init);
+#endif /* CONFIG_MEMORY_HOTPLUG */