DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
while (page_vma_mapped_walk(&pvmw)) {
+ rmap_t rmap_flags = RMAP_NONE;
pte_t pte;
swp_entry_t entry;
struct page *new;
else if (pte_swp_uffd_wp(*pvmw.pte))
pte = pte_mkuffd_wp(pte);
+ if (folio_test_anon(folio) && !is_readable_migration_entry(entry))
+ rmap_flags |= RMAP_EXCLUSIVE;
+
if (unlikely(is_device_private_page(new))) {
if (pte_write(pte))
entry = make_writable_device_private_entry(
pte = pte_mkhuge(pte);
pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
if (folio_test_anon(folio))
- hugepage_add_anon_rmap(new, vma, pvmw.address);
+ hugepage_add_anon_rmap(new, vma, pvmw.address,
+ rmap_flags);
else
- page_dup_rmap(new, true);
+ page_dup_file_rmap(new, true);
set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
} else
#endif
{
if (folio_test_anon(folio))
- page_add_anon_rmap(new, vma, pvmw.address, false);
+ page_add_anon_rmap(new, vma, pvmw.address,
+ rmap_flags);
else
page_add_file_rmap(new, vma, false);
set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
xas_lock_irq(&xas);
expected_count = 2 + page_has_private(page);
- if (page_count(page) != expected_count || xas_load(&xas) != page) {
- xas_unlock_irq(&xas);
- return -EAGAIN;
- }
-
if (!page_ref_freeze(page, expected_count)) {
xas_unlock_irq(&xas);
return -EAGAIN;
folio_set_workingset(newfolio);
if (folio_test_checked(folio))
folio_set_checked(newfolio);
+ /*
+ * PG_anon_exclusive (-> PG_mappedtodisk) is always migrated via
+ * migration entries. We can still have PG_anon_exclusive set on an
+ * effectively unmapped and unreferenced first sub-pages of an
+ * anonymous THP: we can simply copy it here via PG_mappedtodisk.
+ */
if (folio_test_mappedtodisk(folio))
folio_set_mappedtodisk(newfolio);
* < 0 - error code
* MIGRATEPAGE_SUCCESS - success
*/
-static int move_to_new_page(struct page *newpage, struct page *page,
+static int move_to_new_folio(struct folio *dst, struct folio *src,
enum migrate_mode mode)
{
struct address_space *mapping;
int rc = -EAGAIN;
- bool is_lru = !__PageMovable(page);
+ bool is_lru = !__PageMovable(&src->page);
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+ VM_BUG_ON_FOLIO(!folio_test_locked(src), src);
+ VM_BUG_ON_FOLIO(!folio_test_locked(dst), dst);
- mapping = page_mapping(page);
+ mapping = folio_mapping(src);
if (likely(is_lru)) {
if (!mapping)
- rc = migrate_page(mapping, newpage, page, mode);
+ rc = migrate_page(mapping, &dst->page, &src->page, mode);
else if (mapping->a_ops->migratepage)
/*
* Most pages have a mapping and most filesystems
* migratepage callback. This is the most common path
* for page migration.
*/
- rc = mapping->a_ops->migratepage(mapping, newpage,
- page, mode);
+ rc = mapping->a_ops->migratepage(mapping, &dst->page,
+ &src->page, mode);
else
- rc = fallback_migrate_page(mapping, newpage,
- page, mode);
+ rc = fallback_migrate_page(mapping, &dst->page,
+ &src->page, mode);
} else {
/*
* In case of non-lru page, it could be released after
* isolation step. In that case, we shouldn't try migration.
*/
- VM_BUG_ON_PAGE(!PageIsolated(page), page);
- if (!PageMovable(page)) {
+ VM_BUG_ON_FOLIO(!folio_test_isolated(src), src);
+ if (!folio_test_movable(src)) {
rc = MIGRATEPAGE_SUCCESS;
- ClearPageIsolated(page);
+ folio_clear_isolated(src);
goto out;
}
- rc = mapping->a_ops->migratepage(mapping, newpage,
- page, mode);
+ rc = mapping->a_ops->migratepage(mapping, &dst->page,
+ &src->page, mode);
WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
- !PageIsolated(page));
+ !folio_test_isolated(src));
}
/*
- * When successful, old pagecache page->mapping must be cleared before
- * page is freed; but stats require that PageAnon be left as PageAnon.
+ * When successful, old pagecache src->mapping must be cleared before
+ * src is freed; but stats require that PageAnon be left as PageAnon.
*/
if (rc == MIGRATEPAGE_SUCCESS) {
- if (__PageMovable(page)) {
- VM_BUG_ON_PAGE(!PageIsolated(page), page);
+ if (__PageMovable(&src->page)) {
+ VM_BUG_ON_FOLIO(!folio_test_isolated(src), src);
/*
* We clear PG_movable under page_lock so any compactor
* cannot try to migrate this page.
*/
- ClearPageIsolated(page);
+ folio_clear_isolated(src);
}
/*
- * Anonymous and movable page->mapping will be cleared by
+ * Anonymous and movable src->mapping will be cleared by
* free_pages_prepare so don't reset it here for keeping
* the type to work PageAnon, for example.
*/
- if (!PageMappingFlags(page))
- page->mapping = NULL;
+ if (!folio_mapping_flags(src))
+ src->mapping = NULL;
- if (likely(!is_zone_device_page(newpage)))
- flush_dcache_folio(page_folio(newpage));
+ if (likely(!folio_is_zone_device(dst)))
+ flush_dcache_folio(dst);
}
out:
return rc;
goto out_unlock;
if (unlikely(!is_lru)) {
- rc = move_to_new_page(newpage, page, mode);
+ rc = move_to_new_folio(dst, folio, mode);
goto out_unlock_both;
}
if (!page->mapping) {
VM_BUG_ON_PAGE(PageAnon(page), page);
if (page_has_private(page)) {
- try_to_free_buffers(page);
+ try_to_free_buffers(folio);
goto out_unlock_both;
}
} else if (page_mapped(page)) {
}
if (!page_mapped(page))
- rc = move_to_new_page(newpage, page, mode);
+ rc = move_to_new_folio(dst, folio, mode);
/*
* When successful, push newpage to LRU immediately: so that if it
if (!newpage)
return -ENOMEM;
+ newpage->private = 0;
rc = __unmap_and_move(page, newpage, force, mode);
if (rc == MIGRATEPAGE_SUCCESS)
set_page_owner_migrate_reason(newpage, reason);
goto put_anon;
if (page_mapped(hpage)) {
- bool mapping_locked = false;
enum ttu_flags ttu = 0;
if (!PageAnon(hpage)) {
if (unlikely(!mapping))
goto unlock_put_anon;
- mapping_locked = true;
- ttu |= TTU_RMAP_LOCKED;
+ ttu = TTU_RMAP_LOCKED;
}
try_to_migrate(src, ttu);
page_was_mapped = 1;
- if (mapping_locked)
+ if (ttu & TTU_RMAP_LOCKED)
i_mmap_unlock_write(mapping);
}
if (!page_mapped(hpage))
- rc = move_to_new_page(new_hpage, hpage, mode);
+ rc = move_to_new_folio(dst, src, mode);
if (page_was_mapped)
remove_migration_ptes(src,
nr_thp_split++;
goto retry;
}
-
- nr_failed_pages += nr_subpages;
- break;
- }
-
/* Hugetlb migration is unsupported */
- if (!no_subpage_counting)
+ } else if (!no_subpage_counting) {
nr_failed++;
+ }
+
nr_failed_pages += nr_subpages;
break;
case -ENOMEM:
nr_thp_split++;
goto retry;
}
-
- nr_failed_pages += nr_subpages;
- goto out;
+ } else if (!no_subpage_counting) {
+ nr_failed++;
}
- if (!no_subpage_counting)
- nr_failed++;
nr_failed_pages += nr_subpages;
+ /*
+ * There might be some subpages of fail-to-migrate THPs
+ * left in thp_split_pages list. Move them back to migration
+ * list so that they could be put back to the right list by
+ * the caller otherwise the page refcnt will be leaked.
+ */
+ list_splice_init(&thp_split_pages, from);
+ nr_thp_failed += thp_retry;
goto out;
case -EAGAIN:
- if (is_thp) {
+ if (is_thp)
thp_retry++;
- break;
- }
- retry++;
+ else
+ retry++;
break;
case MIGRATEPAGE_SUCCESS:
nr_succeeded += nr_subpages;
- if (is_thp) {
+ if (is_thp)
nr_thp_succeeded++;
- break;
- }
break;
default:
/*
* removed from migration page list and not
* retried in the next outer loop.
*/
- if (is_thp) {
+ if (is_thp)
nr_thp_failed++;
- nr_failed_pages += nr_subpages;
- break;
- }
-
- if (!no_subpage_counting)
+ else if (!no_subpage_counting)
nr_failed++;
+
nr_failed_pages += nr_subpages;
break;
}
mmap_read_lock(mm);
err = -EFAULT;
- vma = find_vma(mm, addr);
- if (!vma || addr < vma->vm_start || !vma_migratable(vma))
+ vma = vma_lookup(mm, addr);
+ if (!vma || !vma_migratable(vma))
goto out;
/* FOLL_DUMP to ignore special (like zero) pages */
goto set_status;
/* FOLL_DUMP to ignore special (like zero) pages */
- page = follow_page(vma, addr, FOLL_DUMP);
+ page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
err = PTR_ERR(page);
if (IS_ERR(page))
goto set_status;
- err = page ? page_to_nid(page) : -ENOENT;
+ if (page) {
+ err = page_to_nid(page);
+ put_page(page);
+ } else {
+ err = -ENOENT;
+ }
set_status:
*status = err;
const void __user * __user *pages,
int __user *status)
{
-#define DO_PAGES_STAT_CHUNK_NR 16
+#define DO_PAGES_STAT_CHUNK_NR 16UL
const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
int chunk_status[DO_PAGES_STAT_CHUNK_NR];
while (nr_pages) {
- unsigned long chunk_nr;
-
- chunk_nr = nr_pages;
- if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
- chunk_nr = DO_PAGES_STAT_CHUNK_NR;
+ unsigned long chunk_nr = min(nr_pages, DO_PAGES_STAT_CHUNK_NR);
if (in_compat_syscall()) {
if (get_compat_pages_array(chunk_pages, pages,
#ifdef CONFIG_NUMA_BALANCING
/*
* Returns true if this is a safe migration target node for misplaced NUMA
- * pages. Currently it only checks the watermarks which crude
+ * pages. Currently it only checks the watermarks which is crude.
*/
static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
unsigned long nr_migrate_pages)
for (z = pgdat->nr_zones - 1; z >= 0; z--) {
struct zone *zone = pgdat->node_zones + z;
- if (!populated_zone(zone))
+ if (!managed_zone(zone))
continue;
/* Avoid waking kswapd by allocating pages_to_migrate pages. */
static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
{
- int page_lru;
int nr_pages = thp_nr_pages(page);
int order = compound_order(page);
if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING))
return 0;
for (z = pgdat->nr_zones - 1; z >= 0; z--) {
- if (populated_zone(pgdat->node_zones + z))
+ if (managed_zone(pgdat->node_zones + z))
break;
}
wakeup_kswapd(pgdat->node_zones + z, 0, order, ZONE_MOVABLE);
if (isolate_lru_page(page))
return 0;
- page_lru = page_is_file_lru(page);
- mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
+ mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_is_file_lru(page),
nr_pages);
/*
return 0;
}
#endif /* CONFIG_NUMA_BALANCING */
-#endif /* CONFIG_NUMA */
/*
* node_demotion[] example:
return target;
}
-#if defined(CONFIG_HOTPLUG_CPU)
/* Disable reclaim-based migration. */
static void __disable_all_migrate_targets(void)
{
*/
static void __set_migration_target_nodes(void)
{
- nodemask_t next_pass = NODE_MASK_NONE;
- nodemask_t this_pass = NODE_MASK_NONE;
+ nodemask_t next_pass;
+ nodemask_t this_pass;
nodemask_t used_targets = NODE_MASK_NONE;
int node, best_distance;
* __set_migration_target_nodes() can be used as opposed to
* set_migration_target_nodes().
*/
+#ifdef CONFIG_MEMORY_HOTPLUG
static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
unsigned long action, void *_arg)
{
return notifier_from_errno(0);
}
+#endif
void __init migrate_on_reclaim_init(void)
{
- node_demotion = kmalloc_array(nr_node_ids,
- sizeof(struct demotion_nodes),
- GFP_KERNEL);
+ node_demotion = kcalloc(nr_node_ids,
+ sizeof(struct demotion_nodes),
+ GFP_KERNEL);
WARN_ON(!node_demotion);
-
+#ifdef CONFIG_MEMORY_HOTPLUG
hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
+#endif
/*
* At this point, all numa nodes with memory/CPus have their state
* properly set, so we can build the demotion order now.
set_migration_target_nodes();
cpus_read_unlock();
}
-#endif /* CONFIG_HOTPLUG_CPU */
bool numa_demotion_enabled = false;
struct kobj_attribute *attr,
const char *buf, size_t count)
{
- if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1))
- numa_demotion_enabled = true;
- else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1))
- numa_demotion_enabled = false;
- else
- return -EINVAL;
+ ssize_t ret;
+
+ ret = kstrtobool(buf, &numa_demotion_enabled);
+ if (ret)
+ return ret;
return count;
}
return err;
}
subsys_initcall(numa_init_sysfs);
-#endif
+#endif /* CONFIG_SYSFS */
+#endif /* CONFIG_NUMA */