Merge tag 'for-linus-5.14-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / mm / migrate.c
index 380ca57..23cbd9d 100644 (file)
@@ -210,13 +210,18 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
                 * Recheck VMA as permissions can change since migration started
                 */
                entry = pte_to_swp_entry(*pvmw.pte);
-               if (is_write_migration_entry(entry))
+               if (is_writable_migration_entry(entry))
                        pte = maybe_mkwrite(pte, vma);
                else if (pte_swp_uffd_wp(*pvmw.pte))
                        pte = pte_mkuffd_wp(pte);
 
                if (unlikely(is_device_private_page(new))) {
-                       entry = make_device_private_entry(new, pte_write(pte));
+                       if (pte_write(pte))
+                               entry = make_writable_device_private_entry(
+                                                       page_to_pfn(new));
+                       else
+                               entry = make_readable_device_private_entry(
+                                                       page_to_pfn(new));
                        pte = swp_entry_to_pte(entry);
                        if (pte_swp_soft_dirty(*pvmw.pte))
                                pte = pte_swp_mksoft_dirty(pte);
@@ -226,8 +231,10 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
 
 #ifdef CONFIG_HUGETLB_PAGE
                if (PageHuge(new)) {
+                       unsigned int shift = huge_page_shift(hstate_vma(vma));
+
                        pte = pte_mkhuge(pte);
-                       pte = arch_make_huge_pte(pte, vma, new, 0);
+                       pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
                        set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
                        if (PageAnon(new))
                                hugepage_add_anon_rmap(new, vma, pvmw.address);
@@ -294,7 +301,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
        if (!is_migration_entry(entry))
                goto out;
 
-       page = migration_entry_to_page(entry);
+       page = pfn_swap_entry_to_page(entry);
        page = compound_head(page);
 
        /*
@@ -335,7 +342,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
        ptl = pmd_lock(mm, pmd);
        if (!is_pmd_migration_entry(*pmd))
                goto unlock;
-       page = migration_entry_to_page(pmd_to_swp_entry(*pmd));
+       page = pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd));
        if (!get_page_unless_zero(page))
                goto unlock;
        spin_unlock(ptl);
@@ -551,7 +558,7 @@ static void __copy_gigantic_page(struct page *dst, struct page *src,
        }
 }
 
-static void copy_huge_page(struct page *dst, struct page *src)
+void copy_huge_page(struct page *dst, struct page *src)
 {
        int i;
        int nr_pages;
@@ -626,7 +633,10 @@ void migrate_page_states(struct page *newpage, struct page *page)
        if (PageSwapCache(page))
                ClearPageSwapCache(page);
        ClearPagePrivate(page);
-       set_page_private(page, 0);
+
+       /* page->private contains hugetlb specific flags */
+       if (!PageHuge(page))
+               set_page_private(page, 0);
 
        /*
         * If any waiters have accumulated on the new page then
@@ -1099,7 +1109,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                /* Establish migration ptes */
                VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
                                page);
-               try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK);
+               try_to_migrate(page, 0);
                page_was_mapped = 1;
        }
 
@@ -1288,7 +1298,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
         * page_mapping() set, hugetlbfs specific move page routine will not
         * be called and we could leak usage counts for subpools.
         */
-       if (page_private(hpage) && !page_mapping(hpage)) {
+       if (hugetlb_page_subpool(hpage) && !page_mapping(hpage)) {
                rc = -EBUSY;
                goto out_unlock;
        }
@@ -1301,7 +1311,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 
        if (page_mapped(hpage)) {
                bool mapping_locked = false;
-               enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK;
+               enum ttu_flags ttu = 0;
 
                if (!PageAnon(hpage)) {
                        /*
@@ -1318,7 +1328,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
                        ttu |= TTU_RMAP_LOCKED;
                }
 
-               try_to_unmap(hpage, ttu);
+               try_to_migrate(hpage, ttu);
                page_was_mapped = 1;
 
                if (mapping_locked)
@@ -1418,6 +1428,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
        int swapwrite = current->flags & PF_SWAPWRITE;
        int rc, nr_subpages;
        LIST_HEAD(ret_pages);
+       bool nosplit = (reason == MR_NUMA_MISPLACED);
 
        trace_mm_migrate_pages_start(mode, reason);
 
@@ -1489,8 +1500,9 @@ retry:
                                /*
                                 * When memory is low, don't bother to try to migrate
                                 * other pages, just exit.
+                                * THP NUMA faulting doesn't split THP to retry.
                                 */
-                               if (is_thp) {
+                               if (is_thp && !nosplit) {
                                        if (!try_split_thp(page, &page2, from)) {
                                                nr_thp_split++;
                                                goto retry;
@@ -2043,12 +2055,33 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
        return newpage;
 }
 
+static struct page *alloc_misplaced_dst_page_thp(struct page *page,
+                                                unsigned long data)
+{
+       int nid = (int) data;
+       struct page *newpage;
+
+       newpage = alloc_pages_node(nid, (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
+                                  HPAGE_PMD_ORDER);
+       if (!newpage)
+               goto out;
+
+       prep_transhuge_page(newpage);
+
+out:
+       return newpage;
+}
+
 static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
 {
        int page_lru;
 
        VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
 
+       /* Do not migrate THP mapped by multiple processes */
+       if (PageTransHuge(page) && total_mapcount(page) > 1)
+               return 0;
+
        /* Avoid migrating to a node that is nearly full */
        if (!migrate_balanced_pgdat(pgdat, compound_nr(page)))
                return 0;
@@ -2056,18 +2089,6 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
        if (isolate_lru_page(page))
                return 0;
 
-       /*
-        * migrate_misplaced_transhuge_page() skips page migration's usual
-        * check on page_count(), so we must do it here, now that the page
-        * has been isolated: a GUP pin, or any other pin, prevents migration.
-        * The expected page count is 3: 1 for page's mapcount and 1 for the
-        * caller's pin and 1 for the reference taken by isolate_lru_page().
-        */
-       if (PageTransHuge(page) && page_count(page) != 3) {
-               putback_lru_page(page);
-               return 0;
-       }
-
        page_lru = page_is_file_lru(page);
        mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
                                thp_nr_pages(page));
@@ -2081,12 +2102,6 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
        return 1;
 }
 
-bool pmd_trans_migrating(pmd_t pmd)
-{
-       struct page *page = pmd_page(pmd);
-       return PageLocked(page);
-}
-
 /*
  * Attempt to migrate a misplaced page to the specified destination
  * node. Caller is expected to have an elevated reference count on
@@ -2099,6 +2114,21 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
        int isolated;
        int nr_remaining;
        LIST_HEAD(migratepages);
+       new_page_t *new;
+       bool compound;
+       unsigned int nr_pages = thp_nr_pages(page);
+
+       /*
+        * PTE mapped THP or HugeTLB page can't reach here so the page could
+        * be either base page or THP.  And it must be head page if it is
+        * THP.
+        */
+       compound = PageTransHuge(page);
+
+       if (compound)
+               new = alloc_misplaced_dst_page_thp;
+       else
+               new = alloc_misplaced_dst_page;
 
        /*
         * Don't migrate file pages that are mapped in multiple processes
@@ -2120,19 +2150,18 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
                goto out;
 
        list_add(&page->lru, &migratepages);
-       nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
-                                    NULL, node, MIGRATE_ASYNC,
-                                    MR_NUMA_MISPLACED);
+       nr_remaining = migrate_pages(&migratepages, *new, NULL, node,
+                                    MIGRATE_ASYNC, MR_NUMA_MISPLACED);
        if (nr_remaining) {
                if (!list_empty(&migratepages)) {
                        list_del(&page->lru);
-                       dec_node_page_state(page, NR_ISOLATED_ANON +
-                                       page_is_file_lru(page));
+                       mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
+                                       page_is_file_lru(page), -nr_pages);
                        putback_lru_page(page);
                }
                isolated = 0;
        } else
-               count_vm_numa_event(NUMA_PAGE_MIGRATE);
+               count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_pages);
        BUG_ON(!list_empty(&migratepages));
        return isolated;
 
@@ -2141,141 +2170,6 @@ out:
        return 0;
 }
 #endif /* CONFIG_NUMA_BALANCING */
-
-#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
-/*
- * Migrates a THP to a given target node. page must be locked and is unlocked
- * before returning.
- */
-int migrate_misplaced_transhuge_page(struct mm_struct *mm,
-                               struct vm_area_struct *vma,
-                               pmd_t *pmd, pmd_t entry,
-                               unsigned long address,
-                               struct page *page, int node)
-{
-       spinlock_t *ptl;
-       pg_data_t *pgdat = NODE_DATA(node);
-       int isolated = 0;
-       struct page *new_page = NULL;
-       int page_lru = page_is_file_lru(page);
-       unsigned long start = address & HPAGE_PMD_MASK;
-
-       new_page = alloc_pages_node(node,
-               (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
-               HPAGE_PMD_ORDER);
-       if (!new_page)
-               goto out_fail;
-       prep_transhuge_page(new_page);
-
-       isolated = numamigrate_isolate_page(pgdat, page);
-       if (!isolated) {
-               put_page(new_page);
-               goto out_fail;
-       }
-
-       /* Prepare a page as a migration target */
-       __SetPageLocked(new_page);
-       if (PageSwapBacked(page))
-               __SetPageSwapBacked(new_page);
-
-       /* anon mapping, we can simply copy page->mapping to the new page: */
-       new_page->mapping = page->mapping;
-       new_page->index = page->index;
-       /* flush the cache before copying using the kernel virtual address */
-       flush_cache_range(vma, start, start + HPAGE_PMD_SIZE);
-       migrate_page_copy(new_page, page);
-       WARN_ON(PageLRU(new_page));
-
-       /* Recheck the target PMD */
-       ptl = pmd_lock(mm, pmd);
-       if (unlikely(!pmd_same(*pmd, entry) || !page_ref_freeze(page, 2))) {
-               spin_unlock(ptl);
-
-               /* Reverse changes made by migrate_page_copy() */
-               if (TestClearPageActive(new_page))
-                       SetPageActive(page);
-               if (TestClearPageUnevictable(new_page))
-                       SetPageUnevictable(page);
-
-               unlock_page(new_page);
-               put_page(new_page);             /* Free it */
-
-               /* Retake the callers reference and putback on LRU */
-               get_page(page);
-               putback_lru_page(page);
-               mod_node_page_state(page_pgdat(page),
-                        NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
-
-               goto out_unlock;
-       }
-
-       entry = mk_huge_pmd(new_page, vma->vm_page_prot);
-       entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-
-       /*
-        * Overwrite the old entry under pagetable lock and establish
-        * the new PTE. Any parallel GUP will either observe the old
-        * page blocking on the page lock, block on the page table
-        * lock or observe the new page. The SetPageUptodate on the
-        * new page and page_add_new_anon_rmap guarantee the copy is
-        * visible before the pagetable update.
-        */
-       page_add_anon_rmap(new_page, vma, start, true);
-       /*
-        * At this point the pmd is numa/protnone (i.e. non present) and the TLB
-        * has already been flushed globally.  So no TLB can be currently
-        * caching this non present pmd mapping.  There's no need to clear the
-        * pmd before doing set_pmd_at(), nor to flush the TLB after
-        * set_pmd_at().  Clearing the pmd here would introduce a race
-        * condition against MADV_DONTNEED, because MADV_DONTNEED only holds the
-        * mmap_lock for reading.  If the pmd is set to NULL at any given time,
-        * MADV_DONTNEED won't wait on the pmd lock and it'll skip clearing this
-        * pmd.
-        */
-       set_pmd_at(mm, start, pmd, entry);
-       update_mmu_cache_pmd(vma, address, &entry);
-
-       page_ref_unfreeze(page, 2);
-       mlock_migrate_page(new_page, page);
-       page_remove_rmap(page, true);
-       set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);
-
-       spin_unlock(ptl);
-
-       /* Take an "isolate" reference and put new page on the LRU. */
-       get_page(new_page);
-       putback_lru_page(new_page);
-
-       unlock_page(new_page);
-       unlock_page(page);
-       put_page(page);                 /* Drop the rmap reference */
-       put_page(page);                 /* Drop the LRU isolation reference */
-
-       count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
-       count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
-
-       mod_node_page_state(page_pgdat(page),
-                       NR_ISOLATED_ANON + page_lru,
-                       -HPAGE_PMD_NR);
-       return isolated;
-
-out_fail:
-       count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
-       ptl = pmd_lock(mm, pmd);
-       if (pmd_same(*pmd, entry)) {
-               entry = pmd_modify(entry, vma->vm_page_prot);
-               set_pmd_at(mm, start, pmd, entry);
-               update_mmu_cache_pmd(vma, address, &entry);
-       }
-       spin_unlock(ptl);
-
-out_unlock:
-       unlock_page(page);
-       put_page(page);
-       return 0;
-}
-#endif /* CONFIG_NUMA_BALANCING */
-
 #endif /* CONFIG_NUMA */
 
 #ifdef CONFIG_DEVICE_PRIVATE
@@ -2400,7 +2294,7 @@ again:
                        if (!is_device_private_entry(entry))
                                goto next;
 
-                       page = device_private_entry_to_page(entry);
+                       page = pfn_swap_entry_to_page(entry);
                        if (!(migrate->flags &
                                MIGRATE_VMA_SELECT_DEVICE_PRIVATE) ||
                            page->pgmap->owner != migrate->pgmap_owner)
@@ -2408,7 +2302,7 @@ again:
 
                        mpfn = migrate_pfn(page_to_pfn(page)) |
                                        MIGRATE_PFN_MIGRATE;
-                       if (is_write_device_private_entry(entry))
+                       if (is_writable_device_private_entry(entry))
                                mpfn |= MIGRATE_PFN_WRITE;
                } else {
                        if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
@@ -2454,8 +2348,12 @@ again:
                        ptep_get_and_clear(mm, addr, ptep);
 
                        /* Setup special migration page table entry */
-                       entry = make_migration_entry(page, mpfn &
-                                                    MIGRATE_PFN_WRITE);
+                       if (mpfn & MIGRATE_PFN_WRITE)
+                               entry = make_writable_migration_entry(
+                                                       page_to_pfn(page));
+                       else
+                               entry = make_readable_migration_entry(
+                                                       page_to_pfn(page));
                        swp_pte = swp_entry_to_pte(entry);
                        if (pte_present(pte)) {
                                if (pte_soft_dirty(pte))
@@ -2518,8 +2416,8 @@ static void migrate_vma_collect(struct migrate_vma *migrate)
         * that the registered device driver can skip invalidating device
         * private page mappings that won't be migrated.
         */
-       mmu_notifier_range_init_migrate(&range, 0, migrate->vma,
-               migrate->vma->vm_mm, migrate->start, migrate->end,
+       mmu_notifier_range_init_owner(&range, MMU_NOTIFY_MIGRATE, 0,
+               migrate->vma, migrate->vma->vm_mm, migrate->start, migrate->end,
                migrate->pgmap_owner);
        mmu_notifier_invalidate_range_start(&range);
 
@@ -2704,7 +2602,6 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
  */
 static void migrate_vma_unmap(struct migrate_vma *migrate)
 {
-       int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK;
        const unsigned long npages = migrate->npages;
        const unsigned long start = migrate->start;
        unsigned long addr, i, restore = 0;
@@ -2716,7 +2613,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
                        continue;
 
                if (page_mapped(page)) {
-                       try_to_unmap(page, flags);
+                       try_to_migrate(page, 0);
                        if (page_mapped(page))
                                goto restore;
                }
@@ -2928,7 +2825,12 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
                if (is_device_private_page(page)) {
                        swp_entry_t swp_entry;
 
-                       swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE);
+                       if (vma->vm_flags & VM_WRITE)
+                               swp_entry = make_writable_device_private_entry(
+                                                       page_to_pfn(page));
+                       else
+                               swp_entry = make_readable_device_private_entry(
+                                                       page_to_pfn(page));
                        entry = swp_entry_to_pte(swp_entry);
                } else {
                        /*
@@ -3025,9 +2927,9 @@ void migrate_vma_pages(struct migrate_vma *migrate)
                        if (!notified) {
                                notified = true;
 
-                               mmu_notifier_range_init_migrate(&range, 0,
-                                       migrate->vma, migrate->vma->vm_mm,
-                                       addr, migrate->end,
+                               mmu_notifier_range_init_owner(&range,
+                                       MMU_NOTIFY_MIGRATE, 0, migrate->vma,
+                                       migrate->vma->vm_mm, addr, migrate->end,
                                        migrate->pgmap_owner);
                                mmu_notifier_invalidate_range_start(&range);
                        }