mm/hmm: fault non-owner device private entries

[linux-2.6-microblaze.git] / mm / migrate.c
diff --git a/mm/migrate.c b/mm/migrate.c

index 6c31ee1..6c1ea61 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -177,6 +177,7 @@ static bool remove_migration_pte(struct folio *folio,
         DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
  
         while (page_vma_mapped_walk(&pvmw)) {
+               rmap_t rmap_flags = RMAP_NONE;
                 pte_t pte;
                 swp_entry_t entry;
                 struct page *new;
@@ -211,6 +212,9 @@ static bool remove_migration_pte(struct folio *folio,
                 else if (pte_swp_uffd_wp(*pvmw.pte))
                         pte = pte_mkuffd_wp(pte);
  
+               if (folio_test_anon(folio) && !is_readable_migration_entry(entry))
+                       rmap_flags |= RMAP_EXCLUSIVE;
+
                 if (unlikely(is_device_private_page(new))) {
                         if (pte_write(pte))
                                 entry = make_writable_device_private_entry(
@@ -232,15 +236,17 @@ static bool remove_migration_pte(struct folio *folio,
                         pte = pte_mkhuge(pte);
                         pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
                         if (folio_test_anon(folio))
-                               hugepage_add_anon_rmap(new, vma, pvmw.address);
+                               hugepage_add_anon_rmap(new, vma, pvmw.address,
+                                                      rmap_flags);
                         else
-                               page_dup_rmap(new, true);
+                               page_dup_file_rmap(new, true);
                         set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
                 } else
  #endif
                 {
                         if (folio_test_anon(folio))
-                               page_add_anon_rmap(new, vma, pvmw.address, false);
+                               page_add_anon_rmap(new, vma, pvmw.address,
+                                                  rmap_flags);
                         else
                                 page_add_file_rmap(new, vma, false);
                         set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
@@ -471,11 +477,6 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
  
         xas_lock_irq(&xas);
         expected_count = 2 + page_has_private(page);
-       if (page_count(page) != expected_count || xas_load(&xas) != page) {
-               xas_unlock_irq(&xas);
-               return -EAGAIN;
-       }
-
         if (!page_ref_freeze(page, expected_count)) {
                 xas_unlock_irq(&xas);
                 return -EAGAIN;
@@ -517,6 +518,12 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio)
                 folio_set_workingset(newfolio);
         if (folio_test_checked(folio))
                 folio_set_checked(newfolio);
+       /*
+        * PG_anon_exclusive (-> PG_mappedtodisk) is always migrated via
+        * migration entries. We can still have PG_anon_exclusive set on an
+        * effectively unmapped and unreferenced first sub-pages of an
+        * anonymous THP: we can simply copy it here via PG_mappedtodisk.
+        */
         if (folio_test_mappedtodisk(folio))
                 folio_set_mappedtodisk(newfolio);
  
@@ -836,21 +843,21 @@ static int fallback_migrate_page(struct address_space *mapping,
   *   < 0 - error code
   *  MIGRATEPAGE_SUCCESS - success
   */
-static int move_to_new_page(struct page *newpage, struct page *page,
+static int move_to_new_folio(struct folio *dst, struct folio *src,
                                 enum migrate_mode mode)
  {
         struct address_space *mapping;
         int rc = -EAGAIN;
-       bool is_lru = !__PageMovable(page);
+       bool is_lru = !__PageMovable(&src->page);
  
-       VM_BUG_ON_PAGE(!PageLocked(page), page);
-       VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+       VM_BUG_ON_FOLIO(!folio_test_locked(src), src);
+       VM_BUG_ON_FOLIO(!folio_test_locked(dst), dst);
  
-       mapping = page_mapping(page);
+       mapping = folio_mapping(src);
  
         if (likely(is_lru)) {
                 if (!mapping)
-                       rc = migrate_page(mapping, newpage, page, mode);
+                       rc = migrate_page(mapping, &dst->page, &src->page, mode);
                 else if (mapping->a_ops->migratepage)
                         /*
                          * Most pages have a mapping and most filesystems
@@ -859,54 +866,54 @@ static int move_to_new_page(struct page *newpage, struct page *page,
                          * migratepage callback. This is the most common path
                          * for page migration.
                          */
-                       rc = mapping->a_ops->migratepage(mapping, newpage,
-                                                       page, mode);
+                       rc = mapping->a_ops->migratepage(mapping, &dst->page,
+                                                       &src->page, mode);
                 else
-                       rc = fallback_migrate_page(mapping, newpage,
-                                                       page, mode);
+                       rc = fallback_migrate_page(mapping, &dst->page,
+                                                       &src->page, mode);
         } else {
                 /*
                  * In case of non-lru page, it could be released after
                  * isolation step. In that case, we shouldn't try migration.
                  */
-               VM_BUG_ON_PAGE(!PageIsolated(page), page);
-               if (!PageMovable(page)) {
+               VM_BUG_ON_FOLIO(!folio_test_isolated(src), src);
+               if (!folio_test_movable(src)) {
                         rc = MIGRATEPAGE_SUCCESS;
-                       ClearPageIsolated(page);
+                       folio_clear_isolated(src);
                         goto out;
                 }
  
-               rc = mapping->a_ops->migratepage(mapping, newpage,
-                                               page, mode);
+               rc = mapping->a_ops->migratepage(mapping, &dst->page,
+                                               &src->page, mode);
                 WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
-                       !PageIsolated(page));
+                               !folio_test_isolated(src));
         }
  
         /*
-        * When successful, old pagecache page->mapping must be cleared before
-        * page is freed; but stats require that PageAnon be left as PageAnon.
+        * When successful, old pagecache src->mapping must be cleared before
+        * src is freed; but stats require that PageAnon be left as PageAnon.
          */
         if (rc == MIGRATEPAGE_SUCCESS) {
-               if (__PageMovable(page)) {
-                       VM_BUG_ON_PAGE(!PageIsolated(page), page);
+               if (__PageMovable(&src->page)) {
+                       VM_BUG_ON_FOLIO(!folio_test_isolated(src), src);
  
                         /*
                          * We clear PG_movable under page_lock so any compactor
                          * cannot try to migrate this page.
                          */
-                       ClearPageIsolated(page);
+                       folio_clear_isolated(src);
                 }
  
                 /*
-                * Anonymous and movable page->mapping will be cleared by
+                * Anonymous and movable src->mapping will be cleared by
                  * free_pages_prepare so don't reset it here for keeping
                  * the type to work PageAnon, for example.
                  */
-               if (!PageMappingFlags(page))
-                       page->mapping = NULL;
+               if (!folio_mapping_flags(src))
+                       src->mapping = NULL;
  
-               if (likely(!is_zone_device_page(newpage)))
-                       flush_dcache_folio(page_folio(newpage));
+               if (likely(!folio_is_zone_device(dst)))
+                       flush_dcache_folio(dst);
         }
  out:
         return rc;
@@ -994,7 +1001,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                 goto out_unlock;
  
         if (unlikely(!is_lru)) {
-               rc = move_to_new_page(newpage, page, mode);
+               rc = move_to_new_folio(dst, folio, mode);
                 goto out_unlock_both;
         }
  
@@ -1013,7 +1020,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
         if (!page->mapping) {
                 VM_BUG_ON_PAGE(PageAnon(page), page);
                 if (page_has_private(page)) {
-                       try_to_free_buffers(page);
+                       try_to_free_buffers(folio);
                         goto out_unlock_both;
                 }
         } else if (page_mapped(page)) {
@@ -1025,7 +1032,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
         }
  
         if (!page_mapped(page))
-               rc = move_to_new_page(newpage, page, mode);
+               rc = move_to_new_folio(dst, folio, mode);
  
         /*
          * When successful, push newpage to LRU immediately: so that if it
@@ -1099,6 +1106,7 @@ static int unmap_and_move(new_page_t get_new_page,
         if (!newpage)
                 return -ENOMEM;
  
+       newpage->private = 0;
         rc = __unmap_and_move(page, newpage, force, mode);
         if (rc == MIGRATEPAGE_SUCCESS)
                 set_page_owner_migrate_reason(newpage, reason);
@@ -1230,7 +1238,6 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
                 goto put_anon;
  
         if (page_mapped(hpage)) {
-               bool mapping_locked = false;
                 enum ttu_flags ttu = 0;
  
                 if (!PageAnon(hpage)) {
@@ -1244,19 +1251,18 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
                         if (unlikely(!mapping))
                                 goto unlock_put_anon;
  
-                       mapping_locked = true;
-                       ttu |= TTU_RMAP_LOCKED;
+                       ttu = TTU_RMAP_LOCKED;
                 }
  
                 try_to_migrate(src, ttu);
                 page_was_mapped = 1;
  
-               if (mapping_locked)
+               if (ttu & TTU_RMAP_LOCKED)
                         i_mmap_unlock_write(mapping);
         }
  
         if (!page_mapped(hpage))
-               rc = move_to_new_page(new_hpage, hpage, mode);
+               rc = move_to_new_folio(dst, src, mode);
  
         if (page_was_mapped)
                 remove_migration_ptes(src,
@@ -1412,14 +1418,11 @@ retry:
                                                 nr_thp_split++;
                                                 goto retry;
                                         }
-
-                                       nr_failed_pages += nr_subpages;
-                                       break;
-                               }
-
                                 /* Hugetlb migration is unsupported */
-                               if (!no_subpage_counting)
+                               } else if (!no_subpage_counting) {
                                         nr_failed++;
+                               }
+
                                 nr_failed_pages += nr_subpages;
                                 break;
                         case -ENOMEM:
@@ -1434,28 +1437,30 @@ retry:
                                                 nr_thp_split++;
                                                 goto retry;
                                         }
-
-                                       nr_failed_pages += nr_subpages;
-                                       goto out;
+                               } else if (!no_subpage_counting) {
+                                       nr_failed++;
                                 }
  
-                               if (!no_subpage_counting)
-                                       nr_failed++;
                                 nr_failed_pages += nr_subpages;
+                               /*
+                                * There might be some subpages of fail-to-migrate THPs
+                                * left in thp_split_pages list. Move them back to migration
+                                * list so that they could be put back to the right list by
+                                * the caller otherwise the page refcnt will be leaked.
+                                */
+                               list_splice_init(&thp_split_pages, from);
+                               nr_thp_failed += thp_retry;
                                 goto out;
                         case -EAGAIN:
-                               if (is_thp) {
+                               if (is_thp)
                                         thp_retry++;
-                                       break;
-                               }
-                               retry++;
+                               else
+                                       retry++;
                                 break;
                         case MIGRATEPAGE_SUCCESS:
                                 nr_succeeded += nr_subpages;
-                               if (is_thp) {
+                               if (is_thp)
                                         nr_thp_succeeded++;
-                                       break;
-                               }
                                 break;
                         default:
                                 /*
@@ -1464,14 +1469,11 @@ retry:
                                  * removed from migration page list and not
                                  * retried in the next outer loop.
                                  */
-                               if (is_thp) {
+                               if (is_thp)
                                         nr_thp_failed++;
-                                       nr_failed_pages += nr_subpages;
-                                       break;
-                               }
-
-                               if (!no_subpage_counting)
+                               else if (!no_subpage_counting)
                                         nr_failed++;
+
                                 nr_failed_pages += nr_subpages;
                                 break;
                         }
@@ -1606,8 +1608,8 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
  
         mmap_read_lock(mm);
         err = -EFAULT;
-       vma = find_vma(mm, addr);
-       if (!vma || addr < vma->vm_start || !vma_migratable(vma))
+       vma = vma_lookup(mm, addr);
+       if (!vma || !vma_migratable(vma))
                 goto out;
  
         /* FOLL_DUMP to ignore special (like zero) pages */
@@ -1802,13 +1804,18 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
                         goto set_status;
  
                 /* FOLL_DUMP to ignore special (like zero) pages */
-               page = follow_page(vma, addr, FOLL_DUMP);
+               page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
  
                 err = PTR_ERR(page);
                 if (IS_ERR(page))
                         goto set_status;
  
-               err = page ? page_to_nid(page) : -ENOENT;
+               if (page) {
+                       err = page_to_nid(page);
+                       put_page(page);
+               } else {
+                       err = -ENOENT;
+               }
  set_status:
                 *status = err;
  
@@ -1844,16 +1851,12 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
                          const void __user * __user *pages,
                          int __user *status)
  {
-#define DO_PAGES_STAT_CHUNK_NR 16
+#define DO_PAGES_STAT_CHUNK_NR 16UL
         const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
         int chunk_status[DO_PAGES_STAT_CHUNK_NR];
  
         while (nr_pages) {
-               unsigned long chunk_nr;
-
-               chunk_nr = nr_pages;
-               if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
-                       chunk_nr = DO_PAGES_STAT_CHUNK_NR;
+               unsigned long chunk_nr = min(nr_pages, DO_PAGES_STAT_CHUNK_NR);
  
                 if (in_compat_syscall()) {
                         if (get_compat_pages_array(chunk_pages, pages,
@@ -1969,7 +1972,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
  #ifdef CONFIG_NUMA_BALANCING
  /*
   * Returns true if this is a safe migration target node for misplaced NUMA
- * pages. Currently it only checks the watermarks which crude
+ * pages. Currently it only checks the watermarks which is crude.
   */
  static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
                                    unsigned long nr_migrate_pages)
@@ -1979,7 +1982,7 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
         for (z = pgdat->nr_zones - 1; z >= 0; z--) {
                 struct zone *zone = pgdat->node_zones + z;
  
-               if (!populated_zone(zone))
+               if (!managed_zone(zone))
                         continue;
  
                 /* Avoid waking kswapd by allocating pages_to_migrate pages. */
@@ -2015,7 +2018,6 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
  
  static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
  {
-       int page_lru;
         int nr_pages = thp_nr_pages(page);
         int order = compound_order(page);
  
@@ -2032,7 +2034,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
                 if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING))
                         return 0;
                 for (z = pgdat->nr_zones - 1; z >= 0; z--) {
-                       if (populated_zone(pgdat->node_zones + z))
+                       if (managed_zone(pgdat->node_zones + z))
                                 break;
                 }
                 wakeup_kswapd(pgdat->node_zones + z, 0, order, ZONE_MOVABLE);
@@ -2042,8 +2044,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
         if (isolate_lru_page(page))
                 return 0;
  
-       page_lru = page_is_file_lru(page);
-       mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
+       mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_is_file_lru(page),
                             nr_pages);
  
         /*
@@ -2116,7 +2117,6 @@ out:
         return 0;
  }
  #endif /* CONFIG_NUMA_BALANCING */
-#endif /* CONFIG_NUMA */
  
  /*
   * node_demotion[] example:
@@ -2250,7 +2250,6 @@ out:
         return target;
  }
  
-#if defined(CONFIG_HOTPLUG_CPU)
  /* Disable reclaim-based migration. */
  static void __disable_all_migrate_targets(void)
  {
@@ -2353,8 +2352,8 @@ out_clear:
   */
  static void __set_migration_target_nodes(void)
  {
-       nodemask_t next_pass    = NODE_MASK_NONE;
-       nodemask_t this_pass    = NODE_MASK_NONE;
+       nodemask_t next_pass;
+       nodemask_t this_pass;
         nodemask_t used_targets = NODE_MASK_NONE;
         int node, best_distance;
  
@@ -2443,6 +2442,7 @@ void set_migration_target_nodes(void)
   * __set_migration_target_nodes() can be used as opposed to
   * set_migration_target_nodes().
   */
+#ifdef CONFIG_MEMORY_HOTPLUG
  static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
                                                  unsigned long action, void *_arg)
  {
@@ -2488,15 +2488,17 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
  
         return notifier_from_errno(0);
  }
+#endif
  
  void __init migrate_on_reclaim_init(void)
  {
-       node_demotion = kmalloc_array(nr_node_ids,
-                                     sizeof(struct demotion_nodes),
-                                     GFP_KERNEL);
+       node_demotion = kcalloc(nr_node_ids,
+                               sizeof(struct demotion_nodes),
+                               GFP_KERNEL);
         WARN_ON(!node_demotion);
-
+#ifdef CONFIG_MEMORY_HOTPLUG
         hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
+#endif
         /*
          * At this point, all numa nodes with memory/CPus have their state
          * properly set, so we can build the demotion order now.
@@ -2507,7 +2509,6 @@ void __init migrate_on_reclaim_init(void)
         set_migration_target_nodes();
         cpus_read_unlock();
  }
-#endif /* CONFIG_HOTPLUG_CPU */
  
  bool numa_demotion_enabled = false;
  
@@ -2523,12 +2524,11 @@ static ssize_t numa_demotion_enabled_store(struct kobject *kobj,
                                            struct kobj_attribute *attr,
                                            const char *buf, size_t count)
  {
-       if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1))
-               numa_demotion_enabled = true;
-       else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1))
-               numa_demotion_enabled = false;
-       else
-               return -EINVAL;
+       ssize_t ret;
+
+       ret = kstrtobool(buf, &numa_demotion_enabled);
+       if (ret)
+               return ret;
  
         return count;
  }
@@ -2568,4 +2568,5 @@ delete_obj:
         return err;
  }
  subsys_initcall(numa_init_sysfs);
-#endif
+#endif /* CONFIG_SYSFS */
+#endif /* CONFIG_NUMA */