Merge tag 'io_uring-5.15-2021-09-11' of git://git.kernel.dk/linux-block

[linux-2.6-microblaze.git] / mm / migrate.c
diff --git a/mm/migrate.c b/mm/migrate.c

index 57aeb9b..a6a7743 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -49,6 +49,7 @@
  #include <linux/sched/mm.h>
  #include <linux/ptrace.h>
  #include <linux/oom.h>
+#include <linux/memory.h>
  
  #include <asm/tlbflush.h>
  
@@ -959,7 +960,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                                 int force, enum migrate_mode mode)
  {
         int rc = -EAGAIN;
-       int page_was_mapped = 0;
+       bool page_was_mapped = false;
         struct anon_vma *anon_vma = NULL;
         bool is_lru = !__PageMovable(page);
  
@@ -1007,7 +1008,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
         }
  
         /*
-        * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
+        * By try_to_migrate(), page->mapcount goes down to 0 here. In this case,
          * we cannot notice that anon_vma is freed while we migrates a page.
          * This get_anon_vma() delays freeing anon_vma pointer until the end
          * of migration. File cache pages are no problem because of page_lock()
@@ -1062,7 +1063,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                 VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
                                 page);
                 try_to_migrate(page, 0);
-               page_was_mapped = 1;
+               page_was_mapped = true;
         }
  
         if (!page_mapped(page))
@@ -1148,7 +1149,7 @@ static int node_demotion[MAX_NUMNODES] __read_mostly =
   * next_demotion_node() - Get the next node in the demotion path
   * @node: The starting node to lookup the next node
   *
- * @returns: node id for next memory node in the demotion path hierarchy
+ * Return: node id for next memory node in the demotion path hierarchy
   * from @node; NUMA_NO_NODE if @node is terminal.  This does not keep
   * @node online or guarantee that it *continues* to be the next demotion
   * target.
@@ -1428,6 +1429,8 @@ static inline int try_split_thp(struct page *page, struct page **page2,
   * @mode:              The migration mode that specifies the constraints for
   *                     page migration, if any.
   * @reason:            The reason for page migration.
+ * @ret_succeeded:     Set to the number of pages migrated successfully if
+ *                     the caller passes a non-NULL pointer.
   *
   * The function returns after 10 attempts or if no pages are movable any more
   * because the list has become empty or no retryable pages exist any more.
@@ -1438,7 +1441,7 @@ static inline int try_split_thp(struct page *page, struct page **page2,
   */
  int migrate_pages(struct list_head *from, new_page_t get_new_page,
                 free_page_t put_new_page, unsigned long private,
-               enum migrate_mode mode, int reason)
+               enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
  {
         int retry = 1;
         int thp_retry = 1;
@@ -1593,6 +1596,9 @@ out:
         if (!swapwrite)
                 current->flags &= ~PF_SWAPWRITE;
  
+       if (ret_succeeded)
+               *ret_succeeded = nr_succeeded;
+
         return rc;
  }
  
@@ -1662,7 +1668,7 @@ static int do_move_pages_to_node(struct mm_struct *mm,
         };
  
         err = migrate_pages(pagelist, alloc_migration_target, NULL,
-                       (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL);
+               (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL);
         if (err)
                 putback_movable_pages(pagelist);
         return err;
@@ -1894,6 +1900,23 @@ set_status:
         mmap_read_unlock(mm);
  }
  
+static int get_compat_pages_array(const void __user *chunk_pages[],
+                                 const void __user * __user *pages,
+                                 unsigned long chunk_nr)
+{
+       compat_uptr_t __user *pages32 = (compat_uptr_t __user *)pages;
+       compat_uptr_t p;
+       int i;
+
+       for (i = 0; i < chunk_nr; i++) {
+               if (get_user(p, pages32 + i))
+                       return -EFAULT;
+               chunk_pages[i] = compat_ptr(p);
+       }
+
+       return 0;
+}
+
  /*
   * Determine the nodes of a user array of pages and store it in
   * a user array of status.
@@ -1913,8 +1936,15 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
                 if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
                         chunk_nr = DO_PAGES_STAT_CHUNK_NR;
  
-               if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
-                       break;
+               if (in_compat_syscall()) {
+                       if (get_compat_pages_array(chunk_pages, pages,
+                                                  chunk_nr))
+                               break;
+               } else {
+                       if (copy_from_user(chunk_pages, pages,
+                                     chunk_nr * sizeof(*chunk_pages)))
+                               break;
+               }
  
                 do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
  
@@ -2017,28 +2047,6 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
         return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
  }
  
-#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages,
-                      compat_uptr_t __user *, pages32,
-                      const int __user *, nodes,
-                      int __user *, status,
-                      int, flags)
-{
-       const void __user * __user *pages;
-       int i;
-
-       pages = compat_alloc_user_space(nr_pages * sizeof(void *));
-       for (i = 0; i < nr_pages; i++) {
-               compat_uptr_t p;
-
-               if (get_user(p, pages32 + i) ||
-                       put_user(compat_ptr(p), pages + i))
-                       return -EFAULT;
-       }
-       return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
-}
-#endif /* CONFIG_COMPAT */
-
  #ifdef CONFIG_NUMA_BALANCING
  /*
   * Returns true if this is a safe migration target node for misplaced NUMA
@@ -2101,6 +2109,7 @@ out:
  static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
  {
         int page_lru;
+       int nr_pages = thp_nr_pages(page);
  
         VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
  
@@ -2109,7 +2118,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
                 return 0;
  
         /* Avoid migrating to a node that is nearly full */
-       if (!migrate_balanced_pgdat(pgdat, compound_nr(page)))
+       if (!migrate_balanced_pgdat(pgdat, nr_pages))
                 return 0;
  
         if (isolate_lru_page(page))
@@ -2117,7 +2126,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
  
         page_lru = page_is_file_lru(page);
         mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
-                               thp_nr_pages(page));
+                           nr_pages);
  
         /*
          * Isolating the page has taken another reference, so the
@@ -2177,7 +2186,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
  
         list_add(&page->lru, &migratepages);
         nr_remaining = migrate_pages(&migratepages, *new, NULL, node,
-                                    MIGRATE_ASYNC, MR_NUMA_MISPLACED);
+                                    MIGRATE_ASYNC, MR_NUMA_MISPLACED, NULL);
         if (nr_remaining) {
                 if (!list_empty(&migratepages)) {
                         list_del(&page->lru);
@@ -3057,6 +3066,7 @@ void migrate_vma_finalize(struct migrate_vma *migrate)
  EXPORT_SYMBOL(migrate_vma_finalize);
  #endif /* CONFIG_DEVICE_PRIVATE */
  
+#if defined(CONFIG_MEMORY_HOTPLUG)
  /* Disable reclaim-based migration. */
  static void __disable_all_migrate_targets(void)
  {
@@ -3191,10 +3201,96 @@ again:
  /*
   * For callers that do not hold get_online_mems() already.
   */
-__maybe_unused // <- temporay to prevent warnings during bisects
  static void set_migration_target_nodes(void)
  {
         get_online_mems();
         __set_migration_target_nodes();
         put_online_mems();
  }
+
+/*
+ * React to hotplug events that might affect the migration targets
+ * like events that online or offline NUMA nodes.
+ *
+ * The ordering is also currently dependent on which nodes have
+ * CPUs.  That means we need CPU on/offline notification too.
+ */
+static int migration_online_cpu(unsigned int cpu)
+{
+       set_migration_target_nodes();
+       return 0;
+}
+
+static int migration_offline_cpu(unsigned int cpu)
+{
+       set_migration_target_nodes();
+       return 0;
+}
+
+/*
+ * This leaves migrate-on-reclaim transiently disabled between
+ * the MEM_GOING_OFFLINE and MEM_OFFLINE events.  This runs
+ * whether reclaim-based migration is enabled or not, which
+ * ensures that the user can turn reclaim-based migration at
+ * any time without needing to recalculate migration targets.
+ *
+ * These callbacks already hold get_online_mems().  That is why
+ * __set_migration_target_nodes() can be used as opposed to
+ * set_migration_target_nodes().
+ */
+static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
+                                                unsigned long action, void *arg)
+{
+       switch (action) {
+       case MEM_GOING_OFFLINE:
+               /*
+                * Make sure there are not transient states where
+                * an offline node is a migration target.  This
+                * will leave migration disabled until the offline
+                * completes and the MEM_OFFLINE case below runs.
+                */
+               disable_all_migrate_targets();
+               break;
+       case MEM_OFFLINE:
+       case MEM_ONLINE:
+               /*
+                * Recalculate the target nodes once the node
+                * reaches its final state (online or offline).
+                */
+               __set_migration_target_nodes();
+               break;
+       case MEM_CANCEL_OFFLINE:
+               /*
+                * MEM_GOING_OFFLINE disabled all the migration
+                * targets.  Reenable them.
+                */
+               __set_migration_target_nodes();
+               break;
+       case MEM_GOING_ONLINE:
+       case MEM_CANCEL_ONLINE:
+               break;
+       }
+
+       return notifier_from_errno(0);
+}
+
+static int __init migrate_on_reclaim_init(void)
+{
+       int ret;
+
+       ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "migrate on reclaim",
+                               migration_online_cpu,
+                               migration_offline_cpu);
+       /*
+        * In the unlikely case that this fails, the automatic
+        * migration targets may become suboptimal for nodes
+        * where N_CPU changes.  With such a small impact in a
+        * rare case, do not bother trying to do anything special.
+        */
+       WARN_ON(ret < 0);
+
+       hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
+       return 0;
+}
+late_initcall(migrate_on_reclaim_init);
+#endif /* CONFIG_MEMORY_HOTPLUG */