Merge branch 'pm-cpufreq'

[linux-2.6-microblaze.git] / mm / migrate.c
diff --git a/mm/migrate.c b/mm/migrate.c

index 04a98bb..ee802cb 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -62,7 +62,7 @@
   * to be migrated using isolate_lru_page(). If scheduling work on other CPUs is
   * undesirable, use migrate_prep_local()
   */
-int migrate_prep(void)
+void migrate_prep(void)
  {
         /*
          * Clear the LRU lists so pages can be isolated.
@@ -71,16 +71,12 @@ int migrate_prep(void)
          * pages that may be busy.
          */
         lru_add_drain_all();
-
-       return 0;
  }
  
  /* Do the necessary work of migrate_prep but not if it involves other CPUs */
-int migrate_prep_local(void)
+void migrate_prep_local(void)
  {
         lru_add_drain();
-
-       return 0;
  }
  
  int isolate_movable_page(struct page *page, isolate_mode_t mode)
@@ -381,7 +377,7 @@ static int expected_page_refs(struct address_space *mapping, struct page *page)
         int expected_count = 1;
  
         /*
-        * Device public or private pages have an extra refcount as they are
+        * Device private pages have an extra refcount as they are
          * ZONE_DEVICE pages.
          */
         expected_count += is_device_private_page(page);
@@ -503,7 +499,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
                         __dec_lruvec_state(old_lruvec, NR_SHMEM);
                         __inc_lruvec_state(new_lruvec, NR_SHMEM);
                 }
-               if (dirty && mapping_cap_account_dirty(mapping)) {
+               if (dirty && mapping_can_writeback(mapping)) {
                         __dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY);
                         __dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING);
                         __inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY);
@@ -1106,7 +1102,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
          * and treated as swapcache but it has no rmap yet.
          * Calling try_to_unmap() against a page->mapping==NULL page will
          * trigger a BUG.  So handle it here.
-        * 2. An orphaned page (see truncate_complete_page) might have
+        * 2. An orphaned page (see truncate_cleanup_page) might have
          * fs-private metadata. The page can be picked up due to memory
          * offlining.  Everywhere else except page reclaim, the page is
          * invisible to the vm, so the page can not be migrated.  So try to
@@ -1122,8 +1118,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                 /* Establish migration ptes */
                 VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
                                 page);
-               try_to_unmap(page,
-                       TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
+               try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK);
                 page_was_mapped = 1;
         }
  
@@ -1169,13 +1164,14 @@ static int unmap_and_move(new_page_t get_new_page,
                                    free_page_t put_new_page,
                                    unsigned long private, struct page *page,
                                    int force, enum migrate_mode mode,
-                                  enum migrate_reason reason)
+                                  enum migrate_reason reason,
+                                  struct list_head *ret)
  {
         int rc = MIGRATEPAGE_SUCCESS;
         struct page *newpage = NULL;
  
         if (!thp_migration_supported() && PageTransHuge(page))
-               return -ENOMEM;
+               return -ENOSYS;
  
         if (page_count(page) == 1) {
                 /* page was freed from under us. So we are done. */
@@ -1206,7 +1202,14 @@ out:
                  * migrated will have kept its references and be restored.
                  */
                 list_del(&page->lru);
+       }
  
+       /*
+        * If migration is successful, releases reference grabbed during
+        * isolation. Otherwise, restore the page to right list unless
+        * we want to retry.
+        */
+       if (rc == MIGRATEPAGE_SUCCESS) {
                 /*
                  * Compaction can migrate also non-LRU pages which are
                  * not accounted to NR_ISOLATED_*. They can be recognized
@@ -1215,40 +1218,16 @@ out:
                 if (likely(!__PageMovable(page)))
                         mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
                                         page_is_file_lru(page), -thp_nr_pages(page));
-       }
  
-       /*
-        * If migration is successful, releases reference grabbed during
-        * isolation. Otherwise, restore the page to right list unless
-        * we want to retry.
-        */
-       if (rc == MIGRATEPAGE_SUCCESS) {
-               put_page(page);
-               if (reason == MR_MEMORY_FAILURE) {
+               if (reason != MR_MEMORY_FAILURE)
                         /*
-                        * Set PG_HWPoison on just freed page
-                        * intentionally. Although it's rather weird,
-                        * it's how HWPoison flag works at the moment.
+                        * We release the page in page_handle_poison.
                          */
-                       if (set_hwpoison_free_buddy_page(page))
-                               num_poisoned_pages_inc();
-               }
+                       put_page(page);
         } else {
-               if (rc != -EAGAIN) {
-                       if (likely(!__PageMovable(page))) {
-                               putback_lru_page(page);
-                               goto put_new;
-                       }
+               if (rc != -EAGAIN)
+                       list_add_tail(&page->lru, ret);
  
-                       lock_page(page);
-                       if (PageMovable(page))
-                               putback_movable_page(page);
-                       else
-                               __ClearPageIsolated(page);
-                       unlock_page(page);
-                       put_page(page);
-               }
-put_new:
                 if (put_new_page)
                         put_new_page(newpage, private);
                 else
@@ -1279,7 +1258,8 @@ put_new:
  static int unmap_and_move_huge_page(new_page_t get_new_page,
                                 free_page_t put_new_page, unsigned long private,
                                 struct page *hpage, int force,
-                               enum migrate_mode mode, int reason)
+                               enum migrate_mode mode, int reason,
+                               struct list_head *ret)
  {
         int rc = -EAGAIN;
         int page_was_mapped = 0;
@@ -1295,7 +1275,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
          * kicking migration.
          */
         if (!hugepage_migration_supported(page_hstate(hpage))) {
-               putback_active_hugepage(hpage);
+               list_move_tail(&hpage->lru, ret);
                 return -ENOSYS;
         }
  
@@ -1333,34 +1313,37 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
                 goto put_anon;
  
         if (page_mapped(hpage)) {
-               /*
-                * try_to_unmap could potentially call huge_pmd_unshare.
-                * Because of this, take semaphore in write mode here and
-                * set TTU_RMAP_LOCKED to let lower levels know we have
-                * taken the lock.
-                */
-               mapping = hugetlb_page_mapping_lock_write(hpage);
-               if (unlikely(!mapping))
-                       goto unlock_put_anon;
+               bool mapping_locked = false;
+               enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK;
  
-               try_to_unmap(hpage,
-                       TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS|
-                       TTU_RMAP_LOCKED);
+               if (!PageAnon(hpage)) {
+                       /*
+                        * In shared mappings, try_to_unmap could potentially
+                        * call huge_pmd_unshare.  Because of this, take
+                        * semaphore in write mode here and set TTU_RMAP_LOCKED
+                        * to let lower levels know we have taken the lock.
+                        */
+                       mapping = hugetlb_page_mapping_lock_write(hpage);
+                       if (unlikely(!mapping))
+                               goto unlock_put_anon;
+
+                       mapping_locked = true;
+                       ttu |= TTU_RMAP_LOCKED;
+               }
+
+               try_to_unmap(hpage, ttu);
                 page_was_mapped = 1;
-               /*
-                * Leave mapping locked until after subsequent call to
-                * remove_migration_ptes()
-                */
+
+               if (mapping_locked)
+                       i_mmap_unlock_write(mapping);
         }
  
         if (!page_mapped(hpage))
                 rc = move_to_new_page(new_hpage, hpage, mode);
  
-       if (page_was_mapped) {
+       if (page_was_mapped)
                 remove_migration_ptes(hpage,
-                       rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, true);
-               i_mmap_unlock_write(mapping);
-       }
+                       rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
  
  unlock_put_anon:
         unlock_page(new_hpage);
@@ -1377,8 +1360,10 @@ put_anon:
  out_unlock:
         unlock_page(hpage);
  out:
-       if (rc != -EAGAIN)
+       if (rc == MIGRATEPAGE_SUCCESS)
                 putback_active_hugepage(hpage);
+       else if (rc != -EAGAIN && rc != MIGRATEPAGE_SUCCESS)
+               list_move_tail(&hpage->lru, ret);
  
         /*
          * If migration was not successful and there's a freeing callback, use
@@ -1393,6 +1378,20 @@ out:
         return rc;
  }
  
+static inline int try_split_thp(struct page *page, struct page **page2,
+                               struct list_head *from)
+{
+       int rc = 0;
+
+       lock_page(page);
+       rc = split_huge_page_to_list(page, from);
+       unlock_page(page);
+       if (!rc)
+               list_safe_reset_next(page, *page2, lru);
+
+       return rc;
+}
+
  /*
   * migrate_pages - migrate the pages specified in a list, to the free pages
   *                supplied as the target for the page migration
@@ -1409,8 +1408,8 @@ out:
   *
   * The function returns after 10 attempts or if no pages are movable any more
   * because the list has become empty or no retryable pages exist any more.
- * The caller should call putback_movable_pages() to return pages to the LRU
- * or free list only if ret != 0.
+ * It is caller's responsibility to call putback_movable_pages() to return pages
+ * to the LRU or free list only if ret != 0.
   *
   * Returns the number of pages that were not migrated, or an error code.
   */
@@ -1431,6 +1430,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
         struct page *page2;
         int swapwrite = current->flags & PF_SWAPWRITE;
         int rc, nr_subpages;
+       LIST_HEAD(ret_pages);
  
         if (!swapwrite)
                 current->flags |= PF_SWAPWRITE;
@@ -1453,31 +1453,56 @@ retry:
                         if (PageHuge(page))
                                 rc = unmap_and_move_huge_page(get_new_page,
                                                 put_new_page, private, page,
-                                               pass > 2, mode, reason);
+                                               pass > 2, mode, reason,
+                                               &ret_pages);
                         else
                                 rc = unmap_and_move(get_new_page, put_new_page,
                                                 private, page, pass > 2, mode,
-                                               reason);
-
+                                               reason, &ret_pages);
+                       /*
+                        * The rules are:
+                        *      Success: non hugetlb page will be freed, hugetlb
+                        *               page will be put back
+                        *      -EAGAIN: stay on the from list
+                        *      -ENOMEM: stay on the from list
+                        *      Other errno: put on ret_pages list then splice to
+                        *                   from list
+                        */
                         switch(rc) {
+                       /*
+                        * THP migration might be unsupported or the
+                        * allocation could've failed so we should
+                        * retry on the same page with the THP split
+                        * to base pages.
+                        *
+                        * Head page is retried immediately and tail
+                        * pages are added to the tail of the list so
+                        * we encounter them after the rest of the list
+                        * is processed.
+                        */
+                       case -ENOSYS:
+                               /* THP migration is unsupported */
+                               if (is_thp) {
+                                       if (!try_split_thp(page, &page2, from)) {
+                                               nr_thp_split++;
+                                               goto retry;
+                                       }
+
+                                       nr_thp_failed++;
+                                       nr_failed += nr_subpages;
+                                       break;
+                               }
+
+                               /* Hugetlb migration is unsupported */
+                               nr_failed++;
+                               break;
                         case -ENOMEM:
                                 /*
-                                * THP migration might be unsupported or the
-                                * allocation could've failed so we should
-                                * retry on the same page with the THP split
-                                * to base pages.
-                                *
-                                * Head page is retried immediately and tail
-                                * pages are added to the tail of the list so
-                                * we encounter them after the rest of the list
-                                * is processed.
+                                * When memory is low, don't bother to try to migrate
+                                * other pages, just exit.
                                  */
                                 if (is_thp) {
-                                       lock_page(page);
-                                       rc = split_huge_page_to_list(page, from);
-                                       unlock_page(page);
-                                       if (!rc) {
-                                               list_safe_reset_next(page, page2, lru);
+                                       if (!try_split_thp(page, &page2, from)) {
                                                 nr_thp_split++;
                                                 goto retry;
                                         }
@@ -1505,7 +1530,7 @@ retry:
                                 break;
                         default:
                                 /*
-                                * Permanent failure (-EBUSY, -ENOSYS, etc.):
+                                * Permanent failure (-EBUSY, etc.):
                                  * unlike -EAGAIN case, the failed page is
                                  * removed from migration page list and not
                                  * retried in the next outer loop.
@@ -1524,6 +1549,12 @@ retry:
         nr_thp_failed += thp_retry;
         rc = nr_failed;
  out:
+       /*
+        * Put the permanent failure page back to migration list, they
+        * will be put back to the right list by the caller.
+        */
+       list_splice(&ret_pages, from);
+
         count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
         count_vm_events(PGMIGRATE_FAIL, nr_failed);
         count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
@@ -1699,7 +1730,7 @@ static int move_pages_and_store_status(struct mm_struct *mm, int node,
                  * Positive err means the number of failed
                  * pages to migrate.  Since we are going to
                  * abort and return the number of non-migrated
-                * pages, so need to incude the rest of the
+                * pages, so need to include the rest of the
                  * nr_pages that have not been attempted as
                  * well.
                  */
@@ -1869,33 +1900,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
         return nr_pages ? -EFAULT : 0;
  }
  
-/*
- * Move a list of pages in the address space of the currently executing
- * process.
- */
-static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
-                            const void __user * __user *pages,
-                            const int __user *nodes,
-                            int __user *status, int flags)
+static struct mm_struct *find_mm_struct(pid_t pid, nodemask_t *mem_nodes)
  {
         struct task_struct *task;
         struct mm_struct *mm;
-       int err;
-       nodemask_t task_nodes;
-
-       /* Check flags */
-       if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
-               return -EINVAL;
  
-       if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
-               return -EPERM;
+       /*
+        * There is no need to check if current process has the right to modify
+        * the specified process when they are same.
+        */
+       if (!pid) {
+               mmget(current->mm);
+               *mem_nodes = cpuset_mems_allowed(current);
+               return current->mm;
+       }
  
         /* Find the mm_struct */
         rcu_read_lock();
-       task = pid ? find_task_by_vpid(pid) : current;
+       task = find_task_by_vpid(pid);
         if (!task) {
                 rcu_read_unlock();
-               return -ESRCH;
+               return ERR_PTR(-ESRCH);
         }
         get_task_struct(task);
  
@@ -1905,22 +1930,47 @@ static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
          */
         if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
                 rcu_read_unlock();
-               err = -EPERM;
+               mm = ERR_PTR(-EPERM);
                 goto out;
         }
         rcu_read_unlock();
  
-       err = security_task_movememory(task);
-       if (err)
+       mm = ERR_PTR(security_task_movememory(task));
+       if (IS_ERR(mm))
                 goto out;
-
-       task_nodes = cpuset_mems_allowed(task);
+       *mem_nodes = cpuset_mems_allowed(task);
         mm = get_task_mm(task);
+out:
         put_task_struct(task);
-
         if (!mm)
+               mm = ERR_PTR(-EINVAL);
+       return mm;
+}
+
+/*
+ * Move a list of pages in the address space of the currently executing
+ * process.
+ */
+static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
+                            const void __user * __user *pages,
+                            const int __user *nodes,
+                            int __user *status, int flags)
+{
+       struct mm_struct *mm;
+       int err;
+       nodemask_t task_nodes;
+
+       /* Check flags */
+       if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
                 return -EINVAL;
  
+       if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
+               return -EPERM;
+
+       mm = find_mm_struct(pid, &task_nodes);
+       if (IS_ERR(mm))
+               return PTR_ERR(mm);
+
         if (nodes)
                 err = do_pages_move(mm, task_nodes, nr_pages, pages,
                                     nodes, status, flags);
@@ -1929,10 +1979,6 @@ static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
  
         mmput(mm);
         return err;
-
-out:
-       put_task_struct(task);
-       return err;
  }
  
  SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
@@ -2051,6 +2097,17 @@ bool pmd_trans_migrating(pmd_t pmd)
         return PageLocked(page);
  }
  
+static inline bool is_shared_exec_page(struct vm_area_struct *vma,
+                                      struct page *page)
+{
+       if (page_mapcount(page) != 1 &&
+           (page_is_file_lru(page) || vma_is_shmem(vma)) &&
+           (vma->vm_flags & VM_EXEC))
+               return true;
+
+       return false;
+}
+
  /*
   * Attempt to migrate a misplaced page to the specified destination
   * node. Caller is expected to have an elevated reference count on
@@ -2068,8 +2125,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
          * Don't migrate file pages that are mapped in multiple processes
          * with execute permissions as they are probably shared libraries.
          */
-       if (page_mapcount(page) != 1 && page_is_file_lru(page) &&
-           (vma->vm_flags & VM_EXEC))
+       if (is_shared_exec_page(vma, page))
                 goto out;
  
         /*
@@ -2124,6 +2180,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
         int page_lru = page_is_file_lru(page);
         unsigned long start = address & HPAGE_PMD_MASK;
  
+       if (is_shared_exec_page(vma, page))
+               goto out;
+
         new_page = alloc_pages_node(node,
                 (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
                 HPAGE_PMD_ORDER);
@@ -2235,6 +2294,7 @@ out_fail:
  
  out_unlock:
         unlock_page(page);
+out:
         put_page(page);
         return 0;
  }
@@ -2674,7 +2734,7 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
   */
  static void migrate_vma_unmap(struct migrate_vma *migrate)
  {
-       int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
+       int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK;
         const unsigned long npages = migrate->npages;
         const unsigned long start = migrate->start;
         unsigned long addr, i, restore = 0;
@@ -2834,8 +2894,7 @@ EXPORT_SYMBOL(migrate_vma_setup);
  static void migrate_vma_insert_page(struct migrate_vma *migrate,
                                     unsigned long addr,
                                     struct page *page,
-                                   unsigned long *src,
-                                   unsigned long *dst)
+                                   unsigned long *src)
  {
         struct vm_area_struct *vma = migrate->vma;
         struct mm_struct *mm = vma->vm_mm;
@@ -2989,16 +3048,14 @@ void migrate_vma_pages(struct migrate_vma *migrate)
                         if (!notified) {
                                 notified = true;
  
-                               mmu_notifier_range_init(&range,
-                                                       MMU_NOTIFY_CLEAR, 0,
-                                                       NULL,
-                                                       migrate->vma->vm_mm,
-                                                       addr, migrate->end);
+                               mmu_notifier_range_init_migrate(&range, 0,
+                                       migrate->vma, migrate->vma->vm_mm,
+                                       addr, migrate->end,
+                                       migrate->pgmap_owner);
                                 mmu_notifier_invalidate_range_start(&range);
                         }
                         migrate_vma_insert_page(migrate, addr, newpage,
-                                               &migrate->src[i],
-                                               &migrate->dst[i]);
+                                               &migrate->src[i]);
                         continue;
                 }
  
@@ -3077,7 +3134,6 @@ void migrate_vma_finalize(struct migrate_vma *migrate)
  
                 remove_migration_ptes(page, newpage, false);
                 unlock_page(page);
-               migrate->cpages--;
  
                 if (is_zone_device_page(page))
                         put_page(page);