Merge branch 'pm-cpufreq'
[linux-2.6-microblaze.git] / mm / migrate.c
index 04a98bb..ee802cb 100644 (file)
@@ -62,7 +62,7 @@
  * to be migrated using isolate_lru_page(). If scheduling work on other CPUs is
  * undesirable, use migrate_prep_local()
  */
-int migrate_prep(void)
+void migrate_prep(void)
 {
        /*
         * Clear the LRU lists so pages can be isolated.
@@ -71,16 +71,12 @@ int migrate_prep(void)
         * pages that may be busy.
         */
        lru_add_drain_all();
-
-       return 0;
 }
 
 /* Do the necessary work of migrate_prep but not if it involves other CPUs */
-int migrate_prep_local(void)
+void migrate_prep_local(void)
 {
        lru_add_drain();
-
-       return 0;
 }
 
 int isolate_movable_page(struct page *page, isolate_mode_t mode)
@@ -381,7 +377,7 @@ static int expected_page_refs(struct address_space *mapping, struct page *page)
        int expected_count = 1;
 
        /*
-        * Device public or private pages have an extra refcount as they are
+        * Device private pages have an extra refcount as they are
         * ZONE_DEVICE pages.
         */
        expected_count += is_device_private_page(page);
@@ -503,7 +499,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
                        __dec_lruvec_state(old_lruvec, NR_SHMEM);
                        __inc_lruvec_state(new_lruvec, NR_SHMEM);
                }
-               if (dirty && mapping_cap_account_dirty(mapping)) {
+               if (dirty && mapping_can_writeback(mapping)) {
                        __dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY);
                        __dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING);
                        __inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY);
@@ -1106,7 +1102,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
         * and treated as swapcache but it has no rmap yet.
         * Calling try_to_unmap() against a page->mapping==NULL page will
         * trigger a BUG.  So handle it here.
-        * 2. An orphaned page (see truncate_complete_page) might have
+        * 2. An orphaned page (see truncate_cleanup_page) might have
         * fs-private metadata. The page can be picked up due to memory
         * offlining.  Everywhere else except page reclaim, the page is
         * invisible to the vm, so the page can not be migrated.  So try to
@@ -1122,8 +1118,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
                /* Establish migration ptes */
                VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
                                page);
-               try_to_unmap(page,
-                       TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
+               try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK);
                page_was_mapped = 1;
        }
 
@@ -1169,13 +1164,14 @@ static int unmap_and_move(new_page_t get_new_page,
                                   free_page_t put_new_page,
                                   unsigned long private, struct page *page,
                                   int force, enum migrate_mode mode,
-                                  enum migrate_reason reason)
+                                  enum migrate_reason reason,
+                                  struct list_head *ret)
 {
        int rc = MIGRATEPAGE_SUCCESS;
        struct page *newpage = NULL;
 
        if (!thp_migration_supported() && PageTransHuge(page))
-               return -ENOMEM;
+               return -ENOSYS;
 
        if (page_count(page) == 1) {
                /* page was freed from under us. So we are done. */
@@ -1206,7 +1202,14 @@ out:
                 * migrated will have kept its references and be restored.
                 */
                list_del(&page->lru);
+       }
 
+       /*
+        * If migration is successful, releases reference grabbed during
+        * isolation. Otherwise, restore the page to right list unless
+        * we want to retry.
+        */
+       if (rc == MIGRATEPAGE_SUCCESS) {
                /*
                 * Compaction can migrate also non-LRU pages which are
                 * not accounted to NR_ISOLATED_*. They can be recognized
@@ -1215,40 +1218,16 @@ out:
                if (likely(!__PageMovable(page)))
                        mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
                                        page_is_file_lru(page), -thp_nr_pages(page));
-       }
 
-       /*
-        * If migration is successful, releases reference grabbed during
-        * isolation. Otherwise, restore the page to right list unless
-        * we want to retry.
-        */
-       if (rc == MIGRATEPAGE_SUCCESS) {
-               put_page(page);
-               if (reason == MR_MEMORY_FAILURE) {
+               if (reason != MR_MEMORY_FAILURE)
                        /*
-                        * Set PG_HWPoison on just freed page
-                        * intentionally. Although it's rather weird,
-                        * it's how HWPoison flag works at the moment.
+                        * We release the page in page_handle_poison.
                         */
-                       if (set_hwpoison_free_buddy_page(page))
-                               num_poisoned_pages_inc();
-               }
+                       put_page(page);
        } else {
-               if (rc != -EAGAIN) {
-                       if (likely(!__PageMovable(page))) {
-                               putback_lru_page(page);
-                               goto put_new;
-                       }
+               if (rc != -EAGAIN)
+                       list_add_tail(&page->lru, ret);
 
-                       lock_page(page);
-                       if (PageMovable(page))
-                               putback_movable_page(page);
-                       else
-                               __ClearPageIsolated(page);
-                       unlock_page(page);
-                       put_page(page);
-               }
-put_new:
                if (put_new_page)
                        put_new_page(newpage, private);
                else
@@ -1279,7 +1258,8 @@ put_new:
 static int unmap_and_move_huge_page(new_page_t get_new_page,
                                free_page_t put_new_page, unsigned long private,
                                struct page *hpage, int force,
-                               enum migrate_mode mode, int reason)
+                               enum migrate_mode mode, int reason,
+                               struct list_head *ret)
 {
        int rc = -EAGAIN;
        int page_was_mapped = 0;
@@ -1295,7 +1275,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
         * kicking migration.
         */
        if (!hugepage_migration_supported(page_hstate(hpage))) {
-               putback_active_hugepage(hpage);
+               list_move_tail(&hpage->lru, ret);
                return -ENOSYS;
        }
 
@@ -1333,34 +1313,37 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
                goto put_anon;
 
        if (page_mapped(hpage)) {
-               /*
-                * try_to_unmap could potentially call huge_pmd_unshare.
-                * Because of this, take semaphore in write mode here and
-                * set TTU_RMAP_LOCKED to let lower levels know we have
-                * taken the lock.
-                */
-               mapping = hugetlb_page_mapping_lock_write(hpage);
-               if (unlikely(!mapping))
-                       goto unlock_put_anon;
+               bool mapping_locked = false;
+               enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK;
 
-               try_to_unmap(hpage,
-                       TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS|
-                       TTU_RMAP_LOCKED);
+               if (!PageAnon(hpage)) {
+                       /*
+                        * In shared mappings, try_to_unmap could potentially
+                        * call huge_pmd_unshare.  Because of this, take
+                        * semaphore in write mode here and set TTU_RMAP_LOCKED
+                        * to let lower levels know we have taken the lock.
+                        */
+                       mapping = hugetlb_page_mapping_lock_write(hpage);
+                       if (unlikely(!mapping))
+                               goto unlock_put_anon;
+
+                       mapping_locked = true;
+                       ttu |= TTU_RMAP_LOCKED;
+               }
+
+               try_to_unmap(hpage, ttu);
                page_was_mapped = 1;
-               /*
-                * Leave mapping locked until after subsequent call to
-                * remove_migration_ptes()
-                */
+
+               if (mapping_locked)
+                       i_mmap_unlock_write(mapping);
        }
 
        if (!page_mapped(hpage))
                rc = move_to_new_page(new_hpage, hpage, mode);
 
-       if (page_was_mapped) {
+       if (page_was_mapped)
                remove_migration_ptes(hpage,
-                       rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, true);
-               i_mmap_unlock_write(mapping);
-       }
+                       rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
 
 unlock_put_anon:
        unlock_page(new_hpage);
@@ -1377,8 +1360,10 @@ put_anon:
 out_unlock:
        unlock_page(hpage);
 out:
-       if (rc != -EAGAIN)
+       if (rc == MIGRATEPAGE_SUCCESS)
                putback_active_hugepage(hpage);
+       else if (rc != -EAGAIN && rc != MIGRATEPAGE_SUCCESS)
+               list_move_tail(&hpage->lru, ret);
 
        /*
         * If migration was not successful and there's a freeing callback, use
@@ -1393,6 +1378,20 @@ out:
        return rc;
 }
 
+static inline int try_split_thp(struct page *page, struct page **page2,
+                               struct list_head *from)
+{
+       int rc = 0;
+
+       lock_page(page);
+       rc = split_huge_page_to_list(page, from);
+       unlock_page(page);
+       if (!rc)
+               list_safe_reset_next(page, *page2, lru);
+
+       return rc;
+}
+
 /*
  * migrate_pages - migrate the pages specified in a list, to the free pages
  *                supplied as the target for the page migration
@@ -1409,8 +1408,8 @@ out:
  *
  * The function returns after 10 attempts or if no pages are movable any more
  * because the list has become empty or no retryable pages exist any more.
- * The caller should call putback_movable_pages() to return pages to the LRU
- * or free list only if ret != 0.
+ * It is caller's responsibility to call putback_movable_pages() to return pages
+ * to the LRU or free list only if ret != 0.
  *
  * Returns the number of pages that were not migrated, or an error code.
  */
@@ -1431,6 +1430,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
        struct page *page2;
        int swapwrite = current->flags & PF_SWAPWRITE;
        int rc, nr_subpages;
+       LIST_HEAD(ret_pages);
 
        if (!swapwrite)
                current->flags |= PF_SWAPWRITE;
@@ -1453,31 +1453,56 @@ retry:
                        if (PageHuge(page))
                                rc = unmap_and_move_huge_page(get_new_page,
                                                put_new_page, private, page,
-                                               pass > 2, mode, reason);
+                                               pass > 2, mode, reason,
+                                               &ret_pages);
                        else
                                rc = unmap_and_move(get_new_page, put_new_page,
                                                private, page, pass > 2, mode,
-                                               reason);
-
+                                               reason, &ret_pages);
+                       /*
+                        * The rules are:
+                        *      Success: non hugetlb page will be freed, hugetlb
+                        *               page will be put back
+                        *      -EAGAIN: stay on the from list
+                        *      -ENOMEM: stay on the from list
+                        *      Other errno: put on ret_pages list then splice to
+                        *                   from list
+                        */
                        switch(rc) {
+                       /*
+                        * THP migration might be unsupported or the
+                        * allocation could've failed so we should
+                        * retry on the same page with the THP split
+                        * to base pages.
+                        *
+                        * Head page is retried immediately and tail
+                        * pages are added to the tail of the list so
+                        * we encounter them after the rest of the list
+                        * is processed.
+                        */
+                       case -ENOSYS:
+                               /* THP migration is unsupported */
+                               if (is_thp) {
+                                       if (!try_split_thp(page, &page2, from)) {
+                                               nr_thp_split++;
+                                               goto retry;
+                                       }
+
+                                       nr_thp_failed++;
+                                       nr_failed += nr_subpages;
+                                       break;
+                               }
+
+                               /* Hugetlb migration is unsupported */
+                               nr_failed++;
+                               break;
                        case -ENOMEM:
                                /*
-                                * THP migration might be unsupported or the
-                                * allocation could've failed so we should
-                                * retry on the same page with the THP split
-                                * to base pages.
-                                *
-                                * Head page is retried immediately and tail
-                                * pages are added to the tail of the list so
-                                * we encounter them after the rest of the list
-                                * is processed.
+                                * When memory is low, don't bother to try to migrate
+                                * other pages, just exit.
                                 */
                                if (is_thp) {
-                                       lock_page(page);
-                                       rc = split_huge_page_to_list(page, from);
-                                       unlock_page(page);
-                                       if (!rc) {
-                                               list_safe_reset_next(page, page2, lru);
+                                       if (!try_split_thp(page, &page2, from)) {
                                                nr_thp_split++;
                                                goto retry;
                                        }
@@ -1505,7 +1530,7 @@ retry:
                                break;
                        default:
                                /*
-                                * Permanent failure (-EBUSY, -ENOSYS, etc.):
+                                * Permanent failure (-EBUSY, etc.):
                                 * unlike -EAGAIN case, the failed page is
                                 * removed from migration page list and not
                                 * retried in the next outer loop.
@@ -1524,6 +1549,12 @@ retry:
        nr_thp_failed += thp_retry;
        rc = nr_failed;
 out:
+       /*
+        * Put the permanent failure page back to migration list, they
+        * will be put back to the right list by the caller.
+        */
+       list_splice(&ret_pages, from);
+
        count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
        count_vm_events(PGMIGRATE_FAIL, nr_failed);
        count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
@@ -1699,7 +1730,7 @@ static int move_pages_and_store_status(struct mm_struct *mm, int node,
                 * Positive err means the number of failed
                 * pages to migrate.  Since we are going to
                 * abort and return the number of non-migrated
-                * pages, so need to incude the rest of the
+                * pages, so need to include the rest of the
                 * nr_pages that have not been attempted as
                 * well.
                 */
@@ -1869,33 +1900,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
        return nr_pages ? -EFAULT : 0;
 }
 
-/*
- * Move a list of pages in the address space of the currently executing
- * process.
- */
-static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
-                            const void __user * __user *pages,
-                            const int __user *nodes,
-                            int __user *status, int flags)
+static struct mm_struct *find_mm_struct(pid_t pid, nodemask_t *mem_nodes)
 {
        struct task_struct *task;
        struct mm_struct *mm;
-       int err;
-       nodemask_t task_nodes;
-
-       /* Check flags */
-       if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
-               return -EINVAL;
 
-       if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
-               return -EPERM;
+       /*
+        * There is no need to check if current process has the right to modify
+        * the specified process when they are same.
+        */
+       if (!pid) {
+               mmget(current->mm);
+               *mem_nodes = cpuset_mems_allowed(current);
+               return current->mm;
+       }
 
        /* Find the mm_struct */
        rcu_read_lock();
-       task = pid ? find_task_by_vpid(pid) : current;
+       task = find_task_by_vpid(pid);
        if (!task) {
                rcu_read_unlock();
-               return -ESRCH;
+               return ERR_PTR(-ESRCH);
        }
        get_task_struct(task);
 
@@ -1905,22 +1930,47 @@ static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
         */
        if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
                rcu_read_unlock();
-               err = -EPERM;
+               mm = ERR_PTR(-EPERM);
                goto out;
        }
        rcu_read_unlock();
 
-       err = security_task_movememory(task);
-       if (err)
+       mm = ERR_PTR(security_task_movememory(task));
+       if (IS_ERR(mm))
                goto out;
-
-       task_nodes = cpuset_mems_allowed(task);
+       *mem_nodes = cpuset_mems_allowed(task);
        mm = get_task_mm(task);
+out:
        put_task_struct(task);
-
        if (!mm)
+               mm = ERR_PTR(-EINVAL);
+       return mm;
+}
+
+/*
+ * Move a list of pages in the address space of the currently executing
+ * process.
+ */
+static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
+                            const void __user * __user *pages,
+                            const int __user *nodes,
+                            int __user *status, int flags)
+{
+       struct mm_struct *mm;
+       int err;
+       nodemask_t task_nodes;
+
+       /* Check flags */
+       if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
                return -EINVAL;
 
+       if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
+               return -EPERM;
+
+       mm = find_mm_struct(pid, &task_nodes);
+       if (IS_ERR(mm))
+               return PTR_ERR(mm);
+
        if (nodes)
                err = do_pages_move(mm, task_nodes, nr_pages, pages,
                                    nodes, status, flags);
@@ -1929,10 +1979,6 @@ static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
 
        mmput(mm);
        return err;
-
-out:
-       put_task_struct(task);
-       return err;
 }
 
 SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
@@ -2051,6 +2097,17 @@ bool pmd_trans_migrating(pmd_t pmd)
        return PageLocked(page);
 }
 
+static inline bool is_shared_exec_page(struct vm_area_struct *vma,
+                                      struct page *page)
+{
+       if (page_mapcount(page) != 1 &&
+           (page_is_file_lru(page) || vma_is_shmem(vma)) &&
+           (vma->vm_flags & VM_EXEC))
+               return true;
+
+       return false;
+}
+
 /*
  * Attempt to migrate a misplaced page to the specified destination
  * node. Caller is expected to have an elevated reference count on
@@ -2068,8 +2125,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
         * Don't migrate file pages that are mapped in multiple processes
         * with execute permissions as they are probably shared libraries.
         */
-       if (page_mapcount(page) != 1 && page_is_file_lru(page) &&
-           (vma->vm_flags & VM_EXEC))
+       if (is_shared_exec_page(vma, page))
                goto out;
 
        /*
@@ -2124,6 +2180,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
        int page_lru = page_is_file_lru(page);
        unsigned long start = address & HPAGE_PMD_MASK;
 
+       if (is_shared_exec_page(vma, page))
+               goto out;
+
        new_page = alloc_pages_node(node,
                (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
                HPAGE_PMD_ORDER);
@@ -2235,6 +2294,7 @@ out_fail:
 
 out_unlock:
        unlock_page(page);
+out:
        put_page(page);
        return 0;
 }
@@ -2674,7 +2734,7 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
  */
 static void migrate_vma_unmap(struct migrate_vma *migrate)
 {
-       int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
+       int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK;
        const unsigned long npages = migrate->npages;
        const unsigned long start = migrate->start;
        unsigned long addr, i, restore = 0;
@@ -2834,8 +2894,7 @@ EXPORT_SYMBOL(migrate_vma_setup);
 static void migrate_vma_insert_page(struct migrate_vma *migrate,
                                    unsigned long addr,
                                    struct page *page,
-                                   unsigned long *src,
-                                   unsigned long *dst)
+                                   unsigned long *src)
 {
        struct vm_area_struct *vma = migrate->vma;
        struct mm_struct *mm = vma->vm_mm;
@@ -2989,16 +3048,14 @@ void migrate_vma_pages(struct migrate_vma *migrate)
                        if (!notified) {
                                notified = true;
 
-                               mmu_notifier_range_init(&range,
-                                                       MMU_NOTIFY_CLEAR, 0,
-                                                       NULL,
-                                                       migrate->vma->vm_mm,
-                                                       addr, migrate->end);
+                               mmu_notifier_range_init_migrate(&range, 0,
+                                       migrate->vma, migrate->vma->vm_mm,
+                                       addr, migrate->end,
+                                       migrate->pgmap_owner);
                                mmu_notifier_invalidate_range_start(&range);
                        }
                        migrate_vma_insert_page(migrate, addr, newpage,
-                                               &migrate->src[i],
-                                               &migrate->dst[i]);
+                                               &migrate->src[i]);
                        continue;
                }
 
@@ -3077,7 +3134,6 @@ void migrate_vma_finalize(struct migrate_vma *migrate)
 
                remove_migration_ptes(page, newpage, false);
                unlock_page(page);
-               migrate->cpages--;
 
                if (is_zone_device_page(page))
                        put_page(page);