Merge tag 'drm-misc-fixes-2021-05-20' of git://anongit.freedesktop.org/drm/drm-misc...
[linux-2.6-microblaze.git] / mm / khugepaged.c
index fb0fdae..6c0185f 100644 (file)
@@ -442,18 +442,28 @@ static inline int khugepaged_test_exit(struct mm_struct *mm)
 static bool hugepage_vma_check(struct vm_area_struct *vma,
                               unsigned long vm_flags)
 {
-       if ((!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
-           (vm_flags & VM_NOHUGEPAGE) ||
+       /* Explicitly disabled through madvise. */
+       if ((vm_flags & VM_NOHUGEPAGE) ||
            test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
                return false;
 
-       if (shmem_file(vma->vm_file) ||
-           (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
-            vma->vm_file &&
-            (vm_flags & VM_DENYWRITE))) {
+       /* Enabled via shmem mount options or sysfs settings. */
+       if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
                return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
                                HPAGE_PMD_NR);
        }
+
+       /* THP settings require madvise. */
+       if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always())
+               return false;
+
+       /* Read-only file mappings need to be aligned for THP to work. */
+       if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
+           (vm_flags & VM_DENYWRITE)) {
+               return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
+                               HPAGE_PMD_NR);
+       }
+
        if (!vma->anon_vma || vma->vm_ops)
                return false;
        if (vma_is_temporary_stack(vma))
@@ -471,7 +481,7 @@ int __khugepaged_enter(struct mm_struct *mm)
                return -ENOMEM;
 
        /* __khugepaged_exit() must not run from under us */
-       VM_BUG_ON_MM(atomic_read(&mm->mm_users) == 0, mm);
+       VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
        if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
                free_mm_slot(mm_slot);
                return 0;
@@ -657,7 +667,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
                 *
                 * The page table that maps the page has been already unlinked
                 * from the page table tree and this process cannot get
-                * an additinal pin on the page.
+                * an additional pin on the page.
                 *
                 * New pins can come later if the page is shared across fork,
                 * but not from this process. The other process cannot write to
@@ -706,17 +716,17 @@ next:
                if (pte_write(pteval))
                        writable = true;
        }
-       if (likely(writable)) {
-               if (likely(referenced)) {
-                       result = SCAN_SUCCEED;
-                       trace_mm_collapse_huge_page_isolate(page, none_or_zero,
-                                                           referenced, writable, result);
-                       return 1;
-               }
-       } else {
+
+       if (unlikely(!writable)) {
                result = SCAN_PAGE_RO;
+       } else if (unlikely(!referenced)) {
+               result = SCAN_LACK_REFERENCED_PAGE;
+       } else {
+               result = SCAN_SUCCEED;
+               trace_mm_collapse_huge_page_isolate(page, none_or_zero,
+                                                   referenced, writable, result);
+               return 1;
        }
-
 out:
        release_pte_pages(pte, _pte, compound_pagelist);
        trace_mm_collapse_huge_page_isolate(page, none_or_zero,
@@ -799,7 +809,7 @@ static bool khugepaged_scan_abort(int nid)
         * If node_reclaim_mode is disabled, then no extra effort is made to
         * allocate memory locally.
         */
-       if (!node_reclaim_mode)
+       if (!node_reclaim_enabled())
                return false;
 
        /* If there is a count for this node already, it must be acceptable */
@@ -1118,10 +1128,10 @@ static void collapse_huge_page(struct mm_struct *mm,
        mmap_write_lock(mm);
        result = hugepage_vma_revalidate(mm, address, &vma);
        if (result)
-               goto out;
+               goto out_up_write;
        /* check if the pmd is still valid */
        if (mm_find_pmd(mm, address) != pmd)
-               goto out;
+               goto out_up_write;
 
        anon_vma_lock_write(vma->anon_vma);
 
@@ -1161,7 +1171,7 @@ static void collapse_huge_page(struct mm_struct *mm,
                spin_unlock(pmd_ptl);
                anon_vma_unlock_write(vma->anon_vma);
                result = SCAN_FAIL;
-               goto out;
+               goto out_up_write;
        }
 
        /*
@@ -1173,19 +1183,18 @@ static void collapse_huge_page(struct mm_struct *mm,
        __collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl,
                        &compound_pagelist);
        pte_unmap(pte);
+       /*
+        * spin_lock() below is not the equivalent of smp_wmb(), but
+        * the smp_wmb() inside __SetPageUptodate() can be reused to
+        * avoid the copy_huge_page writes to become visible after
+        * the set_pmd_at() write.
+        */
        __SetPageUptodate(new_page);
        pgtable = pmd_pgtable(_pmd);
 
        _pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
        _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
 
-       /*
-        * spin_lock() below is not the equivalent of smp_wmb(), so
-        * this is needed to avoid the copy_huge_page writes to become
-        * visible after the set_pmd_at() write.
-        */
-       smp_wmb();
-
        spin_lock(pmd_ptl);
        BUG_ON(!pmd_none(*pmd));
        page_add_new_anon_rmap(new_page, vma, address, true);
@@ -1206,8 +1215,6 @@ out_nolock:
                mem_cgroup_uncharge(*hpage);
        trace_mm_collapse_huge_page(mm, isolated, result);
        return;
-out:
-       goto out_up_write;
 }
 
 static int khugepaged_scan_pmd(struct mm_struct *mm,
@@ -1264,10 +1271,6 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
                                goto out_unmap;
                        }
                }
-               if (!pte_present(pteval)) {
-                       result = SCAN_PTE_NON_PRESENT;
-                       goto out_unmap;
-               }
                if (pte_uffd_wp(pteval)) {
                        /*
                         * Don't collapse the page if any of the small
@@ -1437,7 +1440,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
        int i;
 
        if (!vma || !vma->vm_file ||
-           vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE)
+           !range_in_vma(vma, haddr, haddr + HPAGE_PMD_SIZE))
                return;
 
        /*
@@ -1523,16 +1526,16 @@ abort:
        goto drop_hpage;
 }
 
-static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
+static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
 {
        struct mm_struct *mm = mm_slot->mm;
        int i;
 
        if (likely(mm_slot->nr_pte_mapped_thp == 0))
-               return 0;
+               return;
 
        if (!mmap_write_trylock(mm))
-               return -EBUSY;
+               return;
 
        if (unlikely(khugepaged_test_exit(mm)))
                goto out;
@@ -1543,7 +1546,6 @@ static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
 out:
        mm_slot->nr_pte_mapped_thp = 0;
        mmap_write_unlock(mm);
-       return 0;
 }
 
 static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
@@ -1643,6 +1645,7 @@ static void collapse_file(struct mm_struct *mm,
        XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
        int nr_none = 0, result = SCAN_SUCCEED;
        bool is_shmem = shmem_file(file);
+       int nr;
 
        VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
        VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
@@ -1854,11 +1857,12 @@ out_unlock:
                put_page(page);
                goto xa_unlocked;
        }
+       nr = thp_nr_pages(new_page);
 
        if (is_shmem)
-               __inc_lruvec_page_state(new_page, NR_SHMEM_THPS);
+               __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr);
        else {
-               __inc_lruvec_page_state(new_page, NR_FILE_THPS);
+               __mod_lruvec_page_state(new_page, NR_FILE_THPS, nr);
                filemap_nr_thps_inc(mapping);
        }
 
@@ -2045,9 +2049,8 @@ static void khugepaged_scan_file(struct mm_struct *mm,
        BUILD_BUG();
 }
 
-static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
+static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
 {
-       return 0;
 }
 #endif
 
@@ -2193,11 +2196,9 @@ static void khugepaged_do_scan(void)
 {
        struct page *hpage = NULL;
        unsigned int progress = 0, pass_through_head = 0;
-       unsigned int pages = khugepaged_pages_to_scan;
+       unsigned int pages = READ_ONCE(khugepaged_pages_to_scan);
        bool wait = true;
 
-       barrier(); /* write khugepaged_pages_to_scan to local stack */
-
        lru_add_drain_all();
 
        while (progress < pages) {