mm/memory_hotplug: avoid calling zone_intersects() for ZONE_NORMAL

[linux-2.6-microblaze.git] / mm / huge_memory.c
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 406a3c2..88c83c8 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -34,6 +34,7 @@
  #include <linux/oom.h>
  #include <linux/numa.h>
  #include <linux/page_owner.h>
+#include <linux/sched/sysctl.h>
  
  #include <asm/tlb.h>
  #include <asm/pgalloc.h>
@@ -1766,17 +1767,28 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
         }
  #endif
  
-       /*
-        * Avoid trapping faults against the zero page. The read-only
-        * data is likely to be read-cached on the local CPU and
-        * local/remote hits to the zero page are not interesting.
-        */
-       if (prot_numa && is_huge_zero_pmd(*pmd))
-               goto unlock;
+       if (prot_numa) {
+               struct page *page;
+               /*
+                * Avoid trapping faults against the zero page. The read-only
+                * data is likely to be read-cached on the local CPU and
+                * local/remote hits to the zero page are not interesting.
+                */
+               if (is_huge_zero_pmd(*pmd))
+                       goto unlock;
  
-       if (prot_numa && pmd_protnone(*pmd))
-               goto unlock;
+               if (pmd_protnone(*pmd))
+                       goto unlock;
  
+               page = pmd_page(*pmd);
+               /*
+                * Skip scanning top tier node if normal numa
+                * balancing is disabled
+                */
+               if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
+                   node_is_toptier(page_to_nid(page)))
+                       goto unlock;
+       }
         /*
          * In case prot_numa, we are under mmap_read_lock(mm). It's critical
          * to not clear pmd intermittently to avoid race with MADV_DONTNEED
@@ -2055,9 +2067,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                 young = pmd_young(old_pmd);
                 soft_dirty = pmd_soft_dirty(old_pmd);
                 uffd_wp = pmd_uffd_wp(old_pmd);
+               VM_BUG_ON_PAGE(!page_count(page), page);
+               page_ref_add(page, HPAGE_PMD_NR - 1);
         }
-       VM_BUG_ON_PAGE(!page_count(page), page);
-       page_ref_add(page, HPAGE_PMD_NR - 1);
  
         /*
          * Withdraw the table only after we mark the pmd entry invalid.
@@ -2953,7 +2965,6 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
          */
         for (addr = vaddr_start; addr < vaddr_end; addr += PAGE_SIZE) {
                 struct vm_area_struct *vma = find_vma(mm, addr);
-               unsigned int follflags;
                 struct page *page;
  
                 if (!vma || addr < vma->vm_start)
@@ -2966,8 +2977,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
                 }
  
                 /* FOLL_DUMP to ignore special (like zero) pages */
-               follflags = FOLL_GET | FOLL_DUMP;
-               page = follow_page(vma, addr, follflags);
+               page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
  
                 if (IS_ERR(page))
                         continue;
@@ -3197,7 +3207,6 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
         if (pmd_swp_uffd_wp(*pvmw->pmd))
                 pmde = pmd_wrprotect(pmd_mkuffd_wp(pmde));
  
-       flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE);
         if (PageAnon(new))
                 page_add_anon_rmap(new, vma, mmun_start, true);
         else
@@ -3205,6 +3214,8 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
         set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
         if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new))
                 mlock_vma_page(new);
+
+       /* No need to invalidate - it was non-present before */
         update_mmu_cache_pmd(vma, address, pvmw->pmd);
  }
  #endif