r8169: Avoid memcpy() over-reading of ETH_SS_STATS
[linux-2.6-microblaze.git] / mm / huge_memory.c
index ae907a9..63ed6b2 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/numa_balancing.h>
 #include <linux/highmem.h>
@@ -77,18 +78,18 @@ bool transparent_hugepage_enabled(struct vm_area_struct *vma)
        return false;
 }
 
-static struct page *get_huge_zero_page(void)
+static bool get_huge_zero_page(void)
 {
        struct page *zero_page;
 retry:
        if (likely(atomic_inc_not_zero(&huge_zero_refcount)))
-               return READ_ONCE(huge_zero_page);
+               return true;
 
        zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
                        HPAGE_PMD_ORDER);
        if (!zero_page) {
                count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
-               return NULL;
+               return false;
        }
        count_vm_event(THP_ZERO_PAGE_ALLOC);
        preempt_disable();
@@ -101,7 +102,7 @@ retry:
        /* We take additional reference here. It will be put back by shrinker */
        atomic_set(&huge_zero_refcount, 2);
        preempt_enable();
-       return READ_ONCE(huge_zero_page);
+       return true;
 }
 
 static void put_huge_zero_page(void)
@@ -624,14 +625,12 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 
                /* Deliver the page fault to userland */
                if (userfaultfd_missing(vma)) {
-                       vm_fault_t ret2;
-
                        spin_unlock(vmf->ptl);
                        put_page(page);
                        pte_free(vma->vm_mm, pgtable);
-                       ret2 = handle_userfault(vmf, VM_UFFD_MISSING);
-                       VM_BUG_ON(ret2 & VM_FAULT_FALLBACK);
-                       return ret2;
+                       ret = handle_userfault(vmf, VM_UFFD_MISSING);
+                       VM_BUG_ON(ret & VM_FAULT_FALLBACK);
+                       return ret;
                }
 
                entry = mk_huge_pmd(page, vma->vm_page_prot);
@@ -1293,7 +1292,7 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
        }
 
        page = pmd_page(orig_pmd);
-       VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
+       VM_BUG_ON_PAGE(!PageHead(page), page);
 
        /* Lock page for reuse_swap_page() */
        if (!trylock_page(page)) {
@@ -1464,12 +1463,6 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
         */
        page_locked = trylock_page(page);
        target_nid = mpol_misplaced(page, vma, haddr);
-       if (target_nid == NUMA_NO_NODE) {
-               /* If the page was locked, there are no parallel migrations */
-               if (page_locked)
-                       goto clear_pmdnuma;
-       }
-
        /* Migration could have started since the pmd_trans_migrating check */
        if (!page_locked) {
                page_nid = NUMA_NO_NODE;
@@ -1478,6 +1471,11 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
                spin_unlock(vmf->ptl);
                put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
                goto out;
+       } else if (target_nid == NUMA_NO_NODE) {
+               /* There are no parallel migrations and page is in the right
+                * node. Clear the numa hinting info in this pmd.
+                */
+               goto clear_pmdnuma;
        }
 
        /*
@@ -1696,7 +1694,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 
                        VM_BUG_ON(!is_pmd_migration_entry(orig_pmd));
                        entry = pmd_to_swp_entry(orig_pmd);
-                       page = pfn_to_page(swp_offset(entry));
+                       page = migration_entry_to_page(entry);
                        flush_needed = 0;
                } else
                        WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
@@ -1794,8 +1792,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 /*
  * Returns
  *  - 0 if PMD could not be locked
- *  - 1 if PMD was locked but protections unchange and TLB flush unnecessary
- *  - HPAGE_PMD_NR is protections changed and TLB flush necessary
+ *  - 1 if PMD was locked but protections unchanged and TLB flush unnecessary
+ *  - HPAGE_PMD_NR if protections changed and TLB flush necessary
  */
 int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                unsigned long addr, pgprot_t newprot, unsigned long cp_flags)
@@ -2104,7 +2102,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                swp_entry_t entry;
 
                entry = pmd_to_swp_entry(old_pmd);
-               page = pfn_to_page(swp_offset(entry));
+               page = migration_entry_to_page(entry);
                write = is_write_migration_entry(entry);
                young = false;
                soft_dirty = pmd_swp_soft_dirty(old_pmd);
@@ -2303,44 +2301,38 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
        __split_huge_pmd(vma, pmd, address, freeze, page);
 }
 
+static inline void split_huge_pmd_if_needed(struct vm_area_struct *vma, unsigned long address)
+{
+       /*
+        * If the new address isn't hpage aligned and it could previously
+        * contain an hugepage: check if we need to split an huge pmd.
+        */
+       if (!IS_ALIGNED(address, HPAGE_PMD_SIZE) &&
+           range_in_vma(vma, ALIGN_DOWN(address, HPAGE_PMD_SIZE),
+                        ALIGN(address, HPAGE_PMD_SIZE)))
+               split_huge_pmd_address(vma, address, false, NULL);
+}
+
 void vma_adjust_trans_huge(struct vm_area_struct *vma,
                             unsigned long start,
                             unsigned long end,
                             long adjust_next)
 {
-       /*
-        * If the new start address isn't hpage aligned and it could
-        * previously contain an hugepage: check if we need to split
-        * an huge pmd.
-        */
-       if (start & ~HPAGE_PMD_MASK &&
-           (start & HPAGE_PMD_MASK) >= vma->vm_start &&
-           (start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
-               split_huge_pmd_address(vma, start, false, NULL);
+       /* Check if we need to split start first. */
+       split_huge_pmd_if_needed(vma, start);
 
-       /*
-        * If the new end address isn't hpage aligned and it could
-        * previously contain an hugepage: check if we need to split
-        * an huge pmd.
-        */
-       if (end & ~HPAGE_PMD_MASK &&
-           (end & HPAGE_PMD_MASK) >= vma->vm_start &&
-           (end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
-               split_huge_pmd_address(vma, end, false, NULL);
+       /* Check if we need to split end next. */
+       split_huge_pmd_if_needed(vma, end);
 
        /*
-        * If we're also updating the vma->vm_next->vm_start, if the new
-        * vm_next->vm_start isn't hpage aligned and it could previously
-        * contain an hugepage: check if we need to split an huge pmd.
+        * If we're also updating the vma->vm_next->vm_start,
+        * check if we need to split it.
         */
        if (adjust_next > 0) {
                struct vm_area_struct *next = vma->vm_next;
                unsigned long nstart = next->vm_start;
                nstart += adjust_next;
-               if (nstart & ~HPAGE_PMD_MASK &&
-                   (nstart & HPAGE_PMD_MASK) >= next->vm_start &&
-                   (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
-                       split_huge_pmd_address(next, nstart, false, NULL);
+               split_huge_pmd_if_needed(next, nstart);
        }
 }
 
@@ -2477,7 +2469,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
                xa_lock(&swap_cache->i_pages);
        }
 
-       /* lock lru list/PageCompound, ref freezed by page_ref_freeze */
+       /* lock lru list/PageCompound, ref frozen by page_ref_freeze */
        lruvec = lock_page_lruvec(head);
 
        for (i = nr - 1; i >= 1; i--) {
@@ -2838,8 +2830,8 @@ void deferred_split_huge_page(struct page *page)
                ds_queue->split_queue_len++;
 #ifdef CONFIG_MEMCG
                if (memcg)
-                       memcg_set_shrinker_bit(memcg, page_to_nid(page),
-                                              deferred_split_shrinker.id);
+                       set_shrinker_bit(memcg, page_to_nid(page),
+                                        deferred_split_shrinker.id);
 #endif
        }
        spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
@@ -2924,16 +2916,14 @@ static struct shrinker deferred_split_shrinker = {
 };
 
 #ifdef CONFIG_DEBUG_FS
-static int split_huge_pages_set(void *data, u64 val)
+static void split_huge_pages_all(void)
 {
        struct zone *zone;
        struct page *page;
        unsigned long pfn, max_zone_pfn;
        unsigned long total = 0, split = 0;
 
-       if (val != 1)
-               return -EINVAL;
-
+       pr_debug("Split all THPs\n");
        for_each_populated_zone(zone) {
                max_zone_pfn = zone_end_pfn(zone);
                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
@@ -2957,15 +2947,243 @@ static int split_huge_pages_set(void *data, u64 val)
                        unlock_page(page);
 next:
                        put_page(page);
+                       cond_resched();
                }
        }
 
-       pr_info("%lu of %lu THP split\n", split, total);
+       pr_debug("%lu of %lu THP split\n", split, total);
+}
 
-       return 0;
+static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma)
+{
+       return vma_is_special_huge(vma) || (vma->vm_flags & VM_IO) ||
+                   is_vm_hugetlb_page(vma);
+}
+
+static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
+                               unsigned long vaddr_end)
+{
+       int ret = 0;
+       struct task_struct *task;
+       struct mm_struct *mm;
+       unsigned long total = 0, split = 0;
+       unsigned long addr;
+
+       vaddr_start &= PAGE_MASK;
+       vaddr_end &= PAGE_MASK;
+
+       /* Find the task_struct from pid */
+       rcu_read_lock();
+       task = find_task_by_vpid(pid);
+       if (!task) {
+               rcu_read_unlock();
+               ret = -ESRCH;
+               goto out;
+       }
+       get_task_struct(task);
+       rcu_read_unlock();
+
+       /* Find the mm_struct */
+       mm = get_task_mm(task);
+       put_task_struct(task);
+
+       if (!mm) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       pr_debug("Split huge pages in pid: %d, vaddr: [0x%lx - 0x%lx]\n",
+                pid, vaddr_start, vaddr_end);
+
+       mmap_read_lock(mm);
+       /*
+        * always increase addr by PAGE_SIZE, since we could have a PTE page
+        * table filled with PTE-mapped THPs, each of which is distinct.
+        */
+       for (addr = vaddr_start; addr < vaddr_end; addr += PAGE_SIZE) {
+               struct vm_area_struct *vma = find_vma(mm, addr);
+               unsigned int follflags;
+               struct page *page;
+
+               if (!vma || addr < vma->vm_start)
+                       break;
+
+               /* skip special VMA and hugetlb VMA */
+               if (vma_not_suitable_for_thp_split(vma)) {
+                       addr = vma->vm_end;
+                       continue;
+               }
+
+               /* FOLL_DUMP to ignore special (like zero) pages */
+               follflags = FOLL_GET | FOLL_DUMP;
+               page = follow_page(vma, addr, follflags);
+
+               if (IS_ERR(page))
+                       continue;
+               if (!page)
+                       continue;
+
+               if (!is_transparent_hugepage(page))
+                       goto next;
+
+               total++;
+               if (!can_split_huge_page(compound_head(page), NULL))
+                       goto next;
+
+               if (!trylock_page(page))
+                       goto next;
+
+               if (!split_huge_page(page))
+                       split++;
+
+               unlock_page(page);
+next:
+               put_page(page);
+               cond_resched();
+       }
+       mmap_read_unlock(mm);
+       mmput(mm);
+
+       pr_debug("%lu of %lu THP split\n", split, total);
+
+out:
+       return ret;
+}
+
+static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
+                               pgoff_t off_end)
+{
+       struct filename *file;
+       struct file *candidate;
+       struct address_space *mapping;
+       int ret = -EINVAL;
+       pgoff_t index;
+       int nr_pages = 1;
+       unsigned long total = 0, split = 0;
+
+       file = getname_kernel(file_path);
+       if (IS_ERR(file))
+               return ret;
+
+       candidate = file_open_name(file, O_RDONLY, 0);
+       if (IS_ERR(candidate))
+               goto out;
+
+       pr_debug("split file-backed THPs in file: %s, page offset: [0x%lx - 0x%lx]\n",
+                file_path, off_start, off_end);
+
+       mapping = candidate->f_mapping;
+
+       for (index = off_start; index < off_end; index += nr_pages) {
+               struct page *fpage = pagecache_get_page(mapping, index,
+                                               FGP_ENTRY | FGP_HEAD, 0);
+
+               nr_pages = 1;
+               if (xa_is_value(fpage) || !fpage)
+                       continue;
+
+               if (!is_transparent_hugepage(fpage))
+                       goto next;
+
+               total++;
+               nr_pages = thp_nr_pages(fpage);
+
+               if (!trylock_page(fpage))
+                       goto next;
+
+               if (!split_huge_page(fpage))
+                       split++;
+
+               unlock_page(fpage);
+next:
+               put_page(fpage);
+               cond_resched();
+       }
+
+       filp_close(candidate, NULL);
+       ret = 0;
+
+       pr_debug("%lu of %lu file-backed THP split\n", split, total);
+out:
+       putname(file);
+       return ret;
 }
-DEFINE_DEBUGFS_ATTRIBUTE(split_huge_pages_fops, NULL, split_huge_pages_set,
-               "%llu\n");
+
+#define MAX_INPUT_BUF_SZ 255
+
+static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
+                               size_t count, loff_t *ppops)
+{
+       static DEFINE_MUTEX(split_debug_mutex);
+       ssize_t ret;
+       /* hold pid, start_vaddr, end_vaddr or file_path, off_start, off_end */
+       char input_buf[MAX_INPUT_BUF_SZ];
+       int pid;
+       unsigned long vaddr_start, vaddr_end;
+
+       ret = mutex_lock_interruptible(&split_debug_mutex);
+       if (ret)
+               return ret;
+
+       ret = -EFAULT;
+
+       memset(input_buf, 0, MAX_INPUT_BUF_SZ);
+       if (copy_from_user(input_buf, buf, min_t(size_t, count, MAX_INPUT_BUF_SZ)))
+               goto out;
+
+       input_buf[MAX_INPUT_BUF_SZ - 1] = '\0';
+
+       if (input_buf[0] == '/') {
+               char *tok;
+               char *buf = input_buf;
+               char file_path[MAX_INPUT_BUF_SZ];
+               pgoff_t off_start = 0, off_end = 0;
+               size_t input_len = strlen(input_buf);
+
+               tok = strsep(&buf, ",");
+               if (tok) {
+                       strncpy(file_path, tok, MAX_INPUT_BUF_SZ);
+               } else {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               ret = sscanf(buf, "0x%lx,0x%lx", &off_start, &off_end);
+               if (ret != 2) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               ret = split_huge_pages_in_file(file_path, off_start, off_end);
+               if (!ret)
+                       ret = input_len;
+
+               goto out;
+       }
+
+       ret = sscanf(input_buf, "%d,0x%lx,0x%lx", &pid, &vaddr_start, &vaddr_end);
+       if (ret == 1 && pid == 1) {
+               split_huge_pages_all();
+               ret = strlen(input_buf);
+               goto out;
+       } else if (ret != 3) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end);
+       if (!ret)
+               ret = strlen(input_buf);
+out:
+       mutex_unlock(&split_debug_mutex);
+       return ret;
+
+}
+
+static const struct file_operations split_huge_pages_fops = {
+       .owner   = THIS_MODULE,
+       .write   = split_huge_pages_write,
+       .llseek  = no_llseek,
+};
 
 static int __init split_huge_pages_debugfs(void)
 {