Merge tag 'xtensa-20220416' of https://github.com/jcmvbkbc/linux-xtensa
[linux-2.6-microblaze.git] / mm / memory-failure.c
index 97a9ed8..dcb6bb9 100644 (file)
@@ -130,12 +130,6 @@ static int hwpoison_filter_dev(struct page *p)
            hwpoison_filter_dev_minor == ~0U)
                return 0;
 
-       /*
-        * page_mapping() does not accept slab pages.
-        */
-       if (PageSlab(p))
-               return -EINVAL;
-
        mapping = page_mapping(p);
        if (mapping == NULL || mapping->host == NULL)
                return -EINVAL;
@@ -258,16 +252,13 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
        pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n",
                        pfn, t->comm, t->pid);
 
-       if (flags & MF_ACTION_REQUIRED) {
-               if (t == current)
-                       ret = force_sig_mceerr(BUS_MCEERR_AR,
-                                        (void __user *)tk->addr, addr_lsb);
-               else
-                       /* Signal other processes sharing the page if they have PF_MCE_EARLY set. */
-                       ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr,
-                               addr_lsb, t);
-       } else {
+       if ((flags & MF_ACTION_REQUIRED) && (t == current))
+               ret = force_sig_mceerr(BUS_MCEERR_AR,
+                                (void __user *)tk->addr, addr_lsb);
+       else
                /*
+                * Signal other processes sharing the page if they have
+                * PF_MCE_EARLY set.
                 * Don't use force here, it's convenient if the signal
                 * can be temporarily blocked.
                 * This could cause a loop when the user sets SIGBUS
@@ -275,7 +266,6 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
                 */
                ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr,
                                      addr_lsb, t);  /* synchronous? */
-       }
        if (ret < 0)
                pr_info("Memory failure: Error sending signal to %s:%d: %d\n",
                        t->comm, t->pid, ret);
@@ -315,6 +305,7 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page,
        pmd_t *pmd;
        pte_t *pte;
 
+       VM_BUG_ON_VMA(address == -EFAULT, vma);
        pgd = pgd_offset(vma->vm_mm, address);
        if (!pgd_present(*pgd))
                return 0;
@@ -487,12 +478,13 @@ static struct task_struct *task_early_kill(struct task_struct *tsk,
 static void collect_procs_anon(struct page *page, struct list_head *to_kill,
                                int force_early)
 {
+       struct folio *folio = page_folio(page);
        struct vm_area_struct *vma;
        struct task_struct *tsk;
        struct anon_vma *av;
        pgoff_t pgoff;
 
-       av = page_lock_anon_vma_read(page);
+       av = folio_lock_anon_vma_read(folio);
        if (av == NULL) /* Not actually mapped anymore */
                return;
 
@@ -707,8 +699,10 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
                              (void *)&priv);
        if (ret == 1 && priv.tk.addr)
                kill_proc(&priv.tk, pfn, flags);
+       else
+               ret = 0;
        mmap_read_unlock(p->mm);
-       return ret ? -EFAULT : -EHWPOISON;
+       return ret > 0 ? -EHWPOISON : -EFAULT;
 }
 
 static const char *action_name[] = {
@@ -739,6 +733,7 @@ static const char * const action_page_types[] = {
        [MF_MSG_BUDDY]                  = "free buddy page",
        [MF_MSG_DAX]                    = "dax page",
        [MF_MSG_UNSPLIT_THP]            = "unsplit thp",
+       [MF_MSG_DIFFERENT_PAGE_SIZE]    = "different page size",
        [MF_MSG_UNKNOWN]                = "unknown page",
 };
 
@@ -1182,12 +1177,18 @@ void ClearPageHWPoisonTakenOff(struct page *page)
  * does not return true for hugetlb or device memory pages, so it's assumed
  * to be called only in the context where we never have such pages.
  */
-static inline bool HWPoisonHandlable(struct page *page)
+static inline bool HWPoisonHandlable(struct page *page, unsigned long flags)
 {
-       return PageLRU(page) || __PageMovable(page) || is_free_buddy_page(page);
+       bool movable = false;
+
+       /* Soft offline could mirgate non-LRU movable pages */
+       if ((flags & MF_SOFT_OFFLINE) && __PageMovable(page))
+               movable = true;
+
+       return movable || PageLRU(page) || is_free_buddy_page(page);
 }
 
-static int __get_hwpoison_page(struct page *page)
+static int __get_hwpoison_page(struct page *page, unsigned long flags)
 {
        struct page *head = compound_head(page);
        int ret = 0;
@@ -1202,7 +1203,7 @@ static int __get_hwpoison_page(struct page *page)
         * for any unsupported type of page in order to reduce the risk of
         * unexpected races caused by taking a page refcount.
         */
-       if (!HWPoisonHandlable(head))
+       if (!HWPoisonHandlable(head, flags))
                return -EBUSY;
 
        if (get_page_unless_zero(head)) {
@@ -1227,7 +1228,7 @@ static int get_any_page(struct page *p, unsigned long flags)
 
 try_again:
        if (!count_increased) {
-               ret = __get_hwpoison_page(p);
+               ret = __get_hwpoison_page(p, flags);
                if (!ret) {
                        if (page_count(p)) {
                                /* We raced with an allocation, retry. */
@@ -1255,7 +1256,7 @@ try_again:
                }
        }
 
-       if (PageHuge(p) || HWPoisonHandlable(p)) {
+       if (PageHuge(p) || HWPoisonHandlable(p, flags)) {
                ret = 1;
        } else {
                /*
@@ -1347,6 +1348,7 @@ static int get_hwpoison_page(struct page *p, unsigned long flags)
 static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
                                  int flags, struct page *hpage)
 {
+       struct folio *folio = page_folio(hpage);
        enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC;
        struct address_space *mapping;
        LIST_HEAD(tokill);
@@ -1411,26 +1413,22 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
        if (kill)
                collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
 
-       if (!PageHuge(hpage)) {
-               try_to_unmap(hpage, ttu);
+       if (PageHuge(hpage) && !PageAnon(hpage)) {
+               /*
+                * For hugetlb pages in shared mappings, try_to_unmap
+                * could potentially call huge_pmd_unshare.  Because of
+                * this, take semaphore in write mode here and set
+                * TTU_RMAP_LOCKED to indicate we have taken the lock
+                * at this higher level.
+                */
+               mapping = hugetlb_page_mapping_lock_write(hpage);
+               if (mapping) {
+                       try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
+                       i_mmap_unlock_write(mapping);
+               } else
+                       pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
        } else {
-               if (!PageAnon(hpage)) {
-                       /*
-                        * For hugetlb pages in shared mappings, try_to_unmap
-                        * could potentially call huge_pmd_unshare.  Because of
-                        * this, take semaphore in write mode here and set
-                        * TTU_RMAP_LOCKED to indicate we have taken the lock
-                        * at this higher level.
-                        */
-                       mapping = hugetlb_page_mapping_lock_write(hpage);
-                       if (mapping) {
-                               try_to_unmap(hpage, ttu|TTU_RMAP_LOCKED);
-                               i_mmap_unlock_write(mapping);
-                       } else
-                               pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
-               } else {
-                       try_to_unmap(hpage, ttu);
-               }
+               try_to_unmap(folio, ttu);
        }
 
        unmap_success = !page_mapped(hpage);
@@ -1526,7 +1524,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
                                if (TestClearPageHWPoison(head))
                                        num_poisoned_pages_dec();
                                unlock_page(head);
-                               return 0;
+                               return -EOPNOTSUPP;
                        }
                        unlock_page(head);
                        res = MF_FAILED;
@@ -1543,8 +1541,27 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
        }
 
        lock_page(head);
+
+       /*
+        * The page could have changed compound pages due to race window.
+        * If this happens just bail out.
+        */
+       if (!PageHuge(p) || compound_head(p) != head) {
+               action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED);
+               res = -EBUSY;
+               goto out;
+       }
+
        page_flags = head->flags;
 
+       if (hwpoison_filter(p)) {
+               if (TestClearPageHWPoison(head))
+                       num_poisoned_pages_dec();
+               put_page(p);
+               res = -EOPNOTSUPP;
+               goto out;
+       }
+
        /*
         * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
         * simply disable it. In order to make it work properly, we need
@@ -1613,7 +1630,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
                goto out;
 
        if (hwpoison_filter(page)) {
-               rc = 0;
+               rc = -EOPNOTSUPP;
                goto unlock;
        }
 
@@ -1638,7 +1655,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
         * SIGBUS (i.e. MF_MUST_KILL)
         */
        flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
-       collect_procs(page, &tokill, flags & MF_ACTION_REQUIRED);
+       collect_procs(page, &tokill, true);
 
        list_for_each_entry(tk, &tokill, nd)
                if (tk->size_shift)
@@ -1653,7 +1670,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
                start = (page->index << PAGE_SHIFT) & ~(size - 1);
                unmap_mapping_range(page->mapping, start, size, 0);
        }
-       kill_procs(&tokill, flags & MF_MUST_KILL, false, pfn, flags);
+       kill_procs(&tokill, true, false, pfn, flags);
        rc = 0;
 unlock:
        dax_unlock_page(page, cookie);
@@ -1682,12 +1699,15 @@ static DEFINE_MUTEX(mf_mutex);
  *
  * Must run in process context (e.g. a work queue) with interrupts
  * enabled and no spinlocks hold.
+ *
+ * Return: 0 for successfully handled the memory error,
+ *         -EOPNOTSUPP for memory_filter() filtered the error event,
+ *         < 0(except -EOPNOTSUPP) on failure.
  */
 int memory_failure(unsigned long pfn, int flags)
 {
        struct page *p;
        struct page *hpage;
-       struct page *orig_head;
        struct dev_pagemap *pgmap;
        int res = 0;
        unsigned long page_flags;
@@ -1733,7 +1753,7 @@ try_again:
                goto unlock_mutex;
        }
 
-       orig_head = hpage = compound_head(p);
+       hpage = compound_head(p);
        num_poisoned_pages_inc();
 
        /*
@@ -1814,10 +1834,21 @@ try_again:
        lock_page(p);
 
        /*
-        * The page could have changed compound pages during the locking.
-        * If this happens just bail out.
+        * We're only intended to deal with the non-Compound page here.
+        * However, the page could have changed compound pages due to
+        * race window. If this happens, we could try again to hopefully
+        * handle the page next round.
         */
-       if (PageCompound(p) && compound_head(p) != orig_head) {
+       if (PageCompound(p)) {
+               if (retry) {
+                       if (TestClearPageHWPoison(p))
+                               num_poisoned_pages_dec();
+                       unlock_page(p);
+                       put_page(p);
+                       flags &= ~MF_COUNT_INCREASED;
+                       retry = false;
+                       goto try_again;
+               }
                action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
                res = -EBUSY;
                goto unlock_page;
@@ -1837,6 +1868,7 @@ try_again:
                        num_poisoned_pages_dec();
                unlock_page(p);
                put_page(p);
+               res = -EOPNOTSUPP;
                goto unlock_mutex;
        }
 
@@ -1845,7 +1877,7 @@ try_again:
         * page_lock. We need wait writeback completion for this page or it
         * may trigger vfs BUG while evict inode.
         */
-       if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p))
+       if (!PageLRU(p) && !PageWriteback(p))
                goto identify_page_state;
 
        /*
@@ -2139,7 +2171,7 @@ static bool isolate_page(struct page *page, struct list_head *pagelist)
  */
 static int __soft_offline_page(struct page *page)
 {
-       int ret = 0;
+       long ret = 0;
        unsigned long pfn = page_to_pfn(page);
        struct page *hpage = compound_head(page);
        char const *msg_page[] = {"page", "hugepage"};
@@ -2150,12 +2182,6 @@ static int __soft_offline_page(struct page *page)
                .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
        };
 
-       /*
-        * Check PageHWPoison again inside page lock because PageHWPoison
-        * is set by memory_failure() outside page lock. Note that
-        * memory_failure() also double-checks PageHWPoison inside page lock,
-        * so there's no race between soft_offline_page() and memory_failure().
-        */
        lock_page(page);
        if (!PageHuge(page))
                wait_on_page_writeback(page);
@@ -2166,7 +2192,7 @@ static int __soft_offline_page(struct page *page)
                return 0;
        }
 
-       if (!PageHuge(page))
+       if (!PageHuge(page) && PageLRU(page) && !PageSwapCache(page))
                /*
                 * Try to invalidate first. This should work for
                 * non dirty unmapped page cache pages.
@@ -2174,10 +2200,6 @@ static int __soft_offline_page(struct page *page)
                ret = invalidate_inode_page(page);
        unlock_page(page);
 
-       /*
-        * RED-PEN would be better to keep it isolated here, but we
-        * would need to fix isolation locking first.
-        */
        if (ret) {
                pr_info("soft_offline: %#lx: invalidated\n", pfn);
                page_handle_poison(page, false, true);
@@ -2196,7 +2218,7 @@ static int __soft_offline_page(struct page *page)
                        if (!list_empty(&pagelist))
                                putback_movable_pages(&pagelist);
 
-                       pr_info("soft offline: %#lx: %s migration failed %d, type %pGp\n",
+                       pr_info("soft offline: %#lx: %s migration failed %ld, type %pGp\n",
                                pfn, msg_page[huge], ret, &page->flags);
                        if (ret > 0)
                                ret = -EBUSY;
@@ -2288,7 +2310,7 @@ int soft_offline_page(unsigned long pfn, int flags)
 
 retry:
        get_online_mems();
-       ret = get_hwpoison_page(page, flags);
+       ret = get_hwpoison_page(page, flags | MF_SOFT_OFFLINE);
        put_online_mems();
 
        if (ret > 0) {