Merge tag 'for-5.11/dm-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/device...
[linux-2.6-microblaze.git] / mm / memory-failure.c
index 5d880d4..5a38e9e 100644 (file)
@@ -263,8 +263,8 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
 }
 
 /*
- * When a unknown page type is encountered drain as many buffers as possible
- * in the hope to turn the page into a LRU or free page, which we can handle.
+ * Unknown page type encountered. Try to check whether it can turn PageLRU by
+ * lru_add_drain_all, or a free page by reclaiming slabs when possible.
  */
 void shake_page(struct page *p, int access)
 {
@@ -273,9 +273,6 @@ void shake_page(struct page *p, int access)
 
        if (!PageSlab(p)) {
                lru_add_drain_all();
-               if (PageLRU(p))
-                       return;
-               drain_all_pages(page_zone(p));
                if (PageLRU(p) || is_free_buddy_page(p))
                        return;
        }
@@ -809,7 +806,7 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
  */
 static int me_huge_page(struct page *p, unsigned long pfn)
 {
-       int res = 0;
+       int res;
        struct page *hpage = compound_head(p);
        struct address_space *mapping;
 
@@ -820,6 +817,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
        if (mapping) {
                res = truncate_error_page(hpage, pfn, mapping);
        } else {
+               res = MF_FAILED;
                unlock_page(hpage);
                /*
                 * migration entry prevents later access on error anonymous
@@ -828,8 +826,10 @@ static int me_huge_page(struct page *p, unsigned long pfn)
                 */
                if (PageAnon(hpage))
                        put_page(hpage);
-               dissolve_free_huge_page(p);
-               res = MF_RECOVERED;
+               if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) {
+                       page_ref_inc(p);
+                       res = MF_RECOVERED;
+               }
                lock_page(hpage);
        }
 
@@ -946,13 +946,13 @@ static int page_action(struct page_state *ps, struct page *p,
 }
 
 /**
- * get_hwpoison_page() - Get refcount for memory error handling:
+ * __get_hwpoison_page() - Get refcount for memory error handling:
  * @page:      raw error page (hit by memory error)
  *
  * Return: return 0 if failed to grab the refcount, otherwise true (some
  * non-zero value.)
  */
-static int get_hwpoison_page(struct page *page)
+static int __get_hwpoison_page(struct page *page)
 {
        struct page *head = compound_head(page);
 
@@ -982,6 +982,73 @@ static int get_hwpoison_page(struct page *page)
        return 0;
 }
 
+/*
+ * Safely get reference count of an arbitrary page.
+ *
+ * Returns 0 for a free page, 1 for an in-use page,
+ * -EIO for a page-type we cannot handle and -EBUSY if we raced with an
+ * allocation.
+ * We only incremented refcount in case the page was already in-use and it
+ * is a known type we can handle.
+ */
+static int get_any_page(struct page *p, unsigned long flags)
+{
+       int ret = 0, pass = 0;
+       bool count_increased = false;
+
+       if (flags & MF_COUNT_INCREASED)
+               count_increased = true;
+
+try_again:
+       if (!count_increased && !__get_hwpoison_page(p)) {
+               if (page_count(p)) {
+                       /* We raced with an allocation, retry. */
+                       if (pass++ < 3)
+                               goto try_again;
+                       ret = -EBUSY;
+               } else if (!PageHuge(p) && !is_free_buddy_page(p)) {
+                       /* We raced with put_page, retry. */
+                       if (pass++ < 3)
+                               goto try_again;
+                       ret = -EIO;
+               }
+       } else {
+               if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
+                       ret = 1;
+               } else {
+                       /*
+                        * A page we cannot handle. Check whether we can turn
+                        * it into something we can handle.
+                        */
+                       if (pass++ < 3) {
+                               put_page(p);
+                               shake_page(p, 1);
+                               count_increased = false;
+                               goto try_again;
+                       }
+                       put_page(p);
+                       ret = -EIO;
+               }
+       }
+
+       return ret;
+}
+
+static int get_hwpoison_page(struct page *p, unsigned long flags,
+                            enum mf_flags ctxt)
+{
+       int ret;
+
+       zone_pcp_disable(page_zone(p));
+       if (ctxt == MF_SOFT_OFFLINE)
+               ret = get_any_page(p, flags);
+       else
+               ret = __get_hwpoison_page(p);
+       zone_pcp_enable(page_zone(p));
+
+       return ret;
+}
+
 /*
  * Do all that is necessary to remove user space mappings. Unmap
  * the pages and send SIGBUS to the processes if the data was dirty.
@@ -989,7 +1056,7 @@ static int get_hwpoison_page(struct page *page)
 static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
                                  int flags, struct page **hpagep)
 {
-       enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
+       enum ttu_flags ttu = TTU_IGNORE_MLOCK;
        struct address_space *mapping;
        LIST_HEAD(tokill);
        bool unmap_success = true;
@@ -1162,7 +1229,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
 
        num_poisoned_pages_inc();
 
-       if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) {
+       if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
                /*
                 * Check "filter hit" and "race with other subpage."
                 */
@@ -1176,9 +1243,13 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
                        }
                }
                unlock_page(head);
-               dissolve_free_huge_page(p);
-               action_result(pfn, MF_MSG_FREE_HUGE, MF_DELAYED);
-               return 0;
+               res = MF_FAILED;
+               if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) {
+                       page_ref_inc(p);
+                       res = MF_RECOVERED;
+               }
+               action_result(pfn, MF_MSG_FREE_HUGE, res);
+               return res == MF_RECOVERED ? 0 : -EBUSY;
        }
 
        lock_page(head);
@@ -1231,6 +1302,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
        loff_t start;
        dax_entry_t cookie;
 
+       if (flags & MF_COUNT_INCREASED)
+               /*
+                * Drop the extra refcount in case we come from madvise().
+                */
+               put_page(page);
+
        /*
         * Prevent the inode from being freed while we are interrogating
         * the address_space, typically this would be handled by
@@ -1319,6 +1396,7 @@ int memory_failure(unsigned long pfn, int flags)
        struct dev_pagemap *pgmap;
        int res;
        unsigned long page_flags;
+       bool retry = true;
 
        if (!sysctl_memory_failure_recovery)
                panic("Memory failure on page %lx", pfn);
@@ -1336,6 +1414,7 @@ int memory_failure(unsigned long pfn, int flags)
                return -ENXIO;
        }
 
+try_again:
        if (PageHuge(p))
                return memory_failure_hugetlb(pfn, flags);
        if (TestSetPageHWPoison(p)) {
@@ -1358,10 +1437,23 @@ int memory_failure(unsigned long pfn, int flags)
         * In fact it's dangerous to directly bump up page count from 0,
         * that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
         */
-       if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) {
+       if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
                if (is_free_buddy_page(p)) {
-                       action_result(pfn, MF_MSG_BUDDY, MF_DELAYED);
-                       return 0;
+                       if (take_page_off_buddy(p)) {
+                               page_ref_inc(p);
+                               res = MF_RECOVERED;
+                       } else {
+                               /* We lost the race, try again */
+                               if (retry) {
+                                       ClearPageHWPoison(p);
+                                       num_poisoned_pages_dec();
+                                       retry = false;
+                                       goto try_again;
+                               }
+                               res = MF_FAILED;
+                       }
+                       action_result(pfn, MF_MSG_BUDDY, res);
+                       return res == MF_RECOVERED ? 0 : -EBUSY;
                } else {
                        action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
                        return -EBUSY;
@@ -1385,14 +1477,6 @@ int memory_failure(unsigned long pfn, int flags)
         * walked by the page reclaim code, however that's not a big loss.
         */
        shake_page(p, 0);
-       /* shake_page could have turned it free. */
-       if (!PageLRU(p) && is_free_buddy_page(p)) {
-               if (flags & MF_COUNT_INCREASED)
-                       action_result(pfn, MF_MSG_BUDDY, MF_DELAYED);
-               else
-                       action_result(pfn, MF_MSG_BUDDY_2ND, MF_DELAYED);
-               return 0;
-       }
 
        lock_page(p);
 
@@ -1596,6 +1680,7 @@ int unpoison_memory(unsigned long pfn)
        struct page *page;
        struct page *p;
        int freeit = 0;
+       unsigned long flags = 0;
        static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
                                        DEFAULT_RATELIMIT_BURST);
 
@@ -1640,7 +1725,7 @@ int unpoison_memory(unsigned long pfn)
                return 0;
        }
 
-       if (!get_hwpoison_page(p)) {
+       if (!get_hwpoison_page(p, flags, 0)) {
                if (TestClearPageHWPoison(p))
                        num_poisoned_pages_dec();
                unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
@@ -1671,75 +1756,6 @@ int unpoison_memory(unsigned long pfn)
 }
 EXPORT_SYMBOL(unpoison_memory);
 
-/*
- * Safely get reference count of an arbitrary page.
- * Returns 0 for a free page, -EIO for a zero refcount page
- * that is not free, and 1 for any other page type.
- * For 1 the page is returned with increased page count, otherwise not.
- */
-static int __get_any_page(struct page *p, unsigned long pfn, int flags)
-{
-       int ret;
-
-       if (flags & MF_COUNT_INCREASED)
-               return 1;
-
-       /*
-        * When the target page is a free hugepage, just remove it
-        * from free hugepage list.
-        */
-       if (!get_hwpoison_page(p)) {
-               if (PageHuge(p)) {
-                       pr_info("%s: %#lx free huge page\n", __func__, pfn);
-                       ret = 0;
-               } else if (is_free_buddy_page(p)) {
-                       pr_info("%s: %#lx free buddy page\n", __func__, pfn);
-                       ret = 0;
-               } else if (page_count(p)) {
-                       /* raced with allocation */
-                       ret = -EBUSY;
-               } else {
-                       pr_info("%s: %#lx: unknown zero refcount page type %lx\n",
-                               __func__, pfn, p->flags);
-                       ret = -EIO;
-               }
-       } else {
-               /* Not a free page */
-               ret = 1;
-       }
-       return ret;
-}
-
-static int get_any_page(struct page *page, unsigned long pfn, int flags)
-{
-       int ret = __get_any_page(page, pfn, flags);
-
-       if (ret == -EBUSY)
-               ret = __get_any_page(page, pfn, flags);
-
-       if (ret == 1 && !PageHuge(page) &&
-           !PageLRU(page) && !__PageMovable(page)) {
-               /*
-                * Try to free it.
-                */
-               put_page(page);
-               shake_page(page, 1);
-
-               /*
-                * Did it turn free?
-                */
-               ret = __get_any_page(page, pfn, 0);
-               if (ret == 1 && !PageLRU(page)) {
-                       /* Drop page reference which is from __get_any_page() */
-                       put_page(page);
-                       pr_info("soft_offline: %#lx: unknown non LRU page type %lx (%pGp)\n",
-                               pfn, page->flags, &page->flags);
-                       return -EIO;
-               }
-       }
-       return ret;
-}
-
 static bool isolate_page(struct page *page, struct list_head *pagelist)
 {
        bool isolated = false;
@@ -1839,11 +1855,11 @@ static int __soft_offline_page(struct page *page)
                        pr_info("soft offline: %#lx: %s migration failed %d, type %lx (%pGp)\n",
                                pfn, msg_page[huge], ret, page->flags, &page->flags);
                        if (ret > 0)
-                               ret = -EIO;
+                               ret = -EBUSY;
                }
        } else {
-               pr_info("soft offline: %#lx: %s isolation failed: %d, page count %d, type %lx (%pGp)\n",
-                       pfn, msg_page[huge], ret, page_count(page), page->flags, &page->flags);
+               pr_info("soft offline: %#lx: %s isolation failed, page count %d, type %lx (%pGp)\n",
+                       pfn, msg_page[huge], page_count(page), page->flags, &page->flags);
                ret = -EBUSY;
        }
        return ret;
@@ -1905,7 +1921,7 @@ int soft_offline_page(unsigned long pfn, int flags)
                return -EIO;
 
        if (PageHWPoison(page)) {
-               pr_info("soft offline: %#lx page already poisoned\n", pfn);
+               pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
                if (flags & MF_COUNT_INCREASED)
                        put_page(page);
                return 0;
@@ -1913,16 +1929,20 @@ int soft_offline_page(unsigned long pfn, int flags)
 
 retry:
        get_online_mems();
-       ret = get_any_page(page, pfn, flags);
+       ret = get_hwpoison_page(page, flags, MF_SOFT_OFFLINE);
        put_online_mems();
 
-       if (ret > 0)
+       if (ret > 0) {
                ret = soft_offline_in_use_page(page);
-       else if (ret == 0)
+       } else if (ret == 0) {
                if (soft_offline_free_page(page) && try_again) {
                        try_again = false;
                        goto retry;
                }
+       } else if (ret == -EIO) {
+               pr_info("%s: %#lx: unknown page type: %lx (%pGP)\n",
+                        __func__, pfn, page->flags, &page->flags);
+       }
 
        return ret;
 }