mm, hwpoison: enable memory error handling on 1GB hugepage
[linux-2.6-microblaze.git] / mm / memory-failure.c
index 9a7a228..1443980 100644 (file)
@@ -74,7 +74,13 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
 
 static bool hw_memory_failure __read_mostly = false;
 
-static bool __page_handle_poison(struct page *page)
+/*
+ * Return values:
+ *   1:   the page is dissolved (if needed) and taken off from buddy,
+ *   0:   the page is dissolved (if needed) and not taken off from buddy,
+ *   < 0: failed to dissolve.
+ */
+static int __page_handle_poison(struct page *page)
 {
        int ret;
 
@@ -84,7 +90,7 @@ static bool __page_handle_poison(struct page *page)
                ret = take_page_off_buddy(page);
        zone_pcp_enable(page_zone(page));
 
-       return ret > 0;
+       return ret;
 }
 
 static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release)
@@ -94,7 +100,7 @@ static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, boo
                 * Doing this check for free pages is also fine since dissolve_free_huge_page
                 * returns 0 for non-hugetlb pages as well.
                 */
-               if (!__page_handle_poison(page))
+               if (__page_handle_poison(page) <= 0)
                        /*
                         * We could fail to take off the target page from buddy
                         * for example due to racy page allocation, but that's
@@ -762,7 +768,6 @@ static const char * const action_page_types[] = {
        [MF_MSG_DIFFERENT_COMPOUND]     = "different compound page after locking",
        [MF_MSG_HUGE]                   = "huge page",
        [MF_MSG_FREE_HUGE]              = "free huge page",
-       [MF_MSG_NON_PMD_HUGE]           = "non-pmd-sized huge page",
        [MF_MSG_UNMAP_FAILED]           = "unmapping failed page",
        [MF_MSG_DIRTY_SWAPCACHE]        = "dirty swapcache page",
        [MF_MSG_CLEAN_SWAPCACHE]        = "clean swapcache page",
@@ -1078,7 +1083,6 @@ static int me_huge_page(struct page_state *ps, struct page *p)
                res = truncate_error_page(hpage, page_to_pfn(p), mapping);
                unlock_page(hpage);
        } else {
-               res = MF_FAILED;
                unlock_page(hpage);
                /*
                 * migration entry prevents later access on error hugepage,
@@ -1086,9 +1090,11 @@ static int me_huge_page(struct page_state *ps, struct page *p)
                 * subpages.
                 */
                put_page(hpage);
-               if (__page_handle_poison(p)) {
+               if (__page_handle_poison(p) >= 0) {
                        page_ref_inc(p);
                        res = MF_RECOVERED;
+               } else {
+                       res = MF_FAILED;
                }
        }
 
@@ -1662,6 +1668,113 @@ unlock:
 EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
 #endif /* CONFIG_FS_DAX */
 
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * Struct raw_hwp_page represents information about "raw error page",
+ * constructing singly linked list originated from ->private field of
+ * SUBPAGE_INDEX_HWPOISON-th tail page.
+ */
+struct raw_hwp_page {
+       struct llist_node node;
+       struct page *page;
+};
+
+static inline struct llist_head *raw_hwp_list_head(struct page *hpage)
+{
+       return (struct llist_head *)&page_private(hpage + SUBPAGE_INDEX_HWPOISON);
+}
+
+static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag)
+{
+       struct llist_head *head;
+       struct llist_node *t, *tnode;
+       unsigned long count = 0;
+
+       head = raw_hwp_list_head(hpage);
+       llist_for_each_safe(tnode, t, head->first) {
+               struct raw_hwp_page *p = container_of(tnode, struct raw_hwp_page, node);
+
+               if (move_flag)
+                       SetPageHWPoison(p->page);
+               kfree(p);
+               count++;
+       }
+       llist_del_all(head);
+       return count;
+}
+
+static int hugetlb_set_page_hwpoison(struct page *hpage, struct page *page)
+{
+       struct llist_head *head;
+       struct raw_hwp_page *raw_hwp;
+       struct llist_node *t, *tnode;
+       int ret = TestSetPageHWPoison(hpage) ? -EHWPOISON : 0;
+
+       /*
+        * Once the hwpoison hugepage has lost reliable raw error info,
+        * there is little meaning to keep additional error info precisely,
+        * so skip to add additional raw error info.
+        */
+       if (HPageRawHwpUnreliable(hpage))
+               return -EHWPOISON;
+       head = raw_hwp_list_head(hpage);
+       llist_for_each_safe(tnode, t, head->first) {
+               struct raw_hwp_page *p = container_of(tnode, struct raw_hwp_page, node);
+
+               if (p->page == page)
+                       return -EHWPOISON;
+       }
+
+       raw_hwp = kmalloc(sizeof(struct raw_hwp_page), GFP_ATOMIC);
+       if (raw_hwp) {
+               raw_hwp->page = page;
+               llist_add(&raw_hwp->node, head);
+               /* the first error event will be counted in action_result(). */
+               if (ret)
+                       num_poisoned_pages_inc();
+       } else {
+               /*
+                * Failed to save raw error info.  We no longer trace all
+                * hwpoisoned subpages, and we need refuse to free/dissolve
+                * this hwpoisoned hugepage.
+                */
+               SetHPageRawHwpUnreliable(hpage);
+               /*
+                * Once HPageRawHwpUnreliable is set, raw_hwp_page is not
+                * used any more, so free it.
+                */
+               __free_raw_hwp_pages(hpage, false);
+       }
+       return ret;
+}
+
+static unsigned long free_raw_hwp_pages(struct page *hpage, bool move_flag)
+{
+       /*
+        * HPageVmemmapOptimized hugepages can't be freed because struct
+        * pages for tail pages are required but they don't exist.
+        */
+       if (move_flag && HPageVmemmapOptimized(hpage))
+               return 0;
+
+       /*
+        * HPageRawHwpUnreliable hugepages shouldn't be unpoisoned by
+        * definition.
+        */
+       if (HPageRawHwpUnreliable(hpage))
+               return 0;
+
+       return __free_raw_hwp_pages(hpage, move_flag);
+}
+
+void hugetlb_clear_page_hwpoison(struct page *hpage)
+{
+       if (HPageRawHwpUnreliable(hpage))
+               return;
+       ClearPageHWPoison(hpage);
+       free_raw_hwp_pages(hpage, true);
+}
+
 /*
  * Called from hugetlb code with hugetlb_lock held.
  *
@@ -1693,10 +1806,11 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
                        count_increased = true;
        } else {
                ret = -EBUSY;
-               goto out;
+               if (!(flags & MF_NO_RETRY))
+                       goto out;
        }
 
-       if (TestSetPageHWPoison(head)) {
+       if (hugetlb_set_page_hwpoison(head, page)) {
                ret = -EHWPOISON;
                goto out;
        }
@@ -1708,7 +1822,6 @@ out:
        return ret;
 }
 
-#ifdef CONFIG_HUGETLB_PAGE
 /*
  * Taking refcount of hugetlb pages needs extra care about race conditions
  * with basic operations like hugepage allocation/free/demotion.
@@ -1721,7 +1834,6 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
        struct page *p = pfn_to_page(pfn);
        struct page *head;
        unsigned long page_flags;
-       bool retry = true;
 
        *hugetlb = 1;
 retry:
@@ -1737,8 +1849,8 @@ retry:
                }
                return res;
        } else if (res == -EBUSY) {
-               if (retry) {
-                       retry = false;
+               if (!(flags & MF_NO_RETRY)) {
+                       flags |= MF_NO_RETRY;
                        goto retry;
                }
                action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
@@ -1749,7 +1861,7 @@ retry:
        lock_page(head);
 
        if (hwpoison_filter(p)) {
-               ClearPageHWPoison(head);
+               hugetlb_clear_page_hwpoison(head);
                res = -EOPNOTSUPP;
                goto out;
        }
@@ -1760,10 +1872,11 @@ retry:
         */
        if (res == 0) {
                unlock_page(head);
-               res = MF_FAILED;
-               if (__page_handle_poison(p)) {
+               if (__page_handle_poison(p) >= 0) {
                        page_ref_inc(p);
                        res = MF_RECOVERED;
+               } else {
+                       res = MF_FAILED;
                }
                action_result(pfn, MF_MSG_FREE_HUGE, res);
                return res == MF_RECOVERED ? 0 : -EBUSY;
@@ -1771,21 +1884,6 @@ retry:
 
        page_flags = head->flags;
 
-       /*
-        * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
-        * simply disable it. In order to make it work properly, we need
-        * make sure that:
-        *  - conversion of a pud that maps an error hugetlb into hwpoison
-        *    entry properly works, and
-        *  - other mm code walking over page table is aware of pud-aligned
-        *    hwpoison entries.
-        */
-       if (huge_page_size(page_hstate(head)) > PMD_SIZE) {
-               action_result(pfn, MF_MSG_NON_PMD_HUGE, MF_IGNORED);
-               res = -EBUSY;
-               goto out;
-       }
-
        if (!hwpoison_user_mappings(p, pfn, flags, head)) {
                action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
                res = -EBUSY;
@@ -1804,6 +1902,10 @@ static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *
        return 0;
 }
 
+static inline unsigned long free_raw_hwp_pages(struct page *hpage, bool flag)
+{
+       return 0;
+}
 #endif /* CONFIG_HUGETLB_PAGE */
 
 static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
@@ -2209,6 +2311,7 @@ int unpoison_memory(unsigned long pfn)
        struct page *p;
        int ret = -EBUSY;
        int freeit = 0;
+       unsigned long count = 1;
        static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
                                        DEFAULT_RATELIMIT_BURST);
 
@@ -2256,6 +2359,13 @@ int unpoison_memory(unsigned long pfn)
 
        ret = get_hwpoison_page(p, MF_UNPOISON);
        if (!ret) {
+               if (PageHuge(p)) {
+                       count = free_raw_hwp_pages(page, false);
+                       if (count == 0) {
+                               ret = -EBUSY;
+                               goto unlock_mutex;
+                       }
+               }
                ret = TestClearPageHWPoison(page) ? 0 : -EBUSY;
        } else if (ret < 0) {
                if (ret == -EHWPOISON) {
@@ -2264,6 +2374,13 @@ int unpoison_memory(unsigned long pfn)
                        unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
                                         pfn, &unpoison_rs);
        } else {
+               if (PageHuge(p)) {
+                       count = free_raw_hwp_pages(page, false);
+                       if (count == 0) {
+                               ret = -EBUSY;
+                               goto unlock_mutex;
+                       }
+               }
                freeit = !!TestClearPageHWPoison(p);
 
                put_page(page);
@@ -2276,7 +2393,7 @@ int unpoison_memory(unsigned long pfn)
 unlock_mutex:
        mutex_unlock(&mf_mutex);
        if (!ret || freeit) {
-               num_poisoned_pages_dec();
+               num_poisoned_pages_sub(count);
                unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
                                 page_to_pfn(p), &unpoison_rs);
        }