Merge tag 'libnvdimm-for-4.19_dax-memory-failure' of gitolite.kernel.org:pub/scm...
[linux-2.6-microblaze.git] / drivers / nvdimm / pmem.c
index 8b1fd7f..6071e29 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/hdreg.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
+#include <linux/set_memory.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/badblocks.h>
@@ -51,6 +52,30 @@ static struct nd_region *to_region(struct pmem_device *pmem)
        return to_nd_region(to_dev(pmem)->parent);
 }
 
+static void hwpoison_clear(struct pmem_device *pmem,
+               phys_addr_t phys, unsigned int len)
+{
+       unsigned long pfn_start, pfn_end, pfn;
+
+       /* only pmem in the linear map supports HWPoison */
+       if (is_vmalloc_addr(pmem->virt_addr))
+               return;
+
+       pfn_start = PHYS_PFN(phys);
+       pfn_end = pfn_start + PHYS_PFN(len);
+       for (pfn = pfn_start; pfn < pfn_end; pfn++) {
+               struct page *page = pfn_to_page(pfn);
+
+               /*
+                * Note, no need to hold a get_dev_pagemap() reference
+                * here since we're in the driver I/O path and
+                * outstanding I/O requests pin the dev_pagemap.
+                */
+               if (test_and_clear_pmem_poison(page))
+                       clear_mce_nospec(pfn);
+       }
+}
+
 static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
                phys_addr_t offset, unsigned int len)
 {
@@ -65,6 +90,7 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
        if (cleared < len)
                rc = BLK_STS_IOERR;
        if (cleared > 0 && cleared / 512) {
+               hwpoison_clear(pmem, pmem->phys_addr + offset, cleared);
                cleared /= 512;
                dev_dbg(dev, "%#llx clear %ld sector%s\n",
                                (unsigned long long) sector, cleared,
@@ -120,7 +146,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
 }
 
 static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
-                       unsigned int len, unsigned int off, bool is_write,
+                       unsigned int len, unsigned int off, unsigned int op,
                        sector_t sector)
 {
        blk_status_t rc = BLK_STS_OK;
@@ -131,7 +157,7 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
        if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
                bad_pmem = true;
 
-       if (!is_write) {
+       if (!op_is_write(op)) {
                if (unlikely(bad_pmem))
                        rc = BLK_STS_IOERR;
                else {
@@ -180,8 +206,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
        do_acct = nd_iostat_start(bio, &start);
        bio_for_each_segment(bvec, bio, iter) {
                rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
-                               bvec.bv_offset, op_is_write(bio_op(bio)),
-                               iter.bi_sector);
+                               bvec.bv_offset, bio_op(bio), iter.bi_sector);
                if (rc) {
                        bio->bi_status = rc;
                        break;
@@ -198,13 +223,13 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 }
 
 static int pmem_rw_page(struct block_device *bdev, sector_t sector,
-                      struct page *page, bool is_write)
+                      struct page *page, unsigned int op)
 {
        struct pmem_device *pmem = bdev->bd_queue->queuedata;
        blk_status_t rc;
 
        rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE,
-                         0, is_write, sector);
+                         0, op, sector);
 
        /*
         * The ->rw_page interface is subtle and tricky.  The core
@@ -213,7 +238,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
         * caused by double completion.
         */
        if (rc == 0)
-               page_endio(page, is_write, 0);
+               page_endio(page, op_is_write(op), 0);
 
        return blk_status_to_errno(rc);
 }
@@ -227,8 +252,11 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
        if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512,
                                        PFN_PHYS(nr_pages))))
                return -EIO;
-       *kaddr = pmem->virt_addr + offset;
-       *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+
+       if (kaddr)
+               *kaddr = pmem->virt_addr + offset;
+       if (pfn)
+               *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
 
        /*
         * If badblocks are present, limit known good range to the