Merge branch 'for-5.7/libnvdimm' into libnvdimm-for-next
authorDan Williams <dan.j.williams@intel.com>
Fri, 3 Apr 2020 02:55:17 +0000 (19:55 -0700)
committerDan Williams <dan.j.williams@intel.com>
Fri, 3 Apr 2020 02:55:17 +0000 (19:55 -0700)
- Introduce 'zero_page_range' as a dax operation. This facilitates
  filesystem-dax operation without a block-device.

- Advertise a persistence-domain for of_pmem and papr_scm. The
  persistence domain indicates where cpu-store cycles need to reach in
  the platform-memory subsystem before the platform will consider them
  power-fail protected.

- Fixup some flexible-array declarations.

17 files changed:
arch/powerpc/platforms/pseries/papr_scm.c
drivers/acpi/nfit/nfit.h
drivers/dax/bus.c
drivers/dax/super.c
drivers/md/dm-linear.c
drivers/md/dm-log-writes.c
drivers/md/dm-stripe.c
drivers/md/dm.c
drivers/nvdimm/label.h
drivers/nvdimm/nd.h
drivers/nvdimm/of_pmem.c
drivers/nvdimm/pmem.c
drivers/s390/block/dcssblk.c
fs/dax.c
fs/iomap/buffered-io.c
include/linux/dax.h
include/linux/device-mapper.h

index 5499fc0..b642c4a 100644 (file)
@@ -342,8 +342,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 
        if (p->is_volatile)
                p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
-       else
+       else {
+               set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
                p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
+       }
        if (!p->region) {
                dev_err(dev, "Error registering region %pR from %pOF\n",
                                ndr_desc.res, p->dn);
index b317f40..f5525f8 100644 (file)
@@ -145,32 +145,32 @@ struct nfit_spa {
        unsigned long ars_state;
        u32 clear_err_unit;
        u32 max_ars;
-       struct acpi_nfit_system_address spa[0];
+       struct acpi_nfit_system_address spa[];
 };
 
 struct nfit_dcr {
        struct list_head list;
-       struct acpi_nfit_control_region dcr[0];
+       struct acpi_nfit_control_region dcr[];
 };
 
 struct nfit_bdw {
        struct list_head list;
-       struct acpi_nfit_data_region bdw[0];
+       struct acpi_nfit_data_region bdw[];
 };
 
 struct nfit_idt {
        struct list_head list;
-       struct acpi_nfit_interleave idt[0];
+       struct acpi_nfit_interleave idt[];
 };
 
 struct nfit_flush {
        struct list_head list;
-       struct acpi_nfit_flush_address flush[0];
+       struct acpi_nfit_flush_address flush[];
 };
 
 struct nfit_memdev {
        struct list_head list;
-       struct acpi_nfit_memory_map memdev[0];
+       struct acpi_nfit_memory_map memdev[];
 };
 
 enum nfit_mem_flags {
index 46e4604..df238c8 100644 (file)
@@ -421,8 +421,10 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
         * device outside of mmap of the resulting character device.
         */
        dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
-       if (!dax_dev)
+       if (IS_ERR(dax_dev)) {
+               rc = PTR_ERR(dax_dev);
                goto err;
+       }
 
        /* a device_dax instance is dead while the driver is not attached */
        kill_dax(dax_dev);
index 0aa4b6b..8e32345 100644 (file)
@@ -344,6 +344,23 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 }
 EXPORT_SYMBOL_GPL(dax_copy_to_iter);
 
+int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+                       size_t nr_pages)
+{
+       if (!dax_alive(dax_dev))
+               return -ENXIO;
+       /*
+        * There are no callers that want to zero more than one page as of now.
+        * Once users are there, this check can be removed after the
+        * device mapper code has been updated to split ranges across targets.
+        */
+       if (nr_pages != 1)
+               return -EIO;
+
+       return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
+}
+EXPORT_SYMBOL_GPL(dax_zero_page_range);
+
 #ifdef CONFIG_ARCH_HAS_PMEM_API
 void arch_wb_cache_pmem(void *addr, size_t size);
 void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
@@ -551,9 +568,16 @@ struct dax_device *alloc_dax(void *private, const char *__host,
        dev_t devt;
        int minor;
 
+       if (ops && !ops->zero_page_range) {
+               pr_debug("%s: error: device does not provide dax"
+                        " operation zero_page_range()\n",
+                        __host ? __host : "Unknown");
+               return ERR_PTR(-EINVAL);
+       }
+
        host = kstrdup(__host, GFP_KERNEL);
        if (__host && !host)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL);
        if (minor < 0)
@@ -576,7 +600,7 @@ struct dax_device *alloc_dax(void *private, const char *__host,
        ida_simple_remove(&dax_minor_ida, minor);
  err_minor:
        kfree(host);
-       return NULL;
+       return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL_GPL(alloc_dax);
 
index 8d07fdf..e1db434 100644 (file)
@@ -201,10 +201,27 @@ static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
        return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
+static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
+                                     size_t nr_pages)
+{
+       int ret;
+       struct linear_c *lc = ti->private;
+       struct block_device *bdev = lc->dev->bdev;
+       struct dax_device *dax_dev = lc->dev->dax_dev;
+       sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+
+       dev_sector = linear_map_sector(ti, sector);
+       ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff);
+       if (ret)
+               return ret;
+       return dax_zero_page_range(dax_dev, pgoff, nr_pages);
+}
+
 #else
 #define linear_dax_direct_access NULL
 #define linear_dax_copy_from_iter NULL
 #define linear_dax_copy_to_iter NULL
+#define linear_dax_zero_page_range NULL
 #endif
 
 static struct target_type linear_target = {
@@ -226,6 +243,7 @@ static struct target_type linear_target = {
        .direct_access = linear_dax_direct_access,
        .dax_copy_from_iter = linear_dax_copy_from_iter,
        .dax_copy_to_iter = linear_dax_copy_to_iter,
+       .dax_zero_page_range = linear_dax_zero_page_range,
 };
 
 int __init dm_linear_init(void)
index 99721c7..8ea20b5 100644 (file)
@@ -994,10 +994,26 @@ static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
        return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
 }
 
+static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
+                                         size_t nr_pages)
+{
+       int ret;
+       struct log_writes_c *lc = ti->private;
+       sector_t sector = pgoff * PAGE_SECTORS;
+
+       ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages << PAGE_SHIFT,
+                            &pgoff);
+       if (ret)
+               return ret;
+       return dax_zero_page_range(lc->dev->dax_dev, pgoff,
+                                  nr_pages << PAGE_SHIFT);
+}
+
 #else
 #define log_writes_dax_direct_access NULL
 #define log_writes_dax_copy_from_iter NULL
 #define log_writes_dax_copy_to_iter NULL
+#define log_writes_dax_zero_page_range NULL
 #endif
 
 static struct target_type log_writes_target = {
@@ -1016,6 +1032,7 @@ static struct target_type log_writes_target = {
        .direct_access = log_writes_dax_direct_access,
        .dax_copy_from_iter = log_writes_dax_copy_from_iter,
        .dax_copy_to_iter = log_writes_dax_copy_to_iter,
+       .dax_zero_page_range = log_writes_dax_zero_page_range,
 };
 
 static int __init dm_log_writes_init(void)
index 63bbcc2..fa813c0 100644 (file)
@@ -360,10 +360,32 @@ static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
        return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
+static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
+                                     size_t nr_pages)
+{
+       int ret;
+       sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+       struct stripe_c *sc = ti->private;
+       struct dax_device *dax_dev;
+       struct block_device *bdev;
+       uint32_t stripe;
+
+       stripe_map_sector(sc, sector, &stripe, &dev_sector);
+       dev_sector += sc->stripe[stripe].physical_start;
+       dax_dev = sc->stripe[stripe].dev->dax_dev;
+       bdev = sc->stripe[stripe].dev->bdev;
+
+       ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff);
+       if (ret)
+               return ret;
+       return dax_zero_page_range(dax_dev, pgoff, nr_pages);
+}
+
 #else
 #define stripe_dax_direct_access NULL
 #define stripe_dax_copy_from_iter NULL
 #define stripe_dax_copy_to_iter NULL
+#define stripe_dax_zero_page_range NULL
 #endif
 
 /*
@@ -486,6 +508,7 @@ static struct target_type stripe_target = {
        .direct_access = stripe_dax_direct_access,
        .dax_copy_from_iter = stripe_dax_copy_from_iter,
        .dax_copy_to_iter = stripe_dax_copy_to_iter,
+       .dax_zero_page_range = stripe_dax_zero_page_range,
 };
 
 int __init dm_stripe_init(void)
index b89f07e..6504f0a 100644 (file)
@@ -1198,6 +1198,35 @@ static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
        return ret;
 }
 
+static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+                                 size_t nr_pages)
+{
+       struct mapped_device *md = dax_get_private(dax_dev);
+       sector_t sector = pgoff * PAGE_SECTORS;
+       struct dm_target *ti;
+       int ret = -EIO;
+       int srcu_idx;
+
+       ti = dm_dax_get_live_target(md, sector, &srcu_idx);
+
+       if (!ti)
+               goto out;
+       if (WARN_ON(!ti->type->dax_zero_page_range)) {
+               /*
+                * ->zero_page_range() is mandatory dax operation. If we are
+                *  here, something is wrong.
+                */
+               dm_put_live_table(md, srcu_idx);
+               goto out;
+       }
+       ret = ti->type->dax_zero_page_range(ti, pgoff, nr_pages);
+
+ out:
+       dm_put_live_table(md, srcu_idx);
+
+       return ret;
+}
+
 /*
  * A target may call dm_accept_partial_bio only from the map routine.  It is
  * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_RESET,
@@ -1976,7 +2005,7 @@ static struct mapped_device *alloc_dev(int minor)
        if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
                md->dax_dev = alloc_dax(md, md->disk->disk_name,
                                        &dm_dax_ops, 0);
-               if (!md->dax_dev)
+               if (IS_ERR(md->dax_dev))
                        goto bad;
        }
 
@@ -3199,6 +3228,7 @@ static const struct dax_operations dm_dax_ops = {
        .dax_supported = dm_dax_supported,
        .copy_from_iter = dm_dax_copy_from_iter,
        .copy_to_iter = dm_dax_copy_to_iter,
+       .zero_page_range = dm_dax_zero_page_range,
 };
 
 /*
index 4c7b775..956b6d1 100644 (file)
@@ -62,7 +62,7 @@ struct nd_namespace_index {
        __le16 major;
        __le16 minor;
        __le64 checksum;
-       u8 free[0];
+       u8 free[];
 };
 
 /**
index c4d69c1..85dbb2a 100644 (file)
@@ -39,7 +39,7 @@ struct nd_region_data {
        int ns_count;
        int ns_active;
        unsigned int hints_shift;
-       void __iomem *flush_wpq[0];
+       void __iomem *flush_wpq[];
 };
 
 static inline void __iomem *ndrd_get_flush_wpq(struct nd_region_data *ndrd,
@@ -157,7 +157,7 @@ struct nd_region {
        struct nd_interleave_set *nd_set;
        struct nd_percpu_lane __percpu *lane;
        int (*flush)(struct nd_region *nd_region, struct bio *bio);
-       struct nd_mapping mapping[0];
+       struct nd_mapping mapping[];
 };
 
 struct nd_blk_region {
index 8224d14..6826a27 100644 (file)
@@ -62,8 +62,10 @@ static int of_pmem_region_probe(struct platform_device *pdev)
 
                if (is_volatile)
                        region = nvdimm_volatile_region_create(bus, &ndr_desc);
-               else
+               else {
+                       set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
                        region = nvdimm_pmem_region_create(bus, &ndr_desc);
+               }
 
                if (!region)
                        dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n",
index 4eae441..715cb06 100644 (file)
@@ -136,9 +136,25 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
        return BLK_STS_OK;
 }
 
-static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
-                       unsigned int len, unsigned int off, unsigned int op,
-                       sector_t sector)
+static blk_status_t pmem_do_read(struct pmem_device *pmem,
+                       struct page *page, unsigned int page_off,
+                       sector_t sector, unsigned int len)
+{
+       blk_status_t rc;
+       phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
+       void *pmem_addr = pmem->virt_addr + pmem_off;
+
+       if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
+               return BLK_STS_IOERR;
+
+       rc = read_pmem(page, page_off, pmem_addr, len);
+       flush_dcache_page(page);
+       return rc;
+}
+
+static blk_status_t pmem_do_write(struct pmem_device *pmem,
+                       struct page *page, unsigned int page_off,
+                       sector_t sector, unsigned int len)
 {
        blk_status_t rc = BLK_STS_OK;
        bool bad_pmem = false;
@@ -148,34 +164,25 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
        if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
                bad_pmem = true;
 
-       if (!op_is_write(op)) {
-               if (unlikely(bad_pmem))
-                       rc = BLK_STS_IOERR;
-               else {
-                       rc = read_pmem(page, off, pmem_addr, len);
-                       flush_dcache_page(page);
-               }
-       } else {
-               /*
-                * Note that we write the data both before and after
-                * clearing poison.  The write before clear poison
-                * handles situations where the latest written data is
-                * preserved and the clear poison operation simply marks
-                * the address range as valid without changing the data.
-                * In this case application software can assume that an
-                * interrupted write will either return the new good
-                * data or an error.
-                *
-                * However, if pmem_clear_poison() leaves the data in an
-                * indeterminate state we need to perform the write
-                * after clear poison.
-                */
-               flush_dcache_page(page);
-               write_pmem(pmem_addr, page, off, len);
-               if (unlikely(bad_pmem)) {
-                       rc = pmem_clear_poison(pmem, pmem_off, len);
-                       write_pmem(pmem_addr, page, off, len);
-               }
+       /*
+        * Note that we write the data both before and after
+        * clearing poison.  The write before clear poison
+        * handles situations where the latest written data is
+        * preserved and the clear poison operation simply marks
+        * the address range as valid without changing the data.
+        * In this case application software can assume that an
+        * interrupted write will either return the new good
+        * data or an error.
+        *
+        * However, if pmem_clear_poison() leaves the data in an
+        * indeterminate state we need to perform the write
+        * after clear poison.
+        */
+       flush_dcache_page(page);
+       write_pmem(pmem_addr, page, page_off, len);
+       if (unlikely(bad_pmem)) {
+               rc = pmem_clear_poison(pmem, pmem_off, len);
+               write_pmem(pmem_addr, page, page_off, len);
        }
 
        return rc;
@@ -197,8 +204,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 
        do_acct = nd_iostat_start(bio, &start);
        bio_for_each_segment(bvec, bio, iter) {
-               rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
-                               bvec.bv_offset, bio_op(bio), iter.bi_sector);
+               if (op_is_write(bio_op(bio)))
+                       rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
+                               iter.bi_sector, bvec.bv_len);
+               else
+                       rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset,
+                               iter.bi_sector, bvec.bv_len);
                if (rc) {
                        bio->bi_status = rc;
                        break;
@@ -223,9 +234,12 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
        struct pmem_device *pmem = bdev->bd_queue->queuedata;
        blk_status_t rc;
 
-       rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE,
-                         0, op, sector);
-
+       if (op_is_write(op))
+               rc = pmem_do_write(pmem, page, 0, sector,
+                                  hpage_nr_pages(page) * PAGE_SIZE);
+       else
+               rc = pmem_do_read(pmem, page, 0, sector,
+                                  hpage_nr_pages(page) * PAGE_SIZE);
        /*
         * The ->rw_page interface is subtle and tricky.  The core
         * retries on any error, so we can only invoke page_endio() in
@@ -268,6 +282,16 @@ static const struct block_device_operations pmem_fops = {
        .revalidate_disk =      nvdimm_revalidate_disk,
 };
 
+static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+                                   size_t nr_pages)
+{
+       struct pmem_device *pmem = dax_get_private(dax_dev);
+
+       return blk_status_to_errno(pmem_do_write(pmem, ZERO_PAGE(0), 0,
+                                  PFN_PHYS(pgoff) >> SECTOR_SHIFT,
+                                  PAGE_SIZE));
+}
+
 static long pmem_dax_direct_access(struct dax_device *dax_dev,
                pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -299,6 +323,7 @@ static const struct dax_operations pmem_dax_ops = {
        .dax_supported = generic_fsdax_supported,
        .copy_from_iter = pmem_copy_from_iter,
        .copy_to_iter = pmem_copy_to_iter,
+       .zero_page_range = pmem_dax_zero_page_range,
 };
 
 static const struct attribute_group *pmem_attribute_groups[] = {
@@ -462,9 +487,9 @@ static int pmem_attach_disk(struct device *dev,
        if (is_nvdimm_sync(nd_region))
                flags = DAXDEV_F_SYNC;
        dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
-       if (!dax_dev) {
+       if (IS_ERR(dax_dev)) {
                put_disk(disk);
-               return -ENOMEM;
+               return PTR_ERR(dax_dev);
        }
        dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
        pmem->dax_dev = dax_dev;
index 63502ca..9b05cf1 100644 (file)
@@ -57,11 +57,26 @@ static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev,
        return copy_to_iter(addr, bytes, i);
 }
 
+static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
+                                      pgoff_t pgoff, size_t nr_pages)
+{
+       long rc;
+       void *kaddr;
+
+       rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL);
+       if (rc < 0)
+               return rc;
+       memset(kaddr, 0, nr_pages << PAGE_SHIFT);
+       dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
+       return 0;
+}
+
 static const struct dax_operations dcssblk_dax_ops = {
        .direct_access = dcssblk_dax_direct_access,
        .dax_supported = generic_fsdax_supported,
        .copy_from_iter = dcssblk_dax_copy_from_iter,
        .copy_to_iter = dcssblk_dax_copy_to_iter,
+       .zero_page_range = dcssblk_dax_zero_page_range,
 };
 
 struct dcssblk_dev_info {
@@ -680,8 +695,9 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 
        dev_info->dax_dev = alloc_dax(dev_info, dev_info->gd->disk_name,
                        &dcssblk_dax_ops, DAXDEV_F_SYNC);
-       if (!dev_info->dax_dev) {
-               rc = -ENOMEM;
+       if (IS_ERR(dev_info->dax_dev)) {
+               rc = PTR_ERR(dev_info->dax_dev);
+               dev_info->dax_dev = NULL;
                goto put_dev;
        }
 
index 35da144..11b1672 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1038,50 +1038,43 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
        return ret;
 }
 
-static bool dax_range_is_aligned(struct block_device *bdev,
-                                unsigned int offset, unsigned int length)
+int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
+                  struct iomap *iomap)
 {
-       unsigned short sector_size = bdev_logical_block_size(bdev);
+       sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
+       pgoff_t pgoff;
+       long rc, id;
+       void *kaddr;
+       bool page_aligned = false;
 
-       if (!IS_ALIGNED(offset, sector_size))
-               return false;
-       if (!IS_ALIGNED(length, sector_size))
-               return false;
 
-       return true;
-}
+       if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
+           IS_ALIGNED(size, PAGE_SIZE))
+               page_aligned = true;
 
-int __dax_zero_page_range(struct block_device *bdev,
-               struct dax_device *dax_dev, sector_t sector,
-               unsigned int offset, unsigned int size)
-{
-       if (dax_range_is_aligned(bdev, offset, size)) {
-               sector_t start_sector = sector + (offset >> 9);
+       rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
+       if (rc)
+               return rc;
 
-               return blkdev_issue_zeroout(bdev, start_sector,
-                               size >> 9, GFP_NOFS, 0);
-       } else {
-               pgoff_t pgoff;
-               long rc, id;
-               void *kaddr;
+       id = dax_read_lock();
 
-               rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
-               if (rc)
-                       return rc;
+       if (page_aligned)
+               rc = dax_zero_page_range(iomap->dax_dev, pgoff,
+                                        size >> PAGE_SHIFT);
+       else
+               rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
+       if (rc < 0) {
+               dax_read_unlock(id);
+               return rc;
+       }
 
-               id = dax_read_lock();
-               rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
-               if (rc < 0) {
-                       dax_read_unlock(id);
-                       return rc;
-               }
+       if (!page_aligned) {
                memset(kaddr + offset, 0, size);
-               dax_flush(dax_dev, kaddr + offset, size);
-               dax_read_unlock(id);
+               dax_flush(iomap->dax_dev, kaddr + offset, size);
        }
+       dax_read_unlock(id);
        return 0;
 }
-EXPORT_SYMBOL_GPL(__dax_zero_page_range);
 
 static loff_t
 dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
index 7c84c4c..6f750da 100644 (file)
@@ -974,13 +974,6 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
        return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
 }
 
-static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
-               struct iomap *iomap)
-{
-       return __dax_zero_page_range(iomap->bdev, iomap->dax_dev,
-                       iomap_sector(iomap, pos & PAGE_MASK), offset, bytes);
-}
-
 static loff_t
 iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
                void *data, struct iomap *iomap, struct iomap *srcmap)
@@ -1000,7 +993,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
                bytes = min_t(loff_t, PAGE_SIZE - offset, count);
 
                if (IS_DAX(inode))
-                       status = iomap_dax_zero(pos, offset, bytes, iomap);
+                       status = dax_iomap_zero(pos, offset, bytes, iomap);
                else
                        status = iomap_zero(inode, pos, offset, bytes, iomap,
                                        srcmap);
index 328c2db..d7af5d2 100644 (file)
@@ -13,6 +13,7 @@
 typedef unsigned long dax_entry_t;
 
 struct iomap_ops;
+struct iomap;
 struct dax_device;
 struct dax_operations {
        /*
@@ -34,6 +35,8 @@ struct dax_operations {
        /* copy_to_iter: required operation for fs-dax direct-i/o */
        size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t,
                        struct iov_iter *);
+       /* zero_page_range: required operation. Zero page range   */
+       int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
 };
 
 extern struct attribute_group dax_attribute_group;
@@ -199,6 +202,8 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
                size_t bytes, struct iov_iter *i);
 size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
                size_t bytes, struct iov_iter *i);
+int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+                       size_t nr_pages);
 void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
@@ -210,20 +215,8 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
                                      pgoff_t index);
-
-#ifdef CONFIG_FS_DAX
-int __dax_zero_page_range(struct block_device *bdev,
-               struct dax_device *dax_dev, sector_t sector,
-               unsigned int offset, unsigned int length);
-#else
-static inline int __dax_zero_page_range(struct block_device *bdev,
-               struct dax_device *dax_dev, sector_t sector,
-               unsigned int offset, unsigned int length)
-{
-       return -ENXIO;
-}
-#endif
-
+int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
+                       struct iomap *iomap);
 static inline bool dax_mapping(struct address_space *mapping)
 {
        return mapping->host && IS_DAX(mapping->host);
index 475668c..af48d9d 100644 (file)
@@ -141,6 +141,8 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
                long nr_pages, void **kaddr, pfn_t *pfn);
 typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff,
                void *addr, size_t bytes, struct iov_iter *i);
+typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
+               size_t nr_pages);
 #define PAGE_SECTORS (PAGE_SIZE / 512)
 
 void dm_error(const char *message);
@@ -195,6 +197,7 @@ struct target_type {
        dm_dax_direct_access_fn direct_access;
        dm_dax_copy_iter_fn dax_copy_from_iter;
        dm_dax_copy_iter_fn dax_copy_to_iter;
+       dm_dax_zero_page_range_fn dax_zero_page_range;
 
        /* For internal device-mapper use. */
        struct list_head list;