Merge tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdim...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 9 Jun 2018 00:21:52 +0000 (17:21 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 9 Jun 2018 00:21:52 +0000 (17:21 -0700)
Pull libnvdimm updates from Dan Williams:
 "This adds a user for the new 'bytes-remaining' updates to
  memcpy_mcsafe() that you already received through Ingo via the
  x86-dax- for-linus pull.

  Not included here, but still targeting this cycle, is support for
  handling memory media errors (poison) consumed via userspace dax
  mappings.

  Summary:

   - DAX broke a fundamental assumption of truncate of file mapped
     pages. The truncate path assumed that it is safe to disconnect a
     pinned page from a file and let the filesystem reclaim the physical
     block. With DAX the page is equivalent to the filesystem block.
     Introduce dax_layout_busy_page() to enable filesystems to wait for
     pinned DAX pages to be released. Without this wait a filesystem
     could allocate blocks under active device-DMA to a new file.

   - DAX arranges for the block layer to be bypassed and uses
     dax_direct_access() + copy_to_iter() to satisfy read(2) calls.
     However, the memcpy_mcsafe() facility is available through the pmem
     block driver. In order to safely handle media errors, via the DAX
     block-layer bypass, introduce copy_to_iter_mcsafe().

   - Fix cache management policy relative to the ACPI NFIT Platform
     Capabilities Structure to properly elide cache flushes when they
     are not necessary. The table indicates whether CPU caches are
     power-fail protected. Clarify that a deep flush is always performed
     on REQ_{FUA,PREFLUSH} requests"

* tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (21 commits)
  dax: Use dax_write_cache* helpers
  libnvdimm, pmem: Do not flush power-fail protected CPU caches
  libnvdimm, pmem: Unconditionally deep flush on *sync
  libnvdimm, pmem: Complete REQ_FLUSH => REQ_PREFLUSH
  acpi, nfit: Remove ecc_unit_size
  dax: dax_insert_mapping_entry always succeeds
  libnvdimm, e820: Register all pmem resources
  libnvdimm: Debug probe times
  linvdimm, pmem: Preserve read-only setting for pmem devices
  x86, nfit_test: Add unit test for memcpy_mcsafe()
  pmem: Switch to copy_to_iter_mcsafe()
  dax: Report bytes remaining in dax_iomap_actor()
  dax: Introduce a ->copy_to_iter dax operation
  uio, lib: Fix CONFIG_ARCH_HAS_UACCESS_MCSAFE compilation
  xfs, dax: introduce xfs_break_dax_layouts()
  xfs: prepare xfs_break_layouts() for another layout type
  xfs: prepare xfs_break_layouts() to be called with XFS_MMAPLOCK_EXCL
  mm, fs, dax: handle layout changes to pinned dax mappings
  mm: fix __gup_device_huge vs unmap
  mm: introduce MEMORY_DEVICE_FS_DAX and CONFIG_DEV_PAGEMAP_OPS
  ...

15 files changed:
1  2 
drivers/dax/super.c
drivers/md/dm.c
fs/Kconfig
fs/dax.c
fs/xfs/xfs_file.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_iops.c
include/linux/dax.h
include/linux/memremap.h
include/linux/mm.h
kernel/resource.c
lib/Kconfig
mm/Kconfig
mm/gup.c

@@@ -80,11 -80,13 +80,12 @@@ EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev)
   * This is a library function for filesystems to check if the block device
   * can be mounted with dax option.
   *
 - * Return: negative errno if unsupported, 0 if supported.
 + * Return: true if supported, false if unsupported
   */
 -int __bdev_dax_supported(struct super_block *sb, int blocksize)
 +bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
  {
 -      struct block_device *bdev = sb->s_bdev;
        struct dax_device *dax_dev;
+       bool dax_enabled = false;
        pgoff_t pgoff;
        int err, id;
        void *kaddr;
                 * on being able to do (page_address(pfn_to_page())).
                 */
                WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
+               dax_enabled = true;
        } else if (pfn_t_devmap(pfn)) {
-               /* pass */;
-       } else {
+               struct dev_pagemap *pgmap;
+               pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
+               if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX)
+                       dax_enabled = true;
+               put_dev_pagemap(pgmap);
+       }
+       if (!dax_enabled) {
 -              pr_debug("VFS (%s): error: dax support not enabled\n",
 -                              sb->s_id);
 -              return -EOPNOTSUPP;
 +              pr_debug("%s: error: dax support not enabled\n",
 +                              bdevname(bdev, buf));
 +              return false;
        }
 -      return 0;
 +      return true;
  }
  EXPORT_SYMBOL_GPL(__bdev_dax_supported);
  #endif
diff --cc drivers/md/dm.c
Simple merge
diff --cc fs/Kconfig
Simple merge
diff --cc fs/dax.c
+++ b/fs/dax.c
@@@ -910,9 -1007,8 +1007,8 @@@ static vm_fault_t dax_load_hole(struct 
  {
        struct inode *inode = mapping->host;
        unsigned long vaddr = vmf->address;
 -      int ret = VM_FAULT_NOPAGE;
 +      vm_fault_t ret = VM_FAULT_NOPAGE;
        struct page *zero_page;
-       void *entry2;
        pfn_t pfn;
  
        zero_page = ZERO_PAGE(0);
        }
  
        pfn = page_to_pfn_t(zero_page);
-       entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
-                       RADIX_DAX_ZERO_PAGE, false);
-       if (IS_ERR(entry2)) {
-               ret = VM_FAULT_SIGBUS;
-               goto out;
-       }
+       dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE,
+                       false);
 -      vm_insert_mixed(vmf->vma, vaddr, pfn);
 +      ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
  out:
        trace_dax_load_hole(inode, vmf, ret);
        return ret;
@@@ -724,13 -702,85 +724,76 @@@ xfs_file_write_iter
                 * allow an operation to fall back to buffered mode.
                 */
                ret = xfs_file_dio_aio_write(iocb, from);
 -              if (ret == -EREMCHG)
 -                      goto buffered;
 -      } else {
 -buffered:
 -              ret = xfs_file_buffered_aio_write(iocb, from);
 +              if (ret != -EREMCHG)
 +                      return ret;
        }
  
 -      if (ret > 0) {
 -              XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
 -
 -              /* Handle various SYNC-type writes */
 -              ret = generic_write_sync(iocb, ret);
 -      }
 -      return ret;
 +      return xfs_file_buffered_aio_write(iocb, from);
  }
  
+ static void
+ xfs_wait_dax_page(
+       struct inode            *inode,
+       bool                    *did_unlock)
+ {
+       struct xfs_inode        *ip = XFS_I(inode);
+       *did_unlock = true;
+       xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
+       schedule();
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+ }
+ static int
+ xfs_break_dax_layouts(
+       struct inode            *inode,
+       uint                    iolock,
+       bool                    *did_unlock)
+ {
+       struct page             *page;
+       ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL));
+       page = dax_layout_busy_page(inode->i_mapping);
+       if (!page)
+               return 0;
+       return ___wait_var_event(&page->_refcount,
+                       atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
+                       0, 0, xfs_wait_dax_page(inode, did_unlock));
+ }
+ int
+ xfs_break_layouts(
+       struct inode            *inode,
+       uint                    *iolock,
+       enum layout_break_reason reason)
+ {
+       bool                    retry;
+       int                     error;
+       ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL));
+       do {
+               retry = false;
+               switch (reason) {
+               case BREAK_UNMAP:
+                       error = xfs_break_dax_layouts(inode, *iolock, &retry);
+                       if (error || retry)
+                               break;
+                       /* fall through */
+               case BREAK_WRITE:
+                       error = xfs_break_leased_layouts(inode, iolock, &retry);
+                       break;
+               default:
+                       WARN_ON_ONCE(1);
+                       error = -EINVAL;
+               }
+       } while (error == 0 && retry);
+       return error;
+ }
  #define       XFS_FALLOC_FL_SUPPORTED                                         \
                (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
                 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |      \
Simple merge
Simple merge
Simple merge
@@@ -83,11 -86,12 +86,13 @@@ static inline void fs_put_dax(struct da
  struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
  int dax_writeback_mapping_range(struct address_space *mapping,
                struct block_device *bdev, struct writeback_control *wbc);
+ struct page *dax_layout_busy_page(struct address_space *mapping);
  #else
 -static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
 +static inline bool bdev_dax_supported(struct block_device *bdev,
 +              int blocksize)
  {
 -      return -EOPNOTSUPP;
 +      return false;
  }
  
  static inline struct dax_device *fs_dax_get_by_host(const char *host)
Simple merge
Simple merge
Simple merge
diff --cc lib/Kconfig
Simple merge
diff --cc mm/Kconfig
Simple merge
diff --cc mm/gup.c
Simple merge