Merge tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdim...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Nov 2017 17:51:57 +0000 (09:51 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 17 Nov 2017 17:51:57 +0000 (09:51 -0800)
Pull libnvdimm and dax updates from Dan Williams:
 "Save for a few late fixes, all of these commits have shipped in -next
  releases since before the merge window opened, and 0day has given a
  build success notification.

  The ext4 touches came from Jan, and the xfs touches have Darrick's
  reviewed-by. An xfstest for the MAP_SYNC feature has been through
  a few round of reviews and is on track to be merged.

   - Introduce MAP_SYNC and MAP_SHARED_VALIDATE, a mechanism to enable
     'userspace flush' of persistent memory updates via filesystem-dax
     mappings. It arranges for any filesystem metadata updates that may
     be required to satisfy a write fault to also be flushed ("on disk")
     before the kernel returns to userspace from the fault handler.
     Effectively every write-fault that dirties metadata completes an
     fsync() before returning from the fault handler. The new
     MAP_SHARED_VALIDATE mapping type guarantees that the MAP_SYNC flag
     is validated as supported by the filesystem's ->mmap() file
     operation.

   - Add support for the standard ACPI 6.2 label access methods that
     replace the NVDIMM_FAMILY_INTEL (vendor specific) label methods.
     This enables interoperability with environments that only implement
     the standardized methods.

   - Add support for the ACPI 6.2 NVDIMM media error injection methods.

   - Add support for the NVDIMM_FAMILY_INTEL v1.6 DIMM commands for
     latch last shutdown status, firmware update, SMART error injection,
     and SMART alarm threshold control.

   - Cleanup physical address information disclosures to be root-only.

   - Fix revalidation of the DIMM "locked label area" status to support
     dynamic unlock of the label area.

   - Expand unit test infrastructure to mock the ACPI 6.2 Translate SPA
     (system-physical-address) command and error injection commands.

  Acknowledgements that came after the commits were pushed to -next:

   - 957ac8c421ad ("dax: fix PMD faults on zero-length files"):
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
   - a39e596baa07 ("xfs: support for synchronous DAX faults") and
     7b565c9f965b ("xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()")
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>"
* tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (49 commits)
  acpi, nfit: add 'Enable Latch System Shutdown Status' command support
  dax: fix general protection fault in dax_alloc_inode
  dax: fix PMD faults on zero-length files
  dax: stop requiring a live device for dax_flush()
  brd: remove dax support
  dax: quiet bdev_dax_supported()
  fs, dax: unify IOMAP_F_DIRTY read vs write handling policy in the dax core
  tools/testing/nvdimm: unit test clear-error commands
  acpi, nfit: validate commands against the device type
  tools/testing/nvdimm: stricter bounds checking for error injection commands
  xfs: support for synchronous DAX faults
  xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()
  ext4: Support for synchronous DAX faults
  ext4: Simplify error handling in ext4_dax_huge_fault()
  dax: Implement dax_finish_sync_fault()
  dax, iomap: Add support for synchronous faults
  mm: Define MAP_SYNC and VM_SYNC flags
  dax: Allow tuning whether dax_insert_mapping_entry() dirties entry
  dax: Allow dax_iomap_fault() to return pfn
  dax: Fix comment describing dax_iomap_fault()
  ...

27 files changed:
1  2 
MAINTAINERS
arch/alpha/include/uapi/asm/mman.h
arch/mips/include/uapi/asm/mman.h
arch/parisc/include/uapi/asm/mman.h
arch/xtensa/include/uapi/asm/mman.h
drivers/block/Kconfig
drivers/block/brd.c
drivers/nvdimm/Makefile
fs/dax.c
fs/ext2/file.c
fs/ext4/file.c
fs/ext4/inode.c
fs/jbd2/journal.c
fs/proc/task_mmu.c
fs/xfs/xfs_file.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_trace.h
include/linux/dax.h
include/linux/fs.h
include/linux/iomap.h
include/linux/mm.h
include/linux/mman.h
include/trace/events/fs_dax.h
include/uapi/asm-generic/mman-common.h
include/uapi/asm-generic/mman.h
tools/include/uapi/asm-generic/mman-common.h
tools/testing/nvdimm/Kbuild

diff --cc MAINTAINERS
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
  #include <linux/radix-tree.h>
  #include <linux/fs.h>
  #include <linux/slab.h>
- #ifdef CONFIG_BLK_DEV_RAM_DAX
- #include <linux/pfn_t.h>
- #include <linux/dax.h>
- #include <linux/uio.h>
- #endif
 +#include <linux/backing-dev.h>
  
  #include <linux/uaccess.h>
  
@@@ -449,23 -401,9 +401,10 @@@ static struct brd_device *brd_alloc(in
        disk->flags             = GENHD_FL_EXT_DEVT;
        sprintf(disk->disk_name, "ram%d", i);
        set_capacity(disk, rd_size * 2);
 +      disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
  
- #ifdef CONFIG_BLK_DEV_RAM_DAX
-       queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
-       brd->dax_dev = alloc_dax(brd, disk->disk_name, &brd_dax_ops);
-       if (!brd->dax_dev)
-               goto out_free_inode;
- #endif
        return brd;
  
- #ifdef CONFIG_BLK_DEV_RAM_DAX
- out_free_inode:
-       kill_dax(brd->dax_dev);
-       put_dax(brd->dax_dev);
- #endif
  out_free_queue:
        blk_cleanup_queue(brd->brd_queue);
  out_free_dev:
Simple merge
diff --cc fs/dax.c
+++ b/fs/dax.c
@@@ -825,38 -820,42 +825,42 @@@ out
  }
  EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
  
- static int dax_insert_mapping(struct address_space *mapping,
-               struct block_device *bdev, struct dax_device *dax_dev,
-               sector_t sector, size_t size, void *entry,
-               struct vm_area_struct *vma, struct vm_fault *vmf)
+ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
  {
-       unsigned long vaddr = vmf->address;
-       void *ret, *kaddr;
 -      return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
++      return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
+ }
+ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
+                        pfn_t *pfnp)
+ {
+       const sector_t sector = dax_iomap_sector(iomap, pos);
        pgoff_t pgoff;
+       void *kaddr;
        int id, rc;
-       pfn_t pfn;
+       long length;
  
-       rc = bdev_dax_pgoff(bdev, sector, size, &pgoff);
+       rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff);
        if (rc)
                return rc;
        id = dax_read_lock();
-       rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn);
-       if (rc < 0) {
-               dax_read_unlock(id);
-               return rc;
+       length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
+                                  &kaddr, pfnp);
+       if (length < 0) {
+               rc = length;
+               goto out;
        }
+       rc = -EINVAL;
+       if (PFN_PHYS(length) < size)
+               goto out;
+       if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1))
+               goto out;
+       /* For larger pages we need devmap */
+       if (length > 1 && !pfn_t_devmap(*pfnp))
+               goto out;
+       rc = 0;
+ out:
        dax_read_unlock(id);
-       ret = dax_insert_mapping_entry(mapping, vmf, entry, sector, 0);
-       if (IS_ERR(ret))
-               return PTR_ERR(ret);
-       trace_dax_insert_mapping(mapping->host, vmf, ret);
-       if (vmf->flags & FAULT_FLAG_WRITE)
-               return vm_insert_mixed_mkwrite(vma, vaddr, pfn);
-       else
-               return vm_insert_mixed(vma, vaddr, pfn);
+       return rc;
  }
  
  /*
diff --cc fs/ext2/file.c
Simple merge
diff --cc fs/ext4/file.c
Simple merge
diff --cc fs/ext4/inode.c
@@@ -3384,6 -3393,20 +3384,19 @@@ static int ext4_releasepage(struct pag
                return try_to_free_buffers(page);
  }
  
 -#ifdef CONFIG_FS_DAX
+ static bool ext4_inode_datasync_dirty(struct inode *inode)
+ {
+       journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+       if (journal)
+               return !jbd2_transaction_committed(journal,
+                                       EXT4_I(inode)->i_datasync_tid);
+       /* Any metadata buffers to write? */
+       if (!list_empty(&inode->i_mapping->private_list))
+               return true;
+       return inode->i_state & I_DIRTY_DATASYNC;
+ }
  static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
                            unsigned flags, struct iomap *iomap)
  {
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -21,9 -20,13 +21,13 @@@ struct vm_fault
  
  /*
   * Flags for all iomap mappings:
 - */
 -#define IOMAP_F_NEW   0x01    /* blocks have been newly allocated */
 -/*
++ *
+  * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access
+  * written data and requires fdatasync to commit them to persistent storage.
   */
 -#define IOMAP_F_DIRTY 0x02
 +#define IOMAP_F_NEW           0x01    /* blocks have been newly allocated */
 +#define IOMAP_F_BOUNDARY      0x02    /* mapping ends at metadata boundary */
++#define IOMAP_F_DIRTY         0x04    /* uncommitted metadata */
  
  /*
   * Flags that only need to be reported for IOMAP_REPORT requests:
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge