Merge tag 'hole_punch_for_v5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 30 Aug 2021 17:24:50 +0000 (10:24 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 30 Aug 2021 17:24:50 +0000 (10:24 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 30 Aug 2021 17:24:50 +0000 (10:24 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 30 Aug 2021 17:24:50 +0000 (10:24 -0700)
diff --combined fs/cifs/smb2ops.c

index 2dfd0d8,458c546..ddc0e8f
--- 1/fs/cifs/smb2ops.c
--- 2/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@@ -557,8 -557,8 +557,8 @@@ parse_server_interfaces(struct network_
         p = buf;
         while (bytes_left >= sizeof(*p)) {
                 info->speed = le64_to_cpu(p->LinkSpeed);
- -              info->rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE);
- -              info->rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE);
+ +              info->rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0;
+ +              info->rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE) ? 1 : 0;
   
                 cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, *iface_count);
                 cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed);
@@@ -2910,8 -2910,6 +2910,8 @@@ smb2_get_dfs_refer(const unsigned int x
                 /* ipc tcons are not refcounted */
                 spin_lock(&cifs_tcp_ses_lock);
                 tcon->tc_count--;
+ +              /* tc_count can never go negative */
+ +              WARN_ON(tcon->tc_count < 0);
                 spin_unlock(&cifs_tcp_ses_lock);
         }
         kfree(utf16_path);
@@@ -3590,6 -3588,7 +3590,7 @@@ static long smb3_punch_hole(struct fil
                 return rc;
         }
   
+       filemap_invalidate_lock(inode->i_mapping);
         /*
          * We implement the punch hole through ioctl, so we need remove the page
          * caches first, otherwise the data may be inconsistent with the server.
@@@ -3607,6 -3606,7 +3608,7 @@@
                         sizeof(struct file_zero_data_information),
                         CIFSMaxBufSize, NULL, NULL);
         free_xid(xid);
+       filemap_invalidate_unlock(inode->i_mapping);
         return rc;
   }
   
@@@ -3618,7 -3618,6 +3620,7 @@@ static int smb3_simple_fallocate_write_
   {
         struct cifs_io_parms io_parms = {0};
         int nbytes;
+ +      int rc = 0;
         struct kvec iov[2];
   
         io_parms.netfid = cfile->fid.netfid;
@@@ -3626,25 -3625,13 +3628,25 @@@
         io_parms.tcon = tcon;
         io_parms.persistent_fid = cfile->fid.persistent_fid;
         io_parms.volatile_fid = cfile->fid.volatile_fid;
- -      io_parms.offset = off;
- -      io_parms.length = len;
   
- -      /* iov[0] is reserved for smb header */
- -      iov[1].iov_base = buf;
- -      iov[1].iov_len = io_parms.length;
- -      return SMB2_write(xid, &io_parms, &nbytes, iov, 1);
+ +      while (len) {
+ +              io_parms.offset = off;
+ +              io_parms.length = len;
+ +              if (io_parms.length > SMB2_MAX_BUFFER_SIZE)
+ +                      io_parms.length = SMB2_MAX_BUFFER_SIZE;
+ +              /* iov[0] is reserved for smb header */
+ +              iov[1].iov_base = buf;
+ +              iov[1].iov_len = io_parms.length;
+ +              rc = SMB2_write(xid, &io_parms, &nbytes, iov, 1);
+ +              if (rc)
+ +                      break;
+ +              if (nbytes > len)
+ +                      return -EINVAL;
+ +              buf += nbytes;
+ +              off += nbytes;
+ +              len -= nbytes;
+ +      }
+ +      return rc;
   }
   
   static int smb3_simple_fallocate_range(unsigned int xid,
@@@ -3668,6 -3655,11 +3670,6 @@@
                         (char **)&out_data, &out_data_len);
         if (rc)
                 goto out;
- -      /*
- -       * It is already all allocated
- -       */
- -      if (out_data_len == 0)
- -              goto out;
   
         buf = kzalloc(1024 * 1024, GFP_KERNEL);
         if (buf == NULL) {
@@@ -3790,24 -3782,6 +3792,24 @@@ static long smb3_simple_falloc(struct f
                 goto out;
         }
   
+ +      if (keep_size == true) {
+ +              /*
+ +               * We can not preallocate pages beyond the end of the file
+ +               * in SMB2
+ +               */
+ +              if (off >= i_size_read(inode)) {
+ +                      rc = 0;
+ +                      goto out;
+ +              }
+ +              /*
+ +               * For fallocates that are partially beyond the end of file,
+ +               * clamp len so we only fallocate up to the end of file.
+ +               */
+ +              if (off + len > i_size_read(inode)) {
+ +                      len = i_size_read(inode) - off;
+ +              }
+ +      }
+ +
         if ((keep_size == true) || (i_size_read(inode) >= off + len)) {
                 /*
                  * At this point, we are trying to fallocate an internal
diff --combined fs/ext2/ext2.h

index e512630,81907a0..3be9dd6
--- 1/fs/ext2/ext2.h
--- 2/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@@ -667,9 -667,6 +667,6 @@@ struct ext2_inode_info 
         struct rw_semaphore xattr_sem;
   #endif
         rwlock_t i_meta_lock;
- #ifdef CONFIG_FS_DAX
-       struct rw_semaphore dax_sem;
- #endif
   
         /*
          * truncate_mutex is for serialising ext2_truncate() against
@@@ -685,14 -682,6 +682,6 @@@
   #endif
   };
   
- #ifdef CONFIG_FS_DAX
- #define dax_sem_down_write(ext2_inode)        down_write(&(ext2_inode)->dax_sem)
- #define dax_sem_up_write(ext2_inode)  up_write(&(ext2_inode)->dax_sem)
- #else
- #define dax_sem_down_write(ext2_inode)
- #define dax_sem_up_write(ext2_inode)
- #endif
- 
   /*
    * Inode dynamic state flags
    */
@@@ -740,8 -729,7 +729,8 @@@ extern int ext2_inode_by_name(struct in
   extern int ext2_make_empty(struct inode *, struct inode *);
   extern struct ext2_dir_entry_2 *ext2_find_entry(struct inode *, const struct qstr *,
                                                 struct page **, void **res_page_addr);
- -extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
+ +extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page,
+ +                           char *kaddr);
   extern int ext2_empty_dir (struct inode *);
   extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa);
   extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, void *,
diff --combined fs/ext2/inode.c

index 04f0def,33f874f..333fa62
--- 1/fs/ext2/inode.c
--- 2/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@@ -799,6 -799,7 +799,6 @@@ int ext2_get_block(struct inode *inode
   
   }
   
- -#ifdef CONFIG_FS_DAX
   static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
                 unsigned flags, struct iomap *iomap, struct iomap *srcmap)
   {
@@@ -851,18 -852,16 +851,18 @@@ const struct iomap_ops ext2_iomap_ops 
         .iomap_begin            = ext2_iomap_begin,
         .iomap_end              = ext2_iomap_end,
   };
- -#else
- -/* Define empty ops for !CONFIG_FS_DAX case to avoid ugly ifdefs */
- -const struct iomap_ops ext2_iomap_ops;
- -#endif /* CONFIG_FS_DAX */
   
   int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                 u64 start, u64 len)
   {
- -      return generic_block_fiemap(inode, fieinfo, start, len,
- -                                  ext2_get_block);
+ +      int ret;
+ +
+ +      inode_lock(inode);
+ +      len = min_t(u64, len, i_size_read(inode));
+ +      ret = iomap_fiemap(inode, fieinfo, start, len, &ext2_iomap_ops);
+ +      inode_unlock(inode);
+ +
+ +      return ret;
   }
   
   static int ext2_writepage(struct page *page, struct writeback_control *wbc)
@@@ -1178,7 -1177,7 +1178,7 @@@ static void ext2_free_branches(struct i
                 ext2_free_data(inode, p, q);
   }
   
- /* dax_sem must be held when calling this function */
+ /* mapping->invalidate_lock must be held when calling this function */
   static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
   {
         __le32 *i_data = EXT2_I(inode)->i_data;
@@@ -1195,7 -1194,7 +1195,7 @@@
         iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
   
   #ifdef CONFIG_FS_DAX
-       WARN_ON(!rwsem_is_locked(&ei->dax_sem));
+       WARN_ON(!rwsem_is_locked(&inode->i_mapping->invalidate_lock));
   #endif
   
         n = ext2_block_to_path(inode, iblock, offsets, NULL);
@@@ -1277,9 -1276,9 +1277,9 @@@ static void ext2_truncate_blocks(struc
         if (ext2_inode_is_fast_symlink(inode))
                 return;
   
-       dax_sem_down_write(EXT2_I(inode));
+       filemap_invalidate_lock(inode->i_mapping);
         __ext2_truncate_blocks(inode, offset);
-       dax_sem_up_write(EXT2_I(inode));
+       filemap_invalidate_unlock(inode->i_mapping);
   }
   
   static int ext2_setsize(struct inode *inode, loff_t newsize)
@@@ -1309,10 -1308,10 +1309,10 @@@
         if (error)
                 return error;
   
-       dax_sem_down_write(EXT2_I(inode));
+       filemap_invalidate_lock(inode->i_mapping);
         truncate_setsize(inode, newsize);
         __ext2_truncate_blocks(inode, newsize);
-       dax_sem_up_write(EXT2_I(inode));
+       filemap_invalidate_unlock(inode->i_mapping);
   
         inode->i_mtime = inode->i_ctime = current_time(inode);
         if (inode_needs_sync(inode)) {
diff --combined fs/fuse/dax.c

index 9d58371,fc05ce5..281d79f
--- 1/fs/fuse/dax.c
--- 2/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@@ -444,12 -444,12 +444,12 @@@ static int fuse_setup_new_dax_mapping(s
         /*
          * Can't do inline reclaim in fault path. We call
          * dax_layout_busy_page() before we free a range. And
-        * fuse_wait_dax_page() drops fi->i_mmap_sem lock and requires it.
-        * In fault path we enter with fi->i_mmap_sem held and can't drop
-        * it. Also in fault path we hold fi->i_mmap_sem shared and not
-        * exclusive, so that creates further issues with fuse_wait_dax_page().
-        * Hence return -EAGAIN and fuse_dax_fault() will wait for a memory
-        * range to become free and retry.
+        * fuse_wait_dax_page() drops mapping->invalidate_lock and requires it.
+        * In fault path we enter with mapping->invalidate_lock held and can't
+        * drop it. Also in fault path we hold mapping->invalidate_lock shared
+        * and not exclusive, so that creates further issues with
+        * fuse_wait_dax_page().  Hence return -EAGAIN and fuse_dax_fault()
+        * will wait for a memory range to become free and retry.
          */
         if (flags & IOMAP_FAULT) {
                 alloc_dmap = alloc_dax_mapping(fcd);
@@@ -513,7 -513,7 +513,7 @@@ static int fuse_upgrade_dax_mapping(str
         down_write(&fi->dax->sem);
         node = interval_tree_iter_first(&fi->dax->tree, idx, idx);
   
-       /* We are holding either inode lock or i_mmap_sem, and that should
+       /* We are holding either inode lock or invalidate_lock, and that should
          * ensure that dmap can't be truncated. We are holding a reference
          * on dmap and that should make sure it can't be reclaimed. So dmap
          * should still be there in tree despite the fact we dropped and
@@@ -660,14 -660,12 +660,12 @@@ static const struct iomap_ops fuse_ioma
   
   static void fuse_wait_dax_page(struct inode *inode)
   {
-       struct fuse_inode *fi = get_fuse_inode(inode);
- 
-       up_write(&fi->i_mmap_sem);
+       filemap_invalidate_unlock(inode->i_mapping);
         schedule();
-       down_write(&fi->i_mmap_sem);
+       filemap_invalidate_lock(inode->i_mapping);
   }
   
- /* Should be called with fi->i_mmap_sem lock held exclusively */
+ /* Should be called with mapping->invalidate_lock held exclusively */
   static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
                                     loff_t start, loff_t end)
   {
@@@ -813,18 -811,18 +811,18 @@@ retry
          * we do not want any read/write/mmap to make progress and try
          * to populate page cache or access memory we are trying to free.
          */
-       down_read(&get_fuse_inode(inode)->i_mmap_sem);
+       filemap_invalidate_lock_shared(inode->i_mapping);
         ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops);
         if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) {
                 error = 0;
                 retry = true;
-               up_read(&get_fuse_inode(inode)->i_mmap_sem);
+               filemap_invalidate_unlock_shared(inode->i_mapping);
                 goto retry;
         }
   
         if (ret & VM_FAULT_NEEDDSYNC)
                 ret = dax_finish_sync_fault(vmf, pe_size, pfn);
-       up_read(&get_fuse_inode(inode)->i_mmap_sem);
+       filemap_invalidate_unlock_shared(inode->i_mapping);
   
         if (write)
                 sb_end_pagefault(sb);
@@@ -960,7 -958,7 +958,7 @@@ inode_inline_reclaim_one_dmap(struct fu
         int ret;
         struct interval_tree_node *node;
   
-       down_write(&fi->i_mmap_sem);
+       filemap_invalidate_lock(inode->i_mapping);
   
         /* Lookup a dmap and corresponding file offset to reclaim. */
         down_read(&fi->dax->sem);
@@@ -1021,7 -1019,7 +1019,7 @@@
   out_write_dmap_sem:
         up_write(&fi->dax->sem);
   out_mmap_sem:
-       up_write(&fi->i_mmap_sem);
+       filemap_invalidate_unlock(inode->i_mapping);
         return dmap;
   }
   
@@@ -1050,10 -1048,10 +1048,10 @@@ alloc_dax_mapping_reclaim(struct fuse_c
                  * had a reference or some other temporary failure,
                  * Try again. We want to give up inline reclaim only
                  * if there is no range assigned to this node. Otherwise
-                * if a deadlock is possible if we sleep with fi->i_mmap_sem
-                * held and worker to free memory can't make progress due
-                * to unavailability of fi->i_mmap_sem lock. So sleep
-                * only if fi->dax->nr=0
+                * if a deadlock is possible if we sleep with
+                * mapping->invalidate_lock held and worker to free memory
+                * can't make progress due to unavailability of
+                * mapping->invalidate_lock.  So sleep only if fi->dax->nr=0
                  */
                 if (retry)
                         continue;
@@@ -1061,8 -1059,8 +1059,8 @@@
                  * There are no mappings which can be reclaimed. Wait for one.
                  * We are not holding fi->dax->sem. So it is possible
                  * that range gets added now. But as we are not holding
-                * fi->i_mmap_sem, worker should still be able to free up
-                * a range and wake us up.
+                * mapping->invalidate_lock, worker should still be able to
+                * free up a range and wake us up.
                  */
                 if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) {
                         if (wait_event_killable_exclusive(fcd->range_waitq,
@@@ -1108,7 -1106,7 +1106,7 @@@ static int lookup_and_reclaim_dmap_lock
   /*
    * Free a range of memory.
    * Locking:
-  * 1. Take fi->i_mmap_sem to block dax faults.
+  * 1. Take mapping->invalidate_lock to block dax faults.
    * 2. Take fi->dax->sem to protect interval tree and also to make sure
    *    read/write can not reuse a dmap which we might be freeing.
    */
@@@ -1122,7 -1120,7 +1120,7 @@@ static int lookup_and_reclaim_dmap(stru
         loff_t dmap_start = start_idx << FUSE_DAX_SHIFT;
         loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1;
   
-       down_write(&fi->i_mmap_sem);
+       filemap_invalidate_lock(inode->i_mapping);
         ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end);
         if (ret) {
                 pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n",
@@@ -1134,7 -1132,7 +1132,7 @@@
         ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx);
         up_write(&fi->dax->sem);
   out_mmap_sem:
-       up_write(&fi->i_mmap_sem);
+       filemap_invalidate_unlock(inode->i_mapping);
         return ret;
   }
   
@@@ -1235,6 -1233,8 +1233,6 @@@ void fuse_dax_conn_free(struct fuse_con
   static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
   {
         long nr_pages, nr_ranges;
- -      void *kaddr;
- -      pfn_t pfn;
         struct fuse_dax_mapping *range;
         int ret, id;
         size_t dax_size = -1;
@@@ -1246,8 -1246,8 +1244,8 @@@
         INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker);
   
         id = dax_read_lock();
- -      nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr,
- -                                   &pfn);
+ +      nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), NULL,
+ +                                   NULL);
         dax_read_unlock(id);
         if (nr_pages < 0) {
                 pr_debug("dax_direct_access() returned %ld\n", nr_pages);
diff --combined fs/xfs/xfs_inode.c

index 990b72a,3c0bb21..f00145e
--- 1/fs/xfs/xfs_inode.c
--- 2/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@@ -132,7 -132,7 +132,7 @@@ xfs_ilock_attr_map_shared
   
   /*
    * In addition to i_rwsem in the VFS inode, the xfs inode contains 2
-  * multi-reader locks: i_mmap_lock and the i_lock.  This routine allows
+  * multi-reader locks: invalidate_lock and the i_lock.  This routine allows
    * various combinations of the locks to be obtained.
    *
    * The 3 locks should always be ordered so that the IO lock is obtained first,
@@@ -140,23 -140,23 +140,23 @@@
    *
    * Basic locking order:
    *
-  * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
+  * i_rwsem -> invalidate_lock -> page_lock -> i_ilock
    *
    * mmap_lock locking order:
    *
    * i_rwsem -> page lock -> mmap_lock
-  * mmap_lock -> i_mmap_lock -> page_lock
+  * mmap_lock -> invalidate_lock -> page_lock
    *
    * The difference in mmap_lock locking order mean that we cannot hold the
-  * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
-  * fault in pages during copy in/out (for buffered IO) or require the mmap_lock
-  * in get_user_pages() to map the user pages into the kernel address space for
-  * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because
-  * page faults already hold the mmap_lock.
+  * invalidate_lock over syscall based read(2)/write(2) based IO. These IO paths
+  * can fault in pages during copy in/out (for buffered IO) or require the
+  * mmap_lock in get_user_pages() to map the user pages into the kernel address
+  * space for direct IO. Similarly the i_rwsem cannot be taken inside a page
+  * fault because page faults already hold the mmap_lock.
    *
    * Hence to serialise fully against both syscall and mmap based IO, we need to
-  * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both
-  * taken in places where we need to invalidate the page cache in a race
+  * take both the i_rwsem and the invalidate_lock. These locks should *only* be
+  * both taken in places where we need to invalidate the page cache in a race
    * free manner (e.g. truncate, hole punch and other extent manipulation
    * functions).
    */
@@@ -188,10 -188,13 +188,13 @@@ xfs_ilock
                                  XFS_IOLOCK_DEP(lock_flags));
         }
   
-       if (lock_flags & XFS_MMAPLOCK_EXCL)
-               mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
-       else if (lock_flags & XFS_MMAPLOCK_SHARED)
-               mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+       if (lock_flags & XFS_MMAPLOCK_EXCL) {
+               down_write_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
+                                 XFS_MMAPLOCK_DEP(lock_flags));
+       } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
+               down_read_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
+                                XFS_MMAPLOCK_DEP(lock_flags));
+       }
   
         if (lock_flags & XFS_ILOCK_EXCL)
                 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
@@@ -240,10 -243,10 +243,10 @@@ xfs_ilock_nowait
         }
   
         if (lock_flags & XFS_MMAPLOCK_EXCL) {
-               if (!mrtryupdate(&ip->i_mmaplock))
+               if (!down_write_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
                         goto out_undo_iolock;
         } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
-               if (!mrtryaccess(&ip->i_mmaplock))
+               if (!down_read_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
                         goto out_undo_iolock;
         }
   
@@@ -258,9 -261,9 +261,9 @@@
   
   out_undo_mmaplock:
         if (lock_flags & XFS_MMAPLOCK_EXCL)
-               mrunlock_excl(&ip->i_mmaplock);
+               up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
         else if (lock_flags & XFS_MMAPLOCK_SHARED)
-               mrunlock_shared(&ip->i_mmaplock);
+               up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
   out_undo_iolock:
         if (lock_flags & XFS_IOLOCK_EXCL)
                 up_write(&VFS_I(ip)->i_rwsem);
@@@ -307,9 -310,9 +310,9 @@@ xfs_iunlock
                 up_read(&VFS_I(ip)->i_rwsem);
   
         if (lock_flags & XFS_MMAPLOCK_EXCL)
-               mrunlock_excl(&ip->i_mmaplock);
+               up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
         else if (lock_flags & XFS_MMAPLOCK_SHARED)
-               mrunlock_shared(&ip->i_mmaplock);
+               up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
   
         if (lock_flags & XFS_ILOCK_EXCL)
                 mrunlock_excl(&ip->i_lock);
@@@ -335,7 -338,7 +338,7 @@@ xfs_ilock_demote
         if (lock_flags & XFS_ILOCK_EXCL)
                 mrdemote(&ip->i_lock);
         if (lock_flags & XFS_MMAPLOCK_EXCL)
-               mrdemote(&ip->i_mmaplock);
+               downgrade_write(&VFS_I(ip)->i_mapping->invalidate_lock);
         if (lock_flags & XFS_IOLOCK_EXCL)
                 downgrade_write(&VFS_I(ip)->i_rwsem);
   
@@@ -343,9 -346,29 +346,29 @@@
   }
   
   #if defined(DEBUG) || defined(XFS_WARN)
- int
+ static inline bool
+ __xfs_rwsem_islocked(
+       struct rw_semaphore     *rwsem,
+       bool                    shared)
+ {
+       if (!debug_locks)
+               return rwsem_is_locked(rwsem);
+ 
+       if (!shared)
+               return lockdep_is_held_type(rwsem, 0);
+ 
+       /*
+        * We are checking that the lock is held at least in shared
+        * mode but don't care that it might be held exclusively
+        * (i.e. shared | excl). Hence we check if the lock is held
+        * in any mode rather than an explicit shared mode.
+        */
+       return lockdep_is_held_type(rwsem, -1);
+ }
+ 
+ bool
   xfs_isilocked(
-       xfs_inode_t             *ip,
+       struct xfs_inode        *ip,
         uint                    lock_flags)
   {
         if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
@@@ -355,20 -378,17 +378,17 @@@
         }
   
         if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
-               if (!(lock_flags & XFS_MMAPLOCK_SHARED))
-                       return !!ip->i_mmaplock.mr_writer;
-               return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
+               return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
+                               (lock_flags & XFS_IOLOCK_SHARED));
         }
   
-       if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
-               if (!(lock_flags & XFS_IOLOCK_SHARED))
-                       return !debug_locks ||
-                               lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0);
-               return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
+       if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) {
+               return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
+                               (lock_flags & XFS_IOLOCK_SHARED));
         }
   
         ASSERT(0);
-       return 0;
+       return false;
   }
   #endif
   
@@@ -532,12 -552,10 +552,10 @@@ again
   }
   
   /*
-  * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
-  * the mmaplock or the ilock, but not more than one type at a time. If we lock
-  * more than one at a time, lockdep will report false positives saying we have
-  * violated locking orders.  The iolock must be double-locked separately since
-  * we use i_rwsem for that.  We now support taking one lock EXCL and the other
-  * SHARED.
+  * xfs_lock_two_inodes() can only be used to lock ilock. The iolock and
+  * mmaplock must be double-locked separately since we use i_rwsem and
+  * invalidate_lock for that. We now support taking one lock EXCL and the
+  * other SHARED.
    */
   void
   xfs_lock_two_inodes(
@@@ -555,15 -573,8 +573,8 @@@
         ASSERT(hweight32(ip1_mode) == 1);
         ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
         ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
-       ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
-              !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
-       ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
-              !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
-       ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
-              !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
-       ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
-              !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
- 
+       ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
+       ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
         ASSERT(ip0->i_ino != ip1->i_ino);
   
         if (ip0->i_ino > ip1->i_ino) {
@@@ -2763,19 -2774,6 +2774,19 @@@ xfs_remove
                 error = xfs_droplink(tp, ip);
                 if (error)
                         goto out_trans_cancel;
+ +
+ +              /*
+ +               * Point the unlinked child directory's ".." entry to the root
+ +               * directory to eliminate back-references to inodes that may
+ +               * get freed before the child directory is closed.  If the fs
+ +               * gets shrunk, this can lead to dirent inode validation errors.
+ +               */
+ +              if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) {
+ +                      error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
+ +                                      tp->t_mountp->m_sb.sb_rootino, 0);
+ +                      if (error)
+ +                              return error;
+ +              }
         } else {
                 /*
                  * When removing a non-directory we need to log the parent
@@@ -3741,11 -3739,8 +3752,8 @@@ xfs_ilock2_io_mmap
         ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2));
         if (ret)
                 return ret;
-       if (ip1 == ip2)
-               xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
-       else
-               xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL,
-                                   ip2, XFS_MMAPLOCK_EXCL);
+       filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping,
+                                   VFS_I(ip2)->i_mapping);
         return 0;
   }
   
@@@ -3755,12 -3750,9 +3763,9 @@@ xfs_iunlock2_io_mmap
         struct xfs_inode        *ip1,
         struct xfs_inode        *ip2)
   {
-       bool                    same_inode = (ip1 == ip2);
- 
-       xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
-       if (!same_inode)
-               xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
+       filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping,
+                                     VFS_I(ip2)->i_mapping);
         inode_unlock(VFS_I(ip2));
-       if (!same_inode)
+       if (ip1 != ip2)
                 inode_unlock(VFS_I(ip1));
   }
diff --combined fs/zonefs/super.c

index 70055d4,f323bf3..ddc346a
--- 1/fs/zonefs/super.c
--- 2/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@@ -462,7 -462,7 +462,7 @@@ static int zonefs_file_truncate(struct 
         inode_dio_wait(inode);
   
         /* Serialize against page faults */
-       down_write(&zi->i_mmap_sem);
+       filemap_invalidate_lock(inode->i_mapping);
   
         /* Serialize against zonefs_iomap_begin() */
         mutex_lock(&zi->i_truncate_mutex);
@@@ -500,7 -500,7 +500,7 @@@
   
   unlock:
         mutex_unlock(&zi->i_truncate_mutex);
-       up_write(&zi->i_mmap_sem);
+       filemap_invalidate_unlock(inode->i_mapping);
   
         return ret;
   }
@@@ -575,18 -575,6 +575,6 @@@ static int zonefs_file_fsync(struct fil
         return ret;
   }
   
- static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf)
- {
-       struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file));
-       vm_fault_t ret;
- 
-       down_read(&zi->i_mmap_sem);
-       ret = filemap_fault(vmf);
-       up_read(&zi->i_mmap_sem);
- 
-       return ret;
- }
- 
   static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
   {
         struct inode *inode = file_inode(vmf->vma->vm_file);
@@@ -607,16 -595,16 +595,16 @@@
         file_update_time(vmf->vma->vm_file);
   
         /* Serialize against truncates */
-       down_read(&zi->i_mmap_sem);
+       filemap_invalidate_lock_shared(inode->i_mapping);
         ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops);
-       up_read(&zi->i_mmap_sem);
+       filemap_invalidate_unlock_shared(inode->i_mapping);
   
         sb_end_pagefault(inode->i_sb);
         return ret;
   }
   
   static const struct vm_operations_struct zonefs_file_vm_ops = {
-       .fault          = zonefs_filemap_fault,
+       .fault          = filemap_fault,
         .map_pages      = filemap_map_pages,
         .page_mkwrite   = zonefs_filemap_page_mkwrite,
   };
@@@ -705,6 -693,9 +693,6 @@@ static ssize_t zonefs_file_dio_append(s
                 return 0;
   
         bio = bio_alloc(GFP_NOFS, nr_pages);
- -      if (!bio)
- -              return -ENOMEM;
- -
         bio_set_dev(bio, bdev);
         bio->bi_iter.bi_sector = zi->i_zsector;
         bio->bi_write_hint = iocb->ki_hint;
@@@ -1155,7 -1146,6 +1143,6 @@@ static struct inode *zonefs_alloc_inode
   
         inode_init_once(&zi->i_vnode);
         mutex_init(&zi->i_truncate_mutex);
-       init_rwsem(&zi->i_mmap_sem);
         zi->i_wr_refcnt = 0;
   
         return &zi->i_vnode;
diff --combined include/linux/fs.h

index bea8ec5,894ff24..eb668b5
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -436,6 -436,10 +436,10 @@@ int pagecache_write_end(struct file *, 
    * struct address_space - Contents of a cacheable, mappable object.
    * @host: Owner, either the inode or the block_device.
    * @i_pages: Cached pages.
+  * @invalidate_lock: Guards coherency between page cache contents and
+  *   file offset->disk block mappings in the filesystem during invalidates.
+  *   It is also used to block modification of page cache contents through
+  *   memory mappings.
    * @gfp_mask: Memory allocation flags to use for allocating pages.
    * @i_mmap_writable: Number of VM_SHARED mappings.
    * @nr_thps: Number of THPs in the pagecache (non-shmem only).
@@@ -453,6 -457,7 +457,7 @@@
   struct address_space {
         struct inode            *host;
         struct xarray           i_pages;
+       struct rw_semaphore     invalidate_lock;
         gfp_t                   gfp_mask;
         atomic_t                i_mmap_writable;
   #ifdef CONFIG_READ_ONLY_THP_FOR_FS
@@@ -814,9 -819,42 +819,42 @@@ static inline void inode_lock_shared_ne
         down_read_nested(&inode->i_rwsem, subclass);
   }
   
+ static inline void filemap_invalidate_lock(struct address_space *mapping)
+ {
+       down_write(&mapping->invalidate_lock);
+ }
+ 
+ static inline void filemap_invalidate_unlock(struct address_space *mapping)
+ {
+       up_write(&mapping->invalidate_lock);
+ }
+ 
+ static inline void filemap_invalidate_lock_shared(struct address_space *mapping)
+ {
+       down_read(&mapping->invalidate_lock);
+ }
+ 
+ static inline int filemap_invalidate_trylock_shared(
+                                       struct address_space *mapping)
+ {
+       return down_read_trylock(&mapping->invalidate_lock);
+ }
+ 
+ static inline void filemap_invalidate_unlock_shared(
+                                       struct address_space *mapping)
+ {
+       up_read(&mapping->invalidate_lock);
+ }
+ 
   void lock_two_nondirectories(struct inode *, struct inode*);
   void unlock_two_nondirectories(struct inode *, struct inode*);
   
+ void filemap_invalidate_lock_two(struct address_space *mapping1,
+                                struct address_space *mapping2);
+ void filemap_invalidate_unlock_two(struct address_space *mapping1,
+                                  struct address_space *mapping2);
+ 
+ 
   /*
    * NOTE: in a 32bit arch with a preemptable kernel and
    * an UP compile the i_size_read/write must be atomic
@@@ -1507,11 -1545,8 +1545,11 @@@ struct super_block 
         /* Number of inodes with nlink == 0 but still referenced */
         atomic_long_t s_remove_count;
   
- -      /* Pending fsnotify inode refs */
- -      atomic_long_t s_fsnotify_inode_refs;
+ +      /*
+ +       * Number of inode/mount/sb objects that are being watched, note that
+ +       * inodes objects are currently double-accounted.
+ +       */
+ +      atomic_long_t s_fsnotify_connectors;
   
         /* Being remounted read-only */
         int s_readonly_remount;
@@@ -2490,6 -2525,7 +2528,7 @@@ struct file_system_type 
   
         struct lock_class_key i_lock_key;
         struct lock_class_key i_mutex_key;
+       struct lock_class_key invalidate_lock_key;
         struct lock_class_key i_mutex_dir_key;
   };
   
diff --combined mm/madvise.c

index 5c065bc,012129f..56324a3
--- 1/mm/madvise.c
--- 2/mm/madvise.c
+++ b/mm/madvise.c
@@@ -862,12 -862,10 +862,12 @@@ static long madvise_populate(struct vm_
                         switch (pages) {
                         case -EINTR:
                                 return -EINTR;
- -                      case -EFAULT: /* Incompatible mappings / permissions. */
+ +                      case -EINVAL: /* Incompatible mappings / permissions. */
                                 return -EINVAL;
                         case -EHWPOISON:
                                 return -EHWPOISON;
+ +                      case -EFAULT: /* VM_FAULT_SIGBUS or VM_FAULT_SIGSEGV */
+ +                              return -EFAULT;
                         default:
                                 pr_warn_once("%s: unhandled return value: %ld\n",
                                              __func__, pages);
@@@ -912,7 -910,7 +912,7 @@@ static long madvise_remove(struct vm_ar
                         + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
   
         /*
-        * Filesystem's fallocate may need to take i_mutex.  We need to
+        * Filesystem's fallocate may need to take i_rwsem.  We need to
          * explicitly grab a reference because the vma (and hence the
          * vma's reference to the file) can go away as soon as we drop
          * mmap_lock.
diff --combined mm/memory-failure.c

index 470400c,0edce65..e1f87cf
--- 1/mm/memory-failure.c
--- 2/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@@ -866,7 -866,7 +866,7 @@@ static int me_pagecache_clean(struct pa
         /*
          * Truncation is a bit tricky. Enable it per file system for now.
          *
-        * Open: to take i_mutex or not for this? Right now we don't.
+        * Open: to take i_rwsem or not for this? Right now we don't.
          */
         ret = truncate_error_page(p, pfn, mapping);
   out:
@@@ -1146,7 -1146,7 +1146,7 @@@ static int __get_hwpoison_page(struct p
          * unexpected races caused by taking a page refcount.
          */
         if (!HWPoisonHandlable(head))
- -              return 0;
+ +              return -EBUSY;
   
         if (PageTransHuge(head)) {
                 /*
@@@ -1199,15 -1199,9 +1199,15 @@@ try_again
                         }
                         goto out;
                 } else if (ret == -EBUSY) {
- -                      /* We raced with freeing huge page to buddy, retry. */
- -                      if (pass++ < 3)
+ +                      /*
+ +                       * We raced with (possibly temporary) unhandlable
+ +                       * page, retry.
+ +                       */
+ +                      if (pass++ < 3) {
+ +                              shake_page(p, 1);
                                 goto try_again;
+ +                      }
+ +                      ret = -EIO;
                         goto out;
                 }
         }
diff --combined mm/rmap.c

index b9eb5c1,86471aa..2d29a57
--- 1/mm/rmap.c
--- 2/mm/rmap.c
+++ b/mm/rmap.c
@@@ -20,28 -20,29 +20,29 @@@
   /*
    * Lock ordering in mm:
    *
-  * inode->i_mutex     (while writing or truncating, not reading or faulting)
+  * inode->i_rwsem     (while writing or truncating, not reading or faulting)
    *   mm->mmap_lock
-  *     page->flags PG_locked (lock_page)   * (see huegtlbfs below)
-  *       hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
-  *         mapping->i_mmap_rwsem
-  *           hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
-  *           anon_vma->rwsem
-  *             mm->page_table_lock or pte_lock
-  *               swap_lock (in swap_duplicate, swap_info_get)
-  *                 mmlist_lock (in mmput, drain_mmlist and others)
-  *                 mapping->private_lock (in __set_page_dirty_buffers)
-  *                   lock_page_memcg move_lock (in __set_page_dirty_buffers)
-  *                     i_pages lock (widely used)
-  *                       lruvec->lru_lock (in lock_page_lruvec_irq)
-  *                 inode->i_lock (in set_page_dirty's __mark_inode_dirty)
-  *                 bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
-  *                   sb_lock (within inode_lock in fs/fs-writeback.c)
-  *                   i_pages lock (widely used, in set_page_dirty,
-  *                             in arch-dependent flush_dcache_mmap_lock,
-  *                             within bdi.wb->list_lock in __sync_single_inode)
+  *     mapping->invalidate_lock (in filemap_fault)
+  *       page->flags PG_locked (lock_page)   * (see hugetlbfs below)
+  *         hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
+  *           mapping->i_mmap_rwsem
+  *             hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
+  *             anon_vma->rwsem
+  *               mm->page_table_lock or pte_lock
+  *                 swap_lock (in swap_duplicate, swap_info_get)
+  *                   mmlist_lock (in mmput, drain_mmlist and others)
+  *                   mapping->private_lock (in __set_page_dirty_buffers)
+  *                     lock_page_memcg move_lock (in __set_page_dirty_buffers)
+  *                       i_pages lock (widely used)
+  *                         lruvec->lru_lock (in lock_page_lruvec_irq)
+  *                   inode->i_lock (in set_page_dirty's __mark_inode_dirty)
+  *                   bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
+  *                     sb_lock (within inode_lock in fs/fs-writeback.c)
+  *                     i_pages lock (widely used, in set_page_dirty,
+  *                               in arch-dependent flush_dcache_mmap_lock,
+  *                               within bdi.wb->list_lock in __sync_single_inode)
    *
-  * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
+  * anon_vma->rwsem,mapping->i_mmap_rwsem   (memory_failure, collect_procs_anon)
    *   ->tasklist_lock
    *     pte map lock
    *
@@@ -1440,20 -1441,21 +1441,20 @@@ static bool try_to_unmap_one(struct pag
                 /*
                  * If the page is mlock()d, we cannot swap it out.
                  */
- -              if (!(flags & TTU_IGNORE_MLOCK)) {
- -                      if (vma->vm_flags & VM_LOCKED) {
- -                              /* PTE-mapped THP are never marked as mlocked */
- -                              if (!PageTransCompound(page) ||
- -                                  (PageHead(page) && !PageDoubleMap(page))) {
- -                                      /*
- -                                       * Holding pte lock, we do *not* need
- -                                       * mmap_lock here
- -                                       */
- -                                      mlock_vma_page(page);
- -                              }
- -                              ret = false;
- -                              page_vma_mapped_walk_done(&pvmw);
- -                              break;
- -                      }
+ +              if (!(flags & TTU_IGNORE_MLOCK) &&
+ +                  (vma->vm_flags & VM_LOCKED)) {
+ +                      /*
+ +                       * PTE-mapped THP are never marked as mlocked: so do
+ +                       * not set it on a DoubleMap THP, nor on an Anon THP
+ +                       * (which may still be PTE-mapped after DoubleMap was
+ +                       * cleared).  But stop unmapping even in those cases.
+ +                       */
+ +                      if (!PageTransCompound(page) || (PageHead(page) &&
+ +                           !PageDoubleMap(page) && !PageAnon(page)))
+ +                              mlock_vma_page(page);
+ +                      page_vma_mapped_walk_done(&pvmw);
+ +                      ret = false;
+ +                      break;
                 }
   
                 /* Unexpected PMD-mapped THP? */
@@@ -1985,10 -1987,8 +1986,10 @@@ static bool page_mlock_one(struct page 
                  */
                 if (vma->vm_flags & VM_LOCKED) {
                         /*
- -                       * PTE-mapped THP are never marked as mlocked, but
- -                       * this function is never called when PageDoubleMap().
+ +                       * PTE-mapped THP are never marked as mlocked; but
+ +                       * this function is never called on a DoubleMap THP,
+ +                       * nor on an Anon THP (which may still be PTE-mapped
+ +                       * after DoubleMap was cleared).
                          */
                         mlock_vma_page(page);
                         /*
@@@ -2023,10 -2023,6 +2024,10 @@@ void page_mlock(struct page *page
         VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
         VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
   
+ +      /* Anon THP are only marked as mlocked when singly mapped */
+ +      if (PageTransCompound(page) && PageAnon(page))
+ +              return;
+ +
         rmap_walk(page, &rwc);
   }
   
diff --combined mm/shmem.c

index dacda74,9af4b21..3107ace
--- 1/mm/shmem.c
--- 2/mm/shmem.c
+++ b/mm/shmem.c
@@@ -96,7 -96,7 +96,7 @@@ static struct vfsmount *shm_mnt
   
   /*
    * shmem_fallocate communicates with shmem_fault or shmem_writepage via
-  * inode->i_private (with i_mutex making sure that it has only one user at
+  * inode->i_private (with i_rwsem making sure that it has only one user at
    * a time): we would prefer not to enlarge the shmem inode just for that.
    */
   struct shmem_falloc {
@@@ -774,7 -774,7 +774,7 @@@ static int shmem_free_swap(struct addre
    * Determine (in bytes) how many of the shmem object's pages mapped by the
    * given offsets are swapped out.
    *
-  * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
+  * This is safe to call without i_rwsem or the i_pages lock thanks to RCU,
    * as long as the inode doesn't go away and racy results are not a problem.
    */
   unsigned long shmem_partial_swap_usage(struct address_space *mapping,
@@@ -806,7 -806,7 +806,7 @@@
    * Determine (in bytes) how many of the shmem object's pages mapped by the
    * given vma is swapped out.
    *
-  * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
+  * This is safe to call without i_rwsem or the i_pages lock thanks to RCU,
    * as long as the inode doesn't go away and racy results are not a problem.
    */
   unsigned long shmem_swap_usage(struct vm_area_struct *vma)
@@@ -1069,7 -1069,7 +1069,7 @@@ static int shmem_setattr(struct user_na
                 loff_t oldsize = inode->i_size;
                 loff_t newsize = attr->ia_size;
   
-               /* protected by i_mutex */
+               /* protected by i_rwsem */
                 if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
                     (newsize > oldsize && (info->seals & F_SEAL_GROW)))
                         return -EPERM;
@@@ -1696,7 -1696,8 +1696,7 @@@ static int shmem_swapin_page(struct ino
         struct address_space *mapping = inode->i_mapping;
         struct shmem_inode_info *info = SHMEM_I(inode);
         struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
- -      struct swap_info_struct *si;
- -      struct page *page = NULL;
+ +      struct page *page;
         swp_entry_t swap;
         int error;
   
@@@ -1704,6 -1705,12 +1704,6 @@@
         swap = radix_to_swp_entry(*pagep);
         *pagep = NULL;
   
- -      /* Prevent swapoff from happening to us. */
- -      si = get_swap_device(swap);
- -      if (!si) {
- -              error = EINVAL;
- -              goto failed;
- -      }
         /* Look it up and read it in.. */
         page = lookup_swap_cache(swap, NULL, 0);
         if (!page) {
@@@ -1765,6 -1772,8 +1765,6 @@@
         swap_free(swap);
   
         *pagep = page;
- -      if (si)
- -              put_swap_device(si);
         return 0;
   failed:
         if (!shmem_confirm_swap(mapping, index, swap))
@@@ -1775,6 -1784,9 +1775,6 @@@ unlock
                 put_page(page);
         }
   
- -      if (si)
- -              put_swap_device(si);
- -
         return error;
   }
   
@@@ -2059,7 -2071,7 +2059,7 @@@ static vm_fault_t shmem_fault(struct vm
         /*
          * Trinity finds that probing a hole which tmpfs is punching can
          * prevent the hole-punch from ever completing: which in turn
-        * locks writers out with its hold on i_mutex.  So refrain from
+        * locks writers out with its hold on i_rwsem.  So refrain from
          * faulting pages into the hole while it's being punched.  Although
          * shmem_undo_range() does remove the additions, it may be unable to
          * keep up, as each new page needs its own unmap_mapping_range() call,
@@@ -2070,7 -2082,7 +2070,7 @@@
          * we just need to make racing faults a rare case.
          *
          * The implementation below would be much simpler if we just used a
-        * standard mutex or completion: but we cannot take i_mutex in fault,
+        * standard mutex or completion: but we cannot take i_rwsem in fault,
          * and bloating every shmem inode for this unlikely case would be sad.
          */
         if (unlikely(inode->i_private)) {
@@@ -2470,7 -2482,7 +2470,7 @@@ shmem_write_begin(struct file *file, st
         struct shmem_inode_info *info = SHMEM_I(inode);
         pgoff_t index = pos >> PAGE_SHIFT;
   
-       /* i_mutex is held by caller */
+       /* i_rwsem is held by caller */
         if (unlikely(info->seals & (F_SEAL_GROW |
                                    F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) {
                 if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))
@@@ -2570,7 -2582,7 +2570,7 @@@ static ssize_t shmem_file_read_iter(str
   
                 /*
                  * We must evaluate after, since reads (unlike writes)
-                * are called without i_mutex protection against truncate
+                * are called without i_rwsem protection against truncate
                  */
                 nr = PAGE_SIZE;
                 i_size = i_size_read(inode);
@@@ -2640,7 -2652,7 +2640,7 @@@ static loff_t shmem_file_llseek(struct 
                 return -ENXIO;
   
         inode_lock(inode);
-       /* We're holding i_mutex so we can access i_size directly */
+       /* We're holding i_rwsem so we can access i_size directly */
         offset = mapping_seek_hole_data(mapping, offset, inode->i_size, whence);
         if (offset >= 0)
                 offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
@@@ -2669,7 -2681,7 +2669,7 @@@ static long shmem_fallocate(struct fil
                 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
                 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
   
-               /* protected by i_mutex */
+               /* protected by i_rwsem */
                 if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
                         error = -EPERM;
                         goto out;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 30 Aug 2021 17:24:50 +0000 (10:24 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 30 Aug 2021 17:24:50 +0000 (10:24 -0700)
		1	2
fs/cifs/smb2ops.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext2/ext2.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext2/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fuse/dax.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/zonefs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
mm/madvise.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memory-failure.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/rmap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/shmem.c	patch \|	diff1 \|	diff2 \|	blob \| history