Merge tag 'ovl-update-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 2 Sep 2021 16:21:27 +0000 (09:21 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 2 Sep 2021 16:21:27 +0000 (09:21 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Sep 2021 16:21:27 +0000 (09:21 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Sep 2021 16:21:27 +0000 (09:21 -0700)
diff --combined Documentation/filesystems/locking.rst

index 2a75dd5,899fa9a..d36fe79
--- 1/Documentation/filesystems/locking.rst
--- 2/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@@ -70,7 -70,7 +70,7 @@@ prototypes:
         const char *(*get_link) (struct dentry *, struct inode *, struct delayed_call *);
         void (*truncate) (struct inode *);
         int (*permission) (struct inode *, int, unsigned int);
-       int (*get_acl)(struct inode *, int);
+       struct posix_acl * (*get_acl)(struct inode *, int, bool);
         int (*setattr) (struct dentry *, struct iattr *);
         int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
         ssize_t (*listxattr) (struct dentry *, char *, size_t);
@@@ -271,19 -271,19 +271,19 @@@ prototypes:
   locking rules:
         All except set_page_dirty and freepage may block
   
- -======================        ======================== =========
- -ops                   PageLocked(page)         i_rwsem
- -======================        ======================== =========
+ +======================        ======================== =========      ===============
+ +ops                   PageLocked(page)         i_rwsem        invalidate_lock
+ +======================        ======================== =========      ===============
   writepage:            yes, unlocks (see below)
- -readpage:             yes, unlocks
+ +readpage:             yes, unlocks                            shared
   writepages:
   set_page_dirty                no
- -readahead:            yes, unlocks
- -readpages:            no
+ +readahead:            yes, unlocks                            shared
+ +readpages:            no                                      shared
   write_begin:          locks the page           exclusive
   write_end:            yes, unlocks             exclusive
   bmap:
- -invalidatepage:               yes
+ +invalidatepage:               yes                                     exclusive
   releasepage:          yes
   freepage:             yes
   direct_IO:
@@@ -295,7 -295,7 +295,7 @@@ is_partially_uptodate:     ye
   error_remove_page:    yes
   swap_activate:                no
   swap_deactivate:      no
- -======================        ======================== =========
+ +======================        ======================== =========      ===============
   
   ->write_begin(), ->write_end() and ->readpage() may be called from
   the request handler (/dev/loop).
@@@ -378,10 -378,7 +378,10 @@@ keep it that way and don't breed new ca
   ->invalidatepage() is called when the filesystem must attempt to drop
   some or all of the buffers from the page when it is being truncated. It
   returns zero on success. If ->invalidatepage is zero, the kernel uses
- -block_invalidatepage() instead.
+ +block_invalidatepage() instead. The filesystem must exclusively acquire
+ +invalidate_lock before invalidating page cache in truncate / hole punch path
+ +(and thus calling into ->invalidatepage) to block races between page cache
+ +invalidation and page cache filling functions (fault, read, ...).
   
   ->releasepage() is called when the kernel is about to try to drop the
   buffers from the page in preparation for freeing it.  It returns zero to
@@@ -509,7 -506,6 +509,7 @@@ prototypes:
         ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
         ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
         ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
+ +      int (*iopoll) (struct kiocb *kiocb, bool spin);
         int (*iterate) (struct file *, struct dir_context *);
         int (*iterate_shared) (struct file *, struct dir_context *);
         __poll_t (*poll) (struct file *, struct poll_table_struct *);
@@@ -522,6 -518,12 +522,6 @@@
         int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
         int (*fasync) (int, struct file *, int);
         int (*lock) (struct file *, int, struct file_lock *);
- -      ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
- -                      loff_t *);
- -      ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
- -                      loff_t *);
- -      ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t,
- -                      void __user *);
         ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
                         loff_t *, int);
         unsigned long (*get_unmapped_area)(struct file *, unsigned long,
@@@ -534,14 -536,6 +534,14 @@@
                         size_t, unsigned int);
         int (*setlease)(struct file *, long, struct file_lock **, void **);
         long (*fallocate)(struct file *, int, loff_t, loff_t);
+ +      void (*show_fdinfo)(struct seq_file *m, struct file *f);
+ +      unsigned (*mmap_capabilities)(struct file *);
+ +      ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
+ +                      loff_t, size_t, unsigned int);
+ +      loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
+ +                      struct file *file_out, loff_t pos_out,
+ +                      loff_t len, unsigned int remap_flags);
+ +      int (*fadvise)(struct file *, loff_t, loff_t, int);
   
   locking rules:
         All may block.
@@@ -576,25 -570,6 +576,25 @@@ in sys_read() and friends
   the lease within the individual filesystem to record the result of the
   operation
   
+ +->fallocate implementation must be really careful to maintain page cache
+ +consistency when punching holes or performing other operations that invalidate
+ +page cache contents. Usually the filesystem needs to call
+ +truncate_inode_pages_range() to invalidate relevant range of the page cache.
+ +However the filesystem usually also needs to update its internal (and on disk)
+ +view of file offset -> disk block mapping. Until this update is finished, the
+ +filesystem needs to block page faults and reads from reloading now-stale page
+ +cache contents from the disk. Since VFS acquires mapping->invalidate_lock in
+ +shared mode when loading pages from disk (filemap_fault(), filemap_read(),
+ +readahead paths), the fallocate implementation must take the invalidate_lock to
+ +prevent reloading.
+ +
+ +->copy_file_range and ->remap_file_range implementations need to serialize
+ +against modifications of file data while the operation is running. For
+ +blocking changes through write(2) and similar operations inode->i_rwsem can be
+ +used. To block changes to file contents via a memory mapping during the
+ +operation, the filesystem must take mapping->invalidate_lock to coordinate
+ +with ->page_mkwrite.
+ +
   dquot_operations
   ================
   
@@@ -652,11 -627,11 +652,11 @@@ pfn_mkwrite:    ye
   access:               yes
   ============= =========       ===========================
   
- -->fault() is called when a previously not present pte is about
- -to be faulted in. The filesystem must find and return the page associated
- -with the passed in "pgoff" in the vm_fault structure. If it is possible that
- -the page may be truncated and/or invalidated, then the filesystem must lock
- -the page, then ensure it is not already truncated (the page lock will block
+ +->fault() is called when a previously not present pte is about to be faulted
+ +in. The filesystem must find and return the page associated with the passed in
+ +"pgoff" in the vm_fault structure. If it is possible that the page may be
+ +truncated and/or invalidated, then the filesystem must lock invalidate_lock,
+ +then ensure the page is not already truncated (invalidate_lock will block
   subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
   locked. The VM will unlock the page.
   
@@@ -669,14 -644,12 +669,14 @@@ page table entry. Pointer to entry asso
   "pte" field in vm_fault structure. Pointers to entries for other offsets
   should be calculated relative to "pte".
   
- -->page_mkwrite() is called when a previously read-only pte is
- -about to become writeable. The filesystem again must ensure that there are
- -no truncate/invalidate races, and then return with the page locked. If
- -the page has been truncated, the filesystem should not look up a new page
- -like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
- -will cause the VM to retry the fault.
+ +->page_mkwrite() is called when a previously read-only pte is about to become
+ +writeable. The filesystem again must ensure that there are no
+ +truncate/invalidate races or races with operations such as ->remap_file_range
+ +or ->copy_file_range, and then return with the page locked. Usually
+ +mapping->invalidate_lock is suitable for proper serialization. If the page has
+ +been truncated, the filesystem should not look up a new page like the ->fault()
+ +handler, but simply return with VM_FAULT_NOPAGE, which will cause the VM to
+ +retry the fault.
   
   ->pfn_mkwrite() is the same as page_mkwrite but when the pte is
   VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is
diff --combined fs/btrfs/acl.c

index c9f9789,3d00bb5..0a0d0ec
--- 1/fs/btrfs/acl.c
--- 2/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@@ -16,13 -16,16 +16,16 @@@
   #include "btrfs_inode.h"
   #include "xattr.h"
   
- struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
+ struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu)
   {
         int size;
         const char *name;
         char *value = NULL;
         struct posix_acl *acl;
   
+       if (rcu)
+               return ERR_PTR(-ECHILD);
+ 
         switch (type) {
         case ACL_TYPE_ACCESS:
                 name = XATTR_NAME_POSIX_ACL_ACCESS;
@@@ -53,8 -56,7 +56,8 @@@
   }
   
   static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
- -                       struct inode *inode, struct posix_acl *acl, int type)
+ +                         struct user_namespace *mnt_userns,
+ +                         struct inode *inode, struct posix_acl *acl, int type)
   {
         int ret, size = 0;
         const char *name;
@@@ -115,12 -117,12 +118,12 @@@ int btrfs_set_acl(struct user_namespac
         umode_t old_mode = inode->i_mode;
   
         if (type == ACL_TYPE_ACCESS && acl) {
- -              ret = posix_acl_update_mode(&init_user_ns, inode,
+ +              ret = posix_acl_update_mode(mnt_userns, inode,
                                             &inode->i_mode, &acl);
                 if (ret)
                         return ret;
         }
- -      ret = __btrfs_set_acl(NULL, inode, acl, type);
+ +      ret = __btrfs_set_acl(NULL, mnt_userns, inode, acl, type);
         if (ret)
                 inode->i_mode = old_mode;
         return ret;
@@@ -141,14 -143,14 +144,14 @@@ int btrfs_init_acl(struct btrfs_trans_h
                 return ret;
   
         if (default_acl) {
- -              ret = __btrfs_set_acl(trans, inode, default_acl,
+ +              ret = __btrfs_set_acl(trans, &init_user_ns, inode, default_acl,
                                       ACL_TYPE_DEFAULT);
                 posix_acl_release(default_acl);
         }
   
         if (acl) {
                 if (!ret)
- -                      ret = __btrfs_set_acl(trans, inode, acl,
+ +                      ret = __btrfs_set_acl(trans, &init_user_ns, inode, acl,
                                               ACL_TYPE_ACCESS);
                 posix_acl_release(acl);
         }
diff --combined fs/btrfs/ctree.h

index f07c82f,ca5c7cb..dff2c8a
--- 1/fs/btrfs/ctree.h
--- 2/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@@ -281,8 -281,7 +281,8 @@@ struct btrfs_super_block 
   
   #define BTRFS_FEATURE_COMPAT_RO_SUPP                  \
         (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |      \
- -       BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
+ +       BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \
+ +       BTRFS_FEATURE_COMPAT_RO_VERITY)
   
   #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET      0ULL
   #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR    0ULL
@@@ -1013,6 -1012,8 +1013,6 @@@ struct btrfs_fs_info 
                 u64 zoned;
         };
   
- -      /* Max size to emit ZONE_APPEND write command */
- -      u64 max_zone_append_size;
         struct mutex zoned_meta_io_lock;
         spinlock_t treelog_bg_lock;
         u64 treelog_bg;
@@@ -1483,20 -1484,20 +1483,20 @@@ do 
   /*
    * Inode flags
    */
- -#define BTRFS_INODE_NODATASUM         (1 << 0)
- -#define BTRFS_INODE_NODATACOW         (1 << 1)
- -#define BTRFS_INODE_READONLY          (1 << 2)
- -#define BTRFS_INODE_NOCOMPRESS                (1 << 3)
- -#define BTRFS_INODE_PREALLOC          (1 << 4)
- -#define BTRFS_INODE_SYNC              (1 << 5)
- -#define BTRFS_INODE_IMMUTABLE         (1 << 6)
- -#define BTRFS_INODE_APPEND            (1 << 7)
- -#define BTRFS_INODE_NODUMP            (1 << 8)
- -#define BTRFS_INODE_NOATIME           (1 << 9)
- -#define BTRFS_INODE_DIRSYNC           (1 << 10)
- -#define BTRFS_INODE_COMPRESS          (1 << 11)
- -
- -#define BTRFS_INODE_ROOT_ITEM_INIT    (1 << 31)
+ +#define BTRFS_INODE_NODATASUM         (1U << 0)
+ +#define BTRFS_INODE_NODATACOW         (1U << 1)
+ +#define BTRFS_INODE_READONLY          (1U << 2)
+ +#define BTRFS_INODE_NOCOMPRESS                (1U << 3)
+ +#define BTRFS_INODE_PREALLOC          (1U << 4)
+ +#define BTRFS_INODE_SYNC              (1U << 5)
+ +#define BTRFS_INODE_IMMUTABLE         (1U << 6)
+ +#define BTRFS_INODE_APPEND            (1U << 7)
+ +#define BTRFS_INODE_NODUMP            (1U << 8)
+ +#define BTRFS_INODE_NOATIME           (1U << 9)
+ +#define BTRFS_INODE_DIRSYNC           (1U << 10)
+ +#define BTRFS_INODE_COMPRESS          (1U << 11)
+ +
+ +#define BTRFS_INODE_ROOT_ITEM_INIT    (1U << 31)
   
   #define BTRFS_INODE_FLAG_MASK                                         \
         (BTRFS_INODE_NODATASUM |                                        \
@@@ -1513,10 -1514,6 +1513,10 @@@
          BTRFS_INODE_COMPRESS |                                         \
          BTRFS_INODE_ROOT_ITEM_INIT)
   
+ +#define BTRFS_INODE_RO_VERITY         (1U << 0)
+ +
+ +#define BTRFS_INODE_RO_FLAG_MASK      (BTRFS_INODE_RO_VERITY)
+ +
   struct btrfs_map_token {
         struct extent_buffer *eb;
         char *kaddr;
@@@ -2784,11 -2781,10 +2784,11 @@@ enum btrfs_flush_state 
         FLUSH_DELAYED_REFS      =       4,
         FLUSH_DELALLOC          =       5,
         FLUSH_DELALLOC_WAIT     =       6,
- -      ALLOC_CHUNK             =       7,
- -      ALLOC_CHUNK_FORCE       =       8,
- -      RUN_DELAYED_IPUTS       =       9,
- -      COMMIT_TRANS            =       10,
+ +      FLUSH_DELALLOC_FULL     =       7,
+ +      ALLOC_CHUNK             =       8,
+ +      ALLOC_CHUNK_FORCE       =       9,
+ +      RUN_DELAYED_IPUTS       =       10,
+ +      COMMIT_TRANS            =       11,
   };
   
   int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
@@@ -2905,13 -2901,10 +2905,13 @@@ static inline int btrfs_insert_empty_it
         return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
   }
   
- -int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
   int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
   int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
                         u64 time_seq);
+ +
+ +int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
+ +                         struct btrfs_path *path);
+ +
   static inline int btrfs_next_old_item(struct btrfs_root *root,
                                       struct btrfs_path *p, u64 time_seq)
   {
@@@ -2920,18 -2913,6 +2920,18 @@@
                 return btrfs_next_old_leaf(root, p, time_seq);
         return 0;
   }
+ +
+ +/*
+ + * Search the tree again to find a leaf with greater keys.
+ + *
+ + * Returns 0 if it found something or 1 if there are no greater leaves.
+ + * Returns < 0 on error.
+ + */
+ +static inline int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
+ +{
+ +      return btrfs_next_old_leaf(root, path, 0);
+ +}
+ +
   static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
   {
         return btrfs_next_old_item(root, p, 0);
@@@ -3164,8 -3145,7 +3164,8 @@@ int btrfs_set_extent_delalloc(struct bt
                               struct extent_state **cached_state);
   int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
                              struct btrfs_root *new_root,
- -                           struct btrfs_root *parent_root);
+ +                           struct btrfs_root *parent_root,
+ +                           struct user_namespace *mnt_userns);
    void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
                                unsigned *bits);
   void btrfs_clear_delalloc_extent(struct inode *inode,
@@@ -3214,10 -3194,10 +3214,10 @@@ int btrfs_prealloc_file_range_trans(str
   int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
                 u64 start, u64 end, int *page_started, unsigned long *nr_written,
                 struct writeback_control *wbc);
- -int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
+ +int btrfs_writepage_cow_fixup(struct page *page);
   void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
                                           struct page *page, u64 start,
- -                                        u64 end, int uptodate);
+ +                                        u64 end, bool uptodate);
   extern const struct dentry_operations btrfs_dentry_operations;
   extern const struct iomap_ops btrfs_dio_iomap_ops;
   extern const struct iomap_dio_ops btrfs_dio_ops;
@@@ -3706,7 -3686,7 +3706,7 @@@ static inline int __btrfs_fs_compat_ro(
   
   /* acl.c */
   #ifdef CONFIG_BTRFS_FS_POSIX_ACL
- struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
+ struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu);
   int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
                   struct posix_acl *acl, int type);
   int btrfs_init_acl(struct btrfs_trans_handle *trans,
@@@ -3799,30 -3779,6 +3799,30 @@@ static inline int btrfs_defrag_cancelle
         return signal_pending(current);
   }
   
+ +/* verity.c */
+ +#ifdef CONFIG_FS_VERITY
+ +
+ +extern const struct fsverity_operations btrfs_verityops;
+ +int btrfs_drop_verity_items(struct btrfs_inode *inode);
+ +
+ +BTRFS_SETGET_FUNCS(verity_descriptor_encryption, struct btrfs_verity_descriptor_item,
+ +                 encryption, 8);
+ +BTRFS_SETGET_FUNCS(verity_descriptor_size, struct btrfs_verity_descriptor_item,
+ +                 size, 64);
+ +BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_encryption,
+ +                       struct btrfs_verity_descriptor_item, encryption, 8);
+ +BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_size,
+ +                       struct btrfs_verity_descriptor_item, size, 64);
+ +
+ +#else
+ +
+ +static inline int btrfs_drop_verity_items(struct btrfs_inode *inode)
+ +{
+ +      return 0;
+ +}
+ +
+ +#endif
+ +
   /* Sanity test specific functions */
   #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
   void btrfs_test_destroy_inode(struct inode *inode);
diff --combined fs/ceph/super.h

index b1a3636,b951268..c30258f
--- 1/fs/ceph/super.h
--- 2/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@@ -182,9 -182,8 +182,9 @@@ struct ceph_cap 
   
   struct ceph_cap_flush {
         u64 tid;
- -      int caps; /* 0 means capsnap */
+ +      int caps;
         bool wake; /* wake up flush waiters when finish ? */
+ +      bool is_capsnap; /* true means capsnap */
         struct list_head g_list; // global
         struct list_head i_list; // per inode
   };
@@@ -1088,7 -1087,7 +1088,7 @@@ void ceph_release_acl_sec_ctx(struct ce
   /* acl.c */
   #ifdef CONFIG_CEPH_FS_POSIX_ACL
   
- struct posix_acl *ceph_get_acl(struct inode *, int);
+ struct posix_acl *ceph_get_acl(struct inode *, int, bool);
   int ceph_set_acl(struct user_namespace *mnt_userns,
                  struct inode *inode, struct posix_acl *acl, int type);
   int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
diff --combined fs/fuse/fuse_i.h

index 6fb639b,f414094..3d18556
--- 1/fs/fuse/fuse_i.h
--- 2/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@@ -149,6 -149,13 +149,6 @@@ struct fuse_inode 
         /** Lock to protect write related fields */
         spinlock_t lock;
   
- -      /**
- -       * Can't take inode lock in fault path (leads to circular dependency).
- -       * Introduce another semaphore which can be taken in fault path and
- -       * then other filesystem paths can take this to block faults.
- -       */
- -      struct rw_semaphore i_mmap_sem;
- -
   #ifdef CONFIG_FUSE_DAX
         /*
          * Dax specific inode data
@@@ -1209,7 -1216,7 +1209,7 @@@ extern const struct xattr_handler *fuse
   extern const struct xattr_handler *fuse_no_acl_xattr_handlers[];
   
   struct posix_acl;
- struct posix_acl *fuse_get_acl(struct inode *inode, int type);
+ struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu);
   int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
                  struct posix_acl *acl, int type);
   
diff --combined fs/xfs/xfs_acl.c

index f7fc1d2,9e8ac9f..5c52ee8
--- 1/fs/xfs/xfs_acl.c
--- 2/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@@ -125,7 -125,7 +125,7 @@@ xfs_acl_to_disk(struct xfs_acl *aclp, c
   }
   
   struct posix_acl *
- xfs_get_acl(struct inode *inode, int type)
+ xfs_get_acl(struct inode *inode, int type, bool rcu)
   {
         struct xfs_inode        *ip = XFS_I(inode);
         struct xfs_mount        *mp = ip->i_mount;
@@@ -137,6 -137,9 +137,9 @@@
         };
         int                     error;
   
+       if (rcu)
+               return ERR_PTR(-ECHILD);
+ 
         trace_xfs_get_acl(ip);
   
         switch (type) {
@@@ -232,7 -235,7 +235,7 @@@ xfs_acl_set_mode
         inode->i_ctime = current_time(inode);
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
   
- -      if (mp->m_flags & XFS_MOUNT_WSYNC)
+ +      if (xfs_has_wsync(mp))
                 xfs_trans_set_sync(tp);
         return xfs_trans_commit(tp);
   }
diff --combined include/linux/fs.h

index 1c01f9f,c6e5bcb..a6074cd
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -319,8 -319,6 +319,8 @@@ enum rw_hint 
   /* iocb->ki_waitq is valid */
   #define IOCB_WAITQ            (1 << 19)
   #define IOCB_NOIO             (1 << 20)
+ +/* can use bio alloc cache */
+ +#define IOCB_ALLOC_CACHE      (1 << 21)
   
   struct kiocb {
         struct file             *ki_filp;
@@@ -438,10 -436,6 +438,10 @@@ int pagecache_write_end(struct file *, 
    * struct address_space - Contents of a cacheable, mappable object.
    * @host: Owner, either the inode or the block_device.
    * @i_pages: Cached pages.
+ + * @invalidate_lock: Guards coherency between page cache contents and
+ + *   file offset->disk block mappings in the filesystem during invalidates.
+ + *   It is also used to block modification of page cache contents through
+ + *   memory mappings.
    * @gfp_mask: Memory allocation flags to use for allocating pages.
    * @i_mmap_writable: Number of VM_SHARED mappings.
    * @nr_thps: Number of THPs in the pagecache (non-shmem only).
@@@ -459,7 -453,6 +459,7 @@@
   struct address_space {
         struct inode            *host;
         struct xarray           i_pages;
+ +      struct rw_semaphore     invalidate_lock;
         gfp_t                   gfp_mask;
         atomic_t                i_mmap_writable;
   #ifdef CONFIG_READ_ONLY_THP_FOR_FS
@@@ -588,6 -581,11 +588,11 @@@ static inline void mapping_allow_writab
   
   struct posix_acl;
   #define ACL_NOT_CACHED ((void *)(-1))
+ /*
+  * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to
+  * cache the ACL.  This also means that ->get_acl() can be called in RCU mode
+  * with the LOOKUP_RCU flag.
+  */
   #define ACL_DONT_CACHE ((void *)(-3))
   
   static inline struct posix_acl *
@@@ -821,42 -819,9 +826,42 @@@ static inline void inode_lock_shared_ne
         down_read_nested(&inode->i_rwsem, subclass);
   }
   
+ +static inline void filemap_invalidate_lock(struct address_space *mapping)
+ +{
+ +      down_write(&mapping->invalidate_lock);
+ +}
+ +
+ +static inline void filemap_invalidate_unlock(struct address_space *mapping)
+ +{
+ +      up_write(&mapping->invalidate_lock);
+ +}
+ +
+ +static inline void filemap_invalidate_lock_shared(struct address_space *mapping)
+ +{
+ +      down_read(&mapping->invalidate_lock);
+ +}
+ +
+ +static inline int filemap_invalidate_trylock_shared(
+ +                                      struct address_space *mapping)
+ +{
+ +      return down_read_trylock(&mapping->invalidate_lock);
+ +}
+ +
+ +static inline void filemap_invalidate_unlock_shared(
+ +                                      struct address_space *mapping)
+ +{
+ +      up_read(&mapping->invalidate_lock);
+ +}
+ +
   void lock_two_nondirectories(struct inode *, struct inode*);
   void unlock_two_nondirectories(struct inode *, struct inode*);
   
+ +void filemap_invalidate_lock_two(struct address_space *mapping1,
+ +                               struct address_space *mapping2);
+ +void filemap_invalidate_unlock_two(struct address_space *mapping1,
+ +                                 struct address_space *mapping2);
+ +
+ +
   /*
    * NOTE: in a 32bit arch with a preemptable kernel and
    * an UP compile the i_size_read/write must be atomic
@@@ -1037,7 -1002,6 +1042,7 @@@ static inline struct file *get_file(str
   #define FL_UNLOCK_PENDING     512 /* Lease is being broken */
   #define FL_OFDLCK     1024    /* lock is "owned" by struct file */
   #define FL_LAYOUT     2048    /* outstanding pNFS layout */
+ +#define FL_RECLAIM    4096    /* reclaiming from a reboot server */
   
   #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
   
@@@ -1548,11 -1512,8 +1553,11 @@@ struct super_block 
         /* Number of inodes with nlink == 0 but still referenced */
         atomic_long_t s_remove_count;
   
- -      /* Pending fsnotify inode refs */
- -      atomic_long_t s_fsnotify_inode_refs;
+ +      /*
+ +       * Number of inode/mount/sb objects that are being watched, note that
+ +       * inodes objects are currently double-accounted.
+ +       */
+ +      atomic_long_t s_fsnotify_connectors;
   
         /* Being remounted read-only */
         int s_readonly_remount;
@@@ -2109,7 -2070,7 +2114,7 @@@ struct inode_operations 
         struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
         const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
         int (*permission) (struct user_namespace *, struct inode *, int);
-       struct posix_acl * (*get_acl)(struct inode *, int);
+       struct posix_acl * (*get_acl)(struct inode *, int, bool);
   
         int (*readlink) (struct dentry *, char __user *,int);
   
@@@ -2501,6 -2462,7 +2506,6 @@@ static inline void file_accessed(struc
   
   extern int file_modified(struct file *file);
   
- -int sync_inode(struct inode *inode, struct writeback_control *wbc);
   int sync_inode_metadata(struct inode *inode, int wait);
   
   struct file_system_type {
@@@ -2530,7 -2492,6 +2535,7 @@@
   
         struct lock_class_key i_lock_key;
         struct lock_class_key i_mutex_key;
+ +      struct lock_class_key invalidate_lock_key;
         struct lock_class_key i_mutex_dir_key;
   };
   
@@@ -2614,6 -2575,90 +2619,6 @@@ extern struct kobject *fs_kobj
   
   #define MAX_RW_COUNT (INT_MAX & PAGE_MASK)
   
- -#ifdef CONFIG_MANDATORY_FILE_LOCKING
- -extern int locks_mandatory_locked(struct file *);
- -extern int locks_mandatory_area(struct inode *, struct file *, loff_t, loff_t, unsigned char);
- -
- -/*
- - * Candidates for mandatory locking have the setgid bit set
- - * but no group execute bit -  an otherwise meaningless combination.
- - */
- -
- -static inline int __mandatory_lock(struct inode *ino)
- -{
- -      return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID;
- -}
- -
- -/*
- - * ... and these candidates should be on SB_MANDLOCK mounted fs,
- - * otherwise these will be advisory locks
- - */
- -
- -static inline int mandatory_lock(struct inode *ino)
- -{
- -      return IS_MANDLOCK(ino) && __mandatory_lock(ino);
- -}
- -
- -static inline int locks_verify_locked(struct file *file)
- -{
- -      if (mandatory_lock(locks_inode(file)))
- -              return locks_mandatory_locked(file);
- -      return 0;
- -}
- -
- -static inline int locks_verify_truncate(struct inode *inode,
- -                                  struct file *f,
- -                                  loff_t size)
- -{
- -      if (!inode->i_flctx || !mandatory_lock(inode))
- -              return 0;
- -
- -      if (size < inode->i_size) {
- -              return locks_mandatory_area(inode, f, size, inode->i_size - 1,
- -                              F_WRLCK);
- -      } else {
- -              return locks_mandatory_area(inode, f, inode->i_size, size - 1,
- -                              F_WRLCK);
- -      }
- -}
- -
- -#else /* !CONFIG_MANDATORY_FILE_LOCKING */
- -
- -static inline int locks_mandatory_locked(struct file *file)
- -{
- -      return 0;
- -}
- -
- -static inline int locks_mandatory_area(struct inode *inode, struct file *filp,
- -                                       loff_t start, loff_t end, unsigned char type)
- -{
- -      return 0;
- -}
- -
- -static inline int __mandatory_lock(struct inode *inode)
- -{
- -      return 0;
- -}
- -
- -static inline int mandatory_lock(struct inode *inode)
- -{
- -      return 0;
- -}
- -
- -static inline int locks_verify_locked(struct file *file)
- -{
- -      return 0;
- -}
- -
- -static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
- -                                      size_t size)
- -{
- -      return 0;
- -}
- -
- -#endif /* CONFIG_MANDATORY_FILE_LOCKING */
- -
- -
   #ifdef CONFIG_FILE_LOCKING
   static inline int break_lease(struct inode *inode, unsigned int mode)
   {
@@@ -2746,7 -2791,6 +2751,7 @@@ static inline struct file *file_clone_o
   extern int filp_close(struct file *, fl_owner_t id);
   
   extern struct filename *getname_flags(const char __user *, int, int *);
+ +extern struct filename *getname_uflags(const char __user *, int);
   extern struct filename *getname(const char __user *);
   extern struct filename *getname_kernel(const char *);
   extern void putname(struct filename *name);
@@@ -2852,8 -2896,6 +2857,8 @@@ extern int filemap_fdatawrite_range(str
                                 loff_t start, loff_t end);
   extern int filemap_check_errors(struct address_space *mapping);
   extern void __filemap_set_wb_err(struct address_space *mapping, int err);
+ +int filemap_fdatawrite_wbc(struct address_space *mapping,
+ +                         struct writeback_control *wbc);
   
   static inline int filemap_write_and_wait(struct address_space *mapping)
   {
@@@ -3209,6 -3251,10 +3214,6 @@@ ssize_t vfs_iocb_iter_read(struct file 
   ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
                             struct iov_iter *iter);
   
- -/* fs/block_dev.c */
- -extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
- -                      int datasync);
- -
   /* fs/splice.c */
   extern ssize_t generic_file_splice_read(struct file *, loff_t *,
                 struct pipe_inode_info *, size_t, unsigned int);
@@@ -3314,6 -3360,7 +3319,7 @@@ extern int page_symlink(struct inode *i
   extern const struct inode_operations page_symlink_inode_operations;
   extern void kfree_link(void *);
   void generic_fillattr(struct user_namespace *, struct inode *, struct kstat *);
+ void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
   extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
   extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
   void __inode_add_bytes(struct inode *inode, loff_t bytes);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 2 Sep 2021 16:21:27 +0000 (09:21 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 2 Sep 2021 16:21:27 +0000 (09:21 -0700)
		1	2
Documentation/filesystems/locking.rst	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/acl.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/ctree.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ceph/super.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fuse/fuse_i.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_acl.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history