Merge tag 'libnvdimm-for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdim...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 9 Sep 2021 18:39:57 +0000 (11:39 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 9 Sep 2021 18:39:57 +0000 (11:39 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 9 Sep 2021 18:39:57 +0000 (11:39 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 9 Sep 2021 18:39:57 +0000 (11:39 -0700)
diff --combined drivers/md/dm-table.c

index b03eabc,b53acca..2111daa
--- 1/drivers/md/dm-table.c
--- 2/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@@ -809,14 -809,9 +809,9 @@@ EXPORT_SYMBOL_GPL(dm_table_set_type)
   int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
                         sector_t start, sector_t len, void *data)
   {
-       int blocksize = *(int *) data, id;
-       bool rc;
+       int blocksize = *(int *) data;
   
-       id = dax_read_lock();
-       rc = !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
-       dax_read_unlock(id);
- 
-       return rc;
+       return !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
   }
   
   /* Check devices support synchronous DAX */
@@@ -2076,7 -2071,7 +2071,7 @@@ int dm_table_set_restrictions(struct dm
         }
   
         dm_update_keyslot_manager(q, t);
- -      blk_queue_update_readahead(q);
+ +      disk_update_readahead(t->md->disk);
   
         return 0;
   }
diff --combined drivers/md/dm.c

index 84e9145,4657143..a011d09
--- 1/drivers/md/dm.c
--- 2/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@@ -8,7 -8,6 +8,7 @@@
   #include "dm-core.h"
   #include "dm-rq.h"
   #include "dm-uevent.h"
+ +#include "dm-ima.h"
   
   #include <linux/init.h>
   #include <linux/module.h>
@@@ -262,13 -261,9 +262,13 @@@ static void (*_exits[])(void) = 
   static int __init dm_init(void)
   {
         const int count = ARRAY_SIZE(_inits);
- -
         int r, i;
   
+ +#if (IS_ENABLED(CONFIG_IMA) && !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE))
+ +      DMWARN("CONFIG_IMA_DISABLE_HTABLE is disabled."
+ +             " Duplicate IMA measurements will not be recorded in the IMA log.");
+ +#endif
+ +
         for (i = 0; i < count; i++) {
                 r = _inits[i]();
                 if (r)
@@@ -276,7 -271,8 +276,7 @@@
         }
   
         return 0;
- -
- -      bad:
+ +bad:
         while (i--)
                 _exits[i]();
   
@@@ -654,7 -650,7 +654,7 @@@ static int open_table_device(struct tab
         }
   
         td->dm_dev.bdev = bdev;
-       td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+       td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev);
         return 0;
   }
   
@@@ -1697,13 -1693,14 +1697,13 @@@ static void cleanup_mapped_device(struc
                 spin_lock(&_minor_lock);
                 md->disk->private_data = NULL;
                 spin_unlock(&_minor_lock);
- -              del_gendisk(md->disk);
- -      }
- -
- -      if (md->queue)
+ +              if (dm_get_md_type(md) != DM_TYPE_NONE) {
+ +                      dm_sysfs_exit(md);
+ +                      del_gendisk(md->disk);
+ +              }
                 dm_queue_destroy_keyslot_manager(md->queue);
- -
- -      if (md->disk)
                 blk_cleanup_disk(md->disk);
+ +      }
   
         cleanup_srcu_struct(&md->io_barrier);
   
@@@ -1795,6 -1792,7 +1795,6 @@@ static struct mapped_device *alloc_dev(
                         goto bad;
         }
   
- -      add_disk_no_queue_reg(md->disk);
         format_dev_t(md->name, MKDEV(_major, minor));
   
         md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
@@@ -1995,13 -1993,18 +1995,13 @@@ static struct dm_table *__unbind(struc
    */
   int dm_create(int minor, struct mapped_device **result)
   {
- -      int r;
         struct mapped_device *md;
   
         md = alloc_dev(minor);
         if (!md)
                 return -ENXIO;
   
- -      r = dm_sysfs_init(md);
- -      if (r) {
- -              free_dev(md);
- -              return r;
- -      }
+ +      dm_ima_reset_data(md);
   
         *result = md;
         return 0;
@@@ -2053,9 -2056,9 +2053,9 @@@ EXPORT_SYMBOL_GPL(dm_get_queue_limits)
    */
   int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
   {
- -      int r;
+ +      enum dm_queue_mode type = dm_table_get_type(t);
         struct queue_limits limits;
- -      enum dm_queue_mode type = dm_get_md_type(md);
+ +      int r;
   
         switch (type) {
         case DM_TYPE_REQUEST_BASED:
@@@ -2083,14 -2086,8 +2083,14 @@@
         if (r)
                 return r;
   
- -      blk_register_queue(md->disk);
+ +      add_disk(md->disk);
   
+ +      r = dm_sysfs_init(md);
+ +      if (r) {
+ +              del_gendisk(md->disk);
+ +              return r;
+ +      }
+ +      md->type = type;
         return 0;
   }
   
@@@ -2196,6 -2193,7 +2196,6 @@@ static void __dm_destroy(struct mapped_
                 DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
                        dm_device_name(md), atomic_read(&md->holders));
   
- -      dm_sysfs_exit(md);
         dm_table_destroy(__unbind(md));
         free_dev(md);
   }
diff --combined fs/Kconfig

index d8207a1,bd21535..c854725
--- 1/fs/Kconfig
--- 2/fs/Kconfig
+++ b/fs/Kconfig
@@@ -43,7 -43,7 +43,7 @@@ source "fs/f2fs/Kconfig
   source "fs/zonefs/Kconfig"
   
   config FS_DAX
-       bool "Direct Access (DAX) support"
+       bool "File system based Direct Access (DAX) support"
         depends on MMU
         depends on !(ARM || MIPS || SPARC)
         select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
@@@ -53,8 -53,23 +53,23 @@@
           Direct Access (DAX) can be used on memory-backed block devices.
           If the block device supports DAX and the filesystem supports DAX,
           then you can avoid using the pagecache to buffer I/Os.  Turning
-         on this option will compile in support for DAX; you will need to
-         mount the filesystem using the -o dax option.
+         on this option will compile in support for DAX.
+ 
+         For a DAX device to support file system access it needs to have
+         struct pages.  For the nfit based NVDIMMs this can be enabled
+         using the ndctl utility:
+ 
+               # ndctl create-namespace --force --reconfig=namespace0.0 \
+                       --mode=fsdax --map=mem
+ 
+         See the 'create-namespace' man page for details on the overhead of
+         --map=mem:
+         https://docs.pmem.io/ndctl-user-guide/ndctl-man-pages/ndctl-create-namespace
+ 
+           For ndctl to work CONFIG_DEV_DAX needs to be enabled as well. For most
+         file systems DAX support needs to be manually enabled globally or
+         per-inode using a mount option as well.  See the file documentation in
+         Documentation/filesystems/dax.rst for details.
   
           If you do not have a block device that is capable of using this,
           or if unsure, say N.  Saying Y will increase the size of the kernel
@@@ -101,6 -116,16 +116,6 @@@ config FILE_LOCKIN
             for filesystems like NFS and for the flock() system
             call. Disabling this option saves about 11k.
   
- -config MANDATORY_FILE_LOCKING
- -      bool "Enable Mandatory file locking"
- -      depends on FILE_LOCKING
- -      default y
- -      help
- -        This option enables files appropriately marked files on appropriely
- -        mounted filesystems to support mandatory locking.
- -
- -        To the best of my knowledge this is dead code that no one cares about.
- -
   source "fs/crypto/Kconfig"
   
   source "fs/verity/Kconfig"
@@@ -136,7 -161,6 +151,7 @@@ menu "DOS/FAT/EXFAT/NT Filesystems
   source "fs/fat/Kconfig"
   source "fs/exfat/Kconfig"
   source "fs/ntfs/Kconfig"
+ +source "fs/ntfs3/Kconfig"
   
   endmenu
   endif # BLOCK
@@@ -349,15 -373,7 +364,15 @@@ config NFS_V4_2_SSC_HELPE
   
   source "net/sunrpc/Kconfig"
   source "fs/ceph/Kconfig"
+ +
   source "fs/cifs/Kconfig"
+ +source "fs/ksmbd/Kconfig"
+ +
+ +config CIFS_COMMON
+ +      tristate
+ +      default y if CIFS=y
+ +      default m if CIFS=m
+ +
   source "fs/coda/Kconfig"
   source "fs/afs/Kconfig"
   source "fs/9p/Kconfig"
diff --combined fs/erofs/super.c

index a8d49e8,8fc6c04..11b8855
--- 1/fs/erofs/super.c
--- 2/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@@ -11,7 -11,6 +11,7 @@@
   #include <linux/crc32c.h>
   #include <linux/fs_context.h>
   #include <linux/fs_parser.h>
+ +#include <linux/dax.h>
   #include "xattr.h"
   
   #define CREATE_TRACE_POINTS
@@@ -356,8 -355,6 +356,8 @@@ enum 
         Opt_user_xattr,
         Opt_acl,
         Opt_cache_strategy,
+ +      Opt_dax,
+ +      Opt_dax_enum,
         Opt_err
   };
   
@@@ -368,47 -365,14 +368,47 @@@ static const struct constant_table erof
         {}
   };
   
+ +static const struct constant_table erofs_dax_param_enums[] = {
+ +      {"always",      EROFS_MOUNT_DAX_ALWAYS},
+ +      {"never",       EROFS_MOUNT_DAX_NEVER},
+ +      {}
+ +};
+ +
   static const struct fs_parameter_spec erofs_fs_parameters[] = {
         fsparam_flag_no("user_xattr",   Opt_user_xattr),
         fsparam_flag_no("acl",          Opt_acl),
         fsparam_enum("cache_strategy",  Opt_cache_strategy,
                      erofs_param_cache_strategy),
+ +      fsparam_flag("dax",             Opt_dax),
+ +      fsparam_enum("dax",             Opt_dax_enum, erofs_dax_param_enums),
         {}
   };
   
+ +static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
+ +{
+ +#ifdef CONFIG_FS_DAX
+ +      struct erofs_fs_context *ctx = fc->fs_private;
+ +
+ +      switch (mode) {
+ +      case EROFS_MOUNT_DAX_ALWAYS:
+ +              warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+ +              set_opt(ctx, DAX_ALWAYS);
+ +              clear_opt(ctx, DAX_NEVER);
+ +              return true;
+ +      case EROFS_MOUNT_DAX_NEVER:
+ +              set_opt(ctx, DAX_NEVER);
+ +              clear_opt(ctx, DAX_ALWAYS);
+ +              return true;
+ +      default:
+ +              DBG_BUGON(1);
+ +              return false;
+ +      }
+ +#else
+ +      errorfc(fc, "dax options not supported");
+ +      return false;
+ +#endif
+ +}
+ +
   static int erofs_fc_parse_param(struct fs_context *fc,
                                 struct fs_parameter *param)
   {
@@@ -448,14 -412,6 +448,14 @@@
                 errorfc(fc, "compression not supported, cache_strategy ignored");
   #endif
                 break;
+ +      case Opt_dax:
+ +              if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS))
+ +                      return -EINVAL;
+ +              break;
+ +      case Opt_dax_enum:
+ +              if (!erofs_fc_set_dax_mode(fc, result.uint_32))
+ +                      return -EINVAL;
+ +              break;
         default:
                 return -ENOPARAM;
         }
@@@ -474,7 -430,7 +474,7 @@@ static int erofs_managed_cache_releasep
         DBG_BUGON(mapping->a_ops != &managed_cache_aops);
   
         if (PagePrivate(page))
- -              ret = erofs_try_to_free_cached_page(mapping, page);
+ +              ret = erofs_try_to_free_cached_page(page);
   
         return ret;
   }
@@@ -540,16 -496,10 +540,16 @@@ static int erofs_fc_fill_super(struct s
                 return -ENOMEM;
   
         sb->s_fs_info = sbi;
+ +      sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
         err = erofs_read_superblock(sb);
         if (err)
                 return err;
   
-           !bdev_dax_supported(sb->s_bdev, EROFS_BLKSIZ)) {
+ +      if (test_opt(ctx, DAX_ALWAYS) &&
++          !dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) {
+ +              errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
+ +              clear_opt(ctx, DAX_ALWAYS);
+ +      }
         sb->s_flags |= SB_RDONLY | SB_NOATIME;
         sb->s_maxbytes = MAX_LFS_FILESIZE;
         sb->s_time_gran = 1;
@@@ -659,7 -609,6 +659,7 @@@ static void erofs_kill_sb(struct super_
         sbi = EROFS_SB(sb);
         if (!sbi)
                 return;
+ +      fs_put_dax(sbi->dax_dev);
         kfree(sbi);
         sb->s_fs_info = NULL;
   }
@@@ -762,8 -711,8 +762,8 @@@ static int erofs_statfs(struct dentry *
   
   static int erofs_show_options(struct seq_file *seq, struct dentry *root)
   {
- -      struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
- -      struct erofs_fs_context *ctx __maybe_unused = &sbi->ctx;
+ +      struct erofs_sb_info *sbi = EROFS_SB(root->d_sb);
+ +      struct erofs_fs_context *ctx = &sbi->ctx;
   
   #ifdef CONFIG_EROFS_FS_XATTR
         if (test_opt(ctx, XATTR_USER))
@@@ -785,10 -734,6 +785,10 @@@
         else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
                 seq_puts(seq, ",cache_strategy=readaround");
   #endif
+ +      if (test_opt(ctx, DAX_ALWAYS))
+ +              seq_puts(seq, ",dax=always");
+ +      if (test_opt(ctx, DAX_NEVER))
+ +              seq_puts(seq, ",dax=never");
         return 0;
   }
   
diff --combined fs/ext2/super.c

index 987bcf3,26e69e4..d8d580b
--- 1/fs/ext2/super.c
--- 2/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@@ -206,6 -206,9 +206,6 @@@ static void init_once(void *foo
         init_rwsem(&ei->xattr_sem);
   #endif
         mutex_init(&ei->truncate_mutex);
- -#ifdef CONFIG_FS_DAX
- -      init_rwsem(&ei->dax_sem);
- -#endif
         inode_init_once(&ei->vfs_inode);
   }
   
@@@ -946,7 -949,8 +946,8 @@@ static int ext2_fill_super(struct super
         blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
   
         if (test_opt(sb, DAX)) {
-               if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
+               if (!dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
+                               bdev_nr_sectors(sb->s_bdev))) {
                         ext2_msg(sb, KERN_ERR,
                                 "DAX unsupported by block device. Turning off DAX.");
                         clear_opt(sbi->s_mount_opt, DAX);
diff --combined fs/ext4/super.c

index 136940a,a1726a8..0775950
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -80,6 -80,7 +80,6 @@@ static struct dentry *ext4_mount(struc
                        const char *dev_name, void *data);
   static inline int ext2_feature_set_ok(struct super_block *sb);
   static inline int ext3_feature_set_ok(struct super_block *sb);
- -static int ext4_feature_set_ok(struct super_block *sb, int readonly);
   static void ext4_destroy_lazyinit_thread(void);
   static void ext4_unregister_li_request(struct super_block *sb);
   static void ext4_clear_request_list(void);
@@@ -89,9 -90,12 +89,9 @@@ static struct inode *ext4_get_journal_i
   /*
    * Lock ordering
    *
- - * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
- - * i_mmap_rwsem (inode->i_mmap_rwsem)!
- - *
    * page fault path:
- - * mmap_lock -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
- - *   page lock -> i_data_sem (rw)
+ + * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
+ + *   -> page lock -> i_data_sem (rw)
    *
    * buffered write path:
    * sb_start_write -> i_mutex -> mmap_lock
@@@ -99,9 -103,8 +99,9 @@@
    *   i_data_sem (rw)
    *
    * truncate:
- - * sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock
- - * sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start ->
+ + * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
+ + *   page lock
+ + * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
    *   i_data_sem (rw)
    *
    * direct IO:
@@@ -1172,7 -1175,6 +1172,7 @@@ static void ext4_put_super(struct super
   
         flush_work(&sbi->s_error_work);
         destroy_workqueue(sbi->rsv_conversion_wq);
+ +      ext4_release_orphan_info(sb);
   
         /*
          * Unregister sysfs before destroying jbd2 journal.
@@@ -1198,7 -1200,6 +1198,7 @@@
   
         if (!sb_rdonly(sb) && !aborted) {
                 ext4_clear_feature_journal_needs_recovery(sb);
+ +              ext4_clear_feature_orphan_present(sb);
                 es->s_state = cpu_to_le16(sbi->s_mount_state);
         }
         if (!sb_rdonly(sb))
@@@ -1359,6 -1360,7 +1359,6 @@@ static void init_once(void *foo
         INIT_LIST_HEAD(&ei->i_orphan);
         init_rwsem(&ei->xattr_sem);
         init_rwsem(&ei->i_data_sem);
- -      init_rwsem(&ei->i_mmap_sem);
         inode_init_once(&ei->vfs_inode);
         ext4_fc_init_inode(&ei->vfs_inode);
   }
@@@ -1583,12 -1585,14 +1583,12 @@@ static int ext4_mark_dquot_dirty(struc
   static int ext4_write_info(struct super_block *sb, int type);
   static int ext4_quota_on(struct super_block *sb, int type, int format_id,
                          const struct path *path);
- -static int ext4_quota_on_mount(struct super_block *sb, int type);
   static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
                                size_t len, loff_t off);
   static ssize_t ext4_quota_write(struct super_block *sb, int type,
                                 const char *data, size_t len, loff_t off);
   static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
                              unsigned int flags);
- -static int ext4_enable_quotas(struct super_block *sb);
   
   static struct dquot **ext4_get_dquots(struct inode *inode)
   {
@@@ -2683,11 -2687,8 +2683,11 @@@ static int ext4_setup_super(struct supe
                 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
         le16_add_cpu(&es->s_mnt_count, 1);
         ext4_update_tstamp(es, s_mtime);
- -      if (sbi->s_journal)
+ +      if (sbi->s_journal) {
                 ext4_set_feature_journal_needs_recovery(sb);
+ +              if (ext4_has_feature_orphan_file(sb))
+ +                      ext4_set_feature_orphan_present(sb);
+ +      }
   
         err = ext4_commit_super(sb);
   done:
@@@ -2969,6 -2970,169 +2969,6 @@@ static int ext4_check_descriptors(struc
         return 1;
   }
   
- -/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
- - * the superblock) which were deleted from all directories, but held open by
- - * a process at the time of a crash.  We walk the list and try to delete these
- - * inodes at recovery time (only with a read-write filesystem).
- - *
- - * In order to keep the orphan inode chain consistent during traversal (in
- - * case of crash during recovery), we link each inode into the superblock
- - * orphan list_head and handle it the same way as an inode deletion during
- - * normal operation (which journals the operations for us).
- - *
- - * We only do an iget() and an iput() on each inode, which is very safe if we
- - * accidentally point at an in-use or already deleted inode.  The worst that
- - * can happen in this case is that we get a "bit already cleared" message from
- - * ext4_free_inode().  The only reason we would point at a wrong inode is if
- - * e2fsck was run on this filesystem, and it must have already done the orphan
- - * inode cleanup for us, so we can safely abort without any further action.
- - */
- -static void ext4_orphan_cleanup(struct super_block *sb,
- -                              struct ext4_super_block *es)
- -{
- -      unsigned int s_flags = sb->s_flags;
- -      int ret, nr_orphans = 0, nr_truncates = 0;
- -#ifdef CONFIG_QUOTA
- -      int quota_update = 0;
- -      int i;
- -#endif
- -      if (!es->s_last_orphan) {
- -              jbd_debug(4, "no orphan inodes to clean up\n");
- -              return;
- -      }
- -
- -      if (bdev_read_only(sb->s_bdev)) {
- -              ext4_msg(sb, KERN_ERR, "write access "
- -                      "unavailable, skipping orphan cleanup");
- -              return;
- -      }
- -
- -      /* Check if feature set would not allow a r/w mount */
- -      if (!ext4_feature_set_ok(sb, 0)) {
- -              ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
- -                       "unknown ROCOMPAT features");
- -              return;
- -      }
- -
- -      if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
- -              /* don't clear list on RO mount w/ errors */
- -              if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
- -                      ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
- -                                "clearing orphan list.\n");
- -                      es->s_last_orphan = 0;
- -              }
- -              jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
- -              return;
- -      }
- -
- -      if (s_flags & SB_RDONLY) {
- -              ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
- -              sb->s_flags &= ~SB_RDONLY;
- -      }
- -#ifdef CONFIG_QUOTA
- -      /*
- -       * Turn on quotas which were not enabled for read-only mounts if
- -       * filesystem has quota feature, so that they are updated correctly.
- -       */
- -      if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) {
- -              int ret = ext4_enable_quotas(sb);
- -
- -              if (!ret)
- -                      quota_update = 1;
- -              else
- -                      ext4_msg(sb, KERN_ERR,
- -                              "Cannot turn on quotas: error %d", ret);
- -      }
- -
- -      /* Turn on journaled quotas used for old sytle */
- -      for (i = 0; i < EXT4_MAXQUOTAS; i++) {
- -              if (EXT4_SB(sb)->s_qf_names[i]) {
- -                      int ret = ext4_quota_on_mount(sb, i);
- -
- -                      if (!ret)
- -                              quota_update = 1;
- -                      else
- -                              ext4_msg(sb, KERN_ERR,
- -                                      "Cannot turn on journaled "
- -                                      "quota: type %d: error %d", i, ret);
- -              }
- -      }
- -#endif
- -
- -      while (es->s_last_orphan) {
- -              struct inode *inode;
- -
- -              /*
- -               * We may have encountered an error during cleanup; if
- -               * so, skip the rest.
- -               */
- -              if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
- -                      jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
- -                      es->s_last_orphan = 0;
- -                      break;
- -              }
- -
- -              inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
- -              if (IS_ERR(inode)) {
- -                      es->s_last_orphan = 0;
- -                      break;
- -              }
- -
- -              list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
- -              dquot_initialize(inode);
- -              if (inode->i_nlink) {
- -                      if (test_opt(sb, DEBUG))
- -                              ext4_msg(sb, KERN_DEBUG,
- -                                      "%s: truncating inode %lu to %lld bytes",
- -                                      __func__, inode->i_ino, inode->i_size);
- -                      jbd_debug(2, "truncating inode %lu to %lld bytes\n",
- -                                inode->i_ino, inode->i_size);
- -                      inode_lock(inode);
- -                      truncate_inode_pages(inode->i_mapping, inode->i_size);
- -                      ret = ext4_truncate(inode);
- -                      if (ret) {
- -                              /*
- -                               * We need to clean up the in-core orphan list
- -                               * manually if ext4_truncate() failed to get a
- -                               * transaction handle.
- -                               */
- -                              ext4_orphan_del(NULL, inode);
- -                              ext4_std_error(inode->i_sb, ret);
- -                      }
- -                      inode_unlock(inode);
- -                      nr_truncates++;
- -              } else {
- -                      if (test_opt(sb, DEBUG))
- -                              ext4_msg(sb, KERN_DEBUG,
- -                                      "%s: deleting unreferenced inode %lu",
- -                                      __func__, inode->i_ino);
- -                      jbd_debug(2, "deleting unreferenced inode %lu\n",
- -                                inode->i_ino);
- -                      nr_orphans++;
- -              }
- -              iput(inode);  /* The delete magic happens here! */
- -      }
- -
- -#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
- -
- -      if (nr_orphans)
- -              ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
- -                     PLURAL(nr_orphans));
- -      if (nr_truncates)
- -              ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
- -                     PLURAL(nr_truncates));
- -#ifdef CONFIG_QUOTA
- -      /* Turn off quotas if they were enabled for orphan cleanup */
- -      if (quota_update) {
- -              for (i = 0; i < EXT4_MAXQUOTAS; i++) {
- -                      if (sb_dqopt(sb)->files[i])
- -                              dquot_quota_off(sb, i);
- -              }
- -      }
- -#endif
- -      sb->s_flags = s_flags; /* Restore SB_RDONLY status */
- -}
- -
   /*
    * Maximal extent format file size.
    * Resulting logical blkno at s_maxbytes must fit in our on-disk
@@@ -3148,7 -3312,7 +3148,7 @@@ static unsigned long ext4_get_stripe_si
    * Returns 1 if this filesystem can be mounted as requested,
    * 0 if it cannot be.
    */
- -static int ext4_feature_set_ok(struct super_block *sb, int readonly)
+ +int ext4_feature_set_ok(struct super_block *sb, int readonly)
   {
         if (ext4_has_unknown_ext4_incompat_features(sb)) {
                 ext4_msg(sb, KERN_ERR,
@@@ -3850,20 -4014,6 +3850,20 @@@ static const char *ext4_quota_mode(stru
   #endif
   }
   
+ +static void ext4_setup_csum_trigger(struct super_block *sb,
+ +                                  enum ext4_journal_trigger_type type,
+ +                                  void (*trigger)(
+ +                                      struct jbd2_buffer_trigger_type *type,
+ +                                      struct buffer_head *bh,
+ +                                      void *mapped_data,
+ +                                      size_t size))
+ +{
+ +      struct ext4_sb_info *sbi = EXT4_SB(sb);
+ +
+ +      sbi->s_journal_triggers[type].sb = sb;
+ +      sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
+ +}
+ +
   static int ext4_fill_super(struct super_block *sb, void *data, int silent)
   {
         struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
@@@ -3962,8 -4112,6 +3962,8 @@@
                 silent = 1;
                 goto cantfind_ext4;
         }
+ +      ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
+ +                              ext4_orphan_file_block_trigger);
   
         /* Load the checksum driver */
         sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
@@@ -4287,7 -4435,8 +4287,8 @@@
                 goto failed_mount;
         }
   
-       if (bdev_dax_supported(sb->s_bdev, blocksize))
+       if (dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
+                       bdev_nr_sectors(sb->s_bdev)))
                 set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
   
         if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
@@@ -4628,7 -4777,6 +4629,7 @@@
         sb->s_root = NULL;
   
         needs_recovery = (es->s_last_orphan != 0 ||
+ +                        ext4_has_feature_orphan_present(sb) ||
                           ext4_has_feature_journal_needs_recovery(sb));
   
         if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
@@@ -4885,14 -5033,6 +4886,14 @@@ no_journal
                 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
                                           GFP_KERNEL);
         }
+ +      /*
+ +       * Update the checksum after updating free space/inode
+ +       * counters.  Otherwise the superblock can have an incorrect
+ +       * checksum in the buffer cache until it is written out and
+ +       * e2fsprogs programs trying to open a file system immediately
+ +       * after it is mounted can fail.
+ +       */
+ +      ext4_superblock_csum_set(sb);
         if (!err)
                 err = percpu_counter_init(&sbi->s_dirs_counter,
                                           ext4_count_dirs(sb), GFP_KERNEL);
@@@ -4927,15 -5067,12 +4928,15 @@@
         if (err)
                 goto failed_mount7;
   
+ +      err = ext4_init_orphan_info(sb);
+ +      if (err)
+ +              goto failed_mount8;
   #ifdef CONFIG_QUOTA
         /* Enable quota usage during mount. */
         if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
                 err = ext4_enable_quotas(sb);
                 if (err)
- -                      goto failed_mount8;
+ +                      goto failed_mount9;
         }
   #endif  /* CONFIG_QUOTA */
   
@@@ -4954,7 -5091,7 +4955,7 @@@
                 ext4_msg(sb, KERN_INFO, "recovery complete");
                 err = ext4_mark_recovery_complete(sb, es);
                 if (err)
- -                      goto failed_mount8;
+ +                      goto failed_mount9;
         }
         if (EXT4_SB(sb)->s_journal) {
                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@@ -5000,8 -5137,6 +5001,8 @@@ cantfind_ext4
                 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
         goto failed_mount;
   
+ +failed_mount9:
+ +      ext4_release_orphan_info(sb);
   failed_mount8:
         ext4_unregister_sysfs(sb);
         kobject_put(&sbi->s_kobj);
@@@ -5512,15 -5647,8 +5513,15 @@@ static int ext4_mark_recovery_complete(
         if (err < 0)
                 goto out;
   
- -      if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
+ +      if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
+ +          ext4_has_feature_orphan_present(sb))) {
+ +              if (!ext4_orphan_file_empty(sb)) {
+ +                      ext4_error(sb, "Orphan file not empty on read-only fs.");
+ +                      err = -EFSCORRUPTED;
+ +                      goto out;
+ +              }
                 ext4_clear_feature_journal_needs_recovery(sb);
+ +              ext4_clear_feature_orphan_present(sb);
                 ext4_commit_super(sb);
         }
   out:
@@@ -5663,8 -5791,6 +5664,8 @@@ static int ext4_freeze(struct super_blo
   
                 /* Journal blocked and flushed, clear needs_recovery flag. */
                 ext4_clear_feature_journal_needs_recovery(sb);
+ +              if (ext4_orphan_file_empty(sb))
+ +                      ext4_clear_feature_orphan_present(sb);
         }
   
         error = ext4_commit_super(sb);
@@@ -5687,8 -5813,6 +5688,8 @@@ static int ext4_unfreeze(struct super_b
         if (EXT4_SB(sb)->s_journal) {
                 /* Reset the needs_recovery flag before the fs is unlocked. */
                 ext4_set_feature_journal_needs_recovery(sb);
+ +              if (ext4_has_feature_orphan_file(sb))
+ +                      ext4_set_feature_orphan_present(sb);
         }
   
         ext4_commit_super(sb);
@@@ -5892,7 -6016,7 +5893,7 @@@ static int ext4_remount(struct super_bl
                          * around from a previously readonly bdev mount,
                          * require a full umount/remount for now.
                          */
- -                      if (es->s_last_orphan) {
+ +                      if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
                                 ext4_msg(sb, KERN_WARNING, "Couldn't "
                                        "remount RDWR because of unprocessed "
                                        "orphan inode list.  Please "
@@@ -6189,6 -6313,16 +6190,6 @@@ static int ext4_write_info(struct super
         return ret;
   }
   
- -/*
- - * Turn on quotas during mount time - we need to find
- - * the quota file and such...
- - */
- -static int ext4_quota_on_mount(struct super_block *sb, int type)
- -{
- -      return dquot_quota_on_mount(sb, get_qf_name(sb, EXT4_SB(sb), type),
- -                                      EXT4_SB(sb)->s_jquota_fmt, type);
- -}
- -
   static void lockdep_set_quota_inode(struct inode *inode, int subclass)
   {
         struct ext4_inode_info *ei = EXT4_I(inode);
@@@ -6318,7 -6452,7 +6319,7 @@@ static int ext4_quota_enable(struct sup
   }
   
   /* Enable usage tracking for all quota types. */
- -static int ext4_enable_quotas(struct super_block *sb)
+ +int ext4_enable_quotas(struct super_block *sb)
   {
         int type, err = 0;
         unsigned long qf_inums[EXT4_MAXQUOTAS] = {
@@@ -6476,7 -6610,7 +6477,7 @@@ static ssize_t ext4_quota_write(struct 
         if (!bh)
                 goto out;
         BUFFER_TRACE(bh, "get write access");
- -      err = ext4_journal_get_write_access(handle, bh);
+ +      err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
         if (err) {
                 brelse(bh);
                 return err;
diff --combined fs/xfs/xfs_super.c

index 9a86d3e,f438497..c4e0cd1
--- 1/fs/xfs/xfs_super.c
--- 2/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@@ -49,28 -49,6 +49,28 @@@ static struct kset *xfs_kset;               /* top-l
   static struct xfs_kobj xfs_dbg_kobj;  /* global debug sysfs attrs */
   #endif
   
+ +#ifdef CONFIG_HOTPLUG_CPU
+ +static LIST_HEAD(xfs_mount_list);
+ +static DEFINE_SPINLOCK(xfs_mount_list_lock);
+ +
+ +static inline void xfs_mount_list_add(struct xfs_mount *mp)
+ +{
+ +      spin_lock(&xfs_mount_list_lock);
+ +      list_add(&mp->m_mount_list, &xfs_mount_list);
+ +      spin_unlock(&xfs_mount_list_lock);
+ +}
+ +
+ +static inline void xfs_mount_list_del(struct xfs_mount *mp)
+ +{
+ +      spin_lock(&xfs_mount_list_lock);
+ +      list_del(&mp->m_mount_list);
+ +      spin_unlock(&xfs_mount_list_lock);
+ +}
+ +#else /* !CONFIG_HOTPLUG_CPU */
+ +static inline void xfs_mount_list_add(struct xfs_mount *mp) {}
+ +static inline void xfs_mount_list_del(struct xfs_mount *mp) {}
+ +#endif
+ +
   enum xfs_dax_mode {
         XFS_DAX_INODE = 0,
         XFS_DAX_ALWAYS = 1,
@@@ -84,15 -62,15 +84,15 @@@ xfs_mount_set_dax_mode
   {
         switch (mode) {
         case XFS_DAX_INODE:
- -              mp->m_flags &= ~(XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER);
+ +              mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER);
                 break;
         case XFS_DAX_ALWAYS:
- -              mp->m_flags |= XFS_MOUNT_DAX_ALWAYS;
- -              mp->m_flags &= ~XFS_MOUNT_DAX_NEVER;
+ +              mp->m_features |= XFS_FEAT_DAX_ALWAYS;
+ +              mp->m_features &= ~XFS_FEAT_DAX_NEVER;
                 break;
         case XFS_DAX_NEVER:
- -              mp->m_flags |= XFS_MOUNT_DAX_NEVER;
- -              mp->m_flags &= ~XFS_MOUNT_DAX_ALWAYS;
+ +              mp->m_features |= XFS_FEAT_DAX_NEVER;
+ +              mp->m_features &= ~XFS_FEAT_DAX_ALWAYS;
                 break;
         }
   }
@@@ -176,32 -154,33 +176,32 @@@ xfs_fs_show_options
   {
         static struct proc_xfs_info xfs_info_set[] = {
                 /* the few simple ones we can get from the mount struct */
- -              { XFS_MOUNT_IKEEP,              ",ikeep" },
- -              { XFS_MOUNT_WSYNC,              ",wsync" },
- -              { XFS_MOUNT_NOALIGN,            ",noalign" },
- -              { XFS_MOUNT_SWALLOC,            ",swalloc" },
- -              { XFS_MOUNT_NOUUID,             ",nouuid" },
- -              { XFS_MOUNT_NORECOVERY,         ",norecovery" },
- -              { XFS_MOUNT_ATTR2,              ",attr2" },
- -              { XFS_MOUNT_FILESTREAMS,        ",filestreams" },
- -              { XFS_MOUNT_GRPID,              ",grpid" },
- -              { XFS_MOUNT_DISCARD,            ",discard" },
- -              { XFS_MOUNT_LARGEIO,            ",largeio" },
- -              { XFS_MOUNT_DAX_ALWAYS,         ",dax=always" },
- -              { XFS_MOUNT_DAX_NEVER,          ",dax=never" },
+ +              { XFS_FEAT_IKEEP,               ",ikeep" },
+ +              { XFS_FEAT_WSYNC,               ",wsync" },
+ +              { XFS_FEAT_NOALIGN,             ",noalign" },
+ +              { XFS_FEAT_SWALLOC,             ",swalloc" },
+ +              { XFS_FEAT_NOUUID,              ",nouuid" },
+ +              { XFS_FEAT_NORECOVERY,          ",norecovery" },
+ +              { XFS_FEAT_ATTR2,               ",attr2" },
+ +              { XFS_FEAT_FILESTREAMS,         ",filestreams" },
+ +              { XFS_FEAT_GRPID,               ",grpid" },
+ +              { XFS_FEAT_DISCARD,             ",discard" },
+ +              { XFS_FEAT_LARGE_IOSIZE,        ",largeio" },
+ +              { XFS_FEAT_DAX_ALWAYS,          ",dax=always" },
+ +              { XFS_FEAT_DAX_NEVER,           ",dax=never" },
                 { 0, NULL }
         };
         struct xfs_mount        *mp = XFS_M(root->d_sb);
         struct proc_xfs_info    *xfs_infop;
   
         for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
- -              if (mp->m_flags & xfs_infop->flag)
+ +              if (mp->m_features & xfs_infop->flag)
                         seq_puts(m, xfs_infop->str);
         }
   
- -      seq_printf(m, ",inode%d",
- -              (mp->m_flags & XFS_MOUNT_SMALL_INUMS) ? 32 : 64);
+ +      seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64);
   
- -      if (mp->m_flags & XFS_MOUNT_ALLOCSIZE)
+ +      if (xfs_has_allocsize(mp))
                 seq_printf(m, ",allocsize=%dk",
                            (1 << mp->m_allocsize_log) >> 10);
   
@@@ -222,20 -201,25 +222,20 @@@
                 seq_printf(m, ",swidth=%d",
                                 (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
   
- -      if (mp->m_qflags & XFS_UQUOTA_ACCT) {
- -              if (mp->m_qflags & XFS_UQUOTA_ENFD)
- -                      seq_puts(m, ",usrquota");
- -              else
- -                      seq_puts(m, ",uqnoenforce");
- -      }
+ +      if (mp->m_qflags & XFS_UQUOTA_ENFD)
+ +              seq_puts(m, ",usrquota");
+ +      else if (mp->m_qflags & XFS_UQUOTA_ACCT)
+ +              seq_puts(m, ",uqnoenforce");
   
- -      if (mp->m_qflags & XFS_PQUOTA_ACCT) {
- -              if (mp->m_qflags & XFS_PQUOTA_ENFD)
- -                      seq_puts(m, ",prjquota");
- -              else
- -                      seq_puts(m, ",pqnoenforce");
- -      }
- -      if (mp->m_qflags & XFS_GQUOTA_ACCT) {
- -              if (mp->m_qflags & XFS_GQUOTA_ENFD)
- -                      seq_puts(m, ",grpquota");
- -              else
- -                      seq_puts(m, ",gqnoenforce");
- -      }
+ +      if (mp->m_qflags & XFS_PQUOTA_ENFD)
+ +              seq_puts(m, ",prjquota");
+ +      else if (mp->m_qflags & XFS_PQUOTA_ACCT)
+ +              seq_puts(m, ",pqnoenforce");
+ +
+ +      if (mp->m_qflags & XFS_GQUOTA_ENFD)
+ +              seq_puts(m, ",grpquota");
+ +      else if (mp->m_qflags & XFS_GQUOTA_ACCT)
+ +              seq_puts(m, ",gqnoenforce");
   
         if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
                 seq_puts(m, ",noquota");
@@@ -246,11 -230,11 +246,11 @@@
   /*
    * Set parameters for inode allocation heuristics, taking into account
    * filesystem size and inode32/inode64 mount options; i.e. specifically
- - * whether or not XFS_MOUNT_SMALL_INUMS is set.
+ + * whether or not XFS_FEAT_SMALL_INUMS is set.
    *
    * Inode allocation patterns are altered only if inode32 is requested
- - * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
- - * If altered, XFS_MOUNT_32BITINODES is set as well.
+ + * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large.
+ + * If altered, XFS_OPSTATE_INODE32 is set as well.
    *
    * An agcount independent of that in the mount structure is provided
    * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
@@@ -292,13 -276,13 +292,13 @@@ xfs_set_inode_alloc
   
         /*
          * If user asked for no more than 32-bit inodes, and the fs is
- -       * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
+ +       * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter
          * the allocator to accommodate the request.
          */
- -      if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
- -              mp->m_flags |= XFS_MOUNT_32BITINODES;
+ +      if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
+ +              set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
         else
- -              mp->m_flags &= ~XFS_MOUNT_32BITINODES;
+ +              clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
   
         for (index = 0; index < agcount; index++) {
                 struct xfs_perag        *pag;
@@@ -307,7 -291,7 +307,7 @@@
   
                 pag = xfs_perag_get(mp, index);
   
- -              if (mp->m_flags & XFS_MOUNT_32BITINODES) {
+ +              if (xfs_is_inode32(mp)) {
                         if (ino > XFS_MAXINUMBER_32) {
                                 pag->pagi_inodeok = 0;
                                 pag->pagf_metadata = 0;
@@@ -327,9 -311,18 +327,18 @@@
                 xfs_perag_put(pag);
         }
   
- -      return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
+ +      return xfs_is_inode32(mp) ? maxagi : agcount;
   }
   
+ static bool
+ xfs_buftarg_is_dax(
+       struct super_block      *sb,
+       struct xfs_buftarg      *bt)
+ {
+       return dax_supported(bt->bt_daxdev, bt->bt_bdev, sb->s_blocksize, 0,
+                       bdev_nr_sectors(bt->bt_bdev));
+ }
+ 
   STATIC int
   xfs_blkdev_get(
         xfs_mount_t             *mp,
@@@ -484,7 -477,7 +493,7 @@@ xfs_setup_devices
         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
                 unsigned int    log_sector_size = BBSIZE;
   
- -              if (xfs_sb_version_hassector(&mp->m_sb))
+ +              if (xfs_has_sector(mp))
                         log_sector_size = mp->m_sb.sb_logsectsize;
                 error = xfs_setsize_buftarg(mp->m_logdev_targp,
                                             log_sector_size);
@@@ -517,37 -510,37 +526,37 @@@ xfs_init_mount_workqueues
         if (!mp->m_unwritten_workqueue)
                 goto out_destroy_buf;
   
- -      mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
- -                      XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_UNBOUND),
- -                      0, mp->m_super->s_id);
- -      if (!mp->m_cil_workqueue)
- -              goto out_destroy_unwritten;
- -
         mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
                         XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
                         0, mp->m_super->s_id);
         if (!mp->m_reclaim_workqueue)
- -              goto out_destroy_cil;
+ +              goto out_destroy_unwritten;
   
- -      mp->m_gc_workqueue = alloc_workqueue("xfs-gc/%s",
- -                      WQ_SYSFS | WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM,
+ +      mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s",
+ +                      XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM),
                         0, mp->m_super->s_id);
- -      if (!mp->m_gc_workqueue)
+ +      if (!mp->m_blockgc_wq)
                 goto out_destroy_reclaim;
   
+ +      mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
+ +                      XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+ +                      1, mp->m_super->s_id);
+ +      if (!mp->m_inodegc_wq)
+ +              goto out_destroy_blockgc;
+ +
         mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
                         XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id);
         if (!mp->m_sync_workqueue)
- -              goto out_destroy_eofb;
+ +              goto out_destroy_inodegc;
   
         return 0;
   
- -out_destroy_eofb:
- -      destroy_workqueue(mp->m_gc_workqueue);
+ +out_destroy_inodegc:
+ +      destroy_workqueue(mp->m_inodegc_wq);
+ +out_destroy_blockgc:
+ +      destroy_workqueue(mp->m_blockgc_wq);
   out_destroy_reclaim:
         destroy_workqueue(mp->m_reclaim_workqueue);
- -out_destroy_cil:
- -      destroy_workqueue(mp->m_cil_workqueue);
   out_destroy_unwritten:
         destroy_workqueue(mp->m_unwritten_workqueue);
   out_destroy_buf:
@@@ -561,9 -554,9 +570,9 @@@ xfs_destroy_mount_workqueues
         struct xfs_mount        *mp)
   {
         destroy_workqueue(mp->m_sync_workqueue);
- -      destroy_workqueue(mp->m_gc_workqueue);
+ +      destroy_workqueue(mp->m_blockgc_wq);
+ +      destroy_workqueue(mp->m_inodegc_wq);
         destroy_workqueue(mp->m_reclaim_workqueue);
- -      destroy_workqueue(mp->m_cil_workqueue);
         destroy_workqueue(mp->m_unwritten_workqueue);
         destroy_workqueue(mp->m_buf_workqueue);
   }
@@@ -612,6 -605,32 +621,6 @@@ xfs_fs_alloc_inode
         return NULL;
   }
   
- -#ifdef DEBUG
- -static void
- -xfs_check_delalloc(
- -      struct xfs_inode        *ip,
- -      int                     whichfork)
- -{
- -      struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
- -      struct xfs_bmbt_irec    got;
- -      struct xfs_iext_cursor  icur;
- -
- -      if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got))
- -              return;
- -      do {
- -              if (isnullstartblock(got.br_startblock)) {
- -                      xfs_warn(ip->i_mount,
- -      "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]",
- -                              ip->i_ino,
- -                              whichfork == XFS_DATA_FORK ? "data" : "cow",
- -                              got.br_startoff, got.br_blockcount);
- -              }
- -      } while (xfs_iext_next_extent(ifp, &icur, &got));
- -}
- -#else
- -#define xfs_check_delalloc(ip, whichfork)     do { } while (0)
- -#endif
- -
   /*
    * Now that the generic code is guaranteed not to be accessing
    * the linux inode, we can inactivate and reclaim the inode.
@@@ -627,6 -646,30 +636,6 @@@ xfs_fs_destroy_inode
         ASSERT(!rwsem_is_locked(&inode->i_rwsem));
         XFS_STATS_INC(ip->i_mount, vn_rele);
         XFS_STATS_INC(ip->i_mount, vn_remove);
- -
- -      xfs_inactive(ip);
- -
- -      if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) {
- -              xfs_check_delalloc(ip, XFS_DATA_FORK);
- -              xfs_check_delalloc(ip, XFS_COW_FORK);
- -              ASSERT(0);
- -      }
- -
- -      XFS_STATS_INC(ip->i_mount, vn_reclaim);
- -
- -      /*
- -       * We should never get here with one of the reclaim flags already set.
- -       */
- -      ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
- -      ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
- -
- -      /*
- -       * We always use background reclaim here because even if the inode is
- -       * clean, it still may be under IO and hence we have wait for IO
- -       * completion to occur before we can reclaim the inode. The background
- -       * reclaim path handles this more efficiently than we can here, so
- -       * simply let background reclaim tear down all inodes.
- -       */
         xfs_inode_mark_reclaimable(ip);
   }
   
@@@ -675,6 -718,8 +684,6 @@@ xfs_fs_inode_init_once
         atomic_set(&ip->i_pincount, 0);
         spin_lock_init(&ip->i_flags_lock);
   
- -      mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
- -                   "xfsino", ip->i_ino);
         mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
                      "xfsino", ip->i_ino);
   }
@@@ -698,7 -743,7 +707,7 @@@ xfs_fs_drop_inode
          * that.  See the comment for this inode flag.
          */
         if (ip->i_flags & XFS_IRECOVERY) {
- -              ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED);
+ +              ASSERT(xlog_recovery_needed(ip->i_mount->m_log));
                 return 0;
         }
   
@@@ -721,8 -766,6 +730,8 @@@ xfs_fs_sync_fs
   {
         struct xfs_mount        *mp = XFS_M(sb);
   
+ +      trace_xfs_fs_sync_fs(mp, __return_address);
+ +
         /*
          * Doing anything during the async pass would be counterproductive.
          */
@@@ -739,25 -782,6 +748,25 @@@
                 flush_delayed_work(&mp->m_log->l_work);
         }
   
+ +      /*
+ +       * If we are called with page faults frozen out, it means we are about
+ +       * to freeze the transaction subsystem. Take the opportunity to shut
+ +       * down inodegc because once SB_FREEZE_FS is set it's too late to
+ +       * prevent inactivation races with freeze. The fs doesn't get called
+ +       * again by the freezing process until after SB_FREEZE_FS has been set,
+ +       * so it's now or never.  Same logic applies to speculative allocation
+ +       * garbage collection.
+ +       *
+ +       * We don't care if this is a normal syncfs call that does this or
+ +       * freeze that does this - we can run this multiple times without issue
+ +       * and we won't race with a restart because a restart can only occur
+ +       * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE.
+ +       */
+ +      if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) {
+ +              xfs_inodegc_stop(mp);
+ +              xfs_blockgc_stop(mp);
+ +      }
+ +
         return 0;
   }
   
@@@ -776,9 -800,6 +785,9 @@@ xfs_fs_statfs
         xfs_extlen_t            lsize;
         int64_t                 ffree;
   
+ +      /* Wait for whatever inactivations are in progress. */
+ +      xfs_inodegc_flush(mp);
+ +
         statp->f_type = XFS_SUPER_MAGIC;
         statp->f_namelen = MAXNAMELEN - 1;
   
@@@ -874,22 -895,10 +883,22 @@@ xfs_fs_freeze
          * set a GFP_NOFS context here to avoid recursion deadlocks.
          */
         flags = memalloc_nofs_save();
- -      xfs_blockgc_stop(mp);
         xfs_save_resvblks(mp);
         ret = xfs_log_quiesce(mp);
         memalloc_nofs_restore(flags);
+ +
+ +      /*
+ +       * For read-write filesystems, we need to restart the inodegc on error
+ +       * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not
+ +       * going to be run to restart it now.  We are at SB_FREEZE_FS level
+ +       * here, so we can restart safely without racing with a stop in
+ +       * xfs_fs_sync_fs().
+ +       */
+ +      if (ret && !xfs_is_readonly(mp)) {
+ +              xfs_blockgc_start(mp);
+ +              xfs_inodegc_start(mp);
+ +      }
+ +
         return ret;
   }
   
@@@ -901,18 -910,7 +910,18 @@@ xfs_fs_unfreeze
   
         xfs_restore_resvblks(mp);
         xfs_log_work_queue(mp);
- -      xfs_blockgc_start(mp);
+ +
+ +      /*
+ +       * Don't reactivate the inodegc worker on a readonly filesystem because
+ +       * inodes are sent directly to reclaim.  Don't reactivate the blockgc
+ +       * worker because there are no speculative preallocations on a readonly
+ +       * filesystem.
+ +       */
+ +      if (!xfs_is_readonly(mp)) {
+ +              xfs_blockgc_start(mp);
+ +              xfs_inodegc_start(mp);
+ +      }
+ +
         return 0;
   }
   
@@@ -924,8 -922,10 +933,8 @@@ STATIC in
   xfs_finish_flags(
         struct xfs_mount        *mp)
   {
- -      int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
- -
         /* Fail a mount where the logbuf is smaller than the log stripe */
- -      if (xfs_sb_version_haslogv2(&mp->m_sb)) {
+ +      if (xfs_has_logv2(mp)) {
                 if (mp->m_logbsize <= 0 &&
                     mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
                         mp->m_logbsize = mp->m_sb.sb_logsunit;
@@@ -947,24 -947,33 +956,24 @@@
         /*
          * V5 filesystems always use attr2 format for attributes.
          */
- -      if (xfs_sb_version_hascrc(&mp->m_sb) &&
- -          (mp->m_flags & XFS_MOUNT_NOATTR2)) {
+ +      if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) {
                 xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
                              "attr2 is always enabled for V5 filesystems.");
                 return -EINVAL;
         }
   
- -      /*
- -       * mkfs'ed attr2 will turn on attr2 mount unless explicitly
- -       * told by noattr2 to turn it off
- -       */
- -      if (xfs_sb_version_hasattr2(&mp->m_sb) &&
- -          !(mp->m_flags & XFS_MOUNT_NOATTR2))
- -              mp->m_flags |= XFS_MOUNT_ATTR2;
- -
         /*
          * prohibit r/w mounts of read-only filesystems
          */
- -      if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
+ +      if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) {
                 xfs_warn(mp,
                         "cannot mount a read-only filesystem as read-write");
                 return -EROFS;
         }
   
- -      if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
- -          (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
- -          !xfs_sb_version_has_pquotino(&mp->m_sb)) {
+ +      if ((mp->m_qflags & XFS_GQUOTA_ACCT) &&
+ +          (mp->m_qflags & XFS_PQUOTA_ACCT) &&
+ +          !xfs_has_pquotino(mp)) {
                 xfs_warn(mp,
                   "Super block does not support project and group quota together");
                 return -EINVAL;
@@@ -1022,40 -1031,11 +1031,40 @@@ xfs_destroy_percpu_counters
         percpu_counter_destroy(&mp->m_icount);
         percpu_counter_destroy(&mp->m_ifree);
         percpu_counter_destroy(&mp->m_fdblocks);
- -      ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
+ +      ASSERT(xfs_is_shutdown(mp) ||
                percpu_counter_sum(&mp->m_delalloc_blks) == 0);
         percpu_counter_destroy(&mp->m_delalloc_blks);
   }
   
+ +static int
+ +xfs_inodegc_init_percpu(
+ +      struct xfs_mount        *mp)
+ +{
+ +      struct xfs_inodegc      *gc;
+ +      int                     cpu;
+ +
+ +      mp->m_inodegc = alloc_percpu(struct xfs_inodegc);
+ +      if (!mp->m_inodegc)
+ +              return -ENOMEM;
+ +
+ +      for_each_possible_cpu(cpu) {
+ +              gc = per_cpu_ptr(mp->m_inodegc, cpu);
+ +              init_llist_head(&gc->list);
+ +              gc->items = 0;
+ +              INIT_WORK(&gc->work, xfs_inodegc_worker);
+ +      }
+ +      return 0;
+ +}
+ +
+ +static void
+ +xfs_inodegc_free_percpu(
+ +      struct xfs_mount        *mp)
+ +{
+ +      if (!mp->m_inodegc)
+ +              return;
+ +      free_percpu(mp->m_inodegc);
+ +}
+ +
   static void
   xfs_fs_put_super(
         struct super_block      *sb)
@@@ -1072,8 -1052,6 +1081,8 @@@
   
         xfs_freesb(mp);
         free_percpu(mp->m_stats.xs_stats);
+ +      xfs_mount_list_del(mp);
+ +      xfs_inodegc_free_percpu(mp);
         xfs_destroy_percpu_counters(mp);
         xfs_destroy_mount_workqueues(mp);
         xfs_close_devices(mp);
@@@ -1162,7 -1140,7 +1171,7 @@@ xfs_fs_warn_deprecated
          * already had the flag set
          */
         if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) &&
- -                      !!(XFS_M(fc->root->d_sb)->m_flags & flag) == value)
+ +            !!(XFS_M(fc->root->d_sb)->m_features & flag) == value)
                 return;
         xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key);
   }
@@@ -1210,27 -1188,27 +1219,27 @@@ xfs_fs_parse_param
                 if (suffix_kstrtoint(param->string, 10, &size))
                         return -EINVAL;
                 parsing_mp->m_allocsize_log = ffs(size) - 1;
- -              parsing_mp->m_flags |= XFS_MOUNT_ALLOCSIZE;
+ +              parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE;
                 return 0;
         case Opt_grpid:
         case Opt_bsdgroups:
- -              parsing_mp->m_flags |= XFS_MOUNT_GRPID;
+ +              parsing_mp->m_features |= XFS_FEAT_GRPID;
                 return 0;
         case Opt_nogrpid:
         case Opt_sysvgroups:
- -              parsing_mp->m_flags &= ~XFS_MOUNT_GRPID;
+ +              parsing_mp->m_features &= ~XFS_FEAT_GRPID;
                 return 0;
         case Opt_wsync:
- -              parsing_mp->m_flags |= XFS_MOUNT_WSYNC;
+ +              parsing_mp->m_features |= XFS_FEAT_WSYNC;
                 return 0;
         case Opt_norecovery:
- -              parsing_mp->m_flags |= XFS_MOUNT_NORECOVERY;
+ +              parsing_mp->m_features |= XFS_FEAT_NORECOVERY;
                 return 0;
         case Opt_noalign:
- -              parsing_mp->m_flags |= XFS_MOUNT_NOALIGN;
+ +              parsing_mp->m_features |= XFS_FEAT_NOALIGN;
                 return 0;
         case Opt_swalloc:
- -              parsing_mp->m_flags |= XFS_MOUNT_SWALLOC;
+ +              parsing_mp->m_features |= XFS_FEAT_SWALLOC;
                 return 0;
         case Opt_sunit:
                 parsing_mp->m_dalign = result.uint_32;
@@@ -1239,58 -1217,62 +1248,58 @@@
                 parsing_mp->m_swidth = result.uint_32;
                 return 0;
         case Opt_inode32:
- -              parsing_mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+ +              parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS;
                 return 0;
         case Opt_inode64:
- -              parsing_mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
+ +              parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
                 return 0;
         case Opt_nouuid:
- -              parsing_mp->m_flags |= XFS_MOUNT_NOUUID;
+ +              parsing_mp->m_features |= XFS_FEAT_NOUUID;
                 return 0;
         case Opt_largeio:
- -              parsing_mp->m_flags |= XFS_MOUNT_LARGEIO;
+ +              parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE;
                 return 0;
         case Opt_nolargeio:
- -              parsing_mp->m_flags &= ~XFS_MOUNT_LARGEIO;
+ +              parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE;
                 return 0;
         case Opt_filestreams:
- -              parsing_mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+ +              parsing_mp->m_features |= XFS_FEAT_FILESTREAMS;
                 return 0;
         case Opt_noquota:
                 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
                 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
- -              parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
                 return 0;
         case Opt_quota:
         case Opt_uquota:
         case Opt_usrquota:
- -              parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
- -                               XFS_UQUOTA_ENFD);
+ +              parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD);
                 return 0;
         case Opt_qnoenforce:
         case Opt_uqnoenforce:
- -              parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+ +              parsing_mp->m_qflags |= XFS_UQUOTA_ACCT;
                 parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
                 return 0;
         case Opt_pquota:
         case Opt_prjquota:
- -              parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
- -                               XFS_PQUOTA_ENFD);
+ +              parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD);
                 return 0;
         case Opt_pqnoenforce:
- -              parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+ +              parsing_mp->m_qflags |= XFS_PQUOTA_ACCT;
                 parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
                 return 0;
         case Opt_gquota:
         case Opt_grpquota:
- -              parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
- -                               XFS_GQUOTA_ENFD);
+ +              parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD);
                 return 0;
         case Opt_gqnoenforce:
- -              parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+ +              parsing_mp->m_qflags |= XFS_GQUOTA_ACCT;
                 parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
                 return 0;
         case Opt_discard:
- -              parsing_mp->m_flags |= XFS_MOUNT_DISCARD;
+ +              parsing_mp->m_features |= XFS_FEAT_DISCARD;
                 return 0;
         case Opt_nodiscard:
- -              parsing_mp->m_flags &= ~XFS_MOUNT_DISCARD;
+ +              parsing_mp->m_features &= ~XFS_FEAT_DISCARD;
                 return 0;
   #ifdef CONFIG_FS_DAX
         case Opt_dax:
@@@ -1302,20 -1284,21 +1311,20 @@@
   #endif
         /* Following mount options will be removed in September 2025 */
         case Opt_ikeep:
- -              xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, true);
- -              parsing_mp->m_flags |= XFS_MOUNT_IKEEP;
+ +              xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true);
+ +              parsing_mp->m_features |= XFS_FEAT_IKEEP;
                 return 0;
         case Opt_noikeep:
- -              xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, false);
- -              parsing_mp->m_flags &= ~XFS_MOUNT_IKEEP;
+ +              xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false);
+ +              parsing_mp->m_features &= ~XFS_FEAT_IKEEP;
                 return 0;
         case Opt_attr2:
- -              xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_ATTR2, true);
- -              parsing_mp->m_flags |= XFS_MOUNT_ATTR2;
+ +              xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true);
+ +              parsing_mp->m_features |= XFS_FEAT_ATTR2;
                 return 0;
         case Opt_noattr2:
- -              xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_NOATTR2, true);
- -              parsing_mp->m_flags &= ~XFS_MOUNT_ATTR2;
- -              parsing_mp->m_flags |= XFS_MOUNT_NOATTR2;
+ +              xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true);
+ +              parsing_mp->m_features |= XFS_FEAT_NOATTR2;
                 return 0;
         default:
                 xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
@@@ -1329,23 -1312,17 +1338,23 @@@ static in
   xfs_fs_validate_params(
         struct xfs_mount        *mp)
   {
+ +      /* No recovery flag requires a read-only mount */
+ +      if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
+ +              xfs_warn(mp, "no-recovery mounts must be read-only.");
+ +              return -EINVAL;
+ +      }
+ +
         /*
- -       * no recovery flag requires a read-only mount
+ +       * We have not read the superblock at this point, so only the attr2
+ +       * mount option can set the attr2 feature by this stage.
          */
- -      if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
- -          !(mp->m_flags & XFS_MOUNT_RDONLY)) {
- -              xfs_warn(mp, "no-recovery mounts must be read-only.");
+ +      if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) {
+ +              xfs_warn(mp, "attr2 and noattr2 cannot both be specified.");
                 return -EINVAL;
         }
   
- -      if ((mp->m_flags & XFS_MOUNT_NOALIGN) &&
- -          (mp->m_dalign || mp->m_swidth)) {
+ +
+ +      if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) {
                 xfs_warn(mp,
         "sunit and swidth options incompatible with the noalign option");
                 return -EINVAL;
@@@ -1389,7 -1366,7 +1398,7 @@@
                 return -EINVAL;
         }
   
- -      if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) &&
+ +      if (xfs_has_allocsize(mp) &&
             (mp->m_allocsize_log > XFS_MAX_IO_LOG ||
              mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
                 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
@@@ -1450,22 -1427,11 +1459,22 @@@ xfs_fs_fill_super
         if (error)
                 goto out_destroy_workqueues;
   
+ +      error = xfs_inodegc_init_percpu(mp);
+ +      if (error)
+ +              goto out_destroy_counters;
+ +
+ +      /*
+ +       * All percpu data structures requiring cleanup when a cpu goes offline
+ +       * must be allocated before adding this @mp to the cpu-dead handler's
+ +       * mount list.
+ +       */
+ +      xfs_mount_list_add(mp);
+ +
         /* Allocate stats memory before we do operations that might use it */
         mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
         if (!mp->m_stats.xs_stats) {
                 error = -ENOMEM;
- -              goto out_destroy_counters;
+ +              goto out_destroy_inodegc;
         }
   
         error = xfs_readsb(mp, flags);
@@@ -1481,7 -1447,7 +1490,7 @@@
                 goto out_free_sb;
   
         /* V4 support is undergoing deprecation. */
- -      if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+ +      if (!xfs_has_crc(mp)) {
   #ifdef CONFIG_XFS_SUPPORT_V4
                 xfs_warn_once(mp,
         "Deprecated V4 format (crc=0) will not be supported after September 2030.");
@@@ -1494,7 -1460,7 +1503,7 @@@
         }
   
         /* Filesystem claims it needs repair, so refuse the mount. */
- -      if (xfs_sb_version_needsrepair(&mp->m_sb)) {
+ +      if (xfs_has_needsrepair(mp)) {
                 xfs_warn(mp, "Filesystem needs repair.  Please run xfs_repair.");
                 error = -EFSCORRUPTED;
                 goto out_free_sb;
@@@ -1566,7 -1532,7 +1575,7 @@@
         sb->s_maxbytes = MAX_LFS_FILESIZE;
         sb->s_max_links = XFS_MAXLINK;
         sb->s_time_gran = 1;
- -      if (xfs_sb_version_hasbigtime(&mp->m_sb)) {
+ +      if (xfs_has_bigtime(mp)) {
                 sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN);
                 sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX);
         } else {
@@@ -1579,26 -1545,29 +1588,25 @@@
         set_posix_acl_flag(sb);
   
         /* version 5 superblocks support inode version counters. */
- -      if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
+ +      if (xfs_has_crc(mp))
                 sb->s_flags |= SB_I_VERSION;
   
- -      if (xfs_sb_version_hasbigtime(&mp->m_sb))
- -              xfs_warn(mp,
- - "EXPERIMENTAL big timestamp feature in use. Use at your own risk!");
- -
- -      if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) {
+ +      if (xfs_has_dax_always(mp)) {
                 bool rtdev_is_dax = false, datadev_is_dax;
   
                 xfs_warn(mp,
                 "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
   
-               datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev,
-                       sb->s_blocksize);
+               datadev_is_dax = xfs_buftarg_is_dax(sb, mp->m_ddev_targp);
                 if (mp->m_rtdev_targp)
-                       rtdev_is_dax = bdev_dax_supported(
-                               mp->m_rtdev_targp->bt_bdev, sb->s_blocksize);
+                       rtdev_is_dax = xfs_buftarg_is_dax(sb,
+                                               mp->m_rtdev_targp);
                 if (!rtdev_is_dax && !datadev_is_dax) {
                         xfs_alert(mp,
                         "DAX unsupported by block device. Turning off DAX.");
                         xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
                 }
- -              if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ +              if (xfs_has_reflink(mp)) {
                         xfs_alert(mp,
                 "DAX and reflink cannot be used together!");
                         error = -EINVAL;
@@@ -1606,17 -1575,17 +1614,17 @@@
                 }
         }
   
- -      if (mp->m_flags & XFS_MOUNT_DISCARD) {
+ +      if (xfs_has_discard(mp)) {
                 struct request_queue *q = bdev_get_queue(sb->s_bdev);
   
                 if (!blk_queue_discard(q)) {
                         xfs_warn(mp, "mounting with \"discard\" option, but "
                                         "the device does not support discard");
- -                      mp->m_flags &= ~XFS_MOUNT_DISCARD;
+ +                      mp->m_features &= ~XFS_FEAT_DISCARD;
                 }
         }
   
- -      if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ +      if (xfs_has_reflink(mp)) {
                 if (mp->m_sb.sb_rblocks) {
                         xfs_alert(mp,
         "reflink not compatible with realtime device!");
@@@ -1630,13 -1599,17 +1638,13 @@@
                 }
         }
   
- -      if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) {
+ +      if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) {
                 xfs_alert(mp,
         "reverse mapping btree not compatible with realtime device!");
                 error = -EINVAL;
                 goto out_filestream_unmount;
         }
   
- -      if (xfs_sb_version_hasinobtcounts(&mp->m_sb))
- -              xfs_warn(mp,
- - "EXPERIMENTAL inode btree counters feature in use. Use at your own risk!");
- -
         error = xfs_mountfs(mp);
         if (error)
                 goto out_filestream_unmount;
@@@ -1660,9 -1633,6 +1668,9 @@@
         xfs_freesb(mp);
    out_free_stats:
         free_percpu(mp->m_stats.xs_stats);
+ + out_destroy_inodegc:
+ +      xfs_mount_list_del(mp);
+ +      xfs_inodegc_free_percpu(mp);
    out_destroy_counters:
         xfs_destroy_percpu_counters(mp);
    out_destroy_workqueues:
@@@ -1694,13 -1664,13 +1702,13 @@@ xfs_remount_rw
         struct xfs_sb           *sbp = &mp->m_sb;
         int error;
   
- -      if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
+ +      if (xfs_has_norecovery(mp)) {
                 xfs_warn(mp,
                         "ro->rw transition prohibited on norecovery mount");
                 return -EINVAL;
         }
   
- -      if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+ +      if (xfs_sb_is_v5(sbp) &&
             xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
                 xfs_warn(mp,
         "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
@@@ -1709,7 -1679,7 +1717,7 @@@
                 return -EINVAL;
         }
   
- -      mp->m_flags &= ~XFS_MOUNT_RDONLY;
+ +      clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
   
         /*
          * If this is the first remount to writeable state we might have some
@@@ -1746,9 -1716,6 +1754,9 @@@
         if (error && error != -ENOSPC)
                 return error;
   
+ +      /* Re-enable the background inode inactivation worker. */
+ +      xfs_inodegc_start(mp);
+ +
         return 0;
   }
   
@@@ -1771,15 -1738,6 +1779,15 @@@ xfs_remount_ro
                 return error;
         }
   
+ +      /*
+ +       * Stop the inodegc background worker.  xfs_fs_reconfigure already
+ +       * flushed all pending inodegc work when it sync'd the filesystem.
+ +       * The VFS holds s_umount, so we know that inodes cannot enter
+ +       * xfs_fs_destroy_inode during a remount operation.  In readonly mode
+ +       * we send inodes straight to reclaim, so no inodes will be queued.
+ +       */
+ +      xfs_inodegc_stop(mp);
+ +
         /* Free the per-AG metadata reservation pool. */
         error = xfs_fs_unreserve_ag_blocks(mp);
         if (error) {
@@@ -1797,7 -1755,7 +1805,7 @@@
         xfs_save_resvblks(mp);
   
         xfs_log_clean(mp);
- -      mp->m_flags |= XFS_MOUNT_RDONLY;
+ +      set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
   
         return 0;
   }
@@@ -1820,11 -1778,12 +1828,11 @@@ xfs_fs_reconfigure
   {
         struct xfs_mount        *mp = XFS_M(fc->root->d_sb);
         struct xfs_mount        *new_mp = fc->s_fs_info;
- -      xfs_sb_t                *sbp = &mp->m_sb;
         int                     flags = fc->sb_flags;
         int                     error;
   
         /* version 5 superblocks always support version counters. */
- -      if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
+ +      if (xfs_has_crc(mp))
                 fc->sb_flags |= SB_I_VERSION;
   
         error = xfs_fs_validate_params(new_mp);
@@@ -1834,26 -1793,28 +1842,26 @@@
         sync_filesystem(mp->m_super);
   
         /* inode32 -> inode64 */
- -      if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
- -          !(new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
- -              mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
- -              mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
+ +      if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
+ +              mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
+ +              mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
         }
   
         /* inode64 -> inode32 */
- -      if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
- -          (new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
- -              mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
- -              mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
+ +      if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) {
+ +              mp->m_features |= XFS_FEAT_SMALL_INUMS;
+ +              mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
         }
   
         /* ro -> rw */
- -      if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(flags & SB_RDONLY)) {
+ +      if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
                 error = xfs_remount_rw(mp);
                 if (error)
                         return error;
         }
   
         /* rw -> ro */
- -      if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (flags & SB_RDONLY)) {
+ +      if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) {
                 error = xfs_remount_ro(mp);
                 if (error)
                         return error;
@@@ -1920,11 -1881,11 +1928,11 @@@ static int xfs_init_fs_context
          * Copy binary VFS mount flags we are interested in.
          */
         if (fc->sb_flags & SB_RDONLY)
- -              mp->m_flags |= XFS_MOUNT_RDONLY;
+ +              set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
         if (fc->sb_flags & SB_DIRSYNC)
- -              mp->m_flags |= XFS_MOUNT_DIRSYNC;
+ +              mp->m_features |= XFS_FEAT_DIRSYNC;
         if (fc->sb_flags & SB_SYNCHRONOUS)
- -              mp->m_flags |= XFS_MOUNT_WSYNC;
+ +              mp->m_features |= XFS_FEAT_WSYNC;
   
         fc->s_fs_info = mp;
         fc->ops = &xfs_context_ops;
@@@ -2167,48 -2128,6 +2175,48 @@@ xfs_destroy_workqueues(void
         destroy_workqueue(xfs_alloc_wq);
   }
   
+ +#ifdef CONFIG_HOTPLUG_CPU
+ +static int
+ +xfs_cpu_dead(
+ +      unsigned int            cpu)
+ +{
+ +      struct xfs_mount        *mp, *n;
+ +
+ +      spin_lock(&xfs_mount_list_lock);
+ +      list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) {
+ +              spin_unlock(&xfs_mount_list_lock);
+ +              xfs_inodegc_cpu_dead(mp, cpu);
+ +              spin_lock(&xfs_mount_list_lock);
+ +      }
+ +      spin_unlock(&xfs_mount_list_lock);
+ +      return 0;
+ +}
+ +
+ +static int __init
+ +xfs_cpu_hotplug_init(void)
+ +{
+ +      int     error;
+ +
+ +      error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL,
+ +                      xfs_cpu_dead);
+ +      if (error < 0)
+ +              xfs_alert(NULL,
+ +"Failed to initialise CPU hotplug, error %d. XFS is non-functional.",
+ +                      error);
+ +      return error;
+ +}
+ +
+ +static void
+ +xfs_cpu_hotplug_destroy(void)
+ +{
+ +      cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD);
+ +}
+ +
+ +#else /* !CONFIG_HOTPLUG_CPU */
+ +static inline int xfs_cpu_hotplug_init(void) { return 0; }
+ +static inline void xfs_cpu_hotplug_destroy(void) {}
+ +#endif
+ +
   STATIC int __init
   init_xfs_fs(void)
   {
@@@ -2221,14 -2140,10 +2229,14 @@@
   
         xfs_dir_startup();
   
- -      error = xfs_init_zones();
+ +      error = xfs_cpu_hotplug_init();
         if (error)
                 goto out;
   
+ +      error = xfs_init_zones();
+ +      if (error)
+ +              goto out_destroy_hp;
+ +
         error = xfs_init_workqueues();
         if (error)
                 goto out_destroy_zones;
@@@ -2308,8 -2223,6 +2316,8 @@@
         xfs_destroy_workqueues();
    out_destroy_zones:
         xfs_destroy_zones();
+ + out_destroy_hp:
+ +      xfs_cpu_hotplug_destroy();
    out:
         return error;
   }
@@@ -2332,7 -2245,6 +2340,7 @@@ exit_xfs_fs(void
         xfs_destroy_workqueues();
         xfs_destroy_zones();
         xfs_uuid_table_free();
+ +      xfs_cpu_hotplug_destroy();
   }
   
   module_init(init_xfs_fs);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 9 Sep 2021 18:39:57 +0000 (11:39 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 9 Sep 2021 18:39:57 +0000 (11:39 -0700)
		1	2
drivers/md/dm-table.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
fs/erofs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext2/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_super.c	patch \|	diff1 \|	diff2 \|	blob \| history