Merge tag 'libnvdimm-for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdim...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 9 Sep 2021 18:39:57 +0000 (11:39 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 9 Sep 2021 18:39:57 +0000 (11:39 -0700)
Pull libnvdimm updates from Dan Williams:

 - Fix a race condition in the teardown path of raw mode pmem
   namespaces.

 - Cleanup the code that filesystems use to detect filesystem-dax
   capabilities of their underlying block device.

* tag 'libnvdimm-for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  dax: remove bdev_dax_supported
  xfs: factor out a xfs_buftarg_is_dax helper
  dax: stub out dax_supported for !CONFIG_FS_DAX
  dax: remove __generic_fsdax_supported
  dax: move the dax_read_lock() locking into dax_supported
  dax: mark dax_get_by_host static
  dm: use fs_dax_get_by_bdev instead of dax_get_by_host
  dax: stop using bdevname
  fsdax: improve the FS_DAX Kconfig description and help text
  libnvdimm/pmem: Fix crash triggered when I/O in-flight during unbind

1  2 
drivers/md/dm-table.c
drivers/md/dm.c
fs/Kconfig
fs/erofs/super.c
fs/ext2/super.c
fs/ext4/super.c
fs/xfs/xfs_super.c

diff --combined drivers/md/dm-table.c
@@@ -809,14 -809,9 +809,9 @@@ EXPORT_SYMBOL_GPL(dm_table_set_type)
  int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
                        sector_t start, sector_t len, void *data)
  {
-       int blocksize = *(int *) data, id;
-       bool rc;
+       int blocksize = *(int *) data;
  
-       id = dax_read_lock();
-       rc = !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
-       dax_read_unlock(id);
-       return rc;
+       return !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
  }
  
  /* Check devices support synchronous DAX */
@@@ -2076,7 -2071,7 +2071,7 @@@ int dm_table_set_restrictions(struct dm
        }
  
        dm_update_keyslot_manager(q, t);
 -      blk_queue_update_readahead(q);
 +      disk_update_readahead(t->md->disk);
  
        return 0;
  }
diff --combined drivers/md/dm.c
@@@ -8,7 -8,6 +8,7 @@@
  #include "dm-core.h"
  #include "dm-rq.h"
  #include "dm-uevent.h"
 +#include "dm-ima.h"
  
  #include <linux/init.h>
  #include <linux/module.h>
@@@ -262,13 -261,9 +262,13 @@@ static void (*_exits[])(void) = 
  static int __init dm_init(void)
  {
        const int count = ARRAY_SIZE(_inits);
 -
        int r, i;
  
 +#if (IS_ENABLED(CONFIG_IMA) && !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE))
 +      DMWARN("CONFIG_IMA_DISABLE_HTABLE is disabled."
 +             " Duplicate IMA measurements will not be recorded in the IMA log.");
 +#endif
 +
        for (i = 0; i < count; i++) {
                r = _inits[i]();
                if (r)
        }
  
        return 0;
 -
 -      bad:
 +bad:
        while (i--)
                _exits[i]();
  
@@@ -654,7 -650,7 +654,7 @@@ static int open_table_device(struct tab
        }
  
        td->dm_dev.bdev = bdev;
-       td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+       td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev);
        return 0;
  }
  
@@@ -1697,13 -1693,14 +1697,13 @@@ static void cleanup_mapped_device(struc
                spin_lock(&_minor_lock);
                md->disk->private_data = NULL;
                spin_unlock(&_minor_lock);
 -              del_gendisk(md->disk);
 -      }
 -
 -      if (md->queue)
 +              if (dm_get_md_type(md) != DM_TYPE_NONE) {
 +                      dm_sysfs_exit(md);
 +                      del_gendisk(md->disk);
 +              }
                dm_queue_destroy_keyslot_manager(md->queue);
 -
 -      if (md->disk)
                blk_cleanup_disk(md->disk);
 +      }
  
        cleanup_srcu_struct(&md->io_barrier);
  
@@@ -1795,6 -1792,7 +1795,6 @@@ static struct mapped_device *alloc_dev(
                        goto bad;
        }
  
 -      add_disk_no_queue_reg(md->disk);
        format_dev_t(md->name, MKDEV(_major, minor));
  
        md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
@@@ -1995,13 -1993,18 +1995,13 @@@ static struct dm_table *__unbind(struc
   */
  int dm_create(int minor, struct mapped_device **result)
  {
 -      int r;
        struct mapped_device *md;
  
        md = alloc_dev(minor);
        if (!md)
                return -ENXIO;
  
 -      r = dm_sysfs_init(md);
 -      if (r) {
 -              free_dev(md);
 -              return r;
 -      }
 +      dm_ima_reset_data(md);
  
        *result = md;
        return 0;
@@@ -2053,9 -2056,9 +2053,9 @@@ EXPORT_SYMBOL_GPL(dm_get_queue_limits)
   */
  int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
  {
 -      int r;
 +      enum dm_queue_mode type = dm_table_get_type(t);
        struct queue_limits limits;
 -      enum dm_queue_mode type = dm_get_md_type(md);
 +      int r;
  
        switch (type) {
        case DM_TYPE_REQUEST_BASED:
        if (r)
                return r;
  
 -      blk_register_queue(md->disk);
 +      add_disk(md->disk);
  
 +      r = dm_sysfs_init(md);
 +      if (r) {
 +              del_gendisk(md->disk);
 +              return r;
 +      }
 +      md->type = type;
        return 0;
  }
  
@@@ -2196,6 -2193,7 +2196,6 @@@ static void __dm_destroy(struct mapped_
                DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
                       dm_device_name(md), atomic_read(&md->holders));
  
 -      dm_sysfs_exit(md);
        dm_table_destroy(__unbind(md));
        free_dev(md);
  }
diff --combined fs/Kconfig
@@@ -43,7 -43,7 +43,7 @@@ source "fs/f2fs/Kconfig
  source "fs/zonefs/Kconfig"
  
  config FS_DAX
-       bool "Direct Access (DAX) support"
+       bool "File system based Direct Access (DAX) support"
        depends on MMU
        depends on !(ARM || MIPS || SPARC)
        select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
          Direct Access (DAX) can be used on memory-backed block devices.
          If the block device supports DAX and the filesystem supports DAX,
          then you can avoid using the pagecache to buffer I/Os.  Turning
-         on this option will compile in support for DAX; you will need to
-         mount the filesystem using the -o dax option.
+         on this option will compile in support for DAX.
+         For a DAX device to support file system access it needs to have
+         struct pages.  For the nfit based NVDIMMs this can be enabled
+         using the ndctl utility:
+               # ndctl create-namespace --force --reconfig=namespace0.0 \
+                       --mode=fsdax --map=mem
+         See the 'create-namespace' man page for details on the overhead of
+         --map=mem:
+         https://docs.pmem.io/ndctl-user-guide/ndctl-man-pages/ndctl-create-namespace
+           For ndctl to work CONFIG_DEV_DAX needs to be enabled as well. For most
+         file systems DAX support needs to be manually enabled globally or
+         per-inode using a mount option as well.  See the file documentation in
+         Documentation/filesystems/dax.rst for details.
  
          If you do not have a block device that is capable of using this,
          or if unsure, say N.  Saying Y will increase the size of the kernel
@@@ -101,6 -116,16 +116,6 @@@ config FILE_LOCKIN
            for filesystems like NFS and for the flock() system
            call. Disabling this option saves about 11k.
  
 -config MANDATORY_FILE_LOCKING
 -      bool "Enable Mandatory file locking"
 -      depends on FILE_LOCKING
 -      default y
 -      help
 -        This option enables files appropriately marked files on appropriely
 -        mounted filesystems to support mandatory locking.
 -
 -        To the best of my knowledge this is dead code that no one cares about.
 -
  source "fs/crypto/Kconfig"
  
  source "fs/verity/Kconfig"
@@@ -136,7 -161,6 +151,7 @@@ menu "DOS/FAT/EXFAT/NT Filesystems
  source "fs/fat/Kconfig"
  source "fs/exfat/Kconfig"
  source "fs/ntfs/Kconfig"
 +source "fs/ntfs3/Kconfig"
  
  endmenu
  endif # BLOCK
@@@ -349,15 -373,7 +364,15 @@@ config NFS_V4_2_SSC_HELPE
  
  source "net/sunrpc/Kconfig"
  source "fs/ceph/Kconfig"
 +
  source "fs/cifs/Kconfig"
 +source "fs/ksmbd/Kconfig"
 +
 +config CIFS_COMMON
 +      tristate
 +      default y if CIFS=y
 +      default m if CIFS=m
 +
  source "fs/coda/Kconfig"
  source "fs/afs/Kconfig"
  source "fs/9p/Kconfig"
diff --combined fs/erofs/super.c
@@@ -11,7 -11,6 +11,7 @@@
  #include <linux/crc32c.h>
  #include <linux/fs_context.h>
  #include <linux/fs_parser.h>
 +#include <linux/dax.h>
  #include "xattr.h"
  
  #define CREATE_TRACE_POINTS
@@@ -356,8 -355,6 +356,8 @@@ enum 
        Opt_user_xattr,
        Opt_acl,
        Opt_cache_strategy,
 +      Opt_dax,
 +      Opt_dax_enum,
        Opt_err
  };
  
@@@ -368,47 -365,14 +368,47 @@@ static const struct constant_table erof
        {}
  };
  
 +static const struct constant_table erofs_dax_param_enums[] = {
 +      {"always",      EROFS_MOUNT_DAX_ALWAYS},
 +      {"never",       EROFS_MOUNT_DAX_NEVER},
 +      {}
 +};
 +
  static const struct fs_parameter_spec erofs_fs_parameters[] = {
        fsparam_flag_no("user_xattr",   Opt_user_xattr),
        fsparam_flag_no("acl",          Opt_acl),
        fsparam_enum("cache_strategy",  Opt_cache_strategy,
                     erofs_param_cache_strategy),
 +      fsparam_flag("dax",             Opt_dax),
 +      fsparam_enum("dax",             Opt_dax_enum, erofs_dax_param_enums),
        {}
  };
  
 +static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
 +{
 +#ifdef CONFIG_FS_DAX
 +      struct erofs_fs_context *ctx = fc->fs_private;
 +
 +      switch (mode) {
 +      case EROFS_MOUNT_DAX_ALWAYS:
 +              warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
 +              set_opt(ctx, DAX_ALWAYS);
 +              clear_opt(ctx, DAX_NEVER);
 +              return true;
 +      case EROFS_MOUNT_DAX_NEVER:
 +              set_opt(ctx, DAX_NEVER);
 +              clear_opt(ctx, DAX_ALWAYS);
 +              return true;
 +      default:
 +              DBG_BUGON(1);
 +              return false;
 +      }
 +#else
 +      errorfc(fc, "dax options not supported");
 +      return false;
 +#endif
 +}
 +
  static int erofs_fc_parse_param(struct fs_context *fc,
                                struct fs_parameter *param)
  {
                errorfc(fc, "compression not supported, cache_strategy ignored");
  #endif
                break;
 +      case Opt_dax:
 +              if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS))
 +                      return -EINVAL;
 +              break;
 +      case Opt_dax_enum:
 +              if (!erofs_fc_set_dax_mode(fc, result.uint_32))
 +                      return -EINVAL;
 +              break;
        default:
                return -ENOPARAM;
        }
@@@ -474,7 -430,7 +474,7 @@@ static int erofs_managed_cache_releasep
        DBG_BUGON(mapping->a_ops != &managed_cache_aops);
  
        if (PagePrivate(page))
 -              ret = erofs_try_to_free_cached_page(mapping, page);
 +              ret = erofs_try_to_free_cached_page(page);
  
        return ret;
  }
@@@ -540,16 -496,10 +540,16 @@@ static int erofs_fc_fill_super(struct s
                return -ENOMEM;
  
        sb->s_fs_info = sbi;
 +      sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
        err = erofs_read_superblock(sb);
        if (err)
                return err;
  
-           !bdev_dax_supported(sb->s_bdev, EROFS_BLKSIZ)) {
 +      if (test_opt(ctx, DAX_ALWAYS) &&
++          !dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) {
 +              errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
 +              clear_opt(ctx, DAX_ALWAYS);
 +      }
        sb->s_flags |= SB_RDONLY | SB_NOATIME;
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        sb->s_time_gran = 1;
@@@ -659,7 -609,6 +659,7 @@@ static void erofs_kill_sb(struct super_
        sbi = EROFS_SB(sb);
        if (!sbi)
                return;
 +      fs_put_dax(sbi->dax_dev);
        kfree(sbi);
        sb->s_fs_info = NULL;
  }
@@@ -762,8 -711,8 +762,8 @@@ static int erofs_statfs(struct dentry *
  
  static int erofs_show_options(struct seq_file *seq, struct dentry *root)
  {
 -      struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
 -      struct erofs_fs_context *ctx __maybe_unused = &sbi->ctx;
 +      struct erofs_sb_info *sbi = EROFS_SB(root->d_sb);
 +      struct erofs_fs_context *ctx = &sbi->ctx;
  
  #ifdef CONFIG_EROFS_FS_XATTR
        if (test_opt(ctx, XATTR_USER))
        else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
                seq_puts(seq, ",cache_strategy=readaround");
  #endif
 +      if (test_opt(ctx, DAX_ALWAYS))
 +              seq_puts(seq, ",dax=always");
 +      if (test_opt(ctx, DAX_NEVER))
 +              seq_puts(seq, ",dax=never");
        return 0;
  }
  
diff --combined fs/ext2/super.c
@@@ -206,6 -206,9 +206,6 @@@ static void init_once(void *foo
        init_rwsem(&ei->xattr_sem);
  #endif
        mutex_init(&ei->truncate_mutex);
 -#ifdef CONFIG_FS_DAX
 -      init_rwsem(&ei->dax_sem);
 -#endif
        inode_init_once(&ei->vfs_inode);
  }
  
@@@ -946,7 -949,8 +946,8 @@@ static int ext2_fill_super(struct super
        blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
  
        if (test_opt(sb, DAX)) {
-               if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
+               if (!dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
+                               bdev_nr_sectors(sb->s_bdev))) {
                        ext2_msg(sb, KERN_ERR,
                                "DAX unsupported by block device. Turning off DAX.");
                        clear_opt(sbi->s_mount_opt, DAX);
diff --combined fs/ext4/super.c
@@@ -80,6 -80,7 +80,6 @@@ static struct dentry *ext4_mount(struc
                       const char *dev_name, void *data);
  static inline int ext2_feature_set_ok(struct super_block *sb);
  static inline int ext3_feature_set_ok(struct super_block *sb);
 -static int ext4_feature_set_ok(struct super_block *sb, int readonly);
  static void ext4_destroy_lazyinit_thread(void);
  static void ext4_unregister_li_request(struct super_block *sb);
  static void ext4_clear_request_list(void);
@@@ -89,9 -90,12 +89,9 @@@ static struct inode *ext4_get_journal_i
  /*
   * Lock ordering
   *
 - * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
 - * i_mmap_rwsem (inode->i_mmap_rwsem)!
 - *
   * page fault path:
 - * mmap_lock -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
 - *   page lock -> i_data_sem (rw)
 + * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
 + *   -> page lock -> i_data_sem (rw)
   *
   * buffered write path:
   * sb_start_write -> i_mutex -> mmap_lock
   *   i_data_sem (rw)
   *
   * truncate:
 - * sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock
 - * sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start ->
 + * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
 + *   page lock
 + * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
   *   i_data_sem (rw)
   *
   * direct IO:
@@@ -1172,7 -1175,6 +1172,7 @@@ static void ext4_put_super(struct super
  
        flush_work(&sbi->s_error_work);
        destroy_workqueue(sbi->rsv_conversion_wq);
 +      ext4_release_orphan_info(sb);
  
        /*
         * Unregister sysfs before destroying jbd2 journal.
  
        if (!sb_rdonly(sb) && !aborted) {
                ext4_clear_feature_journal_needs_recovery(sb);
 +              ext4_clear_feature_orphan_present(sb);
                es->s_state = cpu_to_le16(sbi->s_mount_state);
        }
        if (!sb_rdonly(sb))
@@@ -1359,6 -1360,7 +1359,6 @@@ static void init_once(void *foo
        INIT_LIST_HEAD(&ei->i_orphan);
        init_rwsem(&ei->xattr_sem);
        init_rwsem(&ei->i_data_sem);
 -      init_rwsem(&ei->i_mmap_sem);
        inode_init_once(&ei->vfs_inode);
        ext4_fc_init_inode(&ei->vfs_inode);
  }
@@@ -1583,12 -1585,14 +1583,12 @@@ static int ext4_mark_dquot_dirty(struc
  static int ext4_write_info(struct super_block *sb, int type);
  static int ext4_quota_on(struct super_block *sb, int type, int format_id,
                         const struct path *path);
 -static int ext4_quota_on_mount(struct super_block *sb, int type);
  static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
                               size_t len, loff_t off);
  static ssize_t ext4_quota_write(struct super_block *sb, int type,
                                const char *data, size_t len, loff_t off);
  static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
                             unsigned int flags);
 -static int ext4_enable_quotas(struct super_block *sb);
  
  static struct dquot **ext4_get_dquots(struct inode *inode)
  {
@@@ -2683,11 -2687,8 +2683,11 @@@ static int ext4_setup_super(struct supe
                es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
        le16_add_cpu(&es->s_mnt_count, 1);
        ext4_update_tstamp(es, s_mtime);
 -      if (sbi->s_journal)
 +      if (sbi->s_journal) {
                ext4_set_feature_journal_needs_recovery(sb);
 +              if (ext4_has_feature_orphan_file(sb))
 +                      ext4_set_feature_orphan_present(sb);
 +      }
  
        err = ext4_commit_super(sb);
  done:
@@@ -2969,6 -2970,169 +2969,6 @@@ static int ext4_check_descriptors(struc
        return 1;
  }
  
 -/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
 - * the superblock) which were deleted from all directories, but held open by
 - * a process at the time of a crash.  We walk the list and try to delete these
 - * inodes at recovery time (only with a read-write filesystem).
 - *
 - * In order to keep the orphan inode chain consistent during traversal (in
 - * case of crash during recovery), we link each inode into the superblock
 - * orphan list_head and handle it the same way as an inode deletion during
 - * normal operation (which journals the operations for us).
 - *
 - * We only do an iget() and an iput() on each inode, which is very safe if we
 - * accidentally point at an in-use or already deleted inode.  The worst that
 - * can happen in this case is that we get a "bit already cleared" message from
 - * ext4_free_inode().  The only reason we would point at a wrong inode is if
 - * e2fsck was run on this filesystem, and it must have already done the orphan
 - * inode cleanup for us, so we can safely abort without any further action.
 - */
 -static void ext4_orphan_cleanup(struct super_block *sb,
 -                              struct ext4_super_block *es)
 -{
 -      unsigned int s_flags = sb->s_flags;
 -      int ret, nr_orphans = 0, nr_truncates = 0;
 -#ifdef CONFIG_QUOTA
 -      int quota_update = 0;
 -      int i;
 -#endif
 -      if (!es->s_last_orphan) {
 -              jbd_debug(4, "no orphan inodes to clean up\n");
 -              return;
 -      }
 -
 -      if (bdev_read_only(sb->s_bdev)) {
 -              ext4_msg(sb, KERN_ERR, "write access "
 -                      "unavailable, skipping orphan cleanup");
 -              return;
 -      }
 -
 -      /* Check if feature set would not allow a r/w mount */
 -      if (!ext4_feature_set_ok(sb, 0)) {
 -              ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
 -                       "unknown ROCOMPAT features");
 -              return;
 -      }
 -
 -      if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
 -              /* don't clear list on RO mount w/ errors */
 -              if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
 -                      ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
 -                                "clearing orphan list.\n");
 -                      es->s_last_orphan = 0;
 -              }
 -              jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
 -              return;
 -      }
 -
 -      if (s_flags & SB_RDONLY) {
 -              ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
 -              sb->s_flags &= ~SB_RDONLY;
 -      }
 -#ifdef CONFIG_QUOTA
 -      /*
 -       * Turn on quotas which were not enabled for read-only mounts if
 -       * filesystem has quota feature, so that they are updated correctly.
 -       */
 -      if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) {
 -              int ret = ext4_enable_quotas(sb);
 -
 -              if (!ret)
 -                      quota_update = 1;
 -              else
 -                      ext4_msg(sb, KERN_ERR,
 -                              "Cannot turn on quotas: error %d", ret);
 -      }
 -
 -      /* Turn on journaled quotas used for old sytle */
 -      for (i = 0; i < EXT4_MAXQUOTAS; i++) {
 -              if (EXT4_SB(sb)->s_qf_names[i]) {
 -                      int ret = ext4_quota_on_mount(sb, i);
 -
 -                      if (!ret)
 -                              quota_update = 1;
 -                      else
 -                              ext4_msg(sb, KERN_ERR,
 -                                      "Cannot turn on journaled "
 -                                      "quota: type %d: error %d", i, ret);
 -              }
 -      }
 -#endif
 -
 -      while (es->s_last_orphan) {
 -              struct inode *inode;
 -
 -              /*
 -               * We may have encountered an error during cleanup; if
 -               * so, skip the rest.
 -               */
 -              if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
 -                      jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
 -                      es->s_last_orphan = 0;
 -                      break;
 -              }
 -
 -              inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
 -              if (IS_ERR(inode)) {
 -                      es->s_last_orphan = 0;
 -                      break;
 -              }
 -
 -              list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
 -              dquot_initialize(inode);
 -              if (inode->i_nlink) {
 -                      if (test_opt(sb, DEBUG))
 -                              ext4_msg(sb, KERN_DEBUG,
 -                                      "%s: truncating inode %lu to %lld bytes",
 -                                      __func__, inode->i_ino, inode->i_size);
 -                      jbd_debug(2, "truncating inode %lu to %lld bytes\n",
 -                                inode->i_ino, inode->i_size);
 -                      inode_lock(inode);
 -                      truncate_inode_pages(inode->i_mapping, inode->i_size);
 -                      ret = ext4_truncate(inode);
 -                      if (ret) {
 -                              /*
 -                               * We need to clean up the in-core orphan list
 -                               * manually if ext4_truncate() failed to get a
 -                               * transaction handle.
 -                               */
 -                              ext4_orphan_del(NULL, inode);
 -                              ext4_std_error(inode->i_sb, ret);
 -                      }
 -                      inode_unlock(inode);
 -                      nr_truncates++;
 -              } else {
 -                      if (test_opt(sb, DEBUG))
 -                              ext4_msg(sb, KERN_DEBUG,
 -                                      "%s: deleting unreferenced inode %lu",
 -                                      __func__, inode->i_ino);
 -                      jbd_debug(2, "deleting unreferenced inode %lu\n",
 -                                inode->i_ino);
 -                      nr_orphans++;
 -              }
 -              iput(inode);  /* The delete magic happens here! */
 -      }
 -
 -#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
 -
 -      if (nr_orphans)
 -              ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
 -                     PLURAL(nr_orphans));
 -      if (nr_truncates)
 -              ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
 -                     PLURAL(nr_truncates));
 -#ifdef CONFIG_QUOTA
 -      /* Turn off quotas if they were enabled for orphan cleanup */
 -      if (quota_update) {
 -              for (i = 0; i < EXT4_MAXQUOTAS; i++) {
 -                      if (sb_dqopt(sb)->files[i])
 -                              dquot_quota_off(sb, i);
 -              }
 -      }
 -#endif
 -      sb->s_flags = s_flags; /* Restore SB_RDONLY status */
 -}
 -
  /*
   * Maximal extent format file size.
   * Resulting logical blkno at s_maxbytes must fit in our on-disk
@@@ -3148,7 -3312,7 +3148,7 @@@ static unsigned long ext4_get_stripe_si
   * Returns 1 if this filesystem can be mounted as requested,
   * 0 if it cannot be.
   */
 -static int ext4_feature_set_ok(struct super_block *sb, int readonly)
 +int ext4_feature_set_ok(struct super_block *sb, int readonly)
  {
        if (ext4_has_unknown_ext4_incompat_features(sb)) {
                ext4_msg(sb, KERN_ERR,
@@@ -3850,20 -4014,6 +3850,20 @@@ static const char *ext4_quota_mode(stru
  #endif
  }
  
 +static void ext4_setup_csum_trigger(struct super_block *sb,
 +                                  enum ext4_journal_trigger_type type,
 +                                  void (*trigger)(
 +                                      struct jbd2_buffer_trigger_type *type,
 +                                      struct buffer_head *bh,
 +                                      void *mapped_data,
 +                                      size_t size))
 +{
 +      struct ext4_sb_info *sbi = EXT4_SB(sb);
 +
 +      sbi->s_journal_triggers[type].sb = sb;
 +      sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
 +}
 +
  static int ext4_fill_super(struct super_block *sb, void *data, int silent)
  {
        struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
                silent = 1;
                goto cantfind_ext4;
        }
 +      ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
 +                              ext4_orphan_file_block_trigger);
  
        /* Load the checksum driver */
        sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
                goto failed_mount;
        }
  
-       if (bdev_dax_supported(sb->s_bdev, blocksize))
+       if (dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
+                       bdev_nr_sectors(sb->s_bdev)))
                set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
  
        if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
        sb->s_root = NULL;
  
        needs_recovery = (es->s_last_orphan != 0 ||
 +                        ext4_has_feature_orphan_present(sb) ||
                          ext4_has_feature_journal_needs_recovery(sb));
  
        if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
@@@ -4885,14 -5033,6 +4886,14 @@@ no_journal
                err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
                                          GFP_KERNEL);
        }
 +      /*
 +       * Update the checksum after updating free space/inode
 +       * counters.  Otherwise the superblock can have an incorrect
 +       * checksum in the buffer cache until it is written out and
 +       * e2fsprogs programs trying to open a file system immediately
 +       * after it is mounted can fail.
 +       */
 +      ext4_superblock_csum_set(sb);
        if (!err)
                err = percpu_counter_init(&sbi->s_dirs_counter,
                                          ext4_count_dirs(sb), GFP_KERNEL);
        if (err)
                goto failed_mount7;
  
 +      err = ext4_init_orphan_info(sb);
 +      if (err)
 +              goto failed_mount8;
  #ifdef CONFIG_QUOTA
        /* Enable quota usage during mount. */
        if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
                err = ext4_enable_quotas(sb);
                if (err)
 -                      goto failed_mount8;
 +                      goto failed_mount9;
        }
  #endif  /* CONFIG_QUOTA */
  
                ext4_msg(sb, KERN_INFO, "recovery complete");
                err = ext4_mark_recovery_complete(sb, es);
                if (err)
 -                      goto failed_mount8;
 +                      goto failed_mount9;
        }
        if (EXT4_SB(sb)->s_journal) {
                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@@ -5000,8 -5137,6 +5001,8 @@@ cantfind_ext4
                ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
        goto failed_mount;
  
 +failed_mount9:
 +      ext4_release_orphan_info(sb);
  failed_mount8:
        ext4_unregister_sysfs(sb);
        kobject_put(&sbi->s_kobj);
@@@ -5512,15 -5647,8 +5513,15 @@@ static int ext4_mark_recovery_complete(
        if (err < 0)
                goto out;
  
 -      if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
 +      if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
 +          ext4_has_feature_orphan_present(sb))) {
 +              if (!ext4_orphan_file_empty(sb)) {
 +                      ext4_error(sb, "Orphan file not empty on read-only fs.");
 +                      err = -EFSCORRUPTED;
 +                      goto out;
 +              }
                ext4_clear_feature_journal_needs_recovery(sb);
 +              ext4_clear_feature_orphan_present(sb);
                ext4_commit_super(sb);
        }
  out:
@@@ -5663,8 -5791,6 +5664,8 @@@ static int ext4_freeze(struct super_blo
  
                /* Journal blocked and flushed, clear needs_recovery flag. */
                ext4_clear_feature_journal_needs_recovery(sb);
 +              if (ext4_orphan_file_empty(sb))
 +                      ext4_clear_feature_orphan_present(sb);
        }
  
        error = ext4_commit_super(sb);
@@@ -5687,8 -5813,6 +5688,8 @@@ static int ext4_unfreeze(struct super_b
        if (EXT4_SB(sb)->s_journal) {
                /* Reset the needs_recovery flag before the fs is unlocked. */
                ext4_set_feature_journal_needs_recovery(sb);
 +              if (ext4_has_feature_orphan_file(sb))
 +                      ext4_set_feature_orphan_present(sb);
        }
  
        ext4_commit_super(sb);
@@@ -5892,7 -6016,7 +5893,7 @@@ static int ext4_remount(struct super_bl
                         * around from a previously readonly bdev mount,
                         * require a full umount/remount for now.
                         */
 -                      if (es->s_last_orphan) {
 +                      if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
                                ext4_msg(sb, KERN_WARNING, "Couldn't "
                                       "remount RDWR because of unprocessed "
                                       "orphan inode list.  Please "
@@@ -6189,6 -6313,16 +6190,6 @@@ static int ext4_write_info(struct super
        return ret;
  }
  
 -/*
 - * Turn on quotas during mount time - we need to find
 - * the quota file and such...
 - */
 -static int ext4_quota_on_mount(struct super_block *sb, int type)
 -{
 -      return dquot_quota_on_mount(sb, get_qf_name(sb, EXT4_SB(sb), type),
 -                                      EXT4_SB(sb)->s_jquota_fmt, type);
 -}
 -
  static void lockdep_set_quota_inode(struct inode *inode, int subclass)
  {
        struct ext4_inode_info *ei = EXT4_I(inode);
@@@ -6318,7 -6452,7 +6319,7 @@@ static int ext4_quota_enable(struct sup
  }
  
  /* Enable usage tracking for all quota types. */
 -static int ext4_enable_quotas(struct super_block *sb)
 +int ext4_enable_quotas(struct super_block *sb)
  {
        int type, err = 0;
        unsigned long qf_inums[EXT4_MAXQUOTAS] = {
@@@ -6476,7 -6610,7 +6477,7 @@@ static ssize_t ext4_quota_write(struct 
        if (!bh)
                goto out;
        BUFFER_TRACE(bh, "get write access");
 -      err = ext4_journal_get_write_access(handle, bh);
 +      err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
        if (err) {
                brelse(bh);
                return err;
diff --combined fs/xfs/xfs_super.c
@@@ -49,28 -49,6 +49,28 @@@ static struct kset *xfs_kset;               /* top-l
  static struct xfs_kobj xfs_dbg_kobj;  /* global debug sysfs attrs */
  #endif
  
 +#ifdef CONFIG_HOTPLUG_CPU
 +static LIST_HEAD(xfs_mount_list);
 +static DEFINE_SPINLOCK(xfs_mount_list_lock);
 +
 +static inline void xfs_mount_list_add(struct xfs_mount *mp)
 +{
 +      spin_lock(&xfs_mount_list_lock);
 +      list_add(&mp->m_mount_list, &xfs_mount_list);
 +      spin_unlock(&xfs_mount_list_lock);
 +}
 +
 +static inline void xfs_mount_list_del(struct xfs_mount *mp)
 +{
 +      spin_lock(&xfs_mount_list_lock);
 +      list_del(&mp->m_mount_list);
 +      spin_unlock(&xfs_mount_list_lock);
 +}
 +#else /* !CONFIG_HOTPLUG_CPU */
 +static inline void xfs_mount_list_add(struct xfs_mount *mp) {}
 +static inline void xfs_mount_list_del(struct xfs_mount *mp) {}
 +#endif
 +
  enum xfs_dax_mode {
        XFS_DAX_INODE = 0,
        XFS_DAX_ALWAYS = 1,
@@@ -84,15 -62,15 +84,15 @@@ xfs_mount_set_dax_mode
  {
        switch (mode) {
        case XFS_DAX_INODE:
 -              mp->m_flags &= ~(XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER);
 +              mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER);
                break;
        case XFS_DAX_ALWAYS:
 -              mp->m_flags |= XFS_MOUNT_DAX_ALWAYS;
 -              mp->m_flags &= ~XFS_MOUNT_DAX_NEVER;
 +              mp->m_features |= XFS_FEAT_DAX_ALWAYS;
 +              mp->m_features &= ~XFS_FEAT_DAX_NEVER;
                break;
        case XFS_DAX_NEVER:
 -              mp->m_flags |= XFS_MOUNT_DAX_NEVER;
 -              mp->m_flags &= ~XFS_MOUNT_DAX_ALWAYS;
 +              mp->m_features |= XFS_FEAT_DAX_NEVER;
 +              mp->m_features &= ~XFS_FEAT_DAX_ALWAYS;
                break;
        }
  }
@@@ -176,32 -154,33 +176,32 @@@ xfs_fs_show_options
  {
        static struct proc_xfs_info xfs_info_set[] = {
                /* the few simple ones we can get from the mount struct */
 -              { XFS_MOUNT_IKEEP,              ",ikeep" },
 -              { XFS_MOUNT_WSYNC,              ",wsync" },
 -              { XFS_MOUNT_NOALIGN,            ",noalign" },
 -              { XFS_MOUNT_SWALLOC,            ",swalloc" },
 -              { XFS_MOUNT_NOUUID,             ",nouuid" },
 -              { XFS_MOUNT_NORECOVERY,         ",norecovery" },
 -              { XFS_MOUNT_ATTR2,              ",attr2" },
 -              { XFS_MOUNT_FILESTREAMS,        ",filestreams" },
 -              { XFS_MOUNT_GRPID,              ",grpid" },
 -              { XFS_MOUNT_DISCARD,            ",discard" },
 -              { XFS_MOUNT_LARGEIO,            ",largeio" },
 -              { XFS_MOUNT_DAX_ALWAYS,         ",dax=always" },
 -              { XFS_MOUNT_DAX_NEVER,          ",dax=never" },
 +              { XFS_FEAT_IKEEP,               ",ikeep" },
 +              { XFS_FEAT_WSYNC,               ",wsync" },
 +              { XFS_FEAT_NOALIGN,             ",noalign" },
 +              { XFS_FEAT_SWALLOC,             ",swalloc" },
 +              { XFS_FEAT_NOUUID,              ",nouuid" },
 +              { XFS_FEAT_NORECOVERY,          ",norecovery" },
 +              { XFS_FEAT_ATTR2,               ",attr2" },
 +              { XFS_FEAT_FILESTREAMS,         ",filestreams" },
 +              { XFS_FEAT_GRPID,               ",grpid" },
 +              { XFS_FEAT_DISCARD,             ",discard" },
 +              { XFS_FEAT_LARGE_IOSIZE,        ",largeio" },
 +              { XFS_FEAT_DAX_ALWAYS,          ",dax=always" },
 +              { XFS_FEAT_DAX_NEVER,           ",dax=never" },
                { 0, NULL }
        };
        struct xfs_mount        *mp = XFS_M(root->d_sb);
        struct proc_xfs_info    *xfs_infop;
  
        for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
 -              if (mp->m_flags & xfs_infop->flag)
 +              if (mp->m_features & xfs_infop->flag)
                        seq_puts(m, xfs_infop->str);
        }
  
 -      seq_printf(m, ",inode%d",
 -              (mp->m_flags & XFS_MOUNT_SMALL_INUMS) ? 32 : 64);
 +      seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64);
  
 -      if (mp->m_flags & XFS_MOUNT_ALLOCSIZE)
 +      if (xfs_has_allocsize(mp))
                seq_printf(m, ",allocsize=%dk",
                           (1 << mp->m_allocsize_log) >> 10);
  
                seq_printf(m, ",swidth=%d",
                                (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
  
 -      if (mp->m_qflags & XFS_UQUOTA_ACCT) {
 -              if (mp->m_qflags & XFS_UQUOTA_ENFD)
 -                      seq_puts(m, ",usrquota");
 -              else
 -                      seq_puts(m, ",uqnoenforce");
 -      }
 +      if (mp->m_qflags & XFS_UQUOTA_ENFD)
 +              seq_puts(m, ",usrquota");
 +      else if (mp->m_qflags & XFS_UQUOTA_ACCT)
 +              seq_puts(m, ",uqnoenforce");
  
 -      if (mp->m_qflags & XFS_PQUOTA_ACCT) {
 -              if (mp->m_qflags & XFS_PQUOTA_ENFD)
 -                      seq_puts(m, ",prjquota");
 -              else
 -                      seq_puts(m, ",pqnoenforce");
 -      }
 -      if (mp->m_qflags & XFS_GQUOTA_ACCT) {
 -              if (mp->m_qflags & XFS_GQUOTA_ENFD)
 -                      seq_puts(m, ",grpquota");
 -              else
 -                      seq_puts(m, ",gqnoenforce");
 -      }
 +      if (mp->m_qflags & XFS_PQUOTA_ENFD)
 +              seq_puts(m, ",prjquota");
 +      else if (mp->m_qflags & XFS_PQUOTA_ACCT)
 +              seq_puts(m, ",pqnoenforce");
 +
 +      if (mp->m_qflags & XFS_GQUOTA_ENFD)
 +              seq_puts(m, ",grpquota");
 +      else if (mp->m_qflags & XFS_GQUOTA_ACCT)
 +              seq_puts(m, ",gqnoenforce");
  
        if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
                seq_puts(m, ",noquota");
  /*
   * Set parameters for inode allocation heuristics, taking into account
   * filesystem size and inode32/inode64 mount options; i.e. specifically
 - * whether or not XFS_MOUNT_SMALL_INUMS is set.
 + * whether or not XFS_FEAT_SMALL_INUMS is set.
   *
   * Inode allocation patterns are altered only if inode32 is requested
 - * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
 - * If altered, XFS_MOUNT_32BITINODES is set as well.
 + * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large.
 + * If altered, XFS_OPSTATE_INODE32 is set as well.
   *
   * An agcount independent of that in the mount structure is provided
   * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
@@@ -292,13 -276,13 +292,13 @@@ xfs_set_inode_alloc
  
        /*
         * If user asked for no more than 32-bit inodes, and the fs is
 -       * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
 +       * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter
         * the allocator to accommodate the request.
         */
 -      if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
 -              mp->m_flags |= XFS_MOUNT_32BITINODES;
 +      if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
 +              set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
        else
 -              mp->m_flags &= ~XFS_MOUNT_32BITINODES;
 +              clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate);
  
        for (index = 0; index < agcount; index++) {
                struct xfs_perag        *pag;
  
                pag = xfs_perag_get(mp, index);
  
 -              if (mp->m_flags & XFS_MOUNT_32BITINODES) {
 +              if (xfs_is_inode32(mp)) {
                        if (ino > XFS_MAXINUMBER_32) {
                                pag->pagi_inodeok = 0;
                                pag->pagf_metadata = 0;
                xfs_perag_put(pag);
        }
  
 -      return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
 +      return xfs_is_inode32(mp) ? maxagi : agcount;
  }
  
+ static bool
+ xfs_buftarg_is_dax(
+       struct super_block      *sb,
+       struct xfs_buftarg      *bt)
+ {
+       return dax_supported(bt->bt_daxdev, bt->bt_bdev, sb->s_blocksize, 0,
+                       bdev_nr_sectors(bt->bt_bdev));
+ }
  STATIC int
  xfs_blkdev_get(
        xfs_mount_t             *mp,
@@@ -484,7 -477,7 +493,7 @@@ xfs_setup_devices
        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
                unsigned int    log_sector_size = BBSIZE;
  
 -              if (xfs_sb_version_hassector(&mp->m_sb))
 +              if (xfs_has_sector(mp))
                        log_sector_size = mp->m_sb.sb_logsectsize;
                error = xfs_setsize_buftarg(mp->m_logdev_targp,
                                            log_sector_size);
@@@ -517,37 -510,37 +526,37 @@@ xfs_init_mount_workqueues
        if (!mp->m_unwritten_workqueue)
                goto out_destroy_buf;
  
 -      mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
 -                      XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_UNBOUND),
 -                      0, mp->m_super->s_id);
 -      if (!mp->m_cil_workqueue)
 -              goto out_destroy_unwritten;
 -
        mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
                        XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
                        0, mp->m_super->s_id);
        if (!mp->m_reclaim_workqueue)
 -              goto out_destroy_cil;
 +              goto out_destroy_unwritten;
  
 -      mp->m_gc_workqueue = alloc_workqueue("xfs-gc/%s",
 -                      WQ_SYSFS | WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM,
 +      mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s",
 +                      XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM),
                        0, mp->m_super->s_id);
 -      if (!mp->m_gc_workqueue)
 +      if (!mp->m_blockgc_wq)
                goto out_destroy_reclaim;
  
 +      mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
 +                      XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
 +                      1, mp->m_super->s_id);
 +      if (!mp->m_inodegc_wq)
 +              goto out_destroy_blockgc;
 +
        mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
                        XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id);
        if (!mp->m_sync_workqueue)
 -              goto out_destroy_eofb;
 +              goto out_destroy_inodegc;
  
        return 0;
  
 -out_destroy_eofb:
 -      destroy_workqueue(mp->m_gc_workqueue);
 +out_destroy_inodegc:
 +      destroy_workqueue(mp->m_inodegc_wq);
 +out_destroy_blockgc:
 +      destroy_workqueue(mp->m_blockgc_wq);
  out_destroy_reclaim:
        destroy_workqueue(mp->m_reclaim_workqueue);
 -out_destroy_cil:
 -      destroy_workqueue(mp->m_cil_workqueue);
  out_destroy_unwritten:
        destroy_workqueue(mp->m_unwritten_workqueue);
  out_destroy_buf:
@@@ -561,9 -554,9 +570,9 @@@ xfs_destroy_mount_workqueues
        struct xfs_mount        *mp)
  {
        destroy_workqueue(mp->m_sync_workqueue);
 -      destroy_workqueue(mp->m_gc_workqueue);
 +      destroy_workqueue(mp->m_blockgc_wq);
 +      destroy_workqueue(mp->m_inodegc_wq);
        destroy_workqueue(mp->m_reclaim_workqueue);
 -      destroy_workqueue(mp->m_cil_workqueue);
        destroy_workqueue(mp->m_unwritten_workqueue);
        destroy_workqueue(mp->m_buf_workqueue);
  }
@@@ -612,6 -605,32 +621,6 @@@ xfs_fs_alloc_inode
        return NULL;
  }
  
 -#ifdef DEBUG
 -static void
 -xfs_check_delalloc(
 -      struct xfs_inode        *ip,
 -      int                     whichfork)
 -{
 -      struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 -      struct xfs_bmbt_irec    got;
 -      struct xfs_iext_cursor  icur;
 -
 -      if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got))
 -              return;
 -      do {
 -              if (isnullstartblock(got.br_startblock)) {
 -                      xfs_warn(ip->i_mount,
 -      "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]",
 -                              ip->i_ino,
 -                              whichfork == XFS_DATA_FORK ? "data" : "cow",
 -                              got.br_startoff, got.br_blockcount);
 -              }
 -      } while (xfs_iext_next_extent(ifp, &icur, &got));
 -}
 -#else
 -#define xfs_check_delalloc(ip, whichfork)     do { } while (0)
 -#endif
 -
  /*
   * Now that the generic code is guaranteed not to be accessing
   * the linux inode, we can inactivate and reclaim the inode.
@@@ -627,6 -646,30 +636,6 @@@ xfs_fs_destroy_inode
        ASSERT(!rwsem_is_locked(&inode->i_rwsem));
        XFS_STATS_INC(ip->i_mount, vn_rele);
        XFS_STATS_INC(ip->i_mount, vn_remove);
 -
 -      xfs_inactive(ip);
 -
 -      if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) {
 -              xfs_check_delalloc(ip, XFS_DATA_FORK);
 -              xfs_check_delalloc(ip, XFS_COW_FORK);
 -              ASSERT(0);
 -      }
 -
 -      XFS_STATS_INC(ip->i_mount, vn_reclaim);
 -
 -      /*
 -       * We should never get here with one of the reclaim flags already set.
 -       */
 -      ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
 -      ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
 -
 -      /*
 -       * We always use background reclaim here because even if the inode is
 -       * clean, it still may be under IO and hence we have wait for IO
 -       * completion to occur before we can reclaim the inode. The background
 -       * reclaim path handles this more efficiently than we can here, so
 -       * simply let background reclaim tear down all inodes.
 -       */
        xfs_inode_mark_reclaimable(ip);
  }
  
@@@ -675,6 -718,8 +684,6 @@@ xfs_fs_inode_init_once
        atomic_set(&ip->i_pincount, 0);
        spin_lock_init(&ip->i_flags_lock);
  
 -      mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
 -                   "xfsino", ip->i_ino);
        mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
                     "xfsino", ip->i_ino);
  }
@@@ -698,7 -743,7 +707,7 @@@ xfs_fs_drop_inode
         * that.  See the comment for this inode flag.
         */
        if (ip->i_flags & XFS_IRECOVERY) {
 -              ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED);
 +              ASSERT(xlog_recovery_needed(ip->i_mount->m_log));
                return 0;
        }
  
@@@ -721,8 -766,6 +730,8 @@@ xfs_fs_sync_fs
  {
        struct xfs_mount        *mp = XFS_M(sb);
  
 +      trace_xfs_fs_sync_fs(mp, __return_address);
 +
        /*
         * Doing anything during the async pass would be counterproductive.
         */
                flush_delayed_work(&mp->m_log->l_work);
        }
  
 +      /*
 +       * If we are called with page faults frozen out, it means we are about
 +       * to freeze the transaction subsystem. Take the opportunity to shut
 +       * down inodegc because once SB_FREEZE_FS is set it's too late to
 +       * prevent inactivation races with freeze. The fs doesn't get called
 +       * again by the freezing process until after SB_FREEZE_FS has been set,
 +       * so it's now or never.  Same logic applies to speculative allocation
 +       * garbage collection.
 +       *
 +       * We don't care if this is a normal syncfs call that does this or
 +       * freeze that does this - we can run this multiple times without issue
 +       * and we won't race with a restart because a restart can only occur
 +       * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE.
 +       */
 +      if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) {
 +              xfs_inodegc_stop(mp);
 +              xfs_blockgc_stop(mp);
 +      }
 +
        return 0;
  }
  
@@@ -776,9 -800,6 +785,9 @@@ xfs_fs_statfs
        xfs_extlen_t            lsize;
        int64_t                 ffree;
  
 +      /* Wait for whatever inactivations are in progress. */
 +      xfs_inodegc_flush(mp);
 +
        statp->f_type = XFS_SUPER_MAGIC;
        statp->f_namelen = MAXNAMELEN - 1;
  
@@@ -874,22 -895,10 +883,22 @@@ xfs_fs_freeze
         * set a GFP_NOFS context here to avoid recursion deadlocks.
         */
        flags = memalloc_nofs_save();
 -      xfs_blockgc_stop(mp);
        xfs_save_resvblks(mp);
        ret = xfs_log_quiesce(mp);
        memalloc_nofs_restore(flags);
 +
 +      /*
 +       * For read-write filesystems, we need to restart the inodegc on error
 +       * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not
 +       * going to be run to restart it now.  We are at SB_FREEZE_FS level
 +       * here, so we can restart safely without racing with a stop in
 +       * xfs_fs_sync_fs().
 +       */
 +      if (ret && !xfs_is_readonly(mp)) {
 +              xfs_blockgc_start(mp);
 +              xfs_inodegc_start(mp);
 +      }
 +
        return ret;
  }
  
@@@ -901,18 -910,7 +910,18 @@@ xfs_fs_unfreeze
  
        xfs_restore_resvblks(mp);
        xfs_log_work_queue(mp);
 -      xfs_blockgc_start(mp);
 +
 +      /*
 +       * Don't reactivate the inodegc worker on a readonly filesystem because
 +       * inodes are sent directly to reclaim.  Don't reactivate the blockgc
 +       * worker because there are no speculative preallocations on a readonly
 +       * filesystem.
 +       */
 +      if (!xfs_is_readonly(mp)) {
 +              xfs_blockgc_start(mp);
 +              xfs_inodegc_start(mp);
 +      }
 +
        return 0;
  }
  
@@@ -924,8 -922,10 +933,8 @@@ STATIC in
  xfs_finish_flags(
        struct xfs_mount        *mp)
  {
 -      int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
 -
        /* Fail a mount where the logbuf is smaller than the log stripe */
 -      if (xfs_sb_version_haslogv2(&mp->m_sb)) {
 +      if (xfs_has_logv2(mp)) {
                if (mp->m_logbsize <= 0 &&
                    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
                        mp->m_logbsize = mp->m_sb.sb_logsunit;
        /*
         * V5 filesystems always use attr2 format for attributes.
         */
 -      if (xfs_sb_version_hascrc(&mp->m_sb) &&
 -          (mp->m_flags & XFS_MOUNT_NOATTR2)) {
 +      if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) {
                xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
                             "attr2 is always enabled for V5 filesystems.");
                return -EINVAL;
        }
  
 -      /*
 -       * mkfs'ed attr2 will turn on attr2 mount unless explicitly
 -       * told by noattr2 to turn it off
 -       */
 -      if (xfs_sb_version_hasattr2(&mp->m_sb) &&
 -          !(mp->m_flags & XFS_MOUNT_NOATTR2))
 -              mp->m_flags |= XFS_MOUNT_ATTR2;
 -
        /*
         * prohibit r/w mounts of read-only filesystems
         */
 -      if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
 +      if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) {
                xfs_warn(mp,
                        "cannot mount a read-only filesystem as read-write");
                return -EROFS;
        }
  
 -      if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
 -          (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
 -          !xfs_sb_version_has_pquotino(&mp->m_sb)) {
 +      if ((mp->m_qflags & XFS_GQUOTA_ACCT) &&
 +          (mp->m_qflags & XFS_PQUOTA_ACCT) &&
 +          !xfs_has_pquotino(mp)) {
                xfs_warn(mp,
                  "Super block does not support project and group quota together");
                return -EINVAL;
@@@ -1022,40 -1031,11 +1031,40 @@@ xfs_destroy_percpu_counters
        percpu_counter_destroy(&mp->m_icount);
        percpu_counter_destroy(&mp->m_ifree);
        percpu_counter_destroy(&mp->m_fdblocks);
 -      ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
 +      ASSERT(xfs_is_shutdown(mp) ||
               percpu_counter_sum(&mp->m_delalloc_blks) == 0);
        percpu_counter_destroy(&mp->m_delalloc_blks);
  }
  
 +static int
 +xfs_inodegc_init_percpu(
 +      struct xfs_mount        *mp)
 +{
 +      struct xfs_inodegc      *gc;
 +      int                     cpu;
 +
 +      mp->m_inodegc = alloc_percpu(struct xfs_inodegc);
 +      if (!mp->m_inodegc)
 +              return -ENOMEM;
 +
 +      for_each_possible_cpu(cpu) {
 +              gc = per_cpu_ptr(mp->m_inodegc, cpu);
 +              init_llist_head(&gc->list);
 +              gc->items = 0;
 +              INIT_WORK(&gc->work, xfs_inodegc_worker);
 +      }
 +      return 0;
 +}
 +
 +static void
 +xfs_inodegc_free_percpu(
 +      struct xfs_mount        *mp)
 +{
 +      if (!mp->m_inodegc)
 +              return;
 +      free_percpu(mp->m_inodegc);
 +}
 +
  static void
  xfs_fs_put_super(
        struct super_block      *sb)
  
        xfs_freesb(mp);
        free_percpu(mp->m_stats.xs_stats);
 +      xfs_mount_list_del(mp);
 +      xfs_inodegc_free_percpu(mp);
        xfs_destroy_percpu_counters(mp);
        xfs_destroy_mount_workqueues(mp);
        xfs_close_devices(mp);
@@@ -1162,7 -1140,7 +1171,7 @@@ xfs_fs_warn_deprecated
         * already had the flag set
         */
        if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) &&
 -                      !!(XFS_M(fc->root->d_sb)->m_flags & flag) == value)
 +            !!(XFS_M(fc->root->d_sb)->m_features & flag) == value)
                return;
        xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key);
  }
@@@ -1210,27 -1188,27 +1219,27 @@@ xfs_fs_parse_param
                if (suffix_kstrtoint(param->string, 10, &size))
                        return -EINVAL;
                parsing_mp->m_allocsize_log = ffs(size) - 1;
 -              parsing_mp->m_flags |= XFS_MOUNT_ALLOCSIZE;
 +              parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE;
                return 0;
        case Opt_grpid:
        case Opt_bsdgroups:
 -              parsing_mp->m_flags |= XFS_MOUNT_GRPID;
 +              parsing_mp->m_features |= XFS_FEAT_GRPID;
                return 0;
        case Opt_nogrpid:
        case Opt_sysvgroups:
 -              parsing_mp->m_flags &= ~XFS_MOUNT_GRPID;
 +              parsing_mp->m_features &= ~XFS_FEAT_GRPID;
                return 0;
        case Opt_wsync:
 -              parsing_mp->m_flags |= XFS_MOUNT_WSYNC;
 +              parsing_mp->m_features |= XFS_FEAT_WSYNC;
                return 0;
        case Opt_norecovery:
 -              parsing_mp->m_flags |= XFS_MOUNT_NORECOVERY;
 +              parsing_mp->m_features |= XFS_FEAT_NORECOVERY;
                return 0;
        case Opt_noalign:
 -              parsing_mp->m_flags |= XFS_MOUNT_NOALIGN;
 +              parsing_mp->m_features |= XFS_FEAT_NOALIGN;
                return 0;
        case Opt_swalloc:
 -              parsing_mp->m_flags |= XFS_MOUNT_SWALLOC;
 +              parsing_mp->m_features |= XFS_FEAT_SWALLOC;
                return 0;
        case Opt_sunit:
                parsing_mp->m_dalign = result.uint_32;
                parsing_mp->m_swidth = result.uint_32;
                return 0;
        case Opt_inode32:
 -              parsing_mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
 +              parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS;
                return 0;
        case Opt_inode64:
 -              parsing_mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
 +              parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
                return 0;
        case Opt_nouuid:
 -              parsing_mp->m_flags |= XFS_MOUNT_NOUUID;
 +              parsing_mp->m_features |= XFS_FEAT_NOUUID;
                return 0;
        case Opt_largeio:
 -              parsing_mp->m_flags |= XFS_MOUNT_LARGEIO;
 +              parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE;
                return 0;
        case Opt_nolargeio:
 -              parsing_mp->m_flags &= ~XFS_MOUNT_LARGEIO;
 +              parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE;
                return 0;
        case Opt_filestreams:
 -              parsing_mp->m_flags |= XFS_MOUNT_FILESTREAMS;
 +              parsing_mp->m_features |= XFS_FEAT_FILESTREAMS;
                return 0;
        case Opt_noquota:
                parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
                parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
 -              parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
                return 0;
        case Opt_quota:
        case Opt_uquota:
        case Opt_usrquota:
 -              parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
 -                               XFS_UQUOTA_ENFD);
 +              parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD);
                return 0;
        case Opt_qnoenforce:
        case Opt_uqnoenforce:
 -              parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
 +              parsing_mp->m_qflags |= XFS_UQUOTA_ACCT;
                parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
                return 0;
        case Opt_pquota:
        case Opt_prjquota:
 -              parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
 -                               XFS_PQUOTA_ENFD);
 +              parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD);
                return 0;
        case Opt_pqnoenforce:
 -              parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
 +              parsing_mp->m_qflags |= XFS_PQUOTA_ACCT;
                parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
                return 0;
        case Opt_gquota:
        case Opt_grpquota:
 -              parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
 -                               XFS_GQUOTA_ENFD);
 +              parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD);
                return 0;
        case Opt_gqnoenforce:
 -              parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
 +              parsing_mp->m_qflags |= XFS_GQUOTA_ACCT;
                parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
                return 0;
        case Opt_discard:
 -              parsing_mp->m_flags |= XFS_MOUNT_DISCARD;
 +              parsing_mp->m_features |= XFS_FEAT_DISCARD;
                return 0;
        case Opt_nodiscard:
 -              parsing_mp->m_flags &= ~XFS_MOUNT_DISCARD;
 +              parsing_mp->m_features &= ~XFS_FEAT_DISCARD;
                return 0;
  #ifdef CONFIG_FS_DAX
        case Opt_dax:
  #endif
        /* Following mount options will be removed in September 2025 */
        case Opt_ikeep:
 -              xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, true);
 -              parsing_mp->m_flags |= XFS_MOUNT_IKEEP;
 +              xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true);
 +              parsing_mp->m_features |= XFS_FEAT_IKEEP;
                return 0;
        case Opt_noikeep:
 -              xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, false);
 -              parsing_mp->m_flags &= ~XFS_MOUNT_IKEEP;
 +              xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false);
 +              parsing_mp->m_features &= ~XFS_FEAT_IKEEP;
                return 0;
        case Opt_attr2:
 -              xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_ATTR2, true);
 -              parsing_mp->m_flags |= XFS_MOUNT_ATTR2;
 +              xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true);
 +              parsing_mp->m_features |= XFS_FEAT_ATTR2;
                return 0;
        case Opt_noattr2:
 -              xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_NOATTR2, true);
 -              parsing_mp->m_flags &= ~XFS_MOUNT_ATTR2;
 -              parsing_mp->m_flags |= XFS_MOUNT_NOATTR2;
 +              xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true);
 +              parsing_mp->m_features |= XFS_FEAT_NOATTR2;
                return 0;
        default:
                xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
@@@ -1329,23 -1312,17 +1338,23 @@@ static in
  xfs_fs_validate_params(
        struct xfs_mount        *mp)
  {
 +      /* No recovery flag requires a read-only mount */
 +      if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
 +              xfs_warn(mp, "no-recovery mounts must be read-only.");
 +              return -EINVAL;
 +      }
 +
        /*
 -       * no recovery flag requires a read-only mount
 +       * We have not read the superblock at this point, so only the attr2
 +       * mount option can set the attr2 feature by this stage.
         */
 -      if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
 -          !(mp->m_flags & XFS_MOUNT_RDONLY)) {
 -              xfs_warn(mp, "no-recovery mounts must be read-only.");
 +      if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) {
 +              xfs_warn(mp, "attr2 and noattr2 cannot both be specified.");
                return -EINVAL;
        }
  
 -      if ((mp->m_flags & XFS_MOUNT_NOALIGN) &&
 -          (mp->m_dalign || mp->m_swidth)) {
 +
 +      if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) {
                xfs_warn(mp,
        "sunit and swidth options incompatible with the noalign option");
                return -EINVAL;
                return -EINVAL;
        }
  
 -      if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) &&
 +      if (xfs_has_allocsize(mp) &&
            (mp->m_allocsize_log > XFS_MAX_IO_LOG ||
             mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
                xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
@@@ -1450,22 -1427,11 +1459,22 @@@ xfs_fs_fill_super
        if (error)
                goto out_destroy_workqueues;
  
 +      error = xfs_inodegc_init_percpu(mp);
 +      if (error)
 +              goto out_destroy_counters;
 +
 +      /*
 +       * All percpu data structures requiring cleanup when a cpu goes offline
 +       * must be allocated before adding this @mp to the cpu-dead handler's
 +       * mount list.
 +       */
 +      xfs_mount_list_add(mp);
 +
        /* Allocate stats memory before we do operations that might use it */
        mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
        if (!mp->m_stats.xs_stats) {
                error = -ENOMEM;
 -              goto out_destroy_counters;
 +              goto out_destroy_inodegc;
        }
  
        error = xfs_readsb(mp, flags);
                goto out_free_sb;
  
        /* V4 support is undergoing deprecation. */
 -      if (!xfs_sb_version_hascrc(&mp->m_sb)) {
 +      if (!xfs_has_crc(mp)) {
  #ifdef CONFIG_XFS_SUPPORT_V4
                xfs_warn_once(mp,
        "Deprecated V4 format (crc=0) will not be supported after September 2030.");
        }
  
        /* Filesystem claims it needs repair, so refuse the mount. */
 -      if (xfs_sb_version_needsrepair(&mp->m_sb)) {
 +      if (xfs_has_needsrepair(mp)) {
                xfs_warn(mp, "Filesystem needs repair.  Please run xfs_repair.");
                error = -EFSCORRUPTED;
                goto out_free_sb;
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        sb->s_max_links = XFS_MAXLINK;
        sb->s_time_gran = 1;
 -      if (xfs_sb_version_hasbigtime(&mp->m_sb)) {
 +      if (xfs_has_bigtime(mp)) {
                sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN);
                sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX);
        } else {
        set_posix_acl_flag(sb);
  
        /* version 5 superblocks support inode version counters. */
 -      if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
 +      if (xfs_has_crc(mp))
                sb->s_flags |= SB_I_VERSION;
  
 -      if (xfs_sb_version_hasbigtime(&mp->m_sb))
 -              xfs_warn(mp,
 - "EXPERIMENTAL big timestamp feature in use. Use at your own risk!");
 -
 -      if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) {
 +      if (xfs_has_dax_always(mp)) {
                bool rtdev_is_dax = false, datadev_is_dax;
  
                xfs_warn(mp,
                "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
  
-               datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev,
-                       sb->s_blocksize);
+               datadev_is_dax = xfs_buftarg_is_dax(sb, mp->m_ddev_targp);
                if (mp->m_rtdev_targp)
-                       rtdev_is_dax = bdev_dax_supported(
-                               mp->m_rtdev_targp->bt_bdev, sb->s_blocksize);
+                       rtdev_is_dax = xfs_buftarg_is_dax(sb,
+                                               mp->m_rtdev_targp);
                if (!rtdev_is_dax && !datadev_is_dax) {
                        xfs_alert(mp,
                        "DAX unsupported by block device. Turning off DAX.");
                        xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
                }
 -              if (xfs_sb_version_hasreflink(&mp->m_sb)) {
 +              if (xfs_has_reflink(mp)) {
                        xfs_alert(mp,
                "DAX and reflink cannot be used together!");
                        error = -EINVAL;
                }
        }
  
 -      if (mp->m_flags & XFS_MOUNT_DISCARD) {
 +      if (xfs_has_discard(mp)) {
                struct request_queue *q = bdev_get_queue(sb->s_bdev);
  
                if (!blk_queue_discard(q)) {
                        xfs_warn(mp, "mounting with \"discard\" option, but "
                                        "the device does not support discard");
 -                      mp->m_flags &= ~XFS_MOUNT_DISCARD;
 +                      mp->m_features &= ~XFS_FEAT_DISCARD;
                }
        }
  
 -      if (xfs_sb_version_hasreflink(&mp->m_sb)) {
 +      if (xfs_has_reflink(mp)) {
                if (mp->m_sb.sb_rblocks) {
                        xfs_alert(mp,
        "reflink not compatible with realtime device!");
                }
        }
  
 -      if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) {
 +      if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) {
                xfs_alert(mp,
        "reverse mapping btree not compatible with realtime device!");
                error = -EINVAL;
                goto out_filestream_unmount;
        }
  
 -      if (xfs_sb_version_hasinobtcounts(&mp->m_sb))
 -              xfs_warn(mp,
 - "EXPERIMENTAL inode btree counters feature in use. Use at your own risk!");
 -
        error = xfs_mountfs(mp);
        if (error)
                goto out_filestream_unmount;
        xfs_freesb(mp);
   out_free_stats:
        free_percpu(mp->m_stats.xs_stats);
 + out_destroy_inodegc:
 +      xfs_mount_list_del(mp);
 +      xfs_inodegc_free_percpu(mp);
   out_destroy_counters:
        xfs_destroy_percpu_counters(mp);
   out_destroy_workqueues:
@@@ -1694,13 -1664,13 +1702,13 @@@ xfs_remount_rw
        struct xfs_sb           *sbp = &mp->m_sb;
        int error;
  
 -      if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
 +      if (xfs_has_norecovery(mp)) {
                xfs_warn(mp,
                        "ro->rw transition prohibited on norecovery mount");
                return -EINVAL;
        }
  
 -      if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
 +      if (xfs_sb_is_v5(sbp) &&
            xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
                xfs_warn(mp,
        "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
                return -EINVAL;
        }
  
 -      mp->m_flags &= ~XFS_MOUNT_RDONLY;
 +      clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
  
        /*
         * If this is the first remount to writeable state we might have some
        if (error && error != -ENOSPC)
                return error;
  
 +      /* Re-enable the background inode inactivation worker. */
 +      xfs_inodegc_start(mp);
 +
        return 0;
  }
  
@@@ -1771,15 -1738,6 +1779,15 @@@ xfs_remount_ro
                return error;
        }
  
 +      /*
 +       * Stop the inodegc background worker.  xfs_fs_reconfigure already
 +       * flushed all pending inodegc work when it sync'd the filesystem.
 +       * The VFS holds s_umount, so we know that inodes cannot enter
 +       * xfs_fs_destroy_inode during a remount operation.  In readonly mode
 +       * we send inodes straight to reclaim, so no inodes will be queued.
 +       */
 +      xfs_inodegc_stop(mp);
 +
        /* Free the per-AG metadata reservation pool. */
        error = xfs_fs_unreserve_ag_blocks(mp);
        if (error) {
        xfs_save_resvblks(mp);
  
        xfs_log_clean(mp);
 -      mp->m_flags |= XFS_MOUNT_RDONLY;
 +      set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
  
        return 0;
  }
@@@ -1820,11 -1778,12 +1828,11 @@@ xfs_fs_reconfigure
  {
        struct xfs_mount        *mp = XFS_M(fc->root->d_sb);
        struct xfs_mount        *new_mp = fc->s_fs_info;
 -      xfs_sb_t                *sbp = &mp->m_sb;
        int                     flags = fc->sb_flags;
        int                     error;
  
        /* version 5 superblocks always support version counters. */
 -      if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
 +      if (xfs_has_crc(mp))
                fc->sb_flags |= SB_I_VERSION;
  
        error = xfs_fs_validate_params(new_mp);
        sync_filesystem(mp->m_super);
  
        /* inode32 -> inode64 */
 -      if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
 -          !(new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
 -              mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
 -              mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
 +      if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
 +              mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
 +              mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
        }
  
        /* inode64 -> inode32 */
 -      if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
 -          (new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
 -              mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
 -              mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
 +      if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) {
 +              mp->m_features |= XFS_FEAT_SMALL_INUMS;
 +              mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
        }
  
        /* ro -> rw */
 -      if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(flags & SB_RDONLY)) {
 +      if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
                error = xfs_remount_rw(mp);
                if (error)
                        return error;
        }
  
        /* rw -> ro */
 -      if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (flags & SB_RDONLY)) {
 +      if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) {
                error = xfs_remount_ro(mp);
                if (error)
                        return error;
@@@ -1920,11 -1881,11 +1928,11 @@@ static int xfs_init_fs_context
         * Copy binary VFS mount flags we are interested in.
         */
        if (fc->sb_flags & SB_RDONLY)
 -              mp->m_flags |= XFS_MOUNT_RDONLY;
 +              set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);
        if (fc->sb_flags & SB_DIRSYNC)
 -              mp->m_flags |= XFS_MOUNT_DIRSYNC;
 +              mp->m_features |= XFS_FEAT_DIRSYNC;
        if (fc->sb_flags & SB_SYNCHRONOUS)
 -              mp->m_flags |= XFS_MOUNT_WSYNC;
 +              mp->m_features |= XFS_FEAT_WSYNC;
  
        fc->s_fs_info = mp;
        fc->ops = &xfs_context_ops;
@@@ -2167,48 -2128,6 +2175,48 @@@ xfs_destroy_workqueues(void
        destroy_workqueue(xfs_alloc_wq);
  }
  
 +#ifdef CONFIG_HOTPLUG_CPU
 +static int
 +xfs_cpu_dead(
 +      unsigned int            cpu)
 +{
 +      struct xfs_mount        *mp, *n;
 +
 +      spin_lock(&xfs_mount_list_lock);
 +      list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) {
 +              spin_unlock(&xfs_mount_list_lock);
 +              xfs_inodegc_cpu_dead(mp, cpu);
 +              spin_lock(&xfs_mount_list_lock);
 +      }
 +      spin_unlock(&xfs_mount_list_lock);
 +      return 0;
 +}
 +
 +static int __init
 +xfs_cpu_hotplug_init(void)
 +{
 +      int     error;
 +
 +      error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL,
 +                      xfs_cpu_dead);
 +      if (error < 0)
 +              xfs_alert(NULL,
 +"Failed to initialise CPU hotplug, error %d. XFS is non-functional.",
 +                      error);
 +      return error;
 +}
 +
 +static void
 +xfs_cpu_hotplug_destroy(void)
 +{
 +      cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD);
 +}
 +
 +#else /* !CONFIG_HOTPLUG_CPU */
 +static inline int xfs_cpu_hotplug_init(void) { return 0; }
 +static inline void xfs_cpu_hotplug_destroy(void) {}
 +#endif
 +
  STATIC int __init
  init_xfs_fs(void)
  {
  
        xfs_dir_startup();
  
 -      error = xfs_init_zones();
 +      error = xfs_cpu_hotplug_init();
        if (error)
                goto out;
  
 +      error = xfs_init_zones();
 +      if (error)
 +              goto out_destroy_hp;
 +
        error = xfs_init_workqueues();
        if (error)
                goto out_destroy_zones;
        xfs_destroy_workqueues();
   out_destroy_zones:
        xfs_destroy_zones();
 + out_destroy_hp:
 +      xfs_cpu_hotplug_destroy();
   out:
        return error;
  }
@@@ -2332,7 -2245,6 +2340,7 @@@ exit_xfs_fs(void
        xfs_destroy_workqueues();
        xfs_destroy_zones();
        xfs_uuid_table_free();
 +      xfs_cpu_hotplug_destroy();
  }
  
  module_init(init_xfs_fs);