Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 22 Oct 2020 17:31:08 +0000 (10:31 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 22 Oct 2020 17:31:08 +0000 (10:31 -0700)
Pull ext4 updates from Ted Ts'o:
 "The siginificant new ext4 feature this time around is Harshad's new
  fast_commit mode.

  In addition, thanks to Mauricio for fixing a race where mmap'ed pages
  that are being changed in parallel with a data=journal transaction
  commit could result in bad checksums in the failure that could cause
  journal replays to fail.

  Also notable is Ritesh's buffered write optimization which can result
  in significant improvements on parallel write workloads. (The kernel
  test robot reported a 330.6% improvement on fio.write_iops on a 96
  core system using DAX)

  Besides that, we have the usual miscellaneous cleanups and bug fixes"

Link: https://lore.kernel.org/r/20200925071217.GO28663@shao2-debian
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (46 commits)
  ext4: fix invalid inode checksum
  ext4: add fast commit stats in procfs
  ext4: add a mount opt to forcefully turn fast commits on
  ext4: fast commit recovery path
  jbd2: fast commit recovery path
  ext4: main fast-commit commit path
  jbd2: add fast commit machinery
  ext4 / jbd2: add fast commit initialization
  ext4: add fast_commit feature and handling for extended mount options
  doc: update ext4 and journalling docs to include fast commit feature
  ext4: Detect already used quota file early
  jbd2: avoid transaction reuse after reformatting
  ext4: use the normal helper to get the actual inode
  ext4: fix bs < ps issue reported with dioread_nolock mount opt
  ext4: data=journal: write-protect pages on j_submit_inode_data_buffers()
  ext4: data=journal: fixes for ext4_page_mkwrite()
  jbd2, ext4, ocfs2: introduce/use journal callbacks j_submit|finish_inode_data_buffers()
  jbd2: introduce/export functions jbd2_journal_submit|finish_inode_data_buffers()
  ext4: introduce ext4_sb_bread_unmovable() to replace sb_bread_unmovable()
  ext4: use ext4_sb_bread() instead of sb_bread()
  ...

1  2 
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/ialloc.c
fs/ext4/namei.c
fs/ext4/super.c

diff --cc fs/ext4/dir.c
Simple merge
diff --cc fs/ext4/ext4.h
Simple merge
@@@ -742,53 -746,122 +746,169 @@@ not_found
        return 1;
  }
  
+ int ext4_mark_inode_used(struct super_block *sb, int ino)
+ {
+       unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
+       struct buffer_head *inode_bitmap_bh = NULL, *group_desc_bh = NULL;
+       struct ext4_group_desc *gdp;
+       ext4_group_t group;
+       int bit;
+       int err = -EFSCORRUPTED;
+       if (ino < EXT4_FIRST_INO(sb) || ino > max_ino)
+               goto out;
+       group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
+       bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
+       inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
+       if (IS_ERR(inode_bitmap_bh))
+               return PTR_ERR(inode_bitmap_bh);
+       if (ext4_test_bit(bit, inode_bitmap_bh->b_data)) {
+               err = 0;
+               goto out;
+       }
+       gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
+       if (!gdp || !group_desc_bh) {
+               err = -EINVAL;
+               goto out;
+       }
+       ext4_set_bit(bit, inode_bitmap_bh->b_data);
+       BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
+       err = ext4_handle_dirty_metadata(NULL, NULL, inode_bitmap_bh);
+       if (err) {
+               ext4_std_error(sb, err);
+               goto out;
+       }
+       err = sync_dirty_buffer(inode_bitmap_bh);
+       if (err) {
+               ext4_std_error(sb, err);
+               goto out;
+       }
+       /* We may have to initialize the block bitmap if it isn't already */
+       if (ext4_has_group_desc_csum(sb) &&
+           gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+               struct buffer_head *block_bitmap_bh;
+               block_bitmap_bh = ext4_read_block_bitmap(sb, group);
+               if (IS_ERR(block_bitmap_bh)) {
+                       err = PTR_ERR(block_bitmap_bh);
+                       goto out;
+               }
+               BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
+               err = ext4_handle_dirty_metadata(NULL, NULL, block_bitmap_bh);
+               sync_dirty_buffer(block_bitmap_bh);
+               /* recheck and clear flag under lock if we still need to */
+               ext4_lock_group(sb, group);
+               if (ext4_has_group_desc_csum(sb) &&
+                   (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
+                       gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+                       ext4_free_group_clusters_set(sb, gdp,
+                               ext4_free_clusters_after_init(sb, group, gdp));
+                       ext4_block_bitmap_csum_set(sb, group, gdp,
+                                                  block_bitmap_bh);
+                       ext4_group_desc_csum_set(sb, group, gdp);
+               }
+               ext4_unlock_group(sb, group);
+               brelse(block_bitmap_bh);
+               if (err) {
+                       ext4_std_error(sb, err);
+                       goto out;
+               }
+       }
+       /* Update the relevant bg descriptor fields */
+       if (ext4_has_group_desc_csum(sb)) {
+               int free;
+               ext4_lock_group(sb, group); /* while we modify the bg desc */
+               free = EXT4_INODES_PER_GROUP(sb) -
+                       ext4_itable_unused_count(sb, gdp);
+               if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+                       gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+                       free = 0;
+               }
+               /*
+                * Check the relative inode number against the last used
+                * relative inode number in this group. if it is greater
+                * we need to update the bg_itable_unused count
+                */
+               if (bit >= free)
+                       ext4_itable_unused_set(sb, gdp,
+                                       (EXT4_INODES_PER_GROUP(sb) - bit - 1));
+       } else {
+               ext4_lock_group(sb, group);
+       }
+       ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
+       if (ext4_has_group_desc_csum(sb)) {
+               ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh,
+                                          EXT4_INODES_PER_GROUP(sb) / 8);
+               ext4_group_desc_csum_set(sb, group, gdp);
+       }
+       ext4_unlock_group(sb, group);
+       err = ext4_handle_dirty_metadata(NULL, NULL, group_desc_bh);
+       sync_dirty_buffer(group_desc_bh);
+ out:
+       return err;
+ }
 +static int ext4_xattr_credits_for_new_inode(struct inode *dir, mode_t mode,
 +                                          bool encrypt)
 +{
 +      struct super_block *sb = dir->i_sb;
 +      int nblocks = 0;
 +#ifdef CONFIG_EXT4_FS_POSIX_ACL
 +      struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT);
 +
 +      if (IS_ERR(p))
 +              return PTR_ERR(p);
 +      if (p) {
 +              int acl_size = p->a_count * sizeof(ext4_acl_entry);
 +
 +              nblocks += (S_ISDIR(mode) ? 2 : 1) *
 +                      __ext4_xattr_set_credits(sb, NULL /* inode */,
 +                                               NULL /* block_bh */, acl_size,
 +                                               true /* is_create */);
 +              posix_acl_release(p);
 +      }
 +#endif
 +
 +#ifdef CONFIG_SECURITY
 +      {
 +              int num_security_xattrs = 1;
 +
 +#ifdef CONFIG_INTEGRITY
 +              num_security_xattrs++;
 +#endif
 +              /*
 +               * We assume that security xattrs are never more than 1k.
 +               * In practice they are under 128 bytes.
 +               */
 +              nblocks += num_security_xattrs *
 +                      __ext4_xattr_set_credits(sb, NULL /* inode */,
 +                                               NULL /* block_bh */, 1024,
 +                                               true /* is_create */);
 +      }
 +#endif
 +      if (encrypt)
 +              nblocks += __ext4_xattr_set_credits(sb,
 +                                                  NULL /* inode */,
 +                                                  NULL /* block_bh */,
 +                                                  FSCRYPT_SET_CONTEXT_MAX_SIZE,
 +                                                  true /* is_create */);
 +      return nblocks;
 +}
 +
  /*
   * There are two policies for allocating an inode.  If the new inode is
   * a directory, then a forward search is made for a block group with both
@@@ -818,8 -891,8 +938,8 @@@ struct inode *__ext4_new_inode(handle_
        struct inode *ret;
        ext4_group_t i;
        ext4_group_t flex_group;
-       struct ext4_group_info *grp;
+       struct ext4_group_info *grp = NULL;
 -      int encrypt = 0;
 +      bool encrypt = false;
  
        /* Cannot create files in a deleted directory */
        if (!dir || !dir->i_nlink)
diff --cc fs/ext4/namei.c
Simple merge
diff --cc fs/ext4/super.c
Simple merge