Merge tag 'ext4_for_linus_cleanups' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 9 Nov 2020 20:36:58 +0000 (12:36 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 9 Nov 2020 20:36:58 +0000 (12:36 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 9 Nov 2020 20:36:58 +0000 (12:36 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 9 Nov 2020 20:36:58 +0000 (12:36 -0800)
diff --git a/Documentation/filesystems/ext4/journal.rst b/Documentation/filesystems/ext4/journal.rst

index 805a1e9..849d5b1 100644 (file)
--- a/Documentation/filesystems/ext4/journal.rst
+++ b/Documentation/filesystems/ext4/journal.rst
@@ -256,6 +256,10 @@ which is 1024 bytes long:
       - s\_padding2
       -
     * - 0x54
+     - \_\_be32
+     - s\_num\_fc\_blocks
+     - Number of fast commit blocks in the journal.
+   * - 0x58
       - \_\_u32
       - s\_padding[42]
       -
@@ -310,6 +314,8 @@ The journal incompat features are any combination of the following:
       - This journal uses v3 of the checksum on-disk format. This is the same as
         v2, but the journal block tag size is fixed regardless of the size of
         block numbers. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3)
+   * - 0x20
+     - Journal has fast commit blocks. (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT)
  
  .. _jbd2_checksum_type:
  
diff --git a/Documentation/filesystems/ext4/super.rst b/Documentation/filesystems/ext4/super.rst

index 93e55d7..2eb1ab2 100644 (file)
--- a/Documentation/filesystems/ext4/super.rst
+++ b/Documentation/filesystems/ext4/super.rst
@@ -596,6 +596,13 @@ following:
       - Sparse Super Block, v2. If this flag is set, the SB field s\_backup\_bgs
         points to the two block groups that contain backup superblocks
         (COMPAT\_SPARSE\_SUPER2).
+   * - 0x400
+     - Fast commits supported. Although fast commits blocks are
+       backward incompatible, fast commit blocks are not always
+       present in the journal. If fast commit blocks are present in
+       the journal, JBD2 incompat feature
+       (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) gets
+       set (COMPAT\_FAST\_COMMIT).
  
  .. _super_incompat:
  
diff --git a/Documentation/filesystems/journalling.rst b/Documentation/filesystems/journalling.rst

index 5a5f70b..e18f90f 100644 (file)
--- a/Documentation/filesystems/journalling.rst
+++ b/Documentation/filesystems/journalling.rst
@@ -136,10 +136,8 @@ Fast commits
  ~~~~~~~~~~~~
  
  JBD2 to also allows you to perform file-system specific delta commits known as
-fast commits. In order to use fast commits, you first need to call
-:c:func:`jbd2_fc_init` and tell how many blocks at the end of journal
-area should be reserved for fast commits. Along with that, you will also need
-to set following callbacks that perform correspodning work:
+fast commits. In order to use fast commits, you will need to set following
+callbacks that perform correspodning work:
  
  `journal->j_fc_cleanup_cb`: Cleanup function called after every full commit and
  fast commit.
diff --git a/MAINTAINERS b/MAINTAINERS

index 3da6d8c..94ac10a 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6614,6 +6614,7 @@ Q:        http://patchwork.ozlabs.org/project/linux-ext4/list/
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git
  F:     Documentation/filesystems/ext4/
  F:     fs/ext4/
+F:     include/trace/events/ext4.h
  
  Extended Verification Module (EVM)
  M:     Mimi Zohar <zohar@linux.ibm.com>
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index 45fcdbf..1b399ca 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1028,9 +1028,6 @@ struct ext4_inode_info {
                                          * protected by sbi->s_fc_lock.
                                          */
  
-       /* Fast commit subtid when this inode was committed */
-       unsigned int i_fc_committed_subtid;
-
         /* Start of lblk range that needs to be committed in this fast commit */
         ext4_lblk_t i_fc_lblk_start;
  
@@ -1422,16 +1419,6 @@ struct ext4_super_block {
  
  #ifdef __KERNEL__
  
-/*
- * run-time mount flags
- */
-#define EXT4_MF_MNTDIR_SAMPLED         0x0001
-#define EXT4_MF_FS_ABORTED             0x0002  /* Fatal error detected */
-#define EXT4_MF_FC_INELIGIBLE          0x0004  /* Fast commit ineligible */
-#define EXT4_MF_FC_COMMITTING          0x0008  /* File system underoing a fast
-                                                * commit.
-                                                */
-
  #ifdef CONFIG_FS_ENCRYPTION
  #define DUMMY_ENCRYPTION_ENABLED(sbi) ((sbi)->s_dummy_enc_policy.policy != NULL)
  #else
@@ -1466,7 +1453,7 @@ struct ext4_sb_info {
         struct buffer_head * __rcu *s_group_desc;
         unsigned int s_mount_opt;
         unsigned int s_mount_opt2;
-       unsigned int s_mount_flags;
+       unsigned long s_mount_flags;
         unsigned int s_def_mount_opt;
         ext4_fsblk_t s_sb_block;
         atomic64_t s_resv_clusters;
@@ -1694,6 +1681,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
         _v;                                                                \
  })
  
+/*
+ * run-time mount flags
+ */
+enum {
+       EXT4_MF_MNTDIR_SAMPLED,
+       EXT4_MF_FS_ABORTED,     /* Fatal error detected */
+       EXT4_MF_FC_INELIGIBLE,  /* Fast commit ineligible */
+       EXT4_MF_FC_COMMITTING   /* File system underoing a fast
+                                * commit.
+                                */
+};
+
+static inline void ext4_set_mount_flag(struct super_block *sb, int bit)
+{
+       set_bit(bit, &EXT4_SB(sb)->s_mount_flags);
+}
+
+static inline void ext4_clear_mount_flag(struct super_block *sb, int bit)
+{
+       clear_bit(bit, &EXT4_SB(sb)->s_mount_flags);
+}
+
+static inline int ext4_test_mount_flag(struct super_block *sb, int bit)
+{
+       return test_bit(bit, &EXT4_SB(sb)->s_mount_flags);
+}
+
+
  /*
   * Simulate_fail codes
   */
@@ -1863,6 +1878,13 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
  #define EXT4_FEATURE_COMPAT_RESIZE_INODE       0x0010
  #define EXT4_FEATURE_COMPAT_DIR_INDEX          0x0020
  #define EXT4_FEATURE_COMPAT_SPARSE_SUPER2      0x0200
+/*
+ * The reason why "FAST_COMMIT" is a compat feature is that, FS becomes
+ * incompatible only if fast commit blocks are present in the FS. Since we
+ * clear the journal (and thus the fast commit blocks), we don't mark FS as
+ * incompatible. We also have a JBD2 incompat feature, which gets set when
+ * there are fast commit blocks present in the journal.
+ */
  #define EXT4_FEATURE_COMPAT_FAST_COMMIT                0x0400
  #define EXT4_FEATURE_COMPAT_STABLE_INODES      0x0800
  
@@ -2731,12 +2753,16 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
  int ext4_fc_info_show(struct seq_file *seq, void *v);
  void ext4_fc_init(struct super_block *sb, journal_t *journal);
  void ext4_fc_init_inode(struct inode *inode);
-void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
+void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
                          ext4_lblk_t end);
-void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry);
-void ext4_fc_track_link(struct inode *inode, struct dentry *dentry);
-void ext4_fc_track_create(struct inode *inode, struct dentry *dentry);
-void ext4_fc_track_inode(struct inode *inode);
+void __ext4_fc_track_unlink(handle_t *handle, struct inode *inode,
+       struct dentry *dentry);
+void __ext4_fc_track_link(handle_t *handle, struct inode *inode,
+       struct dentry *dentry);
+void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry);
+void ext4_fc_track_link(handle_t *handle, struct dentry *dentry);
+void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
+void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
  void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
  void ext4_fc_start_ineligible(struct super_block *sb, int reason);
  void ext4_fc_stop_ineligible(struct super_block *sb);
@@ -3452,7 +3478,7 @@ extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
  extern int ext4_ci_compare(const struct inode *parent,
                            const struct qstr *fname,
                            const struct qstr *entry, bool quick);
-extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
+extern int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
                          struct inode *inode);
  extern int __ext4_link(struct inode *dir, struct inode *inode,
                        struct dentry *dentry);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c

index 57cfa28..17d7096 100644 (file)
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3724,7 +3724,6 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
         err = ext4_ext_dirty(handle, inode, path + path->p_depth);
  out:
         ext4_ext_show_leaf(inode, path);
-       ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
         return err;
  }
  
@@ -3796,7 +3795,6 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
         if (*allocated > map->m_len)
                 *allocated = map->m_len;
         map->m_len = *allocated;
-       ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
         return 0;
  }
  
@@ -4329,7 +4327,6 @@ got_allocated_blocks:
         map->m_len = ar.len;
         allocated = map->m_len;
         ext4_ext_show_leaf(inode, path);
-       ext4_fc_track_range(inode, map->m_lblk, map->m_lblk + map->m_len - 1);
  out:
         ext4_ext_drop_refs(path);
         kfree(path);
@@ -4602,7 +4599,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
         ret = ext4_mark_inode_dirty(handle, inode);
         if (unlikely(ret))
                 goto out_handle;
-       ext4_fc_track_range(inode, offset >> inode->i_sb->s_blocksize_bits,
+       ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits,
                         (offset + len - 1) >> inode->i_sb->s_blocksize_bits);
         /* Zero out partial block at the edges of the range */
         ret = ext4_zero_partial_blocks(handle, inode, offset, len);
@@ -4651,8 +4648,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                      FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
                      FALLOC_FL_INSERT_RANGE))
                 return -EOPNOTSUPP;
-       ext4_fc_track_range(inode, offset >> blkbits,
-                       (offset + len - 1) >> blkbits);
  
         ext4_fc_start_update(inode);
  
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c

index 8d43058..f2033e1 100644 (file)
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -83,7 +83,7 @@
   *
   * Atomicity of commits
   * --------------------
- * In order to gaurantee atomicity during the commit operation, fast commit
+ * In order to guarantee atomicity during the commit operation, fast commit
   * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
   * tag contains CRC of the contents and TID of the transaction after which
   * this fast commit should be applied. Recovery code replays fast commit
@@ -152,7 +152,31 @@ void ext4_fc_init_inode(struct inode *inode)
         INIT_LIST_HEAD(&ei->i_fc_list);
         init_waitqueue_head(&ei->i_fc_wait);
         atomic_set(&ei->i_fc_updates, 0);
-       ei->i_fc_committed_subtid = 0;
+}
+
+/* This function must be called with sbi->s_fc_lock held. */
+static void ext4_fc_wait_committing_inode(struct inode *inode)
+__releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
+{
+       wait_queue_head_t *wq;
+       struct ext4_inode_info *ei = EXT4_I(inode);
+
+#if (BITS_PER_LONG < 64)
+       DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
+                       EXT4_STATE_FC_COMMITTING);
+       wq = bit_waitqueue(&ei->i_state_flags,
+                               EXT4_STATE_FC_COMMITTING);
+#else
+       DEFINE_WAIT_BIT(wait, &ei->i_flags,
+                       EXT4_STATE_FC_COMMITTING);
+       wq = bit_waitqueue(&ei->i_flags,
+                               EXT4_STATE_FC_COMMITTING);
+#endif
+       lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
+       prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+       spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
+       schedule();
+       finish_wait(wq, &wait.wq_entry);
  }
  
  /*
@@ -176,22 +200,7 @@ restart:
                 goto out;
  
         if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
-               wait_queue_head_t *wq;
-#if (BITS_PER_LONG < 64)
-               DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
-                               EXT4_STATE_FC_COMMITTING);
-               wq = bit_waitqueue(&ei->i_state_flags,
-                                  EXT4_STATE_FC_COMMITTING);
-#else
-               DEFINE_WAIT_BIT(wait, &ei->i_flags,
-                               EXT4_STATE_FC_COMMITTING);
-               wq = bit_waitqueue(&ei->i_flags,
-                                  EXT4_STATE_FC_COMMITTING);
-#endif
-               prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
-               spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
-               schedule();
-               finish_wait(wq, &wait.wq_entry);
+               ext4_fc_wait_committing_inode(inode);
                 goto restart;
         }
  out:
@@ -234,26 +243,10 @@ restart:
         }
  
         if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
-               wait_queue_head_t *wq;
-#if (BITS_PER_LONG < 64)
-               DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
-                               EXT4_STATE_FC_COMMITTING);
-               wq = bit_waitqueue(&ei->i_state_flags,
-                                  EXT4_STATE_FC_COMMITTING);
-#else
-               DEFINE_WAIT_BIT(wait, &ei->i_flags,
-                               EXT4_STATE_FC_COMMITTING);
-               wq = bit_waitqueue(&ei->i_flags,
-                                  EXT4_STATE_FC_COMMITTING);
-#endif
-               prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
-               spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
-               schedule();
-               finish_wait(wq, &wait.wq_entry);
+               ext4_fc_wait_committing_inode(inode);
                 goto restart;
         }
-       if (!list_empty(&ei->i_fc_list))
-               list_del_init(&ei->i_fc_list);
+       list_del_init(&ei->i_fc_list);
         spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
  }
  
@@ -269,7 +262,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
             (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
                 return;
  
-       sbi->s_mount_flags |= EXT4_MF_FC_INELIGIBLE;
+       ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
         WARN_ON(reason >= EXT4_FC_REASON_MAX);
         sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
  }
@@ -302,14 +295,14 @@ void ext4_fc_stop_ineligible(struct super_block *sb)
             (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
                 return;
  
-       EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FC_INELIGIBLE;
+       ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
         atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
  }
  
  static inline int ext4_fc_is_ineligible(struct super_block *sb)
  {
-       return (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FC_INELIGIBLE) ||
-               atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates);
+       return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) ||
+               atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates));
  }
  
  /*
@@ -323,13 +316,14 @@ static inline int ext4_fc_is_ineligible(struct super_block *sb)
   * If enqueue is set, this function enqueues the inode in fast commit list.
   */
  static int ext4_fc_track_template(
-       struct inode *inode, int (*__fc_track_fn)(struct inode *, void *, bool),
+       handle_t *handle, struct inode *inode,
+       int (*__fc_track_fn)(struct inode *, void *, bool),
         void *args, int enqueue)
  {
-       tid_t running_txn_tid;
         bool update = false;
         struct ext4_inode_info *ei = EXT4_I(inode);
         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       tid_t tid = 0;
         int ret;
  
         if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
@@ -339,15 +333,13 @@ static int ext4_fc_track_template(
         if (ext4_fc_is_ineligible(inode->i_sb))
                 return -EINVAL;
  
-       running_txn_tid = sbi->s_journal ?
-               sbi->s_journal->j_commit_sequence + 1 : 0;
-
+       tid = handle->h_transaction->t_tid;
         mutex_lock(&ei->i_fc_lock);
-       if (running_txn_tid == ei->i_sync_tid) {
+       if (tid == ei->i_sync_tid) {
                 update = true;
         } else {
                 ext4_fc_reset_inode(inode);
-               ei->i_sync_tid = running_txn_tid;
+               ei->i_sync_tid = tid;
         }
         ret = __fc_track_fn(inode, args, update);
         mutex_unlock(&ei->i_fc_lock);
@@ -358,7 +350,7 @@ static int ext4_fc_track_template(
         spin_lock(&sbi->s_fc_lock);
         if (list_empty(&EXT4_I(inode)->i_fc_list))
                 list_add_tail(&EXT4_I(inode)->i_fc_list,
-                               (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING) ?
+                               (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
                                 &sbi->s_fc_q[FC_Q_STAGING] :
                                 &sbi->s_fc_q[FC_Q_MAIN]);
         spin_unlock(&sbi->s_fc_lock);
@@ -384,7 +376,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
         mutex_unlock(&ei->i_fc_lock);
         node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
         if (!node) {
-               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_MEM);
+               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
                 mutex_lock(&ei->i_fc_lock);
                 return -ENOMEM;
         }
@@ -397,7 +389,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
                 if (!node->fcd_name.name) {
                         kmem_cache_free(ext4_fc_dentry_cachep, node);
                         ext4_fc_mark_ineligible(inode->i_sb,
-                               EXT4_FC_REASON_MEM);
+                               EXT4_FC_REASON_NOMEM);
                         mutex_lock(&ei->i_fc_lock);
                         return -ENOMEM;
                 }
@@ -411,7 +403,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
         node->fcd_name.len = dentry->d_name.len;
  
         spin_lock(&sbi->s_fc_lock);
-       if (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING)
+       if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
                 list_add_tail(&node->fcd_list,
                                 &sbi->s_fc_dentry_q[FC_Q_STAGING]);
         else
@@ -422,7 +414,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
         return 0;
  }
  
-void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry)
+void __ext4_fc_track_unlink(handle_t *handle,
+               struct inode *inode, struct dentry *dentry)
  {
         struct __track_dentry_update_args args;
         int ret;
@@ -430,12 +423,18 @@ void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry)
         args.dentry = dentry;
         args.op = EXT4_FC_TAG_UNLINK;
  
-       ret = ext4_fc_track_template(inode, __track_dentry_update,
+       ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
                                         (void *)&args, 0);
         trace_ext4_fc_track_unlink(inode, dentry, ret);
  }
  
-void ext4_fc_track_link(struct inode *inode, struct dentry *dentry)
+void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
+{
+       __ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
+}
+
+void __ext4_fc_track_link(handle_t *handle,
+       struct inode *inode, struct dentry *dentry)
  {
         struct __track_dentry_update_args args;
         int ret;
@@ -443,20 +442,26 @@ void ext4_fc_track_link(struct inode *inode, struct dentry *dentry)
         args.dentry = dentry;
         args.op = EXT4_FC_TAG_LINK;
  
-       ret = ext4_fc_track_template(inode, __track_dentry_update,
+       ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
                                         (void *)&args, 0);
         trace_ext4_fc_track_link(inode, dentry, ret);
  }
  
-void ext4_fc_track_create(struct inode *inode, struct dentry *dentry)
+void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
+{
+       __ext4_fc_track_link(handle, d_inode(dentry), dentry);
+}
+
+void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
  {
         struct __track_dentry_update_args args;
+       struct inode *inode = d_inode(dentry);
         int ret;
  
         args.dentry = dentry;
         args.op = EXT4_FC_TAG_CREAT;
  
-       ret = ext4_fc_track_template(inode, __track_dentry_update,
+       ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
                                         (void *)&args, 0);
         trace_ext4_fc_track_create(inode, dentry, ret);
  }
@@ -472,14 +477,20 @@ static int __track_inode(struct inode *inode, void *arg, bool update)
         return 0;
  }
  
-void ext4_fc_track_inode(struct inode *inode)
+void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
  {
         int ret;
  
         if (S_ISDIR(inode->i_mode))
                 return;
  
-       ret = ext4_fc_track_template(inode, __track_inode, NULL, 1);
+       if (ext4_should_journal_data(inode)) {
+               ext4_fc_mark_ineligible(inode->i_sb,
+                                       EXT4_FC_REASON_INODE_JOURNAL_DATA);
+               return;
+       }
+
+       ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
         trace_ext4_fc_track_inode(inode, ret);
  }
  
@@ -515,7 +526,7 @@ static int __track_range(struct inode *inode, void *arg, bool update)
         return 0;
  }
  
-void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
+void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
                          ext4_lblk_t end)
  {
         struct __track_range_args args;
@@ -527,7 +538,7 @@ void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
         args.start = start;
         args.end = end;
  
-       ret = ext4_fc_track_template(inode,  __track_range, &args, 1);
+       ret = ext4_fc_track_template(handle, inode,  __track_range, &args, 1);
  
         trace_ext4_fc_track_range(inode, start, end, ret);
  }
@@ -537,10 +548,11 @@ static void ext4_fc_submit_bh(struct super_block *sb)
         int write_flags = REQ_SYNC;
         struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
  
+       /* TODO: REQ_FUA | REQ_PREFLUSH is unnecessarily expensive. */
         if (test_opt(sb, BARRIER))
                 write_flags |= REQ_FUA | REQ_PREFLUSH;
         lock_buffer(bh);
-       clear_buffer_dirty(bh);
+       set_buffer_dirty(bh);
         set_buffer_uptodate(bh);
         bh->b_end_io = ext4_end_buffer_io_sync;
         submit_bh(REQ_OP_WRITE, write_flags, bh);
@@ -846,7 +858,7 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
         int ret = 0;
  
         spin_lock(&sbi->s_fc_lock);
-       sbi->s_mount_flags |= EXT4_MF_FC_COMMITTING;
+       ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
         list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
                 ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
                 ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
@@ -900,6 +912,8 @@ static int ext4_fc_wait_inode_data_all(journal_t *journal)
  
  /* Commit all the directory entry updates */
  static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
+__acquires(&sbi->s_fc_lock)
+__releases(&sbi->s_fc_lock)
  {
         struct super_block *sb = (struct super_block *)(journal->j_private);
         struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -996,6 +1010,13 @@ static int ext4_fc_perform_commit(journal_t *journal)
         if (ret)
                 return ret;
  
+       /*
+        * If file system device is different from journal device, issue a cache
+        * flush before we start writing fast commit blocks.
+        */
+       if (journal->j_fs_dev != journal->j_dev)
+               blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS);
+
         blk_start_plug(&plug);
         if (sbi->s_fc_bytes == 0) {
                 /*
@@ -1031,8 +1052,6 @@ static int ext4_fc_perform_commit(journal_t *journal)
                 if (ret)
                         goto out;
                 spin_lock(&sbi->s_fc_lock);
-               EXT4_I(inode)->i_fc_committed_subtid =
-                       atomic_read(&sbi->s_fc_subtid);
         }
         spin_unlock(&sbi->s_fc_lock);
  
@@ -1131,7 +1150,7 @@ out:
                 "Fast commit ended with blks = %d, reason = %d, subtid - %d",
                 nblks, reason, subtid);
         if (reason == EXT4_FC_REASON_FC_FAILED)
-               return jbd2_fc_end_commit_fallback(journal, commit_tid);
+               return jbd2_fc_end_commit_fallback(journal);
         if (reason == EXT4_FC_REASON_FC_START_FAILED ||
                 reason == EXT4_FC_REASON_INELIGIBLE)
                 return jbd2_complete_transaction(journal, commit_tid);
@@ -1190,8 +1209,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
         list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
                                 &sbi->s_fc_q[FC_Q_STAGING]);
  
-       sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING;
-       sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE;
+       ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+       ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
  
         if (full)
                 sbi->s_fc_bytes = 0;
@@ -1263,7 +1282,7 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl)
                 return 0;
         }
  
-       ret = __ext4_unlink(old_parent, &entry, inode);
+       ret = __ext4_unlink(NULL, old_parent, &entry, inode);
         /* -ENOENT ok coz it might not exist anymore. */
         if (ret == -ENOENT)
                 ret = 0;
@@ -2079,8 +2098,6 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
  
  void ext4_fc_init(struct super_block *sb, journal_t *journal)
  {
-       int num_fc_blocks;
-
         /*
          * We set replay callback even if fast commit disabled because we may
          * could still have fast commit blocks that need to be replayed even if
@@ -2090,21 +2107,9 @@ void ext4_fc_init(struct super_block *sb, journal_t *journal)
         if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
                 return;
         journal->j_fc_cleanup_callback = ext4_fc_cleanup;
-       if (!buffer_uptodate(journal->j_sb_buffer)
-               && ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO,
-                                       true)) {
-               ext4_msg(sb, KERN_ERR, "I/O error on journal");
-               return;
-       }
-       num_fc_blocks = be32_to_cpu(journal->j_superblock->s_num_fc_blks);
-       if (jbd2_fc_init(journal, num_fc_blocks ? num_fc_blocks :
-                                       EXT4_NUM_FC_BLKS)) {
-               pr_warn("Error while enabling fast commits, turning off.");
-               ext4_clear_feature_fast_commit(sb);
-       }
  }
  
-const char *fc_ineligible_reasons[] = {
+static const char *fc_ineligible_reasons[] = {
         "Extended attributes changed",
         "Cross rename",
         "Journal flag changed",
@@ -2113,6 +2118,7 @@ const char *fc_ineligible_reasons[] = {
         "Resize",
         "Dir renamed",
         "Falloc range op",
+       "Data journalling",
         "FC Commit Failed"
  };
  
diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h

index 06907d4..3a6e5a1 100644 (file)
--- a/fs/ext4/fast_commit.h
+++ b/fs/ext4/fast_commit.h
@@ -3,9 +3,6 @@
  #ifndef __FAST_COMMIT_H__
  #define __FAST_COMMIT_H__
  
-/* Number of blocks in journal area to allocate for fast commits */
-#define EXT4_NUM_FC_BLKS               256
-
  /* Fast commit tags */
  #define EXT4_FC_TAG_ADD_RANGE          0x0001
  #define EXT4_FC_TAG_DEL_RANGE          0x0002
@@ -100,11 +97,12 @@ enum {
         EXT4_FC_REASON_XATTR = 0,
         EXT4_FC_REASON_CROSS_RENAME,
         EXT4_FC_REASON_JOURNAL_FLAG_CHANGE,
-       EXT4_FC_REASON_MEM,
+       EXT4_FC_REASON_NOMEM,
         EXT4_FC_REASON_SWAP_BOOT,
         EXT4_FC_REASON_RESIZE,
         EXT4_FC_REASON_RENAME_DIR,
         EXT4_FC_REASON_FALLOC_RANGE,
+       EXT4_FC_REASON_INODE_JOURNAL_DATA,
         EXT4_FC_COMMIT_FAILED,
         EXT4_FC_REASON_MAX
  };
diff --git a/fs/ext4/file.c b/fs/ext4/file.c

index d85412d..3ed8c04 100644 (file)
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -761,7 +761,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
         if (!daxdev_mapping_supported(vma, dax_dev))
                 return -EOPNOTSUPP;
  
-       ext4_fc_start_update(inode);
         file_accessed(file);
         if (IS_DAX(file_inode(file))) {
                 vma->vm_ops = &ext4_dax_vm_ops;
@@ -769,7 +768,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
         } else {
                 vma->vm_ops = &ext4_file_vm_ops;
         }
-       ext4_fc_stop_update(inode);
         return 0;
  }
  
@@ -782,13 +780,13 @@ static int ext4_sample_last_mounted(struct super_block *sb,
         handle_t *handle;
         int err;
  
-       if (likely(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED))
+       if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
                 return 0;
  
         if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb))
                 return 0;
  
-       sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
+       ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
         /*
          * Sample where the filesystem has been mounted and
          * store it in the superblock for sysadmin convenience
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c

index b232c27..4c2a9fe 100644 (file)
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -280,7 +280,7 @@ static int ext4_getfsmap_logdev(struct super_block *sb, struct ext4_fsmap *keys,
  
         /* Fabricate an rmap entry for the external log device. */
         irec.fmr_physical = journal->j_blk_offset;
-       irec.fmr_length = journal->j_maxlen;
+       irec.fmr_length = journal->j_total_len;
         irec.fmr_owner = EXT4_FMR_OWN_LOG;
         irec.fmr_flags = 0;
  
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c

index 81a545f..a42ca95 100644 (file)
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -143,7 +143,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         if (sb_rdonly(inode->i_sb)) {
                 /* Make sure that we read updated s_mount_flags value */
                 smp_rmb();
-               if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
+               if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))
                         ret = -EROFS;
                 goto out;
         }
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c

index caa5147..b41512d 100644 (file)
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1880,6 +1880,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
  
         ext4_write_lock_xattr(inode, &no_expand);
         if (!ext4_has_inline_data(inode)) {
+               ext4_write_unlock_xattr(inode, &no_expand);
                 *has_inline = 0;
                 ext4_journal_stop(handle);
                 return 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index b96a186..0d8385a 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -327,6 +327,8 @@ stop_handle:
         ext4_xattr_inode_array_free(ea_inode_array);
         return;
  no_delete:
+       if (!list_empty(&EXT4_I(inode)->i_fc_list))
+               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
         ext4_clear_inode(inode);        /* We must guarantee clearing of inode... */
  }
  
@@ -730,7 +732,7 @@ out_sem:
                         if (ret)
                                 return ret;
                 }
-               ext4_fc_track_range(inode, map->m_lblk,
+               ext4_fc_track_range(handle, inode, map->m_lblk,
                             map->m_lblk + map->m_len - 1);
         }
  
@@ -2440,7 +2442,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
                         struct super_block *sb = inode->i_sb;
  
                         if (ext4_forced_shutdown(EXT4_SB(sb)) ||
-                           EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+                           ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
                                 goto invalidate_dirty_pages;
                         /*
                          * Let the uper layers retry transient errors.
@@ -2674,7 +2676,7 @@ static int ext4_writepages(struct address_space *mapping,
          * the stack trace.
          */
         if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
-                    sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
+                    ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) {
                 ret = -EROFS;
                 goto out_writepages;
         }
@@ -3310,8 +3312,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
                         EXT4_I(inode)->i_datasync_tid))
                         return false;
                 if (test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT))
-                       return atomic_read(&EXT4_SB(inode->i_sb)->s_fc_subtid) <
-                               EXT4_I(inode)->i_fc_committed_subtid;
+                       return !list_empty(&EXT4_I(inode)->i_fc_list);
                 return true;
         }
  
@@ -4109,7 +4110,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
  
                 up_write(&EXT4_I(inode)->i_data_sem);
         }
-       ext4_fc_track_range(inode, first_block, stop_block);
+       ext4_fc_track_range(handle, inode, first_block, stop_block);
         if (IS_SYNC(inode))
                 ext4_handle_sync(handle);
  
@@ -5442,14 +5443,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                         }
  
                         if (shrink)
-                               ext4_fc_track_range(inode,
+                               ext4_fc_track_range(handle, inode,
                                         (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
                                         inode->i_sb->s_blocksize_bits,
                                         (oldsize > 0 ? oldsize - 1 : 0) >>
                                         inode->i_sb->s_blocksize_bits);
                         else
                                 ext4_fc_track_range(
-                                       inode,
+                                       handle, inode,
                                         (oldsize > 0 ? oldsize - 1 : oldsize) >>
                                         inode->i_sb->s_blocksize_bits,
                                         (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
@@ -5699,7 +5700,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
                 put_bh(iloc->bh);
                 return -EIO;
         }
-       ext4_fc_track_inode(inode);
+       ext4_fc_track_inode(handle, inode);
  
         if (IS_I_VERSION(inode))
                 inode_inc_iversion(inode);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c

index 85abbfb..24af9ed 100644 (file)
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4477,7 +4477,7 @@ static inline void ext4_mb_show_pa(struct super_block *sb)
  {
         ext4_group_t i, ngroups;
  
-       if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+       if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
                 return;
  
         ngroups = ext4_get_groups_count(sb);
@@ -4508,7 +4508,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
  {
         struct super_block *sb = ac->ac_sb;
  
-       if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+       if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
                 return;
  
         mb_debug(sb, "Can't allocate:"
@@ -5167,7 +5167,7 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
         struct super_block *sb = ar->inode->i_sb;
         ext4_group_t group;
         ext4_grpblk_t blkoff;
-       int  i;
+       int i = sb->s_blocksize;
         ext4_fsblk_t goal, block;
         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
  
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c

index f458d1d..3350926 100644 (file)
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2606,7 +2606,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                        bool excl)
  {
         handle_t *handle;
-       struct inode *inode, *inode_save;
+       struct inode *inode;
         int err, credits, retries = 0;
  
         err = dquot_initialize(dir);
@@ -2624,11 +2624,9 @@ retry:
                 inode->i_op = &ext4_file_inode_operations;
                 inode->i_fop = &ext4_file_operations;
                 ext4_set_aops(inode);
-               inode_save = inode;
-               ihold(inode_save);
                 err = ext4_add_nondir(handle, dentry, &inode);
-               ext4_fc_track_create(inode_save, dentry);
-               iput(inode_save);
+               if (!err)
+                       ext4_fc_track_create(handle, dentry);
         }
         if (handle)
                 ext4_journal_stop(handle);
@@ -2643,7 +2641,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
                       umode_t mode, dev_t rdev)
  {
         handle_t *handle;
-       struct inode *inode, *inode_save;
+       struct inode *inode;
         int err, credits, retries = 0;
  
         err = dquot_initialize(dir);
@@ -2660,12 +2658,9 @@ retry:
         if (!IS_ERR(inode)) {
                 init_special_inode(inode, inode->i_mode, rdev);
                 inode->i_op = &ext4_special_inode_operations;
-               inode_save = inode;
-               ihold(inode_save);
                 err = ext4_add_nondir(handle, dentry, &inode);
                 if (!err)
-                       ext4_fc_track_create(inode_save, dentry);
-               iput(inode_save);
+                       ext4_fc_track_create(handle, dentry);
         }
         if (handle)
                 ext4_journal_stop(handle);
@@ -2829,7 +2824,6 @@ out_clear_inode:
                 iput(inode);
                 goto out_retry;
         }
-       ext4_fc_track_create(inode, dentry);
         ext4_inc_count(dir);
  
         ext4_update_dx_flag(dir);
@@ -2837,6 +2831,7 @@ out_clear_inode:
         if (err)
                 goto out_clear_inode;
         d_instantiate_new(dentry, inode);
+       ext4_fc_track_create(handle, dentry);
         if (IS_DIRSYNC(dir))
                 ext4_handle_sync(handle);
  
@@ -3171,7 +3166,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
                 goto end_rmdir;
         ext4_dec_count(dir);
         ext4_update_dx_flag(dir);
-       ext4_fc_track_unlink(inode, dentry);
+       ext4_fc_track_unlink(handle, dentry);
         retval = ext4_mark_inode_dirty(handle, dir);
  
  #ifdef CONFIG_UNICODE
@@ -3192,13 +3187,12 @@ end_rmdir:
         return retval;
  }
  
-int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
+int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
                   struct inode *inode)
  {
         int retval = -ENOENT;
         struct buffer_head *bh;
         struct ext4_dir_entry_2 *de;
-       handle_t *handle = NULL;
         int skip_remove_dentry = 0;
  
         bh = ext4_find_entry(dir, d_name, &de, NULL);
@@ -3217,14 +3211,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
                 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
                         skip_remove_dentry = 1;
                 else
-                       goto out_bh;
-       }
-
-       handle = ext4_journal_start(dir, EXT4_HT_DIR,
-                                   EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
-       if (IS_ERR(handle)) {
-               retval = PTR_ERR(handle);
-               goto out_bh;
+                       goto out;
         }
  
         if (IS_DIRSYNC(dir))
@@ -3233,12 +3220,12 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
         if (!skip_remove_dentry) {
                 retval = ext4_delete_entry(handle, dir, de, bh);
                 if (retval)
-                       goto out_handle;
+                       goto out;
                 dir->i_ctime = dir->i_mtime = current_time(dir);
                 ext4_update_dx_flag(dir);
                 retval = ext4_mark_inode_dirty(handle, dir);
                 if (retval)
-                       goto out_handle;
+                       goto out;
         } else {
                 retval = 0;
         }
@@ -3252,15 +3239,14 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
         inode->i_ctime = current_time(inode);
         retval = ext4_mark_inode_dirty(handle, inode);
  
-out_handle:
-       ext4_journal_stop(handle);
-out_bh:
+out:
         brelse(bh);
         return retval;
  }
  
  static int ext4_unlink(struct inode *dir, struct dentry *dentry)
  {
+       handle_t *handle;
         int retval;
  
         if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
@@ -3278,9 +3264,16 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
         if (retval)
                 goto out_trace;
  
-       retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry));
+       handle = ext4_journal_start(dir, EXT4_HT_DIR,
+                                   EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle)) {
+               retval = PTR_ERR(handle);
+               goto out_trace;
+       }
+
+       retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry));
         if (!retval)
-               ext4_fc_track_unlink(d_inode(dentry), dentry);
+               ext4_fc_track_unlink(handle, dentry);
  #ifdef CONFIG_UNICODE
         /* VFS negative dentries are incompatible with Encoding and
          * Case-insensitiveness. Eventually we'll want avoid
@@ -3291,6 +3284,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
         if (IS_CASEFOLDED(dir))
                 d_invalidate(dentry);
  #endif
+       if (handle)
+               ext4_journal_stop(handle);
  
  out_trace:
         trace_ext4_unlink_exit(dentry, retval);
@@ -3447,7 +3442,6 @@ retry:
  
         err = ext4_add_entry(handle, dentry, inode);
         if (!err) {
-               ext4_fc_track_link(inode, dentry);
                 err = ext4_mark_inode_dirty(handle, inode);
                 /* this can happen only for tmpfile being
                  * linked the first time
@@ -3455,6 +3449,7 @@ retry:
                 if (inode->i_nlink == 1)
                         ext4_orphan_del(handle, inode);
                 d_instantiate(dentry, inode);
+               ext4_fc_track_link(handle, dentry);
         } else {
                 drop_nlink(inode);
                 iput(inode);
@@ -3915,9 +3910,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                         EXT4_FC_REASON_RENAME_DIR);
         } else {
                 if (new.inode)
-                       ext4_fc_track_unlink(new.inode, new.dentry);
-               ext4_fc_track_link(old.inode, new.dentry);
-               ext4_fc_track_unlink(old.inode, old.dentry);
+                       ext4_fc_track_unlink(handle, new.dentry);
+               __ext4_fc_track_link(handle, old.inode, new.dentry);
+               __ext4_fc_track_unlink(handle, old.inode, old.dentry);
         }
  
         if (new.inode) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index ef4734b..c3b8645 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -686,7 +686,7 @@ static void ext4_handle_error(struct super_block *sb)
         if (!test_opt(sb, ERRORS_CONT)) {
                 journal_t *journal = EXT4_SB(sb)->s_journal;
  
-               EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+               ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
                 if (journal)
                         jbd2_journal_abort(journal, -EIO);
         }
@@ -904,7 +904,7 @@ void __ext4_abort(struct super_block *sb, const char *function,
         va_end(args);
  
         if (sb_rdonly(sb) == 0) {
-               EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+               ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
                 if (EXT4_SB(sb)->s_journal)
                         jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
  
@@ -1716,11 +1716,10 @@ enum {
         Opt_dioread_nolock, Opt_dioread_lock,
         Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
         Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
-       Opt_prefetch_block_bitmaps, Opt_no_fc,
+       Opt_prefetch_block_bitmaps,
  #ifdef CONFIG_EXT4_DEBUG
-       Opt_fc_debug_max_replay,
+       Opt_fc_debug_max_replay, Opt_fc_debug_force
  #endif
-       Opt_fc_debug_force
  };
  
  static const match_table_t tokens = {
@@ -1807,9 +1806,8 @@ static const match_table_t tokens = {
         {Opt_init_itable, "init_itable=%u"},
         {Opt_init_itable, "init_itable"},
         {Opt_noinit_itable, "noinit_itable"},
-       {Opt_no_fc, "no_fc"},
-       {Opt_fc_debug_force, "fc_debug_force"},
  #ifdef CONFIG_EXT4_DEBUG
+       {Opt_fc_debug_force, "fc_debug_force"},
         {Opt_fc_debug_max_replay, "fc_debug_max_replay=%u"},
  #endif
         {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
@@ -2027,8 +2025,8 @@ static const struct mount_opts {
         {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
                        EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
                                                         MOPT_CLEAR | MOPT_Q},
-       {Opt_usrjquota, 0, MOPT_Q},
-       {Opt_grpjquota, 0, MOPT_Q},
+       {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
+       {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
         {Opt_offusrjquota, 0, MOPT_Q},
         {Opt_offgrpjquota, 0, MOPT_Q},
         {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
@@ -2039,11 +2037,9 @@ static const struct mount_opts {
         {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
         {Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
          MOPT_SET},
-       {Opt_no_fc, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
-        MOPT_CLEAR | MOPT_2 | MOPT_EXT4_ONLY},
+#ifdef CONFIG_EXT4_DEBUG
         {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
          MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
-#ifdef CONFIG_EXT4_DEBUG
         {Opt_fc_debug_max_replay, 0, MOPT_GTE0},
  #endif
         {Opt_err, 0, 0}
@@ -2153,7 +2149,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
                 ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
                 return 1;
         case Opt_abort:
-               sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
+               ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
                 return 1;
         case Opt_i_version:
                 sb->s_flags |= SB_I_VERSION;
@@ -3976,7 +3972,7 @@ int ext4_calculate_overhead(struct super_block *sb)
          * loaded or not
          */
         if (sbi->s_journal && !sbi->s_journal_bdev)
-               overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
+               overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
         else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
                 /* j_inum for internal journal is non-zero */
                 j_inode = ext4_get_journal_inode(sb, j_inum);
@@ -4340,9 +4336,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
  #endif
  
         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
-               printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, and O_DIRECT support!\n");
+               printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n");
                 /* can't mount with both data=journal and dioread_nolock. */
                 clear_opt(sb, DIOREAD_NOLOCK);
+               clear_opt2(sb, JOURNAL_FAST_COMMIT);
                 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
                         ext4_msg(sb, KERN_ERR, "can't mount with "
                                  "both data=journal and delalloc");
@@ -4777,8 +4774,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
         INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
         sbi->s_fc_bytes = 0;
-       sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE;
-       sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING;
+       ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+       ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
         spin_lock_init(&sbi->s_fc_lock);
         memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
         sbi->s_fc_replay_state.fc_regions = NULL;
@@ -4857,6 +4854,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                 goto failed_mount_wq;
         }
  
+       if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
+               !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
+                                         JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
+               ext4_msg(sb, KERN_ERR,
+                       "Failed to set fast commit journal feature");
+               goto failed_mount_wq;
+       }
+
         /* We have now updated the journal if required, so we can
          * validate the data journaling mode. */
         switch (test_opt(sb, DATA_FLAGS)) {
@@ -5872,7 +5877,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                 goto restore_opts;
         }
  
-       if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
+       if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
                 ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
  
         sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
@@ -5886,7 +5891,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
         }
  
         if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
-               if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
+               if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
                         err = -EROFS;
                         goto restore_opts;
                 }
@@ -6560,10 +6565,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
         brelse(bh);
  out:
         if (inode->i_size < off + len) {
-               ext4_fc_track_range(inode,
-                       (inode->i_size > 0 ? inode->i_size - 1 : 0)
-                               >> inode->i_sb->s_blocksize_bits,
-                       (off + len) >> inode->i_sb->s_blocksize_bits);
                 i_size_write(inode, off + len);
                 EXT4_I(inode)->i_disksize = inode->i_size;
                 err2 = ext4_mark_inode_dirty(handle, inode);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c

index 263f02a..472932b 100644 (file)
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -106,6 +106,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
   * for a checkpoint to free up some space in the log.
   */
  void __jbd2_log_wait_for_space(journal_t *journal)
+__acquires(&journal->j_state_lock)
+__releases(&journal->j_state_lock)
  {
         int nblocks, space_left;
         /* assert_spin_locked(&journal->j_state_lock); */
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c

index fa688e1..b121d7d 100644 (file)
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -450,6 +450,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                 schedule();
                 write_lock(&journal->j_state_lock);
                 finish_wait(&journal->j_fc_wait, &wait);
+               /*
+                * TODO: by blocking fast commits here, we are increasing
+                * fsync() latency slightly. Strictly speaking, we don't need
+                * to block fast commits until the transaction enters T_FLUSH
+                * state. So an optimization is possible where we block new fast
+                * commits here and wait for existing ones to complete
+                * just before we enter T_FLUSH. That way, the existing fast
+                * commits and this full commit can proceed parallely.
+                */
         }
         write_unlock(&journal->j_state_lock);
  
@@ -801,7 +810,7 @@ start_journal_io:
                 if (first_block < journal->j_tail)
                         freed += journal->j_last - journal->j_first;
                 /* Update tail only if we free significant amount of space */
-               if (freed < journal->j_maxlen / 4)
+               if (freed < jbd2_journal_get_max_txn_bufs(journal))
                         update_tail = 0;
         }
         J_ASSERT(commit_transaction->t_state == T_COMMIT);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c

index 0c7c42b..0c3d5e3 100644 (file)
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -727,6 +727,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
   */
  int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
  {
+       if (unlikely(is_journal_aborted(journal)))
+               return -EIO;
         /*
          * Fast commits only allowed if at least one full commit has
          * been processed.
@@ -734,10 +736,12 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
         if (!journal->j_stats.ts_tid)
                 return -EINVAL;
  
-       if (tid <= journal->j_commit_sequence)
+       write_lock(&journal->j_state_lock);
+       if (tid <= journal->j_commit_sequence) {
+               write_unlock(&journal->j_state_lock);
                 return -EALREADY;
+       }
  
-       write_lock(&journal->j_state_lock);
         if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
             (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
                 DEFINE_WAIT(wait);
@@ -777,13 +781,19 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
  
  int jbd2_fc_end_commit(journal_t *journal)
  {
-       return __jbd2_fc_end_commit(journal, 0, 0);
+       return __jbd2_fc_end_commit(journal, 0, false);
  }
  EXPORT_SYMBOL(jbd2_fc_end_commit);
  
-int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid)
+int jbd2_fc_end_commit_fallback(journal_t *journal)
  {
-       return __jbd2_fc_end_commit(journal, tid, 1);
+       tid_t tid;
+
+       read_lock(&journal->j_state_lock);
+       tid = journal->j_running_transaction ?
+               journal->j_running_transaction->t_tid : 0;
+       read_unlock(&journal->j_state_lock);
+       return __jbd2_fc_end_commit(journal, tid, true);
  }
  EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
  
@@ -865,7 +875,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
         int fc_off;
  
         *bh_out = NULL;
-       write_lock(&journal->j_state_lock);
  
         if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
                 fc_off = journal->j_fc_off;
@@ -874,7 +883,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
         } else {
                 ret = -EINVAL;
         }
-       write_unlock(&journal->j_state_lock);
  
         if (ret)
                 return ret;
@@ -887,11 +895,7 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
         if (!bh)
                 return -ENOMEM;
  
-       lock_buffer(bh);
  
-       clear_buffer_uptodate(bh);
-       set_buffer_dirty(bh);
-       unlock_buffer(bh);
         journal->j_fc_wbuf[fc_off] = bh;
  
         *bh_out = bh;
@@ -909,9 +913,7 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
         struct buffer_head *bh;
         int i, j_fc_off;
  
-       read_lock(&journal->j_state_lock);
         j_fc_off = journal->j_fc_off;
-       read_unlock(&journal->j_state_lock);
  
         /*
          * Wait in reverse order to minimize chances of us being woken up before
@@ -939,9 +941,7 @@ int jbd2_fc_release_bufs(journal_t *journal)
         struct buffer_head *bh;
         int i, j_fc_off;
  
-       read_lock(&journal->j_state_lock);
         j_fc_off = journal->j_fc_off;
-       read_unlock(&journal->j_state_lock);
  
         /*
          * Wait in reverse order to minimize chances of us being woken up before
@@ -1348,23 +1348,16 @@ static journal_t *journal_init_common(struct block_device *bdev,
         journal->j_dev = bdev;
         journal->j_fs_dev = fs_dev;
         journal->j_blk_offset = start;
-       journal->j_maxlen = len;
+       journal->j_total_len = len;
         /* We need enough buffers to write out full descriptor block. */
         n = journal->j_blocksize / jbd2_min_tag_size();
         journal->j_wbufsize = n;
+       journal->j_fc_wbuf = NULL;
         journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
                                         GFP_KERNEL);
         if (!journal->j_wbuf)
                 goto err_cleanup;
  
-       if (journal->j_fc_wbufsize > 0) {
-               journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
-                                       sizeof(struct buffer_head *),
-                                       GFP_KERNEL);
-               if (!journal->j_fc_wbuf)
-                       goto err_cleanup;
-       }
-
         bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
         if (!bh) {
                 pr_err("%s: Cannot get buffer for journal superblock\n",
@@ -1378,23 +1371,11 @@ static journal_t *journal_init_common(struct block_device *bdev,
  
  err_cleanup:
         kfree(journal->j_wbuf);
-       kfree(journal->j_fc_wbuf);
         jbd2_journal_destroy_revoke(journal);
         kfree(journal);
         return NULL;
  }
  
-int jbd2_fc_init(journal_t *journal, int num_fc_blks)
-{
-       journal->j_fc_wbufsize = num_fc_blks;
-       journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
-                               sizeof(struct buffer_head *), GFP_KERNEL);
-       if (!journal->j_fc_wbuf)
-               return -ENOMEM;
-       return 0;
-}
-EXPORT_SYMBOL(jbd2_fc_init);
-
  /* jbd2_journal_init_dev and jbd2_journal_init_inode:
   *
   * Create a journal structure assigned some fixed set of disk blocks to
@@ -1512,16 +1493,7 @@ static int journal_reset(journal_t *journal)
         }
  
         journal->j_first = first;
-
-       if (jbd2_has_feature_fast_commit(journal) &&
-           journal->j_fc_wbufsize > 0) {
-               journal->j_fc_last = last;
-               journal->j_last = last - journal->j_fc_wbufsize;
-               journal->j_fc_first = journal->j_last + 1;
-               journal->j_fc_off = 0;
-       } else {
-               journal->j_last = last;
-       }
+       journal->j_last = last;
  
         journal->j_head = journal->j_first;
         journal->j_tail = journal->j_first;
@@ -1531,7 +1503,14 @@ static int journal_reset(journal_t *journal)
         journal->j_commit_sequence = journal->j_transaction_sequence - 1;
         journal->j_commit_request = journal->j_commit_sequence;
  
-       journal->j_max_transaction_buffers = journal->j_maxlen / 4;
+       journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal);
+
+       /*
+        * Now that journal recovery is done, turn fast commits off here. This
+        * way, if fast commit was enabled before the crash but if now FS has
+        * disabled it, we don't enable fast commits.
+        */
+       jbd2_clear_feature_fast_commit(journal);
  
         /*
          * As a special case, if the on-disk copy is already marked as needing
@@ -1792,15 +1771,15 @@ static int journal_get_superblock(journal_t *journal)
                 goto out;
         }
  
-       if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
-               journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
-       else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
+       if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
+               journal->j_total_len = be32_to_cpu(sb->s_maxlen);
+       else if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
                 printk(KERN_WARNING "JBD2: journal file too short\n");
                 goto out;
         }
  
         if (be32_to_cpu(sb->s_first) == 0 ||
-           be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
+           be32_to_cpu(sb->s_first) >= journal->j_total_len) {
                 printk(KERN_WARNING
                         "JBD2: Invalid start block of journal: %u\n",
                         be32_to_cpu(sb->s_first));
@@ -1872,6 +1851,7 @@ static int load_superblock(journal_t *journal)
  {
         int err;
         journal_superblock_t *sb;
+       int num_fc_blocks;
  
         err = journal_get_superblock(journal);
         if (err)
@@ -1883,15 +1863,17 @@ static int load_superblock(journal_t *journal)
         journal->j_tail = be32_to_cpu(sb->s_start);
         journal->j_first = be32_to_cpu(sb->s_first);
         journal->j_errno = be32_to_cpu(sb->s_errno);
+       journal->j_last = be32_to_cpu(sb->s_maxlen);
  
-       if (jbd2_has_feature_fast_commit(journal) &&
-           journal->j_fc_wbufsize > 0) {
+       if (jbd2_has_feature_fast_commit(journal)) {
                 journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
-               journal->j_last = journal->j_fc_last - journal->j_fc_wbufsize;
+               num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
+               if (!num_fc_blocks)
+                       num_fc_blocks = JBD2_MIN_FC_BLOCKS;
+               if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
+                       journal->j_last = journal->j_fc_last - num_fc_blocks;
                 journal->j_fc_first = journal->j_last + 1;
                 journal->j_fc_off = 0;
-       } else {
-               journal->j_last = be32_to_cpu(sb->s_maxlen);
         }
  
         return 0;
@@ -1954,9 +1936,6 @@ int jbd2_journal_load(journal_t *journal)
          */
         journal->j_flags &= ~JBD2_ABORT;
  
-       if (journal->j_fc_wbufsize > 0)
-               jbd2_journal_set_features(journal, 0, 0,
-                                         JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
         /* OK, we've finished with the dynamic journal bits:
          * reinitialise the dynamic contents of the superblock in memory
          * and reset them on disk. */
@@ -2040,8 +2019,7 @@ int jbd2_journal_destroy(journal_t *journal)
                 jbd2_journal_destroy_revoke(journal);
         if (journal->j_chksum_driver)
                 crypto_free_shash(journal->j_chksum_driver);
-       if (journal->j_fc_wbufsize > 0)
-               kfree(journal->j_fc_wbuf);
+       kfree(journal->j_fc_wbuf);
         kfree(journal->j_wbuf);
         kfree(journal);
  
@@ -2116,6 +2094,37 @@ int jbd2_journal_check_available_features(journal_t *journal, unsigned long comp
         return 0;
  }
  
+static int
+jbd2_journal_initialize_fast_commit(journal_t *journal)
+{
+       journal_superblock_t *sb = journal->j_superblock;
+       unsigned long long num_fc_blks;
+
+       num_fc_blks = be32_to_cpu(sb->s_num_fc_blks);
+       if (num_fc_blks == 0)
+               num_fc_blks = JBD2_MIN_FC_BLOCKS;
+       if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS)
+               return -ENOSPC;
+
+       /* Are we called twice? */
+       WARN_ON(journal->j_fc_wbuf != NULL);
+       journal->j_fc_wbuf = kmalloc_array(num_fc_blks,
+                               sizeof(struct buffer_head *), GFP_KERNEL);
+       if (!journal->j_fc_wbuf)
+               return -ENOMEM;
+
+       journal->j_fc_wbufsize = num_fc_blks;
+       journal->j_fc_last = journal->j_last;
+       journal->j_last = journal->j_fc_last - num_fc_blks;
+       journal->j_fc_first = journal->j_last + 1;
+       journal->j_fc_off = 0;
+       journal->j_free = journal->j_last - journal->j_first;
+       journal->j_max_transaction_buffers =
+               jbd2_journal_get_max_txn_bufs(journal);
+
+       return 0;
+}
+
  /**
   * int jbd2_journal_set_features() - Mark a given journal feature in the superblock
   * @journal: Journal to act on.
@@ -2159,6 +2168,13 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
  
         sb = journal->j_superblock;
  
+       if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) {
+               if (jbd2_journal_initialize_fast_commit(journal)) {
+                       pr_err("JBD2: Cannot enable fast commits.\n");
+                       return 0;
+               }
+       }
+
         /* Load the checksum driver if necessary */
         if ((journal->j_chksum_driver == NULL) &&
             INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c

index eb26061..dc0694f 100644 (file)
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -74,8 +74,8 @@ static int do_readahead(journal_t *journal, unsigned int start)
  
         /* Do up to 128K of readahead */
         max = start + (128 * 1024 / journal->j_blocksize);
-       if (max > journal->j_maxlen)
-               max = journal->j_maxlen;
+       if (max > journal->j_total_len)
+               max = journal->j_total_len;
  
         /* Do the readahead itself.  We'll submit MAXBUF buffer_heads at
          * a time to the block device IO layer. */
@@ -134,7 +134,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
  
         *bhp = NULL;
  
-       if (offset >= journal->j_maxlen) {
+       if (offset >= journal->j_total_len) {
                 printk(KERN_ERR "JBD2: corrupted journal superblock\n");
                 return -EFSCORRUPTED;
         }
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c

index 4398573..d54f046 100644 (file)
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -195,8 +195,10 @@ static void wait_transaction_switching(journal_t *journal)
         DEFINE_WAIT(wait);
  
         if (WARN_ON(!journal->j_running_transaction ||
-                   journal->j_running_transaction->t_state != T_SWITCH))
+                   journal->j_running_transaction->t_state != T_SWITCH)) {
+               read_unlock(&journal->j_state_lock);
                 return;
+       }
         prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
                         TASK_UNINTERRUPTIBLE);
         read_unlock(&journal->j_state_lock);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c

index b9a9d69..db52e84 100644 (file)
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -877,7 +877,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
                 goto done;
         }
  
-       trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen);
+       trace_ocfs2_journal_init_maxlen(j_journal->j_total_len);
  
         *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
                   OCFS2_JOURNAL_DIRTY_FL);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h

index 1d5566a..1c49fd6 100644 (file)
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -68,6 +68,7 @@ extern void *jbd2_alloc(size_t size, gfp_t flags);
  extern void jbd2_free(void *ptr, size_t size);
  
  #define JBD2_MIN_JOURNAL_BLOCKS 1024
+#define JBD2_MIN_FC_BLOCKS     256
  
  #ifdef __KERNEL__
  
@@ -944,8 +945,9 @@ struct journal_s
         /**
          * @j_fc_off:
          *
-        * Number of fast commit blocks currently allocated.
-        * [j_state_lock].
+        * Number of fast commit blocks currently allocated. Accessed only
+        * during fast commit. Currently only process can do fast commit, so
+        * this field is not protected by any lock.
          */
         unsigned long           j_fc_off;
  
@@ -988,9 +990,9 @@ struct journal_s
         struct block_device     *j_fs_dev;
  
         /**
-        * @j_maxlen: Total maximum capacity of the journal region on disk.
+        * @j_total_len: Total maximum capacity of the journal region on disk.
          */
-       unsigned int            j_maxlen;
+       unsigned int            j_total_len;
  
         /**
          * @j_reserved_credits:
@@ -1108,8 +1110,9 @@ struct journal_s
         struct buffer_head      **j_wbuf;
  
         /**
-        * @j_fc_wbuf: Array of fast commit bhs for
-        * jbd2_journal_commit_transaction.
+        * @j_fc_wbuf: Array of fast commit bhs for fast commit. Accessed only
+        * during a fast commit. Currently only process can do fast commit, so
+        * this field is not protected by any lock.
          */
         struct buffer_head      **j_fc_wbuf;
  
@@ -1614,16 +1617,20 @@ extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
  extern int jbd2_cleanup_journal_tail(journal_t *);
  
  /* Fast commit related APIs */
-int jbd2_fc_init(journal_t *journal, int num_fc_blks);
  int jbd2_fc_begin_commit(journal_t *journal, tid_t tid);
  int jbd2_fc_end_commit(journal_t *journal);
-int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid);
+int jbd2_fc_end_commit_fallback(journal_t *journal);
  int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
  int jbd2_submit_inode_data(struct jbd2_inode *jinode);
  int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
  int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
  int jbd2_fc_release_bufs(journal_t *journal);
  
+static inline int jbd2_journal_get_max_txn_bufs(journal_t *journal)
+{
+       return (journal->j_total_len - journal->j_fc_wbufsize) / 4;
+}
+
  /*
   * is_journal_abort
   *
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h

index b14314f..70ae549 100644 (file)
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -100,11 +100,12 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
                 { EXT4_FC_REASON_XATTR,         "XATTR"},               \
                 { EXT4_FC_REASON_CROSS_RENAME,  "CROSS_RENAME"},        \
                 { EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, "JOURNAL_FLAG_CHANGE"}, \
-               { EXT4_FC_REASON_MEM,   "NO_MEM"},                      \
+               { EXT4_FC_REASON_NOMEM, "NO_MEM"},                      \
                 { EXT4_FC_REASON_SWAP_BOOT,     "SWAP_BOOT"},           \
                 { EXT4_FC_REASON_RESIZE,        "RESIZE"},              \
                 { EXT4_FC_REASON_RENAME_DIR,    "RENAME_DIR"},          \
-               { EXT4_FC_REASON_FALLOC_RANGE,  "FALLOC_RANGE"})
+               { EXT4_FC_REASON_FALLOC_RANGE,  "FALLOC_RANGE"},        \
+               { EXT4_FC_REASON_INODE_JOURNAL_DATA,    "INODE_JOURNAL_DATA"})
  
  TRACE_EVENT(ext4_other_inode_update_time,
         TP_PROTO(struct inode *inode, ino_t orig_ino),
@@ -2917,17 +2918,18 @@ TRACE_EVENT(ext4_fc_stats,
                     ),
  
             TP_printk("dev %d:%d fc ineligible reasons:\n"
-                     "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s,%d; "
+                     "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; "
                       "num_commits:%ld, ineligible: %ld, numblks: %ld",
                       MAJOR(__entry->dev), MINOR(__entry->dev),
                       FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR),
                       FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME),
                       FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE),
-                     FC_REASON_NAME_STAT(EXT4_FC_REASON_MEM),
+                     FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM),
                       FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT),
                       FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE),
                       FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR),
                       FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE),
+                     FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA),
                       __entry->sbi->s_fc_stats.fc_num_commits,
                       __entry->sbi->s_fc_stats.fc_ineligible_commits,
                       __entry->sbi->s_fc_stats.fc_numblks)
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 9 Nov 2020 20:36:58 +0000 (12:36 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 9 Nov 2020 20:36:58 +0000 (12:36 -0800)
Documentation/filesystems/ext4/journal.rst		patch \| blob \| history
Documentation/filesystems/ext4/super.rst		patch \| blob \| history
Documentation/filesystems/journalling.rst		patch \| blob \| history
MAINTAINERS		patch \| blob \| history
fs/ext4/ext4.h		patch \| blob \| history
fs/ext4/extents.c		patch \| blob \| history
fs/ext4/fast_commit.c		patch \| blob \| history
fs/ext4/fast_commit.h		patch \| blob \| history
fs/ext4/file.c		patch \| blob \| history
fs/ext4/fsmap.c		patch \| blob \| history
fs/ext4/fsync.c		patch \| blob \| history
fs/ext4/inline.c		patch \| blob \| history
fs/ext4/inode.c		patch \| blob \| history
fs/ext4/mballoc.c		patch \| blob \| history
fs/ext4/namei.c		patch \| blob \| history
fs/ext4/super.c		patch \| blob \| history
fs/jbd2/checkpoint.c		patch \| blob \| history
fs/jbd2/commit.c		patch \| blob \| history
fs/jbd2/journal.c		patch \| blob \| history
fs/jbd2/recovery.c		patch \| blob \| history
fs/jbd2/transaction.c		patch \| blob \| history
fs/ocfs2/journal.c		patch \| blob \| history
include/linux/jbd2.h		patch \| blob \| history
include/trace/events/ext4.h		patch \| blob \| history