Merge tag 'ext4_for_linus_cleanups' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 9 Nov 2020 20:36:58 +0000 (12:36 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 9 Nov 2020 20:36:58 +0000 (12:36 -0800)
Pull ext4 fixes and cleanups from Ted Ts'o:
 "More fixes and cleanups for the new fast_commit features, but also a
  few other miscellaneous bug fixes and a cleanup for the MAINTAINERS
  file"

* tag 'ext4_for_linus_cleanups' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (28 commits)
  jbd2: fix up sparse warnings in checkpoint code
  ext4: fix sparse warnings in fast_commit code
  ext4: cleanup fast commit mount options
  jbd2: don't start fast commit on aborted journal
  ext4: make s_mount_flags modifications atomic
  ext4: issue fsdev cache flush before starting fast commit
  ext4: disable fast commit with data journalling
  ext4: fix inode dirty check in case of fast commits
  ext4: remove unnecessary fast commit calls from ext4_file_mmap
  ext4: mark buf dirty before submitting fast commit buffer
  ext4: fix code documentatioon
  ext4: dedpulicate the code to wait on inode that's being committed
  jbd2: don't read journal->j_commit_sequence without taking a lock
  jbd2: don't touch buffer state until it is filled
  jbd2: add todo for a fast commit performance optimization
  jbd2: don't pass tid to jbd2_fc_end_commit_fallback()
  jbd2: don't use state lock during commit path
  jbd2: drop jbd2_fc_init documentation
  ext4: clean up the JBD2 API that initializes fast commits
  jbd2: rename j_maxlen to j_total_len and add jbd2_journal_max_txn_bufs
  ...

24 files changed:
Documentation/filesystems/ext4/journal.rst
Documentation/filesystems/ext4/super.rst
Documentation/filesystems/journalling.rst
MAINTAINERS
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/fast_commit.c
fs/ext4/fast_commit.h
fs/ext4/file.c
fs/ext4/fsmap.c
fs/ext4/fsync.c
fs/ext4/inline.c
fs/ext4/inode.c
fs/ext4/mballoc.c
fs/ext4/namei.c
fs/ext4/super.c
fs/jbd2/checkpoint.c
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/recovery.c
fs/jbd2/transaction.c
fs/ocfs2/journal.c
include/linux/jbd2.h
include/trace/events/ext4.h

index 805a1e9..849d5b1 100644 (file)
@@ -256,6 +256,10 @@ which is 1024 bytes long:
      - s\_padding2
      -
    * - 0x54
+     - \_\_be32
+     - s\_num\_fc\_blocks
+     - Number of fast commit blocks in the journal.
+   * - 0x58
      - \_\_u32
      - s\_padding[42]
      -
@@ -310,6 +314,8 @@ The journal incompat features are any combination of the following:
      - This journal uses v3 of the checksum on-disk format. This is the same as
        v2, but the journal block tag size is fixed regardless of the size of
        block numbers. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3)
+   * - 0x20
+     - Journal has fast commit blocks. (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT)
 
 .. _jbd2_checksum_type:
 
index 93e55d7..2eb1ab2 100644 (file)
@@ -596,6 +596,13 @@ following:
      - Sparse Super Block, v2. If this flag is set, the SB field s\_backup\_bgs
        points to the two block groups that contain backup superblocks
        (COMPAT\_SPARSE\_SUPER2).
+   * - 0x400
+     - Fast commits supported. Although fast commits blocks are
+       backward incompatible, fast commit blocks are not always
+       present in the journal. If fast commit blocks are present in
+       the journal, JBD2 incompat feature
+       (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) gets
+       set (COMPAT\_FAST\_COMMIT).
 
 .. _super_incompat:
 
index 5a5f70b..e18f90f 100644 (file)
@@ -136,10 +136,8 @@ Fast commits
 ~~~~~~~~~~~~
 
 JBD2 to also allows you to perform file-system specific delta commits known as
-fast commits. In order to use fast commits, you first need to call
-:c:func:`jbd2_fc_init` and tell how many blocks at the end of journal
-area should be reserved for fast commits. Along with that, you will also need
-to set following callbacks that perform correspodning work:
+fast commits. In order to use fast commits, you will need to set following
+callbacks that perform correspodning work:
 
 `journal->j_fc_cleanup_cb`: Cleanup function called after every full commit and
 fast commit.
index 3da6d8c..94ac10a 100644 (file)
@@ -6614,6 +6614,7 @@ Q:        http://patchwork.ozlabs.org/project/linux-ext4/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git
 F:     Documentation/filesystems/ext4/
 F:     fs/ext4/
+F:     include/trace/events/ext4.h
 
 Extended Verification Module (EVM)
 M:     Mimi Zohar <zohar@linux.ibm.com>
index 45fcdbf..1b399ca 100644 (file)
@@ -1028,9 +1028,6 @@ struct ext4_inode_info {
                                         * protected by sbi->s_fc_lock.
                                         */
 
-       /* Fast commit subtid when this inode was committed */
-       unsigned int i_fc_committed_subtid;
-
        /* Start of lblk range that needs to be committed in this fast commit */
        ext4_lblk_t i_fc_lblk_start;
 
@@ -1422,16 +1419,6 @@ struct ext4_super_block {
 
 #ifdef __KERNEL__
 
-/*
- * run-time mount flags
- */
-#define EXT4_MF_MNTDIR_SAMPLED         0x0001
-#define EXT4_MF_FS_ABORTED             0x0002  /* Fatal error detected */
-#define EXT4_MF_FC_INELIGIBLE          0x0004  /* Fast commit ineligible */
-#define EXT4_MF_FC_COMMITTING          0x0008  /* File system underoing a fast
-                                                * commit.
-                                                */
-
 #ifdef CONFIG_FS_ENCRYPTION
 #define DUMMY_ENCRYPTION_ENABLED(sbi) ((sbi)->s_dummy_enc_policy.policy != NULL)
 #else
@@ -1466,7 +1453,7 @@ struct ext4_sb_info {
        struct buffer_head * __rcu *s_group_desc;
        unsigned int s_mount_opt;
        unsigned int s_mount_opt2;
-       unsigned int s_mount_flags;
+       unsigned long s_mount_flags;
        unsigned int s_def_mount_opt;
        ext4_fsblk_t s_sb_block;
        atomic64_t s_resv_clusters;
@@ -1694,6 +1681,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
        _v;                                                                \
 })
 
+/*
+ * run-time mount flags
+ */
+enum {
+       EXT4_MF_MNTDIR_SAMPLED,
+       EXT4_MF_FS_ABORTED,     /* Fatal error detected */
+       EXT4_MF_FC_INELIGIBLE,  /* Fast commit ineligible */
+       EXT4_MF_FC_COMMITTING   /* File system underoing a fast
+                                * commit.
+                                */
+};
+
+static inline void ext4_set_mount_flag(struct super_block *sb, int bit)
+{
+       set_bit(bit, &EXT4_SB(sb)->s_mount_flags);
+}
+
+static inline void ext4_clear_mount_flag(struct super_block *sb, int bit)
+{
+       clear_bit(bit, &EXT4_SB(sb)->s_mount_flags);
+}
+
+static inline int ext4_test_mount_flag(struct super_block *sb, int bit)
+{
+       return test_bit(bit, &EXT4_SB(sb)->s_mount_flags);
+}
+
+
 /*
  * Simulate_fail codes
  */
@@ -1863,6 +1878,13 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
 #define EXT4_FEATURE_COMPAT_RESIZE_INODE       0x0010
 #define EXT4_FEATURE_COMPAT_DIR_INDEX          0x0020
 #define EXT4_FEATURE_COMPAT_SPARSE_SUPER2      0x0200
+/*
+ * The reason why "FAST_COMMIT" is a compat feature is that, FS becomes
+ * incompatible only if fast commit blocks are present in the FS. Since we
+ * clear the journal (and thus the fast commit blocks), we don't mark FS as
+ * incompatible. We also have a JBD2 incompat feature, which gets set when
+ * there are fast commit blocks present in the journal.
+ */
 #define EXT4_FEATURE_COMPAT_FAST_COMMIT                0x0400
 #define EXT4_FEATURE_COMPAT_STABLE_INODES      0x0800
 
@@ -2731,12 +2753,16 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
 int ext4_fc_info_show(struct seq_file *seq, void *v);
 void ext4_fc_init(struct super_block *sb, journal_t *journal);
 void ext4_fc_init_inode(struct inode *inode);
-void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
+void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
                         ext4_lblk_t end);
-void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry);
-void ext4_fc_track_link(struct inode *inode, struct dentry *dentry);
-void ext4_fc_track_create(struct inode *inode, struct dentry *dentry);
-void ext4_fc_track_inode(struct inode *inode);
+void __ext4_fc_track_unlink(handle_t *handle, struct inode *inode,
+       struct dentry *dentry);
+void __ext4_fc_track_link(handle_t *handle, struct inode *inode,
+       struct dentry *dentry);
+void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry);
+void ext4_fc_track_link(handle_t *handle, struct dentry *dentry);
+void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
+void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
 void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
 void ext4_fc_start_ineligible(struct super_block *sb, int reason);
 void ext4_fc_stop_ineligible(struct super_block *sb);
@@ -3452,7 +3478,7 @@ extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
 extern int ext4_ci_compare(const struct inode *parent,
                           const struct qstr *fname,
                           const struct qstr *entry, bool quick);
-extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
+extern int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
                         struct inode *inode);
 extern int __ext4_link(struct inode *dir, struct inode *inode,
                       struct dentry *dentry);
index 57cfa28..17d7096 100644 (file)
@@ -3724,7 +3724,6 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
        err = ext4_ext_dirty(handle, inode, path + path->p_depth);
 out:
        ext4_ext_show_leaf(inode, path);
-       ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
        return err;
 }
 
@@ -3796,7 +3795,6 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
        if (*allocated > map->m_len)
                *allocated = map->m_len;
        map->m_len = *allocated;
-       ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
        return 0;
 }
 
@@ -4329,7 +4327,6 @@ got_allocated_blocks:
        map->m_len = ar.len;
        allocated = map->m_len;
        ext4_ext_show_leaf(inode, path);
-       ext4_fc_track_range(inode, map->m_lblk, map->m_lblk + map->m_len - 1);
 out:
        ext4_ext_drop_refs(path);
        kfree(path);
@@ -4602,7 +4599,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        ret = ext4_mark_inode_dirty(handle, inode);
        if (unlikely(ret))
                goto out_handle;
-       ext4_fc_track_range(inode, offset >> inode->i_sb->s_blocksize_bits,
+       ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits,
                        (offset + len - 1) >> inode->i_sb->s_blocksize_bits);
        /* Zero out partial block at the edges of the range */
        ret = ext4_zero_partial_blocks(handle, inode, offset, len);
@@ -4651,8 +4648,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
                     FALLOC_FL_INSERT_RANGE))
                return -EOPNOTSUPP;
-       ext4_fc_track_range(inode, offset >> blkbits,
-                       (offset + len - 1) >> blkbits);
 
        ext4_fc_start_update(inode);
 
index 8d43058..f2033e1 100644 (file)
@@ -83,7 +83,7 @@
  *
  * Atomicity of commits
  * --------------------
- * In order to gaurantee atomicity during the commit operation, fast commit
+ * In order to guarantee atomicity during the commit operation, fast commit
  * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
  * tag contains CRC of the contents and TID of the transaction after which
  * this fast commit should be applied. Recovery code replays fast commit
@@ -152,7 +152,31 @@ void ext4_fc_init_inode(struct inode *inode)
        INIT_LIST_HEAD(&ei->i_fc_list);
        init_waitqueue_head(&ei->i_fc_wait);
        atomic_set(&ei->i_fc_updates, 0);
-       ei->i_fc_committed_subtid = 0;
+}
+
+/* This function must be called with sbi->s_fc_lock held. */
+static void ext4_fc_wait_committing_inode(struct inode *inode)
+__releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
+{
+       wait_queue_head_t *wq;
+       struct ext4_inode_info *ei = EXT4_I(inode);
+
+#if (BITS_PER_LONG < 64)
+       DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
+                       EXT4_STATE_FC_COMMITTING);
+       wq = bit_waitqueue(&ei->i_state_flags,
+                               EXT4_STATE_FC_COMMITTING);
+#else
+       DEFINE_WAIT_BIT(wait, &ei->i_flags,
+                       EXT4_STATE_FC_COMMITTING);
+       wq = bit_waitqueue(&ei->i_flags,
+                               EXT4_STATE_FC_COMMITTING);
+#endif
+       lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
+       prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+       spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
+       schedule();
+       finish_wait(wq, &wait.wq_entry);
 }
 
 /*
@@ -176,22 +200,7 @@ restart:
                goto out;
 
        if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
-               wait_queue_head_t *wq;
-#if (BITS_PER_LONG < 64)
-               DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
-                               EXT4_STATE_FC_COMMITTING);
-               wq = bit_waitqueue(&ei->i_state_flags,
-                                  EXT4_STATE_FC_COMMITTING);
-#else
-               DEFINE_WAIT_BIT(wait, &ei->i_flags,
-                               EXT4_STATE_FC_COMMITTING);
-               wq = bit_waitqueue(&ei->i_flags,
-                                  EXT4_STATE_FC_COMMITTING);
-#endif
-               prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
-               spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
-               schedule();
-               finish_wait(wq, &wait.wq_entry);
+               ext4_fc_wait_committing_inode(inode);
                goto restart;
        }
 out:
@@ -234,26 +243,10 @@ restart:
        }
 
        if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
-               wait_queue_head_t *wq;
-#if (BITS_PER_LONG < 64)
-               DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
-                               EXT4_STATE_FC_COMMITTING);
-               wq = bit_waitqueue(&ei->i_state_flags,
-                                  EXT4_STATE_FC_COMMITTING);
-#else
-               DEFINE_WAIT_BIT(wait, &ei->i_flags,
-                               EXT4_STATE_FC_COMMITTING);
-               wq = bit_waitqueue(&ei->i_flags,
-                                  EXT4_STATE_FC_COMMITTING);
-#endif
-               prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
-               spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
-               schedule();
-               finish_wait(wq, &wait.wq_entry);
+               ext4_fc_wait_committing_inode(inode);
                goto restart;
        }
-       if (!list_empty(&ei->i_fc_list))
-               list_del_init(&ei->i_fc_list);
+       list_del_init(&ei->i_fc_list);
        spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
 }
 
@@ -269,7 +262,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
            (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
                return;
 
-       sbi->s_mount_flags |= EXT4_MF_FC_INELIGIBLE;
+       ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
        WARN_ON(reason >= EXT4_FC_REASON_MAX);
        sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
 }
@@ -302,14 +295,14 @@ void ext4_fc_stop_ineligible(struct super_block *sb)
            (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
                return;
 
-       EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FC_INELIGIBLE;
+       ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
        atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
 }
 
 static inline int ext4_fc_is_ineligible(struct super_block *sb)
 {
-       return (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FC_INELIGIBLE) ||
-               atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates);
+       return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) ||
+               atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates));
 }
 
 /*
@@ -323,13 +316,14 @@ static inline int ext4_fc_is_ineligible(struct super_block *sb)
  * If enqueue is set, this function enqueues the inode in fast commit list.
  */
 static int ext4_fc_track_template(
-       struct inode *inode, int (*__fc_track_fn)(struct inode *, void *, bool),
+       handle_t *handle, struct inode *inode,
+       int (*__fc_track_fn)(struct inode *, void *, bool),
        void *args, int enqueue)
 {
-       tid_t running_txn_tid;
        bool update = false;
        struct ext4_inode_info *ei = EXT4_I(inode);
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       tid_t tid = 0;
        int ret;
 
        if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
@@ -339,15 +333,13 @@ static int ext4_fc_track_template(
        if (ext4_fc_is_ineligible(inode->i_sb))
                return -EINVAL;
 
-       running_txn_tid = sbi->s_journal ?
-               sbi->s_journal->j_commit_sequence + 1 : 0;
-
+       tid = handle->h_transaction->t_tid;
        mutex_lock(&ei->i_fc_lock);
-       if (running_txn_tid == ei->i_sync_tid) {
+       if (tid == ei->i_sync_tid) {
                update = true;
        } else {
                ext4_fc_reset_inode(inode);
-               ei->i_sync_tid = running_txn_tid;
+               ei->i_sync_tid = tid;
        }
        ret = __fc_track_fn(inode, args, update);
        mutex_unlock(&ei->i_fc_lock);
@@ -358,7 +350,7 @@ static int ext4_fc_track_template(
        spin_lock(&sbi->s_fc_lock);
        if (list_empty(&EXT4_I(inode)->i_fc_list))
                list_add_tail(&EXT4_I(inode)->i_fc_list,
-                               (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING) ?
+                               (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
                                &sbi->s_fc_q[FC_Q_STAGING] :
                                &sbi->s_fc_q[FC_Q_MAIN]);
        spin_unlock(&sbi->s_fc_lock);
@@ -384,7 +376,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
        mutex_unlock(&ei->i_fc_lock);
        node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
        if (!node) {
-               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_MEM);
+               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
                mutex_lock(&ei->i_fc_lock);
                return -ENOMEM;
        }
@@ -397,7 +389,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
                if (!node->fcd_name.name) {
                        kmem_cache_free(ext4_fc_dentry_cachep, node);
                        ext4_fc_mark_ineligible(inode->i_sb,
-                               EXT4_FC_REASON_MEM);
+                               EXT4_FC_REASON_NOMEM);
                        mutex_lock(&ei->i_fc_lock);
                        return -ENOMEM;
                }
@@ -411,7 +403,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
        node->fcd_name.len = dentry->d_name.len;
 
        spin_lock(&sbi->s_fc_lock);
-       if (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING)
+       if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
                list_add_tail(&node->fcd_list,
                                &sbi->s_fc_dentry_q[FC_Q_STAGING]);
        else
@@ -422,7 +414,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
        return 0;
 }
 
-void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry)
+void __ext4_fc_track_unlink(handle_t *handle,
+               struct inode *inode, struct dentry *dentry)
 {
        struct __track_dentry_update_args args;
        int ret;
@@ -430,12 +423,18 @@ void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry)
        args.dentry = dentry;
        args.op = EXT4_FC_TAG_UNLINK;
 
-       ret = ext4_fc_track_template(inode, __track_dentry_update,
+       ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
                                        (void *)&args, 0);
        trace_ext4_fc_track_unlink(inode, dentry, ret);
 }
 
-void ext4_fc_track_link(struct inode *inode, struct dentry *dentry)
+void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
+{
+       __ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
+}
+
+void __ext4_fc_track_link(handle_t *handle,
+       struct inode *inode, struct dentry *dentry)
 {
        struct __track_dentry_update_args args;
        int ret;
@@ -443,20 +442,26 @@ void ext4_fc_track_link(struct inode *inode, struct dentry *dentry)
        args.dentry = dentry;
        args.op = EXT4_FC_TAG_LINK;
 
-       ret = ext4_fc_track_template(inode, __track_dentry_update,
+       ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
                                        (void *)&args, 0);
        trace_ext4_fc_track_link(inode, dentry, ret);
 }
 
-void ext4_fc_track_create(struct inode *inode, struct dentry *dentry)
+void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
+{
+       __ext4_fc_track_link(handle, d_inode(dentry), dentry);
+}
+
+void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
 {
        struct __track_dentry_update_args args;
+       struct inode *inode = d_inode(dentry);
        int ret;
 
        args.dentry = dentry;
        args.op = EXT4_FC_TAG_CREAT;
 
-       ret = ext4_fc_track_template(inode, __track_dentry_update,
+       ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
                                        (void *)&args, 0);
        trace_ext4_fc_track_create(inode, dentry, ret);
 }
@@ -472,14 +477,20 @@ static int __track_inode(struct inode *inode, void *arg, bool update)
        return 0;
 }
 
-void ext4_fc_track_inode(struct inode *inode)
+void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
 {
        int ret;
 
        if (S_ISDIR(inode->i_mode))
                return;
 
-       ret = ext4_fc_track_template(inode, __track_inode, NULL, 1);
+       if (ext4_should_journal_data(inode)) {
+               ext4_fc_mark_ineligible(inode->i_sb,
+                                       EXT4_FC_REASON_INODE_JOURNAL_DATA);
+               return;
+       }
+
+       ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
        trace_ext4_fc_track_inode(inode, ret);
 }
 
@@ -515,7 +526,7 @@ static int __track_range(struct inode *inode, void *arg, bool update)
        return 0;
 }
 
-void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
+void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
                         ext4_lblk_t end)
 {
        struct __track_range_args args;
@@ -527,7 +538,7 @@ void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
        args.start = start;
        args.end = end;
 
-       ret = ext4_fc_track_template(inode,  __track_range, &args, 1);
+       ret = ext4_fc_track_template(handle, inode,  __track_range, &args, 1);
 
        trace_ext4_fc_track_range(inode, start, end, ret);
 }
@@ -537,10 +548,11 @@ static void ext4_fc_submit_bh(struct super_block *sb)
        int write_flags = REQ_SYNC;
        struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
 
+       /* TODO: REQ_FUA | REQ_PREFLUSH is unnecessarily expensive. */
        if (test_opt(sb, BARRIER))
                write_flags |= REQ_FUA | REQ_PREFLUSH;
        lock_buffer(bh);
-       clear_buffer_dirty(bh);
+       set_buffer_dirty(bh);
        set_buffer_uptodate(bh);
        bh->b_end_io = ext4_end_buffer_io_sync;
        submit_bh(REQ_OP_WRITE, write_flags, bh);
@@ -846,7 +858,7 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
        int ret = 0;
 
        spin_lock(&sbi->s_fc_lock);
-       sbi->s_mount_flags |= EXT4_MF_FC_COMMITTING;
+       ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
        list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
                ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
                ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
@@ -900,6 +912,8 @@ static int ext4_fc_wait_inode_data_all(journal_t *journal)
 
 /* Commit all the directory entry updates */
 static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
+__acquires(&sbi->s_fc_lock)
+__releases(&sbi->s_fc_lock)
 {
        struct super_block *sb = (struct super_block *)(journal->j_private);
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -996,6 +1010,13 @@ static int ext4_fc_perform_commit(journal_t *journal)
        if (ret)
                return ret;
 
+       /*
+        * If file system device is different from journal device, issue a cache
+        * flush before we start writing fast commit blocks.
+        */
+       if (journal->j_fs_dev != journal->j_dev)
+               blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS);
+
        blk_start_plug(&plug);
        if (sbi->s_fc_bytes == 0) {
                /*
@@ -1031,8 +1052,6 @@ static int ext4_fc_perform_commit(journal_t *journal)
                if (ret)
                        goto out;
                spin_lock(&sbi->s_fc_lock);
-               EXT4_I(inode)->i_fc_committed_subtid =
-                       atomic_read(&sbi->s_fc_subtid);
        }
        spin_unlock(&sbi->s_fc_lock);
 
@@ -1131,7 +1150,7 @@ out:
                "Fast commit ended with blks = %d, reason = %d, subtid - %d",
                nblks, reason, subtid);
        if (reason == EXT4_FC_REASON_FC_FAILED)
-               return jbd2_fc_end_commit_fallback(journal, commit_tid);
+               return jbd2_fc_end_commit_fallback(journal);
        if (reason == EXT4_FC_REASON_FC_START_FAILED ||
                reason == EXT4_FC_REASON_INELIGIBLE)
                return jbd2_complete_transaction(journal, commit_tid);
@@ -1190,8 +1209,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
        list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
                                &sbi->s_fc_q[FC_Q_STAGING]);
 
-       sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING;
-       sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE;
+       ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+       ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
 
        if (full)
                sbi->s_fc_bytes = 0;
@@ -1263,7 +1282,7 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl)
                return 0;
        }
 
-       ret = __ext4_unlink(old_parent, &entry, inode);
+       ret = __ext4_unlink(NULL, old_parent, &entry, inode);
        /* -ENOENT ok coz it might not exist anymore. */
        if (ret == -ENOENT)
                ret = 0;
@@ -2079,8 +2098,6 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
 
 void ext4_fc_init(struct super_block *sb, journal_t *journal)
 {
-       int num_fc_blocks;
-
        /*
         * We set replay callback even if fast commit disabled because we may
         * could still have fast commit blocks that need to be replayed even if
@@ -2090,21 +2107,9 @@ void ext4_fc_init(struct super_block *sb, journal_t *journal)
        if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
                return;
        journal->j_fc_cleanup_callback = ext4_fc_cleanup;
-       if (!buffer_uptodate(journal->j_sb_buffer)
-               && ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO,
-                                       true)) {
-               ext4_msg(sb, KERN_ERR, "I/O error on journal");
-               return;
-       }
-       num_fc_blocks = be32_to_cpu(journal->j_superblock->s_num_fc_blks);
-       if (jbd2_fc_init(journal, num_fc_blocks ? num_fc_blocks :
-                                       EXT4_NUM_FC_BLKS)) {
-               pr_warn("Error while enabling fast commits, turning off.");
-               ext4_clear_feature_fast_commit(sb);
-       }
 }
 
-const char *fc_ineligible_reasons[] = {
+static const char *fc_ineligible_reasons[] = {
        "Extended attributes changed",
        "Cross rename",
        "Journal flag changed",
@@ -2113,6 +2118,7 @@ const char *fc_ineligible_reasons[] = {
        "Resize",
        "Dir renamed",
        "Falloc range op",
+       "Data journalling",
        "FC Commit Failed"
 };
 
index 06907d4..3a6e5a1 100644 (file)
@@ -3,9 +3,6 @@
 #ifndef __FAST_COMMIT_H__
 #define __FAST_COMMIT_H__
 
-/* Number of blocks in journal area to allocate for fast commits */
-#define EXT4_NUM_FC_BLKS               256
-
 /* Fast commit tags */
 #define EXT4_FC_TAG_ADD_RANGE          0x0001
 #define EXT4_FC_TAG_DEL_RANGE          0x0002
@@ -100,11 +97,12 @@ enum {
        EXT4_FC_REASON_XATTR = 0,
        EXT4_FC_REASON_CROSS_RENAME,
        EXT4_FC_REASON_JOURNAL_FLAG_CHANGE,
-       EXT4_FC_REASON_MEM,
+       EXT4_FC_REASON_NOMEM,
        EXT4_FC_REASON_SWAP_BOOT,
        EXT4_FC_REASON_RESIZE,
        EXT4_FC_REASON_RENAME_DIR,
        EXT4_FC_REASON_FALLOC_RANGE,
+       EXT4_FC_REASON_INODE_JOURNAL_DATA,
        EXT4_FC_COMMIT_FAILED,
        EXT4_FC_REASON_MAX
 };
index d85412d..3ed8c04 100644 (file)
@@ -761,7 +761,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
        if (!daxdev_mapping_supported(vma, dax_dev))
                return -EOPNOTSUPP;
 
-       ext4_fc_start_update(inode);
        file_accessed(file);
        if (IS_DAX(file_inode(file))) {
                vma->vm_ops = &ext4_dax_vm_ops;
@@ -769,7 +768,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
        } else {
                vma->vm_ops = &ext4_file_vm_ops;
        }
-       ext4_fc_stop_update(inode);
        return 0;
 }
 
@@ -782,13 +780,13 @@ static int ext4_sample_last_mounted(struct super_block *sb,
        handle_t *handle;
        int err;
 
-       if (likely(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED))
+       if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
                return 0;
 
        if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb))
                return 0;
 
-       sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
+       ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
        /*
         * Sample where the filesystem has been mounted and
         * store it in the superblock for sysadmin convenience
index b232c27..4c2a9fe 100644 (file)
@@ -280,7 +280,7 @@ static int ext4_getfsmap_logdev(struct super_block *sb, struct ext4_fsmap *keys,
 
        /* Fabricate an rmap entry for the external log device. */
        irec.fmr_physical = journal->j_blk_offset;
-       irec.fmr_length = journal->j_maxlen;
+       irec.fmr_length = journal->j_total_len;
        irec.fmr_owner = EXT4_FMR_OWN_LOG;
        irec.fmr_flags = 0;
 
index 81a545f..a42ca95 100644 (file)
@@ -143,7 +143,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        if (sb_rdonly(inode->i_sb)) {
                /* Make sure that we read updated s_mount_flags value */
                smp_rmb();
-               if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
+               if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))
                        ret = -EROFS;
                goto out;
        }
index caa5147..b41512d 100644 (file)
@@ -1880,6 +1880,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
 
        ext4_write_lock_xattr(inode, &no_expand);
        if (!ext4_has_inline_data(inode)) {
+               ext4_write_unlock_xattr(inode, &no_expand);
                *has_inline = 0;
                ext4_journal_stop(handle);
                return 0;
index b96a186..0d8385a 100644 (file)
@@ -327,6 +327,8 @@ stop_handle:
        ext4_xattr_inode_array_free(ea_inode_array);
        return;
 no_delete:
+       if (!list_empty(&EXT4_I(inode)->i_fc_list))
+               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
        ext4_clear_inode(inode);        /* We must guarantee clearing of inode... */
 }
 
@@ -730,7 +732,7 @@ out_sem:
                        if (ret)
                                return ret;
                }
-               ext4_fc_track_range(inode, map->m_lblk,
+               ext4_fc_track_range(handle, inode, map->m_lblk,
                            map->m_lblk + map->m_len - 1);
        }
 
@@ -2440,7 +2442,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
                        struct super_block *sb = inode->i_sb;
 
                        if (ext4_forced_shutdown(EXT4_SB(sb)) ||
-                           EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+                           ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
                                goto invalidate_dirty_pages;
                        /*
                         * Let the uper layers retry transient errors.
@@ -2674,7 +2676,7 @@ static int ext4_writepages(struct address_space *mapping,
         * the stack trace.
         */
        if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
-                    sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
+                    ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) {
                ret = -EROFS;
                goto out_writepages;
        }
@@ -3310,8 +3312,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
                        EXT4_I(inode)->i_datasync_tid))
                        return false;
                if (test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT))
-                       return atomic_read(&EXT4_SB(inode->i_sb)->s_fc_subtid) <
-                               EXT4_I(inode)->i_fc_committed_subtid;
+                       return !list_empty(&EXT4_I(inode)->i_fc_list);
                return true;
        }
 
@@ -4109,7 +4110,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
                up_write(&EXT4_I(inode)->i_data_sem);
        }
-       ext4_fc_track_range(inode, first_block, stop_block);
+       ext4_fc_track_range(handle, inode, first_block, stop_block);
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
 
@@ -5442,14 +5443,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                        }
 
                        if (shrink)
-                               ext4_fc_track_range(inode,
+                               ext4_fc_track_range(handle, inode,
                                        (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
                                        inode->i_sb->s_blocksize_bits,
                                        (oldsize > 0 ? oldsize - 1 : 0) >>
                                        inode->i_sb->s_blocksize_bits);
                        else
                                ext4_fc_track_range(
-                                       inode,
+                                       handle, inode,
                                        (oldsize > 0 ? oldsize - 1 : oldsize) >>
                                        inode->i_sb->s_blocksize_bits,
                                        (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
@@ -5699,7 +5700,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
                put_bh(iloc->bh);
                return -EIO;
        }
-       ext4_fc_track_inode(inode);
+       ext4_fc_track_inode(handle, inode);
 
        if (IS_I_VERSION(inode))
                inode_inc_iversion(inode);
index 85abbfb..24af9ed 100644 (file)
@@ -4477,7 +4477,7 @@ static inline void ext4_mb_show_pa(struct super_block *sb)
 {
        ext4_group_t i, ngroups;
 
-       if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+       if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
                return;
 
        ngroups = ext4_get_groups_count(sb);
@@ -4508,7 +4508,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
 
-       if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+       if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
                return;
 
        mb_debug(sb, "Can't allocate:"
@@ -5167,7 +5167,7 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
        struct super_block *sb = ar->inode->i_sb;
        ext4_group_t group;
        ext4_grpblk_t blkoff;
-       int  i;
+       int i = sb->s_blocksize;
        ext4_fsblk_t goal, block;
        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 
index f458d1d..3350926 100644 (file)
@@ -2606,7 +2606,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                       bool excl)
 {
        handle_t *handle;
-       struct inode *inode, *inode_save;
+       struct inode *inode;
        int err, credits, retries = 0;
 
        err = dquot_initialize(dir);
@@ -2624,11 +2624,9 @@ retry:
                inode->i_op = &ext4_file_inode_operations;
                inode->i_fop = &ext4_file_operations;
                ext4_set_aops(inode);
-               inode_save = inode;
-               ihold(inode_save);
                err = ext4_add_nondir(handle, dentry, &inode);
-               ext4_fc_track_create(inode_save, dentry);
-               iput(inode_save);
+               if (!err)
+                       ext4_fc_track_create(handle, dentry);
        }
        if (handle)
                ext4_journal_stop(handle);
@@ -2643,7 +2641,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
                      umode_t mode, dev_t rdev)
 {
        handle_t *handle;
-       struct inode *inode, *inode_save;
+       struct inode *inode;
        int err, credits, retries = 0;
 
        err = dquot_initialize(dir);
@@ -2660,12 +2658,9 @@ retry:
        if (!IS_ERR(inode)) {
                init_special_inode(inode, inode->i_mode, rdev);
                inode->i_op = &ext4_special_inode_operations;
-               inode_save = inode;
-               ihold(inode_save);
                err = ext4_add_nondir(handle, dentry, &inode);
                if (!err)
-                       ext4_fc_track_create(inode_save, dentry);
-               iput(inode_save);
+                       ext4_fc_track_create(handle, dentry);
        }
        if (handle)
                ext4_journal_stop(handle);
@@ -2829,7 +2824,6 @@ out_clear_inode:
                iput(inode);
                goto out_retry;
        }
-       ext4_fc_track_create(inode, dentry);
        ext4_inc_count(dir);
 
        ext4_update_dx_flag(dir);
@@ -2837,6 +2831,7 @@ out_clear_inode:
        if (err)
                goto out_clear_inode;
        d_instantiate_new(dentry, inode);
+       ext4_fc_track_create(handle, dentry);
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
 
@@ -3171,7 +3166,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
                goto end_rmdir;
        ext4_dec_count(dir);
        ext4_update_dx_flag(dir);
-       ext4_fc_track_unlink(inode, dentry);
+       ext4_fc_track_unlink(handle, dentry);
        retval = ext4_mark_inode_dirty(handle, dir);
 
 #ifdef CONFIG_UNICODE
@@ -3192,13 +3187,12 @@ end_rmdir:
        return retval;
 }
 
-int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
+int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
                  struct inode *inode)
 {
        int retval = -ENOENT;
        struct buffer_head *bh;
        struct ext4_dir_entry_2 *de;
-       handle_t *handle = NULL;
        int skip_remove_dentry = 0;
 
        bh = ext4_find_entry(dir, d_name, &de, NULL);
@@ -3217,14 +3211,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
                if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
                        skip_remove_dentry = 1;
                else
-                       goto out_bh;
-       }
-
-       handle = ext4_journal_start(dir, EXT4_HT_DIR,
-                                   EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
-       if (IS_ERR(handle)) {
-               retval = PTR_ERR(handle);
-               goto out_bh;
+                       goto out;
        }
 
        if (IS_DIRSYNC(dir))
@@ -3233,12 +3220,12 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
        if (!skip_remove_dentry) {
                retval = ext4_delete_entry(handle, dir, de, bh);
                if (retval)
-                       goto out_handle;
+                       goto out;
                dir->i_ctime = dir->i_mtime = current_time(dir);
                ext4_update_dx_flag(dir);
                retval = ext4_mark_inode_dirty(handle, dir);
                if (retval)
-                       goto out_handle;
+                       goto out;
        } else {
                retval = 0;
        }
@@ -3252,15 +3239,14 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
        inode->i_ctime = current_time(inode);
        retval = ext4_mark_inode_dirty(handle, inode);
 
-out_handle:
-       ext4_journal_stop(handle);
-out_bh:
+out:
        brelse(bh);
        return retval;
 }
 
 static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 {
+       handle_t *handle;
        int retval;
 
        if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
@@ -3278,9 +3264,16 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
        if (retval)
                goto out_trace;
 
-       retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry));
+       handle = ext4_journal_start(dir, EXT4_HT_DIR,
+                                   EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle)) {
+               retval = PTR_ERR(handle);
+               goto out_trace;
+       }
+
+       retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry));
        if (!retval)
-               ext4_fc_track_unlink(d_inode(dentry), dentry);
+               ext4_fc_track_unlink(handle, dentry);
 #ifdef CONFIG_UNICODE
        /* VFS negative dentries are incompatible with Encoding and
         * Case-insensitiveness. Eventually we'll want avoid
@@ -3291,6 +3284,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
        if (IS_CASEFOLDED(dir))
                d_invalidate(dentry);
 #endif
+       if (handle)
+               ext4_journal_stop(handle);
 
 out_trace:
        trace_ext4_unlink_exit(dentry, retval);
@@ -3447,7 +3442,6 @@ retry:
 
        err = ext4_add_entry(handle, dentry, inode);
        if (!err) {
-               ext4_fc_track_link(inode, dentry);
                err = ext4_mark_inode_dirty(handle, inode);
                /* this can happen only for tmpfile being
                 * linked the first time
@@ -3455,6 +3449,7 @@ retry:
                if (inode->i_nlink == 1)
                        ext4_orphan_del(handle, inode);
                d_instantiate(dentry, inode);
+               ext4_fc_track_link(handle, dentry);
        } else {
                drop_nlink(inode);
                iput(inode);
@@ -3915,9 +3910,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                        EXT4_FC_REASON_RENAME_DIR);
        } else {
                if (new.inode)
-                       ext4_fc_track_unlink(new.inode, new.dentry);
-               ext4_fc_track_link(old.inode, new.dentry);
-               ext4_fc_track_unlink(old.inode, old.dentry);
+                       ext4_fc_track_unlink(handle, new.dentry);
+               __ext4_fc_track_link(handle, old.inode, new.dentry);
+               __ext4_fc_track_unlink(handle, old.inode, old.dentry);
        }
 
        if (new.inode) {
index ef4734b..c3b8645 100644 (file)
@@ -686,7 +686,7 @@ static void ext4_handle_error(struct super_block *sb)
        if (!test_opt(sb, ERRORS_CONT)) {
                journal_t *journal = EXT4_SB(sb)->s_journal;
 
-               EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+               ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
                if (journal)
                        jbd2_journal_abort(journal, -EIO);
        }
@@ -904,7 +904,7 @@ void __ext4_abort(struct super_block *sb, const char *function,
        va_end(args);
 
        if (sb_rdonly(sb) == 0) {
-               EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+               ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
                if (EXT4_SB(sb)->s_journal)
                        jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 
@@ -1716,11 +1716,10 @@ enum {
        Opt_dioread_nolock, Opt_dioread_lock,
        Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
        Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
-       Opt_prefetch_block_bitmaps, Opt_no_fc,
+       Opt_prefetch_block_bitmaps,
 #ifdef CONFIG_EXT4_DEBUG
-       Opt_fc_debug_max_replay,
+       Opt_fc_debug_max_replay, Opt_fc_debug_force
 #endif
-       Opt_fc_debug_force
 };
 
 static const match_table_t tokens = {
@@ -1807,9 +1806,8 @@ static const match_table_t tokens = {
        {Opt_init_itable, "init_itable=%u"},
        {Opt_init_itable, "init_itable"},
        {Opt_noinit_itable, "noinit_itable"},
-       {Opt_no_fc, "no_fc"},
-       {Opt_fc_debug_force, "fc_debug_force"},
 #ifdef CONFIG_EXT4_DEBUG
+       {Opt_fc_debug_force, "fc_debug_force"},
        {Opt_fc_debug_max_replay, "fc_debug_max_replay=%u"},
 #endif
        {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
@@ -2027,8 +2025,8 @@ static const struct mount_opts {
        {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
                       EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
                                                        MOPT_CLEAR | MOPT_Q},
-       {Opt_usrjquota, 0, MOPT_Q},
-       {Opt_grpjquota, 0, MOPT_Q},
+       {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
+       {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
        {Opt_offusrjquota, 0, MOPT_Q},
        {Opt_offgrpjquota, 0, MOPT_Q},
        {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
@@ -2039,11 +2037,9 @@ static const struct mount_opts {
        {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
        {Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
         MOPT_SET},
-       {Opt_no_fc, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
-        MOPT_CLEAR | MOPT_2 | MOPT_EXT4_ONLY},
+#ifdef CONFIG_EXT4_DEBUG
        {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
         MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
-#ifdef CONFIG_EXT4_DEBUG
        {Opt_fc_debug_max_replay, 0, MOPT_GTE0},
 #endif
        {Opt_err, 0, 0}
@@ -2153,7 +2149,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
                ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
                return 1;
        case Opt_abort:
-               sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
+               ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
                return 1;
        case Opt_i_version:
                sb->s_flags |= SB_I_VERSION;
@@ -3976,7 +3972,7 @@ int ext4_calculate_overhead(struct super_block *sb)
         * loaded or not
         */
        if (sbi->s_journal && !sbi->s_journal_bdev)
-               overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
+               overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
        else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
                /* j_inum for internal journal is non-zero */
                j_inode = ext4_get_journal_inode(sb, j_inum);
@@ -4340,9 +4336,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 #endif
 
        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
-               printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, and O_DIRECT support!\n");
+               printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n");
                /* can't mount with both data=journal and dioread_nolock. */
                clear_opt(sb, DIOREAD_NOLOCK);
+               clear_opt2(sb, JOURNAL_FAST_COMMIT);
                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
                        ext4_msg(sb, KERN_ERR, "can't mount with "
                                 "both data=journal and delalloc");
@@ -4777,8 +4774,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
        INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
        sbi->s_fc_bytes = 0;
-       sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE;
-       sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING;
+       ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+       ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
        spin_lock_init(&sbi->s_fc_lock);
        memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
        sbi->s_fc_replay_state.fc_regions = NULL;
@@ -4857,6 +4854,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                goto failed_mount_wq;
        }
 
+       if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
+               !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
+                                         JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
+               ext4_msg(sb, KERN_ERR,
+                       "Failed to set fast commit journal feature");
+               goto failed_mount_wq;
+       }
+
        /* We have now updated the journal if required, so we can
         * validate the data journaling mode. */
        switch (test_opt(sb, DATA_FLAGS)) {
@@ -5872,7 +5877,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                goto restore_opts;
        }
 
-       if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
+       if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
                ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
 
        sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
@@ -5886,7 +5891,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
        }
 
        if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
-               if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
+               if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
                        err = -EROFS;
                        goto restore_opts;
                }
@@ -6560,10 +6565,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
        brelse(bh);
 out:
        if (inode->i_size < off + len) {
-               ext4_fc_track_range(inode,
-                       (inode->i_size > 0 ? inode->i_size - 1 : 0)
-                               >> inode->i_sb->s_blocksize_bits,
-                       (off + len) >> inode->i_sb->s_blocksize_bits);
                i_size_write(inode, off + len);
                EXT4_I(inode)->i_disksize = inode->i_size;
                err2 = ext4_mark_inode_dirty(handle, inode);
index 263f02a..472932b 100644 (file)
@@ -106,6 +106,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
  * for a checkpoint to free up some space in the log.
  */
 void __jbd2_log_wait_for_space(journal_t *journal)
+__acquires(&journal->j_state_lock)
+__releases(&journal->j_state_lock)
 {
        int nblocks, space_left;
        /* assert_spin_locked(&journal->j_state_lock); */
index fa688e1..b121d7d 100644 (file)
@@ -450,6 +450,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                schedule();
                write_lock(&journal->j_state_lock);
                finish_wait(&journal->j_fc_wait, &wait);
+               /*
+                * TODO: by blocking fast commits here, we are increasing
+                * fsync() latency slightly. Strictly speaking, we don't need
+                * to block fast commits until the transaction enters T_FLUSH
+                * state. So an optimization is possible where we block new fast
+                * commits here and wait for existing ones to complete
+                * just before we enter T_FLUSH. That way, the existing fast
+                * commits and this full commit can proceed parallely.
+                */
        }
        write_unlock(&journal->j_state_lock);
 
@@ -801,7 +810,7 @@ start_journal_io:
                if (first_block < journal->j_tail)
                        freed += journal->j_last - journal->j_first;
                /* Update tail only if we free significant amount of space */
-               if (freed < journal->j_maxlen / 4)
+               if (freed < jbd2_journal_get_max_txn_bufs(journal))
                        update_tail = 0;
        }
        J_ASSERT(commit_transaction->t_state == T_COMMIT);
index 0c7c42b..0c3d5e3 100644 (file)
@@ -727,6 +727,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
  */
 int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
 {
+       if (unlikely(is_journal_aborted(journal)))
+               return -EIO;
        /*
         * Fast commits only allowed if at least one full commit has
         * been processed.
@@ -734,10 +736,12 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
        if (!journal->j_stats.ts_tid)
                return -EINVAL;
 
-       if (tid <= journal->j_commit_sequence)
+       write_lock(&journal->j_state_lock);
+       if (tid <= journal->j_commit_sequence) {
+               write_unlock(&journal->j_state_lock);
                return -EALREADY;
+       }
 
-       write_lock(&journal->j_state_lock);
        if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
            (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
                DEFINE_WAIT(wait);
@@ -777,13 +781,19 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
 
 int jbd2_fc_end_commit(journal_t *journal)
 {
-       return __jbd2_fc_end_commit(journal, 0, 0);
+       return __jbd2_fc_end_commit(journal, 0, false);
 }
 EXPORT_SYMBOL(jbd2_fc_end_commit);
 
-int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid)
+int jbd2_fc_end_commit_fallback(journal_t *journal)
 {
-       return __jbd2_fc_end_commit(journal, tid, 1);
+       tid_t tid;
+
+       read_lock(&journal->j_state_lock);
+       tid = journal->j_running_transaction ?
+               journal->j_running_transaction->t_tid : 0;
+       read_unlock(&journal->j_state_lock);
+       return __jbd2_fc_end_commit(journal, tid, true);
 }
 EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
 
@@ -865,7 +875,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
        int fc_off;
 
        *bh_out = NULL;
-       write_lock(&journal->j_state_lock);
 
        if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
                fc_off = journal->j_fc_off;
@@ -874,7 +883,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
        } else {
                ret = -EINVAL;
        }
-       write_unlock(&journal->j_state_lock);
 
        if (ret)
                return ret;
@@ -887,11 +895,7 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
        if (!bh)
                return -ENOMEM;
 
-       lock_buffer(bh);
 
-       clear_buffer_uptodate(bh);
-       set_buffer_dirty(bh);
-       unlock_buffer(bh);
        journal->j_fc_wbuf[fc_off] = bh;
 
        *bh_out = bh;
@@ -909,9 +913,7 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
        struct buffer_head *bh;
        int i, j_fc_off;
 
-       read_lock(&journal->j_state_lock);
        j_fc_off = journal->j_fc_off;
-       read_unlock(&journal->j_state_lock);
 
        /*
         * Wait in reverse order to minimize chances of us being woken up before
@@ -939,9 +941,7 @@ int jbd2_fc_release_bufs(journal_t *journal)
        struct buffer_head *bh;
        int i, j_fc_off;
 
-       read_lock(&journal->j_state_lock);
        j_fc_off = journal->j_fc_off;
-       read_unlock(&journal->j_state_lock);
 
        /*
         * Wait in reverse order to minimize chances of us being woken up before
@@ -1348,23 +1348,16 @@ static journal_t *journal_init_common(struct block_device *bdev,
        journal->j_dev = bdev;
        journal->j_fs_dev = fs_dev;
        journal->j_blk_offset = start;
-       journal->j_maxlen = len;
+       journal->j_total_len = len;
        /* We need enough buffers to write out full descriptor block. */
        n = journal->j_blocksize / jbd2_min_tag_size();
        journal->j_wbufsize = n;
+       journal->j_fc_wbuf = NULL;
        journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
                                        GFP_KERNEL);
        if (!journal->j_wbuf)
                goto err_cleanup;
 
-       if (journal->j_fc_wbufsize > 0) {
-               journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
-                                       sizeof(struct buffer_head *),
-                                       GFP_KERNEL);
-               if (!journal->j_fc_wbuf)
-                       goto err_cleanup;
-       }
-
        bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
        if (!bh) {
                pr_err("%s: Cannot get buffer for journal superblock\n",
@@ -1378,23 +1371,11 @@ static journal_t *journal_init_common(struct block_device *bdev,
 
 err_cleanup:
        kfree(journal->j_wbuf);
-       kfree(journal->j_fc_wbuf);
        jbd2_journal_destroy_revoke(journal);
        kfree(journal);
        return NULL;
 }
 
-int jbd2_fc_init(journal_t *journal, int num_fc_blks)
-{
-       journal->j_fc_wbufsize = num_fc_blks;
-       journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
-                               sizeof(struct buffer_head *), GFP_KERNEL);
-       if (!journal->j_fc_wbuf)
-               return -ENOMEM;
-       return 0;
-}
-EXPORT_SYMBOL(jbd2_fc_init);
-
 /* jbd2_journal_init_dev and jbd2_journal_init_inode:
  *
  * Create a journal structure assigned some fixed set of disk blocks to
@@ -1512,16 +1493,7 @@ static int journal_reset(journal_t *journal)
        }
 
        journal->j_first = first;
-
-       if (jbd2_has_feature_fast_commit(journal) &&
-           journal->j_fc_wbufsize > 0) {
-               journal->j_fc_last = last;
-               journal->j_last = last - journal->j_fc_wbufsize;
-               journal->j_fc_first = journal->j_last + 1;
-               journal->j_fc_off = 0;
-       } else {
-               journal->j_last = last;
-       }
+       journal->j_last = last;
 
        journal->j_head = journal->j_first;
        journal->j_tail = journal->j_first;
@@ -1531,7 +1503,14 @@ static int journal_reset(journal_t *journal)
        journal->j_commit_sequence = journal->j_transaction_sequence - 1;
        journal->j_commit_request = journal->j_commit_sequence;
 
-       journal->j_max_transaction_buffers = journal->j_maxlen / 4;
+       journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal);
+
+       /*
+        * Now that journal recovery is done, turn fast commits off here. This
+        * way, if fast commit was enabled before the crash but if now FS has
+        * disabled it, we don't enable fast commits.
+        */
+       jbd2_clear_feature_fast_commit(journal);
 
        /*
         * As a special case, if the on-disk copy is already marked as needing
@@ -1792,15 +1771,15 @@ static int journal_get_superblock(journal_t *journal)
                goto out;
        }
 
-       if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
-               journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
-       else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
+       if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
+               journal->j_total_len = be32_to_cpu(sb->s_maxlen);
+       else if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
                printk(KERN_WARNING "JBD2: journal file too short\n");
                goto out;
        }
 
        if (be32_to_cpu(sb->s_first) == 0 ||
-           be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
+           be32_to_cpu(sb->s_first) >= journal->j_total_len) {
                printk(KERN_WARNING
                        "JBD2: Invalid start block of journal: %u\n",
                        be32_to_cpu(sb->s_first));
@@ -1872,6 +1851,7 @@ static int load_superblock(journal_t *journal)
 {
        int err;
        journal_superblock_t *sb;
+       int num_fc_blocks;
 
        err = journal_get_superblock(journal);
        if (err)
@@ -1883,15 +1863,17 @@ static int load_superblock(journal_t *journal)
        journal->j_tail = be32_to_cpu(sb->s_start);
        journal->j_first = be32_to_cpu(sb->s_first);
        journal->j_errno = be32_to_cpu(sb->s_errno);
+       journal->j_last = be32_to_cpu(sb->s_maxlen);
 
-       if (jbd2_has_feature_fast_commit(journal) &&
-           journal->j_fc_wbufsize > 0) {
+       if (jbd2_has_feature_fast_commit(journal)) {
                journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
-               journal->j_last = journal->j_fc_last - journal->j_fc_wbufsize;
+               num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
+               if (!num_fc_blocks)
+                       num_fc_blocks = JBD2_MIN_FC_BLOCKS;
+               if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
+                       journal->j_last = journal->j_fc_last - num_fc_blocks;
                journal->j_fc_first = journal->j_last + 1;
                journal->j_fc_off = 0;
-       } else {
-               journal->j_last = be32_to_cpu(sb->s_maxlen);
        }
 
        return 0;
@@ -1954,9 +1936,6 @@ int jbd2_journal_load(journal_t *journal)
         */
        journal->j_flags &= ~JBD2_ABORT;
 
-       if (journal->j_fc_wbufsize > 0)
-               jbd2_journal_set_features(journal, 0, 0,
-                                         JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
        /* OK, we've finished with the dynamic journal bits:
         * reinitialise the dynamic contents of the superblock in memory
         * and reset them on disk. */
@@ -2040,8 +2019,7 @@ int jbd2_journal_destroy(journal_t *journal)
                jbd2_journal_destroy_revoke(journal);
        if (journal->j_chksum_driver)
                crypto_free_shash(journal->j_chksum_driver);
-       if (journal->j_fc_wbufsize > 0)
-               kfree(journal->j_fc_wbuf);
+       kfree(journal->j_fc_wbuf);
        kfree(journal->j_wbuf);
        kfree(journal);
 
@@ -2116,6 +2094,37 @@ int jbd2_journal_check_available_features(journal_t *journal, unsigned long comp
        return 0;
 }
 
+static int
+jbd2_journal_initialize_fast_commit(journal_t *journal)
+{
+       journal_superblock_t *sb = journal->j_superblock;
+       unsigned long long num_fc_blks;
+
+       num_fc_blks = be32_to_cpu(sb->s_num_fc_blks);
+       if (num_fc_blks == 0)
+               num_fc_blks = JBD2_MIN_FC_BLOCKS;
+       if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS)
+               return -ENOSPC;
+
+       /* Are we called twice? */
+       WARN_ON(journal->j_fc_wbuf != NULL);
+       journal->j_fc_wbuf = kmalloc_array(num_fc_blks,
+                               sizeof(struct buffer_head *), GFP_KERNEL);
+       if (!journal->j_fc_wbuf)
+               return -ENOMEM;
+
+       journal->j_fc_wbufsize = num_fc_blks;
+       journal->j_fc_last = journal->j_last;
+       journal->j_last = journal->j_fc_last - num_fc_blks;
+       journal->j_fc_first = journal->j_last + 1;
+       journal->j_fc_off = 0;
+       journal->j_free = journal->j_last - journal->j_first;
+       journal->j_max_transaction_buffers =
+               jbd2_journal_get_max_txn_bufs(journal);
+
+       return 0;
+}
+
 /**
  * int jbd2_journal_set_features() - Mark a given journal feature in the superblock
  * @journal: Journal to act on.
@@ -2159,6 +2168,13 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
 
        sb = journal->j_superblock;
 
+       if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) {
+               if (jbd2_journal_initialize_fast_commit(journal)) {
+                       pr_err("JBD2: Cannot enable fast commits.\n");
+                       return 0;
+               }
+       }
+
        /* Load the checksum driver if necessary */
        if ((journal->j_chksum_driver == NULL) &&
            INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
index eb26061..dc0694f 100644 (file)
@@ -74,8 +74,8 @@ static int do_readahead(journal_t *journal, unsigned int start)
 
        /* Do up to 128K of readahead */
        max = start + (128 * 1024 / journal->j_blocksize);
-       if (max > journal->j_maxlen)
-               max = journal->j_maxlen;
+       if (max > journal->j_total_len)
+               max = journal->j_total_len;
 
        /* Do the readahead itself.  We'll submit MAXBUF buffer_heads at
         * a time to the block device IO layer. */
@@ -134,7 +134,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
 
        *bhp = NULL;
 
-       if (offset >= journal->j_maxlen) {
+       if (offset >= journal->j_total_len) {
                printk(KERN_ERR "JBD2: corrupted journal superblock\n");
                return -EFSCORRUPTED;
        }
index 4398573..d54f046 100644 (file)
@@ -195,8 +195,10 @@ static void wait_transaction_switching(journal_t *journal)
        DEFINE_WAIT(wait);
 
        if (WARN_ON(!journal->j_running_transaction ||
-                   journal->j_running_transaction->t_state != T_SWITCH))
+                   journal->j_running_transaction->t_state != T_SWITCH)) {
+               read_unlock(&journal->j_state_lock);
                return;
+       }
        prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
                        TASK_UNINTERRUPTIBLE);
        read_unlock(&journal->j_state_lock);
index b9a9d69..db52e84 100644 (file)
@@ -877,7 +877,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
                goto done;
        }
 
-       trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen);
+       trace_ocfs2_journal_init_maxlen(j_journal->j_total_len);
 
        *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
                  OCFS2_JOURNAL_DIRTY_FL);
index 1d5566a..1c49fd6 100644 (file)
@@ -68,6 +68,7 @@ extern void *jbd2_alloc(size_t size, gfp_t flags);
 extern void jbd2_free(void *ptr, size_t size);
 
 #define JBD2_MIN_JOURNAL_BLOCKS 1024
+#define JBD2_MIN_FC_BLOCKS     256
 
 #ifdef __KERNEL__
 
@@ -944,8 +945,9 @@ struct journal_s
        /**
         * @j_fc_off:
         *
-        * Number of fast commit blocks currently allocated.
-        * [j_state_lock].
+        * Number of fast commit blocks currently allocated. Accessed only
+        * during fast commit. Currently only process can do fast commit, so
+        * this field is not protected by any lock.
         */
        unsigned long           j_fc_off;
 
@@ -988,9 +990,9 @@ struct journal_s
        struct block_device     *j_fs_dev;
 
        /**
-        * @j_maxlen: Total maximum capacity of the journal region on disk.
+        * @j_total_len: Total maximum capacity of the journal region on disk.
         */
-       unsigned int            j_maxlen;
+       unsigned int            j_total_len;
 
        /**
         * @j_reserved_credits:
@@ -1108,8 +1110,9 @@ struct journal_s
        struct buffer_head      **j_wbuf;
 
        /**
-        * @j_fc_wbuf: Array of fast commit bhs for
-        * jbd2_journal_commit_transaction.
+        * @j_fc_wbuf: Array of fast commit bhs for fast commit. Accessed only
+        * during a fast commit. Currently only process can do fast commit, so
+        * this field is not protected by any lock.
         */
        struct buffer_head      **j_fc_wbuf;
 
@@ -1614,16 +1617,20 @@ extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
 extern int jbd2_cleanup_journal_tail(journal_t *);
 
 /* Fast commit related APIs */
-int jbd2_fc_init(journal_t *journal, int num_fc_blks);
 int jbd2_fc_begin_commit(journal_t *journal, tid_t tid);
 int jbd2_fc_end_commit(journal_t *journal);
-int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid);
+int jbd2_fc_end_commit_fallback(journal_t *journal);
 int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
 int jbd2_submit_inode_data(struct jbd2_inode *jinode);
 int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
 int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
 int jbd2_fc_release_bufs(journal_t *journal);
 
+static inline int jbd2_journal_get_max_txn_bufs(journal_t *journal)
+{
+       return (journal->j_total_len - journal->j_fc_wbufsize) / 4;
+}
+
 /*
  * is_journal_abort
  *
index b14314f..70ae549 100644 (file)
@@ -100,11 +100,12 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
                { EXT4_FC_REASON_XATTR,         "XATTR"},               \
                { EXT4_FC_REASON_CROSS_RENAME,  "CROSS_RENAME"},        \
                { EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, "JOURNAL_FLAG_CHANGE"}, \
-               { EXT4_FC_REASON_MEM,   "NO_MEM"},                      \
+               { EXT4_FC_REASON_NOMEM, "NO_MEM"},                      \
                { EXT4_FC_REASON_SWAP_BOOT,     "SWAP_BOOT"},           \
                { EXT4_FC_REASON_RESIZE,        "RESIZE"},              \
                { EXT4_FC_REASON_RENAME_DIR,    "RENAME_DIR"},          \
-               { EXT4_FC_REASON_FALLOC_RANGE,  "FALLOC_RANGE"})
+               { EXT4_FC_REASON_FALLOC_RANGE,  "FALLOC_RANGE"},        \
+               { EXT4_FC_REASON_INODE_JOURNAL_DATA,    "INODE_JOURNAL_DATA"})
 
 TRACE_EVENT(ext4_other_inode_update_time,
        TP_PROTO(struct inode *inode, ino_t orig_ino),
@@ -2917,17 +2918,18 @@ TRACE_EVENT(ext4_fc_stats,
                    ),
 
            TP_printk("dev %d:%d fc ineligible reasons:\n"
-                     "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s,%d; "
+                     "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; "
                      "num_commits:%ld, ineligible: %ld, numblks: %ld",
                      MAJOR(__entry->dev), MINOR(__entry->dev),
                      FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR),
                      FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME),
                      FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE),
-                     FC_REASON_NAME_STAT(EXT4_FC_REASON_MEM),
+                     FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM),
                      FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT),
                      FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE),
                      FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR),
                      FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE),
+                     FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA),
                      __entry->sbi->s_fc_stats.fc_num_commits,
                      __entry->sbi->s_fc_stats.fc_ineligible_commits,
                      __entry->sbi->s_fc_stats.fc_numblks)