ext4 / jbd2: add fast commit initialization
[linux-2.6-microblaze.git] / fs / ext4 / super.c
index 61af903..41da649 100644 (file)
@@ -141,27 +141,115 @@ MODULE_ALIAS_FS("ext3");
 MODULE_ALIAS("ext3");
 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
 
+
+static inline void __ext4_read_bh(struct buffer_head *bh, int op_flags,
+                                 bh_end_io_t *end_io)
+{
+       /*
+        * buffer's verified bit is no longer valid after reading from
+        * disk again due to write out error, clear it to make sure we
+        * recheck the buffer contents.
+        */
+       clear_buffer_verified(bh);
+
+       bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
+       get_bh(bh);
+       submit_bh(REQ_OP_READ, op_flags, bh);
+}
+
+void ext4_read_bh_nowait(struct buffer_head *bh, int op_flags,
+                        bh_end_io_t *end_io)
+{
+       BUG_ON(!buffer_locked(bh));
+
+       if (ext4_buffer_uptodate(bh)) {
+               unlock_buffer(bh);
+               return;
+       }
+       __ext4_read_bh(bh, op_flags, end_io);
+}
+
+int ext4_read_bh(struct buffer_head *bh, int op_flags, bh_end_io_t *end_io)
+{
+       BUG_ON(!buffer_locked(bh));
+
+       if (ext4_buffer_uptodate(bh)) {
+               unlock_buffer(bh);
+               return 0;
+       }
+
+       __ext4_read_bh(bh, op_flags, end_io);
+
+       wait_on_buffer(bh);
+       if (buffer_uptodate(bh))
+               return 0;
+       return -EIO;
+}
+
+int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait)
+{
+       if (trylock_buffer(bh)) {
+               if (wait)
+                       return ext4_read_bh(bh, op_flags, NULL);
+               ext4_read_bh_nowait(bh, op_flags, NULL);
+               return 0;
+       }
+       if (wait) {
+               wait_on_buffer(bh);
+               if (buffer_uptodate(bh))
+                       return 0;
+               return -EIO;
+       }
+       return 0;
+}
+
 /*
- * This works like sb_bread() except it uses ERR_PTR for error
+ * This works like __bread_gfp() except it uses ERR_PTR for error
  * returns.  Currently with sb_bread it's impossible to distinguish
  * between ENOMEM and EIO situations (since both result in a NULL
  * return.
  */
-struct buffer_head *
-ext4_sb_bread(struct super_block *sb, sector_t block, int op_flags)
+static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
+                                              sector_t block, int op_flags,
+                                              gfp_t gfp)
 {
-       struct buffer_head *bh = sb_getblk(sb, block);
+       struct buffer_head *bh;
+       int ret;
 
+       bh = sb_getblk_gfp(sb, block, gfp);
        if (bh == NULL)
                return ERR_PTR(-ENOMEM);
        if (ext4_buffer_uptodate(bh))
                return bh;
-       ll_rw_block(REQ_OP_READ, REQ_META | op_flags, 1, &bh);
-       wait_on_buffer(bh);
-       if (buffer_uptodate(bh))
-               return bh;
-       put_bh(bh);
-       return ERR_PTR(-EIO);
+
+       ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
+       if (ret) {
+               put_bh(bh);
+               return ERR_PTR(ret);
+       }
+       return bh;
+}
+
+struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
+                                  int op_flags)
+{
+       return __ext4_sb_bread_gfp(sb, block, op_flags, __GFP_MOVABLE);
+}
+
+struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
+                                           sector_t block)
+{
+       return __ext4_sb_bread_gfp(sb, block, 0, 0);
+}
+
+void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
+{
+       struct buffer_head *bh = sb_getblk_gfp(sb, block, 0);
+
+       if (likely(bh)) {
+               ext4_read_bh_lock(bh, REQ_RAHEAD, false);
+               brelse(bh);
+       }
 }
 
 static int ext4_verify_csum_type(struct super_block *sb,
@@ -483,6 +571,89 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
        spin_unlock(&sbi->s_md_lock);
 }
 
+/*
+ * This writepage callback for write_cache_pages()
+ * takes care of a few cases after page cleaning.
+ *
+ * write_cache_pages() already checks for dirty pages
+ * and calls clear_page_dirty_for_io(), which we want,
+ * to write protect the pages.
+ *
+ * However, we may have to redirty a page (see below.)
+ */
+static int ext4_journalled_writepage_callback(struct page *page,
+                                             struct writeback_control *wbc,
+                                             void *data)
+{
+       transaction_t *transaction = (transaction_t *) data;
+       struct buffer_head *bh, *head;
+       struct journal_head *jh;
+
+       bh = head = page_buffers(page);
+       do {
+               /*
+                * We have to redirty a page in these cases:
+                * 1) If buffer is dirty, it means the page was dirty because it
+                * contains a buffer that needs checkpointing. So the dirty bit
+                * needs to be preserved so that checkpointing writes the buffer
+                * properly.
+                * 2) If buffer is not part of the committing transaction
+                * (we may have just accidentally come across this buffer because
+                * inode range tracking is not exact) or if the currently running
+                * transaction already contains this buffer as well, dirty bit
+                * needs to be preserved so that the buffer gets writeprotected
+                * properly on running transaction's commit.
+                */
+               jh = bh2jh(bh);
+               if (buffer_dirty(bh) ||
+                   (jh && (jh->b_transaction != transaction ||
+                           jh->b_next_transaction))) {
+                       redirty_page_for_writepage(wbc, page);
+                       goto out;
+               }
+       } while ((bh = bh->b_this_page) != head);
+
+out:
+       return AOP_WRITEPAGE_ACTIVATE;
+}
+
+static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
+{
+       struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
+       struct writeback_control wbc = {
+               .sync_mode =  WB_SYNC_ALL,
+               .nr_to_write = LONG_MAX,
+               .range_start = jinode->i_dirty_start,
+               .range_end = jinode->i_dirty_end,
+        };
+
+       return write_cache_pages(mapping, &wbc,
+                                ext4_journalled_writepage_callback,
+                                jinode->i_transaction);
+}
+
+static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
+{
+       int ret;
+
+       if (ext4_should_journal_data(jinode->i_vfs_inode))
+               ret = ext4_journalled_submit_inode_data_buffers(jinode);
+       else
+               ret = jbd2_journal_submit_inode_data_buffers(jinode);
+
+       return ret;
+}
+
+static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
+{
+       int ret = 0;
+
+       if (!ext4_should_journal_data(jinode->i_vfs_inode))
+               ret = jbd2_journal_finish_inode_data_buffers(jinode);
+
+       return ret;
+}
+
 static bool system_going_down(void)
 {
        return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
@@ -1538,7 +1709,7 @@ enum {
        Opt_dioread_nolock, Opt_dioread_lock,
        Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
        Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
-       Opt_prefetch_block_bitmaps,
+       Opt_prefetch_block_bitmaps, Opt_no_fc,
 };
 
 static const match_table_t tokens = {
@@ -1625,6 +1796,7 @@ static const match_table_t tokens = {
        {Opt_init_itable, "init_itable=%u"},
        {Opt_init_itable, "init_itable"},
        {Opt_noinit_itable, "noinit_itable"},
+       {Opt_no_fc, "no_fc"},
        {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
        {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
        {Opt_test_dummy_encryption, "test_dummy_encryption"},
@@ -1751,6 +1923,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
 #define MOPT_STRING    0x0400
 #define MOPT_SKIP      0x0800
+#define        MOPT_2          0x1000
 
 static const struct mount_opts {
        int     token;
@@ -1851,6 +2024,8 @@ static const struct mount_opts {
        {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
        {Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
         MOPT_SET},
+       {Opt_no_fc, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
+        MOPT_CLEAR | MOPT_2 | MOPT_EXT4_ONLY},
        {Opt_err, 0, 0}
 };
 
@@ -2227,10 +2402,17 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
                        WARN_ON(1);
                        return -1;
                }
-               if (arg != 0)
-                       sbi->s_mount_opt |= m->mount_opt;
-               else
-                       sbi->s_mount_opt &= ~m->mount_opt;
+               if (m->flags & MOPT_2) {
+                       if (arg != 0)
+                               sbi->s_mount_opt2 |= m->mount_opt;
+                       else
+                               sbi->s_mount_opt2 &= ~m->mount_opt;
+               } else {
+                       if (arg != 0)
+                               sbi->s_mount_opt |= m->mount_opt;
+                       else
+                               sbi->s_mount_opt &= ~m->mount_opt;
+               }
        }
        return 1;
 }
@@ -2447,6 +2629,9 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
                SEQ_OPTS_PUTS("dax=inode");
        }
 
+       if (test_opt2(sb, JOURNAL_FAST_COMMIT))
+               SEQ_OPTS_PUTS("fast_commit");
+
        ext4_show_quota_options(seq, sb);
        return 0;
 }
@@ -3879,8 +4064,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                logical_sb_block = sb_block;
        }
 
-       if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
+       bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
+       if (IS_ERR(bh)) {
                ext4_msg(sb, KERN_ERR, "unable to read superblock");
+               ret = PTR_ERR(bh);
+               bh = NULL;
                goto out_fail;
        }
        /*
@@ -3947,6 +4135,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
        set_opt(sb, POSIX_ACL);
 #endif
+       if (ext4_has_feature_fast_commit(sb))
+               set_opt2(sb, JOURNAL_FAST_COMMIT);
        /* don't forget to enable journal_csum when metadata_csum is enabled. */
        if (ext4_has_metadata_csum(sb))
                set_opt(sb, JOURNAL_CHECKSUM);
@@ -4276,10 +4466,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                brelse(bh);
                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
                offset = do_div(logical_sb_block, blocksize);
-               bh = sb_bread_unmovable(sb, logical_sb_block);
-               if (!bh) {
+               bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
+               if (IS_ERR(bh)) {
                        ext4_msg(sb, KERN_ERR,
                               "Can't read superblock on 2nd try");
+                       ret = PTR_ERR(bh);
+                       bh = NULL;
                        goto failed_mount;
                }
                es = (struct ext4_super_block *)(bh->b_data + offset);
@@ -4491,18 +4683,20 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        /* Pre-read the descriptors into the buffer cache */
        for (i = 0; i < db_count; i++) {
                block = descriptor_loc(sb, logical_sb_block, i);
-               sb_breadahead_unmovable(sb, block);
+               ext4_sb_breadahead_unmovable(sb, block);
        }
 
        for (i = 0; i < db_count; i++) {
                struct buffer_head *bh;
 
                block = descriptor_loc(sb, logical_sb_block, i);
-               bh = sb_bread_unmovable(sb, block);
-               if (!bh) {
+               bh = ext4_sb_bread_unmovable(sb, block);
+               if (IS_ERR(bh)) {
                        ext4_msg(sb, KERN_ERR,
                               "can't read group descriptor %d", i);
                        db_count = i;
+                       ret = PTR_ERR(bh);
+                       bh = NULL;
                        goto failed_mount2;
                }
                rcu_read_lock();
@@ -4599,6 +4793,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
                clear_opt(sb, JOURNAL_CHECKSUM);
                clear_opt(sb, DATA_FLAGS);
+               clear_opt2(sb, JOURNAL_FAST_COMMIT);
                sbi->s_journal = NULL;
                needs_recovery = 0;
                goto no_journal;
@@ -4657,6 +4852,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
 
        sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
+       sbi->s_journal->j_submit_inode_data_buffers =
+               ext4_journal_submit_inode_data_buffers;
+       sbi->s_journal->j_finish_inode_data_buffers =
+               ext4_journal_finish_inode_data_buffers;
 
 no_journal:
        if (!test_opt(sb, NO_MBCACHE)) {
@@ -4971,6 +5170,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
        journal->j_commit_interval = sbi->s_commit_interval;
        journal->j_min_batch_time = sbi->s_min_batch_time;
        journal->j_max_batch_time = sbi->s_max_batch_time;
+       ext4_fc_init(sb, journal);
 
        write_lock(&journal->j_state_lock);
        if (test_opt(sb, BARRIER))
@@ -5113,9 +5313,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
                goto out_bdev;
        }
        journal->j_private = sb;
-       ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
-       wait_on_buffer(journal->j_sb_buffer);
-       if (!buffer_uptodate(journal->j_sb_buffer)) {
+       if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, true)) {
                ext4_msg(sb, KERN_ERR, "I/O error on journal device");
                goto out_journal;
        }
@@ -6045,6 +6243,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
        /* Quotafile not on the same filesystem? */
        if (path->dentry->d_sb != sb)
                return -EXDEV;
+
+       /* Quota already enabled for this file? */
+       if (IS_NOQUOTA(d_inode(path->dentry)))
+               return -EBUSY;
+
        /* Journaling quota? */
        if (EXT4_SB(sb)->s_qf_names[type]) {
                /* Quotafile not in fs root? */