ext4: add discard/zeroout flags to journal flush
authorLeah Rumancik <leah.rumancik@gmail.com>
Tue, 18 May 2021 15:13:25 +0000 (15:13 +0000)
committerTheodore Ts'o <tytso@mit.edu>
Tue, 22 Jun 2021 23:27:10 +0000 (19:27 -0400)
Add a flags argument to jbd2_journal_flush to enable discarding or
zero-filling the journal blocks while flushing the journal.

Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
Link: https://lore.kernel.org/r/20210518151327.130198-1-leah.rumancik@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/super.c
fs/jbd2/journal.c
fs/ocfs2/alloc.c
fs/ocfs2/journal.c
include/linux/jbd2.h

index 211acfb..e1ff4eb 100644 (file)
@@ -3223,7 +3223,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
                ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
                journal = EXT4_JOURNAL(inode);
                jbd2_journal_lock_updates(journal);
-               err = jbd2_journal_flush(journal);
+               err = jbd2_journal_flush(journal, 0);
                jbd2_journal_unlock_updates(journal);
 
                if (err)
@@ -6005,7 +6005,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
        if (val)
                ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
        else {
-               err = jbd2_journal_flush(journal);
+               err = jbd2_journal_flush(journal, 0);
                if (err < 0) {
                        jbd2_journal_unlock_updates(journal);
                        percpu_up_write(&sbi->s_writepages_rwsem);
index a96d672..93e9419 100644 (file)
@@ -706,7 +706,7 @@ static long ext4_ioctl_group_add(struct file *file,
        err = ext4_group_add(sb, input);
        if (EXT4_SB(sb)->s_journal) {
                jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
-               err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+               err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
        }
        if (err == 0)
@@ -884,7 +884,7 @@ setversion_out:
                err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
                if (EXT4_SB(sb)->s_journal) {
                        jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
-                       err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+                       err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
                        jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
                }
                if (err == 0)
@@ -1027,7 +1027,7 @@ mext_out:
                if (EXT4_SB(sb)->s_journal) {
                        ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
                        jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
-                       err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+                       err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
                        jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
                }
                if (err == 0)
index 3b62035..ad3919d 100644 (file)
@@ -5653,7 +5653,7 @@ static int ext4_mark_recovery_complete(struct super_block *sb,
                return 0;
        }
        jbd2_journal_lock_updates(journal);
-       err = jbd2_journal_flush(journal);
+       err = jbd2_journal_flush(journal, 0);
        if (err < 0)
                goto out;
 
@@ -5795,7 +5795,7 @@ static int ext4_freeze(struct super_block *sb)
                 * Don't clear the needs_recovery flag if we failed to
                 * flush the journal.
                 */
-               error = jbd2_journal_flush(journal);
+               error = jbd2_journal_flush(journal, 0);
                if (error < 0)
                        goto out;
 
@@ -6389,7 +6389,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
                 * otherwise be livelocked...
                 */
                jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
-               err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+               err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
                if (err)
                        return err;
index 2dc9444..3a2ed60 100644 (file)
@@ -1686,6 +1686,110 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
        write_unlock(&journal->j_state_lock);
 }
 
+/**
+ * __jbd2_journal_erase() - Discard or zeroout journal blocks (excluding superblock)
+ * @journal: The journal to erase.
+ * @flags: A discard/zeroout request is sent for each physically contigous
+ *     region of the journal. Either JBD2_JOURNAL_FLUSH_DISCARD or
+ *     JBD2_JOURNAL_FLUSH_ZEROOUT must be set to determine which operation
+ *     to perform.
+ *
+ * Note: JBD2_JOURNAL_FLUSH_ZEROOUT attempts to use hardware offload. Zeroes
+ * will be explicitly written if no hardware offload is available, see
+ * blkdev_issue_zeroout for more details.
+ */
+static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
+{
+       int err = 0;
+       unsigned long block, log_offset; /* logical */
+       unsigned long long phys_block, block_start, block_stop; /* physical */
+       loff_t byte_start, byte_stop, byte_count;
+       struct request_queue *q = bdev_get_queue(journal->j_dev);
+
+       /* flags must be set to either discard or zeroout */
+       if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) || !flags ||
+                       ((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
+                       (flags & JBD2_JOURNAL_FLUSH_ZEROOUT)))
+               return -EINVAL;
+
+       if (!q)
+               return -ENXIO;
+
+       if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q))
+               return -EOPNOTSUPP;
+
+       /*
+        * lookup block mapping and issue discard/zeroout for each
+        * contiguous region
+        */
+       log_offset = be32_to_cpu(journal->j_superblock->s_first);
+       block_start =  ~0ULL;
+       for (block = log_offset; block < journal->j_total_len; block++) {
+               err = jbd2_journal_bmap(journal, block, &phys_block);
+               if (err) {
+                       pr_err("JBD2: bad block at offset %lu", block);
+                       return err;
+               }
+
+               if (block_start == ~0ULL) {
+                       block_start = phys_block;
+                       block_stop = block_start - 1;
+               }
+
+               /*
+                * last block not contiguous with current block,
+                * process last contiguous region and return to this block on
+                * next loop
+                */
+               if (phys_block != block_stop + 1) {
+                       block--;
+               } else {
+                       block_stop++;
+                       /*
+                        * if this isn't the last block of journal,
+                        * no need to process now because next block may also
+                        * be part of this contiguous region
+                        */
+                       if (block != journal->j_total_len - 1)
+                               continue;
+               }
+
+               /*
+                * end of contiguous region or this is last block of journal,
+                * take care of the region
+                */
+               byte_start = block_start * journal->j_blocksize;
+               byte_stop = block_stop * journal->j_blocksize;
+               byte_count = (block_stop - block_start + 1) *
+                               journal->j_blocksize;
+
+               truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping,
+                               byte_start, byte_stop);
+
+               if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
+                       err = blkdev_issue_discard(journal->j_dev,
+                                       byte_start >> SECTOR_SHIFT,
+                                       byte_count >> SECTOR_SHIFT,
+                                       GFP_NOFS, 0);
+               } else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) {
+                       err = blkdev_issue_zeroout(journal->j_dev,
+                                       byte_start >> SECTOR_SHIFT,
+                                       byte_count >> SECTOR_SHIFT,
+                                       GFP_NOFS, 0);
+               }
+
+               if (unlikely(err != 0)) {
+                       pr_err("JBD2: (error %d) unable to wipe journal at physical blocks %llu - %llu",
+                                       err, block_start, block_stop);
+                       return err;
+               }
+
+               /* reset start and stop after processing a region */
+               block_start = ~0ULL;
+       }
+
+       return blkdev_issue_flush(journal->j_dev);
+}
 
 /**
  * jbd2_journal_update_sb_errno() - Update error in the journal.
@@ -2246,13 +2350,18 @@ EXPORT_SYMBOL(jbd2_journal_clear_features);
 /**
  * jbd2_journal_flush() - Flush journal
  * @journal: Journal to act on.
+ * @flags: optional operation on the journal blocks after the flush (see below)
  *
  * Flush all data for a given journal to disk and empty the journal.
  * Filesystems can use this when remounting readonly to ensure that
- * recovery does not need to happen on remount.
+ * recovery does not need to happen on remount. Optionally, a discard or zeroout
+ * can be issued on the journal blocks after flushing.
+ *
+ * flags:
+ *     JBD2_JOURNAL_FLUSH_DISCARD: issues discards for the journal blocks
+ *     JBD2_JOURNAL_FLUSH_ZEROOUT: issues zeroouts for the journal blocks
  */
-
-int jbd2_journal_flush(journal_t *journal)
+int jbd2_journal_flush(journal_t *journal, unsigned int flags)
 {
        int err = 0;
        transaction_t *transaction = NULL;
@@ -2306,6 +2415,10 @@ int jbd2_journal_flush(journal_t *journal)
         * commits of data to the journal will restore the current
         * s_start value. */
        jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
+
+       if (flags)
+               err = __jbd2_journal_erase(journal, flags);
+
        mutex_unlock(&journal->j_checkpoint_mutex);
        write_lock(&journal->j_state_lock);
        J_ASSERT(!journal->j_running_transaction);
index e032f2e..f1cc825 100644 (file)
@@ -6018,7 +6018,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
         * Then truncate log will be replayed resulting in cluster double free.
         */
        jbd2_journal_lock_updates(journal->j_journal);
-       status = jbd2_journal_flush(journal->j_journal);
+       status = jbd2_journal_flush(journal->j_journal, 0);
        jbd2_journal_unlock_updates(journal->j_journal);
        if (status < 0) {
                mlog_errno(status);
index 4e589ce..4f15750 100644 (file)
@@ -308,7 +308,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
        }
 
        jbd2_journal_lock_updates(journal->j_journal);
-       status = jbd2_journal_flush(journal->j_journal);
+       status = jbd2_journal_flush(journal->j_journal, 0);
        jbd2_journal_unlock_updates(journal->j_journal);
        if (status < 0) {
                up_write(&journal->j_trans_barrier);
@@ -1000,7 +1000,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
 
        if (ocfs2_mount_local(osb)) {
                jbd2_journal_lock_updates(journal->j_journal);
-               status = jbd2_journal_flush(journal->j_journal);
+               status = jbd2_journal_flush(journal->j_journal, 0);
                jbd2_journal_unlock_updates(journal->j_journal);
                if (status < 0)
                        mlog_errno(status);
@@ -1070,7 +1070,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
 
        if (replayed) {
                jbd2_journal_lock_updates(journal->j_journal);
-               status = jbd2_journal_flush(journal->j_journal);
+               status = jbd2_journal_flush(journal->j_journal, 0);
                jbd2_journal_unlock_updates(journal->j_journal);
                if (status < 0)
                        mlog_errno(status);
@@ -1666,7 +1666,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 
        /* wipe the journal */
        jbd2_journal_lock_updates(journal);
-       status = jbd2_journal_flush(journal);
+       status = jbd2_journal_flush(journal, 0);
        jbd2_journal_unlock_updates(journal);
        if (status < 0)
                mlog_errno(status);
index db0e192..8543233 100644 (file)
@@ -1370,6 +1370,10 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
                                                 * mode */
 #define JBD2_FAST_COMMIT_ONGOING       0x100   /* Fast commit is ongoing */
 #define JBD2_FULL_COMMIT_ONGOING       0x200   /* Full commit is ongoing */
+#define JBD2_JOURNAL_FLUSH_DISCARD     0x0001
+#define JBD2_JOURNAL_FLUSH_ZEROOUT     0x0002
+#define JBD2_JOURNAL_FLUSH_VALID       (JBD2_JOURNAL_FLUSH_DISCARD | \
+                                       JBD2_JOURNAL_FLUSH_ZEROOUT)
 
 /*
  * Function declarations for the journaling transaction and buffer
@@ -1500,7 +1504,7 @@ extern int         jbd2_journal_invalidatepage(journal_t *,
                                struct page *, unsigned int, unsigned int);
 extern int      jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page);
 extern int      jbd2_journal_stop(handle_t *);
-extern int      jbd2_journal_flush (journal_t *);
+extern int      jbd2_journal_flush(journal_t *journal, unsigned int flags);
 extern void     jbd2_journal_lock_updates (journal_t *);
 extern void     jbd2_journal_unlock_updates (journal_t *);