xfs: external logs need to flush data device
authorDave Chinner <dchinner@redhat.com>
Tue, 27 Jul 2021 23:23:47 +0000 (16:23 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Thu, 29 Jul 2021 16:27:27 +0000 (09:27 -0700)
The recent journal flush/FUA changes replaced the flushing of the
data device on every iclog write with an up-front async data device
cache flush. Unfortunately, the assumption of which this was based
on has been proven incorrect by the flush vs log tail update
ordering issue. As the fix for that issue uses the
XLOG_ICL_NEED_FLUSH flag to indicate that data device needs a cache
flush, we now need to (once again) ensure that an iclog write to
external logs that need a cache flush to be issued actually issue a
cache flush to the data device as well as the log device.

Fixes: eef983ffeae7 ("xfs: journal IO cache flush reductions")
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
fs/xfs/xfs_log.c

index 96434cc..a3c4d48 100644 (file)
@@ -827,13 +827,6 @@ xlog_write_unmount_record(
        /* account for space used by record data */
        ticket->t_curr_res -= sizeof(ulf);
 
-       /*
-        * For external log devices, we need to flush the data device cache
-        * first to ensure all metadata writeback is on stable storage before we
-        * stamp the tail LSN into the unmount record.
-        */
-       if (log->l_targ != log->l_mp->m_ddev_targp)
-               blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev);
        return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS);
 }
 
@@ -1796,10 +1789,20 @@ xlog_write_iclog(
         * metadata writeback and causing priority inversions.
         */
        iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE;
-       if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH)
+       if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) {
                iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
+               /*
+                * For external log devices, we also need to flush the data
+                * device cache first to ensure all metadata writeback covered
+                * by the LSN in this iclog is on stable storage. This is slow,
+                * but it *must* complete before we issue the external log IO.
+                */
+               if (log->l_targ != log->l_mp->m_ddev_targp)
+                       blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev);
+       }
        if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
                iclog->ic_bio.bi_opf |= REQ_FUA;
+
        iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
 
        if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {