Merge tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 11 Mar 2016 18:21:32 +0000 (10:21 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 11 Mar 2016 18:21:32 +0000 (10:21 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 11 Mar 2016 18:21:32 +0000 (10:21 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 11 Mar 2016 18:21:32 +0000 (10:21 -0800)
diff --combined fs/xfs/xfs_log_recover.c

index 594f7e6,e7aa82f..be55688
--- 1/fs/xfs/xfs_log_recover.c
--- 2/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@@ -1109,27 -1109,10 +1109,10 @@@ xlog_verify_head
         bool                    tmp_wrapped;
   
         /*
-        * Search backwards through the log looking for the log record header
-        * block. This wraps all the way back around to the head so something is
-        * seriously wrong if we can't find it.
-        */
-       found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
-                                     rhead, wrapped);
-       if (found < 0)
-               return found;
-       if (!found) {
-               xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
-               return -EIO;
-       }
- 
-       *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
- 
-       /*
-        * Now that we have a tail block, check the head of the log for torn
-        * writes. Search again until we hit the tail or the maximum number of
-        * log record I/Os that could have been in flight at one time. Use a
-        * temporary buffer so we don't trash the rhead/bp pointer from the
-        * call above.
+        * Check the head of the log for torn writes. Search backwards from the
+        * head until we hit the tail or the maximum number of log record I/Os
+        * that could have been in flight at one time. Use a temporary buffer so
+        * we don't trash the rhead/bp pointers from the caller.
          */
         tmp_bp = xlog_get_bp(log, 1);
         if (!tmp_bp)
@@@ -1215,6 -1198,115 +1198,115 @@@
         return error;
   }
   
+ /*
+  * Check whether the head of the log points to an unmount record. In other
+  * words, determine whether the log is clean. If so, update the in-core state
+  * appropriately.
+  */
+ static int
+ xlog_check_unmount_rec(
+       struct xlog             *log,
+       xfs_daddr_t             *head_blk,
+       xfs_daddr_t             *tail_blk,
+       struct xlog_rec_header  *rhead,
+       xfs_daddr_t             rhead_blk,
+       struct xfs_buf          *bp,
+       bool                    *clean)
+ {
+       struct xlog_op_header   *op_head;
+       xfs_daddr_t             umount_data_blk;
+       xfs_daddr_t             after_umount_blk;
+       int                     hblks;
+       int                     error;
+       char                    *offset;
+ 
+       *clean = false;
+ 
+       /*
+        * Look for unmount record. If we find it, then we know there was a
+        * clean unmount. Since 'i' could be the last block in the physical
+        * log, we convert to a log block before comparing to the head_blk.
+        *
+        * Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
+        * below. We won't want to clear the unmount record if there is one, so
+        * we pass the lsn of the unmount record rather than the block after it.
+        */
+       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+               int     h_size = be32_to_cpu(rhead->h_size);
+               int     h_version = be32_to_cpu(rhead->h_version);
+ 
+               if ((h_version & XLOG_VERSION_2) &&
+                   (h_size > XLOG_HEADER_CYCLE_SIZE)) {
+                       hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
+                       if (h_size % XLOG_HEADER_CYCLE_SIZE)
+                               hblks++;
+               } else {
+                       hblks = 1;
+               }
+       } else {
+               hblks = 1;
+       }
+       after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
+       after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
+       if (*head_blk == after_umount_blk &&
+           be32_to_cpu(rhead->h_num_logops) == 1) {
+               umount_data_blk = rhead_blk + hblks;
+               umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
+               error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+               if (error)
+                       return error;
+ 
+               op_head = (struct xlog_op_header *)offset;
+               if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
+                       /*
+                        * Set tail and last sync so that newly written log
+                        * records will point recovery to after the current
+                        * unmount record.
+                        */
+                       xlog_assign_atomic_lsn(&log->l_tail_lsn,
+                                       log->l_curr_cycle, after_umount_blk);
+                       xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+                                       log->l_curr_cycle, after_umount_blk);
+                       *tail_blk = after_umount_blk;
+ 
+                       *clean = true;
+               }
+       }
+ 
+       return 0;
+ }
+ 
+ static void
+ xlog_set_state(
+       struct xlog             *log,
+       xfs_daddr_t             head_blk,
+       struct xlog_rec_header  *rhead,
+       xfs_daddr_t             rhead_blk,
+       bool                    bump_cycle)
+ {
+       /*
+        * Reset log values according to the state of the log when we
+        * crashed.  In the case where head_blk == 0, we bump curr_cycle
+        * one because the next write starts a new cycle rather than
+        * continuing the cycle of the last good log record.  At this
+        * point we have guaranteed that all partial log records have been
+        * accounted for.  Therefore, we know that the last good log record
+        * written was complete and ended exactly on the end boundary
+        * of the physical log.
+        */
+       log->l_prev_block = rhead_blk;
+       log->l_curr_block = (int)head_blk;
+       log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
+       if (bump_cycle)
+               log->l_curr_cycle++;
+       atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
+       atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+       xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
+                                       BBTOB(log->l_curr_block));
+       xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
+                                       BBTOB(log->l_curr_block));
+ }
+ 
   /*
    * Find the sync block number or the tail of the log.
    *
@@@ -1238,22 -1330,20 +1330,20 @@@ xlog_find_tail
         xfs_daddr_t             *tail_blk)
   {
         xlog_rec_header_t       *rhead;
-       xlog_op_header_t        *op_head;
         char                    *offset = NULL;
         xfs_buf_t               *bp;
         int                     error;
-       xfs_daddr_t             umount_data_blk;
-       xfs_daddr_t             after_umount_blk;
         xfs_daddr_t             rhead_blk;
         xfs_lsn_t               tail_lsn;
-       int                     hblks;
         bool                    wrapped = false;
+       bool                    clean = false;
   
         /*
          * Find previous log record
          */
         if ((error = xlog_find_head(log, head_blk)))
                 return error;
+       ASSERT(*head_blk < INT_MAX);
   
         bp = xlog_get_bp(log, 1);
         if (!bp)
@@@ -1271,99 -1361,74 +1361,74 @@@
         }
   
         /*
-        * Trim the head block back to skip over torn records. We can have
-        * multiple log I/Os in flight at any time, so we assume CRC failures
-        * back through the previous several records are torn writes and skip
-        * them.
+        * Search backwards through the log looking for the log record header
+        * block. This wraps all the way back around to the head so something is
+        * seriously wrong if we can't find it.
          */
-       ASSERT(*head_blk < INT_MAX);
-       error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
-                                &rhead, &wrapped);
-       if (error)
-               goto done;
+       error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
+                                     &rhead_blk, &rhead, &wrapped);
+       if (error < 0)
+               return error;
+       if (!error) {
+               xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
+               return -EIO;
+       }
+       *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
   
         /*
-        * Reset log values according to the state of the log when we
-        * crashed.  In the case where head_blk == 0, we bump curr_cycle
-        * one because the next write starts a new cycle rather than
-        * continuing the cycle of the last good log record.  At this
-        * point we have guaranteed that all partial log records have been
-        * accounted for.  Therefore, we know that the last good log record
-        * written was complete and ended exactly on the end boundary
-        * of the physical log.
+        * Set the log state based on the current head record.
          */
-       log->l_prev_block = rhead_blk;
-       log->l_curr_block = (int)*head_blk;
-       log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
-       if (wrapped)
-               log->l_curr_cycle++;
-       atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
-       atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
-       xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
-                                       BBTOB(log->l_curr_block));
-       xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
-                                       BBTOB(log->l_curr_block));
+       xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
+       tail_lsn = atomic64_read(&log->l_tail_lsn);
+ 
+       /*
+        * Look for an unmount record at the head of the log. This sets the log
+        * state to determine whether recovery is necessary.
+        */
+       error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
+                                      rhead_blk, bp, &clean);
+       if (error)
+               goto done;
   
         /*
-        * Look for unmount record.  If we find it, then we know there
-        * was a clean unmount.  Since 'i' could be the last block in
-        * the physical log, we convert to a log block before comparing
-        * to the head_blk.
+        * Verify the log head if the log is not clean (e.g., we have anything
+        * but an unmount record at the head). This uses CRC verification to
+        * detect and trim torn writes. If discovered, CRC failures are
+        * considered torn writes and the log head is trimmed accordingly.
          *
-        * Save the current tail lsn to use to pass to
-        * xlog_clear_stale_blocks() below.  We won't want to clear the
-        * unmount record if there is one, so we pass the lsn of the
-        * unmount record rather than the block after it.
+        * Note that we can only run CRC verification when the log is dirty
+        * because there's no guarantee that the log data behind an unmount
+        * record is compatible with the current architecture.
          */
-       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
-               int     h_size = be32_to_cpu(rhead->h_size);
-               int     h_version = be32_to_cpu(rhead->h_version);
+       if (!clean) {
+               xfs_daddr_t     orig_head = *head_blk;
   
-               if ((h_version & XLOG_VERSION_2) &&
-                   (h_size > XLOG_HEADER_CYCLE_SIZE)) {
-                       hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
-                       if (h_size % XLOG_HEADER_CYCLE_SIZE)
-                               hblks++;
-               } else {
-                       hblks = 1;
-               }
-       } else {
-               hblks = 1;
-       }
-       after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
-       after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
-       tail_lsn = atomic64_read(&log->l_tail_lsn);
-       if (*head_blk == after_umount_blk &&
-           be32_to_cpu(rhead->h_num_logops) == 1) {
-               umount_data_blk = rhead_blk + hblks;
-               umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
-               error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+               error = xlog_verify_head(log, head_blk, tail_blk, bp,
+                                        &rhead_blk, &rhead, &wrapped);
                 if (error)
                         goto done;
   
-               op_head = (xlog_op_header_t *)offset;
-               if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
-                       /*
-                        * Set tail and last sync so that newly written
-                        * log records will point recovery to after the
-                        * current unmount record.
-                        */
-                       xlog_assign_atomic_lsn(&log->l_tail_lsn,
-                                       log->l_curr_cycle, after_umount_blk);
-                       xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
-                                       log->l_curr_cycle, after_umount_blk);
-                       *tail_blk = after_umount_blk;
- 
-                       /*
-                        * Note that the unmount was clean. If the unmount
-                        * was not clean, we need to know this to rebuild the
-                        * superblock counters from the perag headers if we
-                        * have a filesystem using non-persistent counters.
-                        */
-                       log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+               /* update in-core state again if the head changed */
+               if (*head_blk != orig_head) {
+                       xlog_set_state(log, *head_blk, rhead, rhead_blk,
+                                      wrapped);
+                       tail_lsn = atomic64_read(&log->l_tail_lsn);
+                       error = xlog_check_unmount_rec(log, head_blk, tail_blk,
+                                                      rhead, rhead_blk, bp,
+                                                      &clean);
+                       if (error)
+                               goto done;
                 }
         }
   
+       /*
+        * Note that the unmount was clean. If the unmount was not clean, we
+        * need to know this to rebuild the superblock counters from the perag
+        * headers if we have a filesystem using non-persistent counters.
+        */
+       if (clean)
+               log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+ 
         /*
          * Make sure that there are no blocks in front of the head
          * with the same cycle number as the head.  This can happen
@@@ -4491,7 -4556,7 +4556,7 @@@ xlog_recover_process
          * know precisely what failed.
          */
         if (pass == XLOG_RECOVER_CRCPASS) {
- -              if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc))
+ +              if (rhead->h_crc && crc != rhead->h_crc)
                         return -EFSBADCRC;
                 return 0;
         }
@@@ -4502,7 -4567,7 +4567,7 @@@
          * zero CRC check prevents warnings from being emitted when upgrading
          * the kernel from one that does not add CRCs by default.
          */
- -      if (crc != le32_to_cpu(rhead->h_crc)) {
+ +      if (crc != rhead->h_crc) {
                 if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
                         xfs_alert(log->l_mp,
                 "log record CRC mismatch: found 0x%x, expected 0x%x.",
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 11 Mar 2016 18:21:32 +0000 (10:21 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 11 Mar 2016 18:21:32 +0000 (10:21 -0800)