xfs: repair dquots based on live quotacheck results
authorDarrick J. Wong <djwong@kernel.org>
Thu, 22 Feb 2024 20:30:57 +0000 (12:30 -0800)
committerDarrick J. Wong <djwong@kernel.org>
Thu, 22 Feb 2024 20:30:57 +0000 (12:30 -0800)
Use the shadow quota counters that live quotacheck creates to reset the
incore dquot counters.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
fs/xfs/Makefile
fs/xfs/scrub/quotacheck.c
fs/xfs/scrub/quotacheck.h
fs/xfs/scrub/quotacheck_repair.c [new file with mode: 0644]
fs/xfs/scrub/repair.c
fs/xfs/scrub/repair.h
fs/xfs/scrub/scrub.c
fs/xfs/scrub/trace.h

index 951404c..68891e6 100644 (file)
@@ -204,6 +204,7 @@ xfs-$(CONFIG_XFS_RT)                += $(addprefix scrub/, \
 
 xfs-$(CONFIG_XFS_QUOTA)                += $(addprefix scrub/, \
                                   quota_repair.o \
+                                  quotacheck_repair.o \
                                   )
 endif
 endif
index 2cd55f2..c77eb2d 100644 (file)
@@ -102,7 +102,9 @@ xchk_setup_quotacheck(
  * set the INCOMPLETE flag even when a negative errno is returned.  This care
  * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
  * ECANCELED) that are absorbed into a scrub state flag update by
- * xchk_*_process_error.
+ * xchk_*_process_error.  Scrub and repair share the same incore data
+ * structures, so the INCOMPLETE flag is critical to prevent a repair based on
+ * insufficient information.
  *
  * Because we are scanning a live filesystem, it's possible that another thread
  * will try to update the quota counters for an inode that we've already
index 244e8cc..4ea5f24 100644 (file)
@@ -30,6 +30,9 @@ struct xqcheck_dquot {
 /* Already checked this dquot. */
 #define XQCHECK_DQUOT_COMPARE_SCANNED  (1U << 1)
 
+/* Already repaired this dquot. */
+#define XQCHECK_DQUOT_REPAIR_SCANNED   (1U << 2)
+
 /* Live quotacheck control structure. */
 struct xqcheck {
        struct xfs_scrub        *sc;
diff --git a/fs/xfs/scrub/quotacheck_repair.c b/fs/xfs/scrub/quotacheck_repair.c
new file mode 100644 (file)
index 0000000..dd8554c
--- /dev/null
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_qm.h"
+#include "xfs_icache.h"
+#include "xfs_bmap_util.h"
+#include "xfs_iwalk.h"
+#include "xfs_ialloc.h"
+#include "xfs_sb.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/repair.h"
+#include "scrub/xfile.h"
+#include "scrub/xfarray.h"
+#include "scrub/iscan.h"
+#include "scrub/quota.h"
+#include "scrub/quotacheck.h"
+#include "scrub/trace.h"
+
+/*
+ * Live Quotacheck Repair
+ * ======================
+ *
+ * Use the live quota counter information that we collected to replace the
+ * counter values in the incore dquots.  A scrub->repair cycle should have left
+ * the live data and hooks active, so this is safe so long as we make sure the
+ * dquot is locked.
+ */
+
+/* Commit new counters to a dquot. */
+static int
+xqcheck_commit_dquot(
+       struct xqcheck          *xqc,
+       xfs_dqtype_t            dqtype,
+       struct xfs_dquot        *dq)
+{
+       struct xqcheck_dquot    xcdq;
+       struct xfarray          *counts = xqcheck_counters_for(xqc, dqtype);
+       int64_t                 delta;
+       bool                    dirty = false;
+       int                     error = 0;
+
+       /* Unlock the dquot just long enough to allocate a transaction. */
+       xfs_dqunlock(dq);
+       error = xchk_trans_alloc(xqc->sc, 0);
+       xfs_dqlock(dq);
+       if (error)
+               return error;
+
+       xfs_trans_dqjoin(xqc->sc->tp, dq);
+
+       if (xchk_iscan_aborted(&xqc->iscan)) {
+               error = -ECANCELED;
+               goto out_cancel;
+       }
+
+       mutex_lock(&xqc->lock);
+       error = xfarray_load_sparse(counts, dq->q_id, &xcdq);
+       if (error)
+               goto out_unlock;
+
+       /* Adjust counters as needed. */
+       delta = (int64_t)xcdq.icount - dq->q_ino.count;
+       if (delta) {
+               dq->q_ino.reserved += delta;
+               dq->q_ino.count += delta;
+               dirty = true;
+       }
+
+       delta = (int64_t)xcdq.bcount - dq->q_blk.count;
+       if (delta) {
+               dq->q_blk.reserved += delta;
+               dq->q_blk.count += delta;
+               dirty = true;
+       }
+
+       delta = (int64_t)xcdq.rtbcount - dq->q_rtb.count;
+       if (delta) {
+               dq->q_rtb.reserved += delta;
+               dq->q_rtb.count += delta;
+               dirty = true;
+       }
+
+       xcdq.flags |= (XQCHECK_DQUOT_REPAIR_SCANNED | XQCHECK_DQUOT_WRITTEN);
+       error = xfarray_store(counts, dq->q_id, &xcdq);
+       if (error == -EFBIG) {
+               /*
+                * EFBIG means we tried to store data at too high a byte offset
+                * in the sparse array.  IOWs, we cannot complete the repair
+                * and must cancel the whole operation.  This should never
+                * happen, but we need to catch it anyway.
+                */
+               error = -ECANCELED;
+       }
+       mutex_unlock(&xqc->lock);
+       if (error || !dirty)
+               goto out_cancel;
+
+       trace_xrep_quotacheck_dquot(xqc->sc->mp, dq->q_type, dq->q_id);
+
+       /* Commit the dirty dquot to disk. */
+       dq->q_flags |= XFS_DQFLAG_DIRTY;
+       if (dq->q_id)
+               xfs_qm_adjust_dqtimers(dq);
+       xfs_trans_log_dquot(xqc->sc->tp, dq);
+
+       /*
+        * Transaction commit unlocks the dquot, so we must re-lock it so that
+        * the caller can put the reference (which apparently requires a locked
+        * dquot).
+        */
+       error = xrep_trans_commit(xqc->sc);
+       xfs_dqlock(dq);
+       return error;
+
+out_unlock:
+       mutex_unlock(&xqc->lock);
+out_cancel:
+       xchk_trans_cancel(xqc->sc);
+
+       /* Re-lock the dquot so the caller can put the reference. */
+       xfs_dqlock(dq);
+       return error;
+}
+
+/* Commit new quota counters for a particular quota type. */
+STATIC int
+xqcheck_commit_dqtype(
+       struct xqcheck          *xqc,
+       unsigned int            dqtype)
+{
+       struct xchk_dqiter      cursor = { };
+       struct xqcheck_dquot    xcdq;
+       struct xfs_scrub        *sc = xqc->sc;
+       struct xfs_mount        *mp = sc->mp;
+       struct xfarray          *counts = xqcheck_counters_for(xqc, dqtype);
+       struct xfs_dquot        *dq;
+       xfarray_idx_t           cur = XFARRAY_CURSOR_INIT;
+       int                     error;
+
+       /*
+        * Update the counters of every dquot that the quota file knows about.
+        */
+       xchk_dqiter_init(&cursor, sc, dqtype);
+       while ((error = xchk_dquot_iter(&cursor, &dq)) == 1) {
+               error = xqcheck_commit_dquot(xqc, dqtype, dq);
+               xfs_qm_dqput(dq);
+               if (error)
+                       break;
+       }
+       if (error)
+               return error;
+
+       /*
+        * Make a second pass to deal with the dquots that we know about but
+        * the quota file previously did not know about.
+        */
+       mutex_lock(&xqc->lock);
+       while ((error = xfarray_iter(counts, &cur, &xcdq)) == 1) {
+               xfs_dqid_t      id = cur - 1;
+
+               if (xcdq.flags & XQCHECK_DQUOT_REPAIR_SCANNED)
+                       continue;
+
+               mutex_unlock(&xqc->lock);
+
+               /*
+                * Grab the dquot, allowing for dquot block allocation in a
+                * separate transaction.  We committed the scrub transaction
+                * in a previous step, so we will not be creating nested
+                * transactions here.
+                */
+               error = xfs_qm_dqget(mp, id, dqtype, true, &dq);
+               if (error)
+                       return error;
+
+               error = xqcheck_commit_dquot(xqc, dqtype, dq);
+               xfs_qm_dqput(dq);
+               if (error)
+                       return error;
+
+               mutex_lock(&xqc->lock);
+       }
+       mutex_unlock(&xqc->lock);
+
+       return error;
+}
+
+/* Figure out quota CHKD flags for the running quota types. */
+static inline unsigned int
+xqcheck_chkd_flags(
+       struct xfs_mount        *mp)
+{
+       unsigned int            ret = 0;
+
+       if (XFS_IS_UQUOTA_ON(mp))
+               ret |= XFS_UQUOTA_CHKD;
+       if (XFS_IS_GQUOTA_ON(mp))
+               ret |= XFS_GQUOTA_CHKD;
+       if (XFS_IS_PQUOTA_ON(mp))
+               ret |= XFS_PQUOTA_CHKD;
+       return ret;
+}
+
+/* Commit the new dquot counters. */
+int
+xrep_quotacheck(
+       struct xfs_scrub        *sc)
+{
+       struct xqcheck          *xqc = sc->buf;
+       unsigned int            qflags = xqcheck_chkd_flags(sc->mp);
+       int                     error;
+
+       /*
+        * Clear the CHKD flag for the running quota types and commit the scrub
+        * transaction so that we can allocate new quota block mappings if we
+        * have to.  If we crash after this point, the sb still has the CHKD
+        * flags cleared, so mount quotacheck will fix all of this up.
+        */
+       xrep_update_qflags(sc, qflags, 0);
+       error = xrep_trans_commit(sc);
+       if (error)
+               return error;
+
+       /* Commit the new counters to the dquots. */
+       if (xqc->ucounts) {
+               error = xqcheck_commit_dqtype(xqc, XFS_DQTYPE_USER);
+               if (error)
+                       return error;
+       }
+       if (xqc->gcounts) {
+               error = xqcheck_commit_dqtype(xqc, XFS_DQTYPE_GROUP);
+               if (error)
+                       return error;
+       }
+       if (xqc->pcounts) {
+               error = xqcheck_commit_dqtype(xqc, XFS_DQTYPE_PROJ);
+               if (error)
+                       return error;
+       }
+
+       /* Set the CHKD flags now that we've fixed quota counts. */
+       error = xchk_trans_alloc(sc, 0);
+       if (error)
+               return error;
+
+       xrep_update_qflags(sc, 0, qflags);
+       return xrep_trans_commit(sc);
+}
index 3d2c4db..7141b17 100644 (file)
@@ -688,21 +688,26 @@ xrep_find_ag_btree_roots(
 
 #ifdef CONFIG_XFS_QUOTA
 /* Update some quota flags in the superblock. */
-static void
+void
 xrep_update_qflags(
        struct xfs_scrub        *sc,
-       unsigned int            clear_flags)
+       unsigned int            clear_flags,
+       unsigned int            set_flags)
 {
        struct xfs_mount        *mp = sc->mp;
        struct xfs_buf          *bp;
 
        mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
-       if ((mp->m_qflags & clear_flags) == 0)
+       if ((mp->m_qflags & clear_flags) == 0 &&
+           (mp->m_qflags & set_flags) == set_flags)
                goto no_update;
 
        mp->m_qflags &= ~clear_flags;
+       mp->m_qflags |= set_flags;
+
        spin_lock(&mp->m_sb_lock);
        mp->m_sb.sb_qflags &= ~clear_flags;
+       mp->m_sb.sb_qflags |= set_flags;
        spin_unlock(&mp->m_sb_lock);
 
        /*
@@ -732,7 +737,7 @@ xrep_force_quotacheck(
        if (!(flag & sc->mp->m_qflags))
                return;
 
-       xrep_update_qflags(sc, flag);
+       xrep_update_qflags(sc, flag, 0);
 }
 
 /*
index 1711432..fdfa066 100644 (file)
@@ -72,6 +72,8 @@ int xrep_find_ag_btree_roots(struct xfs_scrub *sc, struct xfs_buf *agf_bp,
                struct xrep_find_ag_btree *btree_info, struct xfs_buf *agfl_bp);
 
 #ifdef CONFIG_XFS_QUOTA
+void xrep_update_qflags(struct xfs_scrub *sc, unsigned int clear_flags,
+               unsigned int set_flags);
 void xrep_force_quotacheck(struct xfs_scrub *sc, xfs_dqtype_t type);
 int xrep_ino_dqattach(struct xfs_scrub *sc);
 #else
@@ -123,8 +125,10 @@ int xrep_rtbitmap(struct xfs_scrub *sc);
 
 #ifdef CONFIG_XFS_QUOTA
 int xrep_quota(struct xfs_scrub *sc);
+int xrep_quotacheck(struct xfs_scrub *sc);
 #else
 # define xrep_quota                    xrep_notsupported
+# define xrep_quotacheck               xrep_notsupported
 #endif /* CONFIG_XFS_QUOTA */
 
 int xrep_reinit_pagf(struct xfs_scrub *sc);
@@ -191,6 +195,7 @@ xrep_setup_nothing(
 #define xrep_bmap_cow                  xrep_notsupported
 #define xrep_rtbitmap                  xrep_notsupported
 #define xrep_quota                     xrep_notsupported
+#define xrep_quotacheck                        xrep_notsupported
 
 #endif /* CONFIG_XFS_ONLINE_REPAIR */
 
index 71a9eb4..9112c09 100644 (file)
@@ -367,7 +367,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
                .type   = ST_FS,
                .setup  = xchk_setup_quotacheck,
                .scrub  = xchk_quotacheck,
-               .repair = xrep_notsupported,
+               .repair = xrep_quotacheck,
        },
 };
 
index 6c90bc7..fedcebf 100644 (file)
@@ -2004,6 +2004,7 @@ DEFINE_EVENT(xrep_dquot_class, name, \
 DEFINE_XREP_DQUOT_EVENT(xrep_dquot_item);
 DEFINE_XREP_DQUOT_EVENT(xrep_disk_dquot);
 DEFINE_XREP_DQUOT_EVENT(xrep_dquot_item_fill_bmap_hole);
+DEFINE_XREP_DQUOT_EVENT(xrep_quotacheck_dquot);
 #endif /* CONFIG_XFS_QUOTA */
 
 #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */