Merge tag 'xfs-6.4-rc1-fixes' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 11 May 2023 21:51:11 +0000 (16:51 -0500)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 11 May 2023 21:51:11 +0000 (16:51 -0500)
Pull xfs bug fixes from Dave Chinner:
 "Largely minor bug fixes and cleanups, th emost important of which are
  probably the fixes for regressions in the extent allocation code:

   - fixes for inode garbage collection shutdown racing with work queue
     updates

   - ensure inodegc workers run on the CPU they are supposed to

   - disable counter scrubbing until we can exclusively freeze the
     filesystem from the kernel

   - regression fixes for new allocation related bugs

   - a couple of minor cleanups"

* tag 'xfs-6.4-rc1-fixes' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: fix xfs_inodegc_stop racing with mod_delayed_work
  xfs: disable reaping in fscounters scrub
  xfs: check that per-cpu inodegc workers actually run on that cpu
  xfs: explicitly specify cpu when forcing inodegc delayed work to run immediately
  xfs: fix negative array access in xfs_getbmap
  xfs: don't allocate into the data fork for an unshare request
  xfs: flush dirty data and drain directios before scrubbing cow fork
  xfs: set bnobt/cntbt numrecs correctly when formatting new AGs
  xfs: don't unconditionally null args->pag in xfs_bmap_btalloc_at_eof

14 files changed:
fs/xfs/libxfs/xfs_ag.c
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/scrub/bmap.c
fs/xfs/scrub/common.c
fs/xfs/scrub/common.h
fs/xfs/scrub/fscounters.c
fs/xfs/scrub/scrub.c
fs/xfs/scrub/scrub.h
fs/xfs/scrub/trace.h
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_super.c

index 1b078bb..9b373a0 100644 (file)
@@ -495,10 +495,12 @@ xfs_freesp_init_recs(
                ASSERT(start >= mp->m_ag_prealloc_blocks);
                if (start != mp->m_ag_prealloc_blocks) {
                        /*
-                        * Modify first record to pad stripe align of log
+                        * Modify first record to pad stripe align of log and
+                        * bump the record count.
                         */
                        arec->ar_blockcount = cpu_to_be32(start -
                                                mp->m_ag_prealloc_blocks);
+                       be16_add_cpu(&block->bb_numrecs, 1);
                        nrec = arec + 1;
 
                        /*
@@ -509,7 +511,6 @@ xfs_freesp_init_recs(
                                        be32_to_cpu(arec->ar_startblock) +
                                        be32_to_cpu(arec->ar_blockcount));
                        arec = nrec;
-                       be16_add_cpu(&block->bb_numrecs, 1);
                }
                /*
                 * Change record start to after the internal log
@@ -518,15 +519,13 @@ xfs_freesp_init_recs(
        }
 
        /*
-        * Calculate the record block count and check for the case where
-        * the log might have consumed all available space in the AG. If
-        * so, reset the record count to 0 to avoid exposure of an invalid
-        * record start block.
+        * Calculate the block count of this record; if it is nonzero,
+        * increment the record count.
         */
        arec->ar_blockcount = cpu_to_be32(id->agsize -
                                          be32_to_cpu(arec->ar_startblock));
-       if (!arec->ar_blockcount)
-               block->bb_numrecs = 0;
+       if (arec->ar_blockcount)
+               be16_add_cpu(&block->bb_numrecs, 1);
 }
 
 /*
@@ -538,7 +537,7 @@ xfs_bnoroot_init(
        struct xfs_buf          *bp,
        struct aghdr_init_data  *id)
 {
-       xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
+       xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno);
        xfs_freesp_init_recs(mp, bp, id);
 }
 
@@ -548,7 +547,7 @@ xfs_cntroot_init(
        struct xfs_buf          *bp,
        struct aghdr_init_data  *id)
 {
-       xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
+       xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno);
        xfs_freesp_init_recs(mp, bp, id);
 }
 
index b512de0..cd8870a 100644 (file)
@@ -3494,8 +3494,10 @@ xfs_bmap_btalloc_at_eof(
                if (!caller_pag)
                        args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
                error = xfs_alloc_vextent_exact_bno(args, ap->blkno);
-               if (!caller_pag)
+               if (!caller_pag) {
                        xfs_perag_put(args->pag);
+                       args->pag = NULL;
+               }
                if (error)
                        return error;
 
@@ -3505,7 +3507,6 @@ xfs_bmap_btalloc_at_eof(
                 * Exact allocation failed. Reset to try an aligned allocation
                 * according to the original allocation specification.
                 */
-               args->pag = NULL;
                args->alignment = stripe_align;
                args->minlen = nextminlen;
                args->minalignslop = 0;
index 87ab9f9..69bc89d 100644 (file)
@@ -42,12 +42,12 @@ xchk_setup_inode_bmap(
        xfs_ilock(sc->ip, XFS_IOLOCK_EXCL);
 
        /*
-        * We don't want any ephemeral data fork updates sitting around
+        * We don't want any ephemeral data/cow fork updates sitting around
         * while we inspect block mappings, so wait for directio to finish
         * and flush dirty data if we have delalloc reservations.
         */
        if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
-           sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
+           sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
                struct address_space    *mapping = VFS_I(sc->ip)->i_mapping;
 
                sc->ilock_flags |= XFS_MMAPLOCK_EXCL;
index 9aa7966..7a20256 100644 (file)
@@ -1164,32 +1164,6 @@ xchk_metadata_inode_forks(
        return 0;
 }
 
-/* Pause background reaping of resources. */
-void
-xchk_stop_reaping(
-       struct xfs_scrub        *sc)
-{
-       sc->flags |= XCHK_REAPING_DISABLED;
-       xfs_blockgc_stop(sc->mp);
-       xfs_inodegc_stop(sc->mp);
-}
-
-/* Restart background reaping of resources. */
-void
-xchk_start_reaping(
-       struct xfs_scrub        *sc)
-{
-       /*
-        * Readonly filesystems do not perform inactivation or speculative
-        * preallocation, so there's no need to restart the workers.
-        */
-       if (!xfs_is_readonly(sc->mp)) {
-               xfs_inodegc_start(sc->mp);
-               xfs_blockgc_start(sc->mp);
-       }
-       sc->flags &= ~XCHK_REAPING_DISABLED;
-}
-
 /*
  * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub
  * operation.  Callers must not hold any locks that intersect with the CPU
index 18b5f2b..791235c 100644 (file)
@@ -156,8 +156,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
 }
 
 int xchk_metadata_inode_forks(struct xfs_scrub *sc);
-void xchk_stop_reaping(struct xfs_scrub *sc);
-void xchk_start_reaping(struct xfs_scrub *sc);
 
 /*
  * Setting up a hook to wait for intents to drain is costly -- we have to take
index faa315b..e382a35 100644 (file)
@@ -150,13 +150,6 @@ xchk_setup_fscounters(
        if (error)
                return error;
 
-       /*
-        * Pause background reclaim while we're scrubbing to reduce the
-        * likelihood of background perturbations to the counters throwing off
-        * our calculations.
-        */
-       xchk_stop_reaping(sc);
-
        return xchk_trans_alloc(sc, 0);
 }
 
@@ -453,6 +446,12 @@ xchk_fscounters(
        if (frextents > mp->m_sb.sb_rextents)
                xchk_set_corrupt(sc);
 
+       /*
+        * XXX: We can't quiesce percpu counter updates, so exit early.
+        * This can be re-enabled when we gain exclusive freeze functionality.
+        */
+       return 0;
+
        /*
         * If ifree exceeds icount by more than the minimum variance then
         * something's probably wrong with the counters.
index 02819be..3d98f60 100644 (file)
@@ -186,8 +186,6 @@ xchk_teardown(
        }
        if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
                mnt_drop_write_file(sc->file);
-       if (sc->flags & XCHK_REAPING_DISABLED)
-               xchk_start_reaping(sc);
        if (sc->buf) {
                if (sc->buf_cleanup)
                        sc->buf_cleanup(sc->buf);
index e719034..b38e938 100644 (file)
@@ -106,7 +106,6 @@ struct xfs_scrub {
 
 /* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
 #define XCHK_TRY_HARDER                (1 << 0)  /* can't get resources, try again */
-#define XCHK_REAPING_DISABLED  (1 << 1)  /* background block reaping paused */
 #define XCHK_FSGATES_DRAIN     (1 << 2)  /* defer ops draining enabled */
 #define XCHK_NEED_DRAIN                (1 << 3)  /* scrub needs to drain defer ops */
 #define XREP_ALREADY_FIXED     (1 << 31) /* checking our repair work */
index 68efd6f..b3894da 100644 (file)
@@ -98,7 +98,6 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
 
 #define XFS_SCRUB_STATE_STRINGS \
        { XCHK_TRY_HARDER,                      "try_harder" }, \
-       { XCHK_REAPING_DISABLED,                "reaping_disabled" }, \
        { XCHK_FSGATES_DRAIN,                   "fsgates_drain" }, \
        { XCHK_NEED_DRAIN,                      "need_drain" }, \
        { XREP_ALREADY_FIXED,                   "already_fixed" }
index f032d3a..fbb6755 100644 (file)
@@ -558,7 +558,9 @@ xfs_getbmap(
                if (!xfs_iext_next_extent(ifp, &icur, &got)) {
                        xfs_fileoff_t   end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
 
-                       out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
+                       if (bmv->bmv_entries > 0)
+                               out[bmv->bmv_entries - 1].bmv_oflags |=
+                                                               BMV_OF_LAST;
 
                        if (whichfork != XFS_ATTR_FORK && bno < end &&
                            !xfs_getbmap_full(bmv)) {
index 351849f..0f60e30 100644 (file)
@@ -435,18 +435,23 @@ xfs_iget_check_free_state(
 }
 
 /* Make all pending inactivation work start immediately. */
-static void
+static bool
 xfs_inodegc_queue_all(
        struct xfs_mount        *mp)
 {
        struct xfs_inodegc      *gc;
        int                     cpu;
+       bool                    ret = false;
 
        for_each_online_cpu(cpu) {
                gc = per_cpu_ptr(mp->m_inodegc, cpu);
-               if (!llist_empty(&gc->list))
+               if (!llist_empty(&gc->list)) {
                        mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
+                       ret = true;
+               }
        }
+
+       return ret;
 }
 
 /*
@@ -1856,6 +1861,8 @@ xfs_inodegc_worker(
        struct xfs_inode        *ip, *n;
        unsigned int            nofs_flag;
 
+       ASSERT(gc->cpu == smp_processor_id());
+
        WRITE_ONCE(gc->items, 0);
 
        if (!node)
@@ -1909,24 +1916,41 @@ xfs_inodegc_flush(
 
 /*
  * Flush all the pending work and then disable the inode inactivation background
- * workers and wait for them to stop.
+ * workers and wait for them to stop.  Caller must hold sb->s_umount to
+ * coordinate changes in the inodegc_enabled state.
  */
 void
 xfs_inodegc_stop(
        struct xfs_mount        *mp)
 {
+       bool                    rerun;
+
        if (!xfs_clear_inodegc_enabled(mp))
                return;
 
+       /*
+        * Drain all pending inodegc work, including inodes that could be
+        * queued by racing xfs_inodegc_queue or xfs_inodegc_shrinker_scan
+        * threads that sample the inodegc state just prior to us clearing it.
+        * The inodegc flag state prevents new threads from queuing more
+        * inodes, so we queue pending work items and flush the workqueue until
+        * all inodegc lists are empty.  IOWs, we cannot use drain_workqueue
+        * here because it does not allow other unserialized mechanisms to
+        * reschedule inodegc work while this draining is in progress.
+        */
        xfs_inodegc_queue_all(mp);
-       drain_workqueue(mp->m_inodegc_wq);
+       do {
+               flush_workqueue(mp->m_inodegc_wq);
+               rerun = xfs_inodegc_queue_all(mp);
+       } while (rerun);
 
        trace_xfs_inodegc_stop(mp, __return_address);
 }
 
 /*
  * Enable the inode inactivation background workers and schedule deferred inode
- * inactivation work if there is any.
+ * inactivation work if there is any.  Caller must hold sb->s_umount to
+ * coordinate changes in the inodegc_enabled state.
  */
 void
 xfs_inodegc_start(
@@ -2069,7 +2093,8 @@ xfs_inodegc_queue(
                queue_delay = 0;
 
        trace_xfs_inodegc_queue(mp, __return_address);
-       mod_delayed_work(mp->m_inodegc_wq, &gc->work, queue_delay);
+       mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
+                       queue_delay);
        put_cpu_ptr(gc);
 
        if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
@@ -2113,7 +2138,8 @@ xfs_inodegc_cpu_dead(
 
        if (xfs_is_inodegc_enabled(mp)) {
                trace_xfs_inodegc_queue(mp, __return_address);
-               mod_delayed_work(mp->m_inodegc_wq, &gc->work, 0);
+               mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
+                               0);
        }
        put_cpu_ptr(gc);
 }
index 285885c..18c8f16 100644 (file)
@@ -1006,8 +1006,9 @@ xfs_buffered_write_iomap_begin(
        if (eof)
                imap.br_startoff = end_fsb; /* fake hole until the end */
 
-       /* We never need to allocate blocks for zeroing a hole. */
-       if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) {
+       /* We never need to allocate blocks for zeroing or unsharing a hole. */
+       if ((flags & (IOMAP_UNSHARE | IOMAP_ZERO)) &&
+           imap.br_startoff > offset_fsb) {
                xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
                goto out_unlock;
        }
index f3269c0..aaaf5ec 100644 (file)
@@ -66,6 +66,9 @@ struct xfs_inodegc {
        /* approximate count of inodes in the list */
        unsigned int            items;
        unsigned int            shrinker_hits;
+#if defined(DEBUG) || defined(XFS_WARN)
+       unsigned int            cpu;
+#endif
 };
 
 /*
index 4d2e874..7e70625 100644 (file)
@@ -1095,6 +1095,9 @@ xfs_inodegc_init_percpu(
 
        for_each_possible_cpu(cpu) {
                gc = per_cpu_ptr(mp->m_inodegc, cpu);
+#if defined(DEBUG) || defined(XFS_WARN)
+               gc->cpu = cpu;
+#endif
                init_llist_head(&gc->list);
                gc->items = 0;
                INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);