xfs: support reserved blocks for the rt extent counter
authorChristoph Hellwig <hch@lst.de>
Sun, 9 Feb 2025 05:19:06 +0000 (06:19 +0100)
committerChristoph Hellwig <hch@lst.de>
Mon, 3 Mar 2025 15:16:43 +0000 (08:16 -0700)
The zoned space allocator will need reserved RT extents for garbage
collection and zeroing of partial blocks.  Move the resblks related
fields into the freecounter array so that they can be used for all
counters.

Co-developed-by: Hans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
fs/xfs/scrub/fscounters.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_fsops.h
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_super.c

index 207a238..9dd893e 100644 (file)
@@ -350,7 +350,7 @@ retry:
         * The global incore space reservation is taken from the incore
         * counters, so leave that out of the computation.
         */
-       fsc->fdblocks -= mp->m_resblks_avail;
+       fsc->fdblocks -= mp->m_free[XC_FREE_BLOCKS].res_avail;
 
        /*
         * Delayed allocation reservations are taken out of the incore counters
index 58249f3..f055aeb 100644 (file)
@@ -366,6 +366,7 @@ xfs_growfs_log(
 int
 xfs_reserve_blocks(
        struct xfs_mount        *mp,
+       enum xfs_free_counter   ctr,
        uint64_t                request)
 {
        int64_t                 lcounter, delta;
@@ -373,6 +374,8 @@ xfs_reserve_blocks(
        int64_t                 free;
        int                     error = 0;
 
+       ASSERT(ctr < XC_FREE_NR);
+
        /*
         * With per-cpu counters, this becomes an interesting problem. we need
         * to work out if we are freeing or allocation blocks first, then we can
@@ -391,16 +394,16 @@ xfs_reserve_blocks(
         * counters directly since we shouldn't have any problems unreserving
         * space.
         */
-       if (mp->m_resblks > request) {
-               lcounter = mp->m_resblks_avail - request;
+       if (mp->m_free[ctr].res_total > request) {
+               lcounter = mp->m_free[ctr].res_avail - request;
                if (lcounter > 0) {             /* release unused blocks */
                        fdblks_delta = lcounter;
-                       mp->m_resblks_avail -= lcounter;
+                       mp->m_free[ctr].res_avail -= lcounter;
                }
-               mp->m_resblks = request;
+               mp->m_free[ctr].res_total = request;
                if (fdblks_delta) {
                        spin_unlock(&mp->m_sb_lock);
-                       xfs_add_fdblocks(mp, fdblks_delta);
+                       xfs_add_freecounter(mp, ctr, fdblks_delta);
                        spin_lock(&mp->m_sb_lock);
                }
 
@@ -419,10 +422,10 @@ xfs_reserve_blocks(
         * space to fill it because mod_fdblocks will refill an undersized
         * reserve when it can.
         */
-       free = xfs_sum_freecounter_raw(mp, XC_FREE_BLOCKS) -
-               xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS);
-       delta = request - mp->m_resblks;
-       mp->m_resblks = request;
+       free = xfs_sum_freecounter_raw(mp, ctr) -
+               xfs_freecounter_unavailable(mp, ctr);
+       delta = request - mp->m_free[ctr].res_total;
+       mp->m_free[ctr].res_total = request;
        if (delta > 0 && free > 0) {
                /*
                 * We'll either succeed in getting space from the free block
@@ -436,9 +439,9 @@ xfs_reserve_blocks(
                 */
                fdblks_delta = min(free, delta);
                spin_unlock(&mp->m_sb_lock);
-               error = xfs_dec_fdblocks(mp, fdblks_delta, 0);
+               error = xfs_dec_freecounter(mp, ctr, fdblks_delta, 0);
                if (!error)
-                       xfs_add_fdblocks(mp, fdblks_delta);
+                       xfs_add_freecounter(mp, ctr, fdblks_delta);
                spin_lock(&mp->m_sb_lock);
        }
 out:
index 3e2f73b..9d23c36 100644 (file)
@@ -8,7 +8,8 @@
 
 int xfs_growfs_data(struct xfs_mount *mp, struct xfs_growfs_data *in);
 int xfs_growfs_log(struct xfs_mount *mp, struct xfs_growfs_log *in);
-int xfs_reserve_blocks(struct xfs_mount *mp, uint64_t request);
+int xfs_reserve_blocks(struct xfs_mount *mp, enum xfs_free_counter cnt,
+               uint64_t request);
 int xfs_fs_goingdown(struct xfs_mount *mp, uint32_t inflags);
 
 int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp);
index 0418aad..d250f7f 100644 (file)
@@ -1131,15 +1131,15 @@ xfs_ioctl_getset_resblocks(
                error = mnt_want_write_file(filp);
                if (error)
                        return error;
-               error = xfs_reserve_blocks(mp, fsop.resblks);
+               error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, fsop.resblks);
                mnt_drop_write_file(filp);
                if (error)
                        return error;
        }
 
        spin_lock(&mp->m_sb_lock);
-       fsop.resblks = mp->m_resblks;
-       fsop.resblks_avail = mp->m_resblks_avail;
+       fsop.resblks = mp->m_free[XC_FREE_BLOCKS].res_total;
+       fsop.resblks_avail = mp->m_free[XC_FREE_BLOCKS].res_avail;
        spin_unlock(&mp->m_sb_lock);
 
        if (copy_to_user(arg, &fsop, sizeof(fsop)))
index f444b41..01f3877 100644 (file)
@@ -461,11 +461,21 @@ xfs_mount_reset_sbqflags(
        return xfs_sync_sb(mp, false);
 }
 
+static const char *const xfs_free_pool_name[] = {
+       [XC_FREE_BLOCKS]        = "free blocks",
+       [XC_FREE_RTEXTENTS]     = "free rt extents",
+};
+
 uint64_t
-xfs_default_resblks(xfs_mount_t *mp)
+xfs_default_resblks(
+       struct xfs_mount        *mp,
+       enum xfs_free_counter   ctr)
 {
        uint64_t resblks;
 
+       if (ctr == XC_FREE_RTEXTENTS)
+               return 0;
+
        /*
         * We default to 5% or 8192 fsbs of space reserved, whichever is
         * smaller.  This is intended to cover concurrent allocation
@@ -678,6 +688,7 @@ xfs_mountfs(
        uint                    quotamount = 0;
        uint                    quotaflags = 0;
        int                     error = 0;
+       int                     i;
 
        xfs_sb_mount_common(mp, sbp);
 
@@ -1046,17 +1057,21 @@ xfs_mountfs(
         * privileged transactions. This is needed so that transaction
         * space required for critical operations can dip into this pool
         * when at ENOSPC. This is needed for operations like create with
-        * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
-        * are not allowed to use this reserved space.
+        * attr, unwritten extent conversion at ENOSPC, garbage collection
+        * etc. Data allocations are not allowed to use this reserved space.
         *
         * This may drive us straight to ENOSPC on mount, but that implies
         * we were already there on the last unmount. Warn if this occurs.
         */
        if (!xfs_is_readonly(mp)) {
-               error = xfs_reserve_blocks(mp, xfs_default_resblks(mp));
-               if (error)
-                       xfs_warn(mp,
-       "Unable to allocate reserve blocks. Continuing without reserve pool.");
+               for (i = 0; i < XC_FREE_NR; i++) {
+                       error = xfs_reserve_blocks(mp, i,
+                                       xfs_default_resblks(mp, i));
+                       if (error)
+                               xfs_warn(mp,
+"Unable to allocate reserve blocks. Continuing without reserve pool for %s.",
+                                       xfs_free_pool_name[i]);
+               }
 
                /* Reserve AG blocks for future btree expansion. */
                error = xfs_fs_reserve_ag_blocks(mp);
@@ -1173,7 +1188,7 @@ xfs_unmountfs(
         * we only every apply deltas to the superblock and hence the incore
         * value does not matter....
         */
-       error = xfs_reserve_blocks(mp, 0);
+       error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, 0);
        if (error)
                xfs_warn(mp, "Unable to free reserved block pool. "
                                "Freespace may not be correct on next mount.");
@@ -1244,26 +1259,26 @@ xfs_add_freecounter(
        enum xfs_free_counter   ctr,
        uint64_t                delta)
 {
-       bool                    has_resv_pool = (ctr == XC_FREE_BLOCKS);
+       struct xfs_freecounter  *counter = &mp->m_free[ctr];
        uint64_t                res_used;
 
        /*
         * If the reserve pool is depleted, put blocks back into it first.
         * Most of the time the pool is full.
         */
-       if (!has_resv_pool || mp->m_resblks == mp->m_resblks_avail) {
-               percpu_counter_add(&mp->m_free[ctr].count, delta);
+       if (likely(counter->res_avail == counter->res_total)) {
+               percpu_counter_add(&counter->count, delta);
                return;
        }
 
        spin_lock(&mp->m_sb_lock);
-       res_used = mp->m_resblks - mp->m_resblks_avail;
+       res_used = counter->res_total - counter->res_avail;
        if (res_used > delta) {
-               mp->m_resblks_avail += delta;
+               counter->res_avail += delta;
        } else {
                delta -= res_used;
-               mp->m_resblks_avail = mp->m_resblks;
-               percpu_counter_add(&mp->m_free[ctr].count, delta);
+               counter->res_avail = counter->res_total;
+               percpu_counter_add(&counter->count, delta);
        }
        spin_unlock(&mp->m_sb_lock);
 }
@@ -1277,15 +1292,10 @@ xfs_dec_freecounter(
        uint64_t                delta,
        bool                    rsvd)
 {
-       struct percpu_counter   *counter = &mp->m_free[ctr].count;
-       uint64_t                set_aside = 0;
+       struct xfs_freecounter  *counter = &mp->m_free[ctr];
        s32                     batch;
-       bool                    has_resv_pool;
 
        ASSERT(ctr < XC_FREE_NR);
-       has_resv_pool = (ctr == XC_FREE_BLOCKS);
-       if (rsvd)
-               ASSERT(has_resv_pool);
 
        /*
         * Taking blocks away, need to be more accurate the closer we
@@ -1295,7 +1305,7 @@ xfs_dec_freecounter(
         * then make everything serialise as we are real close to
         * ENOSPC.
         */
-       if (__percpu_counter_compare(counter, 2 * XFS_FDBLOCKS_BATCH,
+       if (__percpu_counter_compare(&counter->count, 2 * XFS_FDBLOCKS_BATCH,
                                     XFS_FDBLOCKS_BATCH) < 0)
                batch = 1;
        else
@@ -1312,25 +1322,25 @@ xfs_dec_freecounter(
         * problems (i.e. transaction abort, pagecache discards, etc.) than
         * slightly premature -ENOSPC.
         */
-       if (has_resv_pool)
-               set_aside = xfs_freecounter_unavailable(mp, ctr);
-       percpu_counter_add_batch(counter, -((int64_t)delta), batch);
-       if (__percpu_counter_compare(counter, set_aside,
+       percpu_counter_add_batch(&counter->count, -((int64_t)delta), batch);
+       if (__percpu_counter_compare(&counter->count,
+                       xfs_freecounter_unavailable(mp, ctr),
                        XFS_FDBLOCKS_BATCH) < 0) {
                /*
                 * Lock up the sb for dipping into reserves before releasing the
                 * space that took us to ENOSPC.
                 */
                spin_lock(&mp->m_sb_lock);
-               percpu_counter_add(counter, delta);
+               percpu_counter_add(&counter->count, delta);
                if (!rsvd)
                        goto fdblocks_enospc;
-               if (delta > mp->m_resblks_avail) {
-                       xfs_warn_once(mp,
+               if (delta > counter->res_avail) {
+                       if (ctr == XC_FREE_BLOCKS)
+                               xfs_warn_once(mp,
 "Reserve blocks depleted! Consider increasing reserve pool size.");
                        goto fdblocks_enospc;
                }
-               mp->m_resblks_avail -= delta;
+               counter->res_avail -= delta;
                spin_unlock(&mp->m_sb_lock);
        }
 
index 7f3265d..579eaf0 100644 (file)
@@ -108,6 +108,15 @@ struct xfs_groups {
 struct xfs_freecounter {
        /* free blocks for general use: */
        struct percpu_counter   count;
+
+       /* total reserved blocks: */
+       uint64_t                res_total;
+
+       /* available reserved blocks: */
+       uint64_t                res_avail;
+
+       /* reserved blks @ remount,ro: */
+       uint64_t                res_saved;
 };
 
 /*
@@ -250,9 +259,6 @@ typedef struct xfs_mount {
        atomic64_t              m_allocbt_blks;
 
        struct xfs_groups       m_groups[XG_TYPE_MAX];
-       uint64_t                m_resblks;      /* total reserved blocks */
-       uint64_t                m_resblks_avail;/* available reserved blocks */
-       uint64_t                m_resblks_save; /* reserved blks @ remount,ro */
        struct delayed_work     m_reclaim_work; /* background inode reclaim */
        struct dentry           *m_debugfs;     /* debugfs parent */
        struct xfs_kobj         m_kobj;
@@ -638,7 +644,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 }
 
 extern void    xfs_uuid_table_free(void);
-extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
+uint64_t       xfs_default_resblks(struct xfs_mount *mp,
+                       enum xfs_free_counter ctr);
 extern int     xfs_mountfs(xfs_mount_t *mp);
 extern void    xfs_unmountfs(xfs_mount_t *);
 
index b08d28a..366837e 100644 (file)
@@ -924,24 +924,32 @@ xfs_fs_statfs(
 }
 
 STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
+xfs_save_resvblks(
+       struct xfs_mount        *mp)
 {
-       mp->m_resblks_save = mp->m_resblks;
-       xfs_reserve_blocks(mp, 0);
+       enum xfs_free_counter   i;
+
+       for (i = 0; i < XC_FREE_NR; i++) {
+               mp->m_free[i].res_saved = mp->m_free[i].res_total;
+               xfs_reserve_blocks(mp, i, 0);
+       }
 }
 
 STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
+xfs_restore_resvblks(
+       struct xfs_mount        *mp)
 {
-       uint64_t resblks;
-
-       if (mp->m_resblks_save) {
-               resblks = mp->m_resblks_save;
-               mp->m_resblks_save = 0;
-       } else
-               resblks = xfs_default_resblks(mp);
+       uint64_t                resblks;
+       enum xfs_free_counter   i;
 
-       xfs_reserve_blocks(mp, resblks);
+       for (i = 0; i < XC_FREE_NR; i++) {
+               if (mp->m_free[i].res_saved) {
+                       resblks = mp->m_free[i].res_saved;
+                       mp->m_free[i].res_saved = 0;
+               } else
+                       resblks = xfs_default_resblks(mp, i);
+               xfs_reserve_blocks(mp, i, resblks);
+       }
 }
 
 /*