xfs: support reserved blocks for the rt extent counter

author Christoph Hellwig <hch@lst.de>

Sun, 9 Feb 2025 05:19:06 +0000 (06:19 +0100)

committer Christoph Hellwig <hch@lst.de>

Mon, 3 Mar 2025 15:16:43 +0000 (08:16 -0700)
author Christoph Hellwig <hch@lst.de>
Sun, 9 Feb 2025 05:19:06 +0000 (06:19 +0100)
committer Christoph Hellwig <hch@lst.de>
Mon, 3 Mar 2025 15:16:43 +0000 (08:16 -0700)
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c

index 207a238..9dd893e 100644 (file)
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -350,7 +350,7 @@ retry:
          * The global incore space reservation is taken from the incore
          * counters, so leave that out of the computation.
          */
-       fsc->fdblocks -= mp->m_resblks_avail;
+       fsc->fdblocks -= mp->m_free[XC_FREE_BLOCKS].res_avail;
  
         /*
          * Delayed allocation reservations are taken out of the incore counters
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c

index 58249f3..f055aeb 100644 (file)
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -366,6 +366,7 @@ xfs_growfs_log(
  int
  xfs_reserve_blocks(
         struct xfs_mount        *mp,
+       enum xfs_free_counter   ctr,
         uint64_t                request)
  {
         int64_t                 lcounter, delta;
@@ -373,6 +374,8 @@ xfs_reserve_blocks(
         int64_t                 free;
         int                     error = 0;
  
+       ASSERT(ctr < XC_FREE_NR);
+
         /*
          * With per-cpu counters, this becomes an interesting problem. we need
          * to work out if we are freeing or allocation blocks first, then we can
@@ -391,16 +394,16 @@ xfs_reserve_blocks(
          * counters directly since we shouldn't have any problems unreserving
          * space.
          */
-       if (mp->m_resblks > request) {
-               lcounter = mp->m_resblks_avail - request;
+       if (mp->m_free[ctr].res_total > request) {
+               lcounter = mp->m_free[ctr].res_avail - request;
                 if (lcounter > 0) {             /* release unused blocks */
                         fdblks_delta = lcounter;
-                       mp->m_resblks_avail -= lcounter;
+                       mp->m_free[ctr].res_avail -= lcounter;
                 }
-               mp->m_resblks = request;
+               mp->m_free[ctr].res_total = request;
                 if (fdblks_delta) {
                         spin_unlock(&mp->m_sb_lock);
-                       xfs_add_fdblocks(mp, fdblks_delta);
+                       xfs_add_freecounter(mp, ctr, fdblks_delta);
                         spin_lock(&mp->m_sb_lock);
                 }
  
@@ -419,10 +422,10 @@ xfs_reserve_blocks(
          * space to fill it because mod_fdblocks will refill an undersized
          * reserve when it can.
          */
-       free = xfs_sum_freecounter_raw(mp, XC_FREE_BLOCKS) -
-               xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS);
-       delta = request - mp->m_resblks;
-       mp->m_resblks = request;
+       free = xfs_sum_freecounter_raw(mp, ctr) -
+               xfs_freecounter_unavailable(mp, ctr);
+       delta = request - mp->m_free[ctr].res_total;
+       mp->m_free[ctr].res_total = request;
         if (delta > 0 && free > 0) {
                 /*
                  * We'll either succeed in getting space from the free block
@@ -436,9 +439,9 @@ xfs_reserve_blocks(
                  */
                 fdblks_delta = min(free, delta);
                 spin_unlock(&mp->m_sb_lock);
-               error = xfs_dec_fdblocks(mp, fdblks_delta, 0);
+               error = xfs_dec_freecounter(mp, ctr, fdblks_delta, 0);
                 if (!error)
-                       xfs_add_fdblocks(mp, fdblks_delta);
+                       xfs_add_freecounter(mp, ctr, fdblks_delta);
                 spin_lock(&mp->m_sb_lock);
         }
  out:
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h

index 3e2f73b..9d23c36 100644 (file)
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -8,7 +8,8 @@
  
  int xfs_growfs_data(struct xfs_mount *mp, struct xfs_growfs_data *in);
  int xfs_growfs_log(struct xfs_mount *mp, struct xfs_growfs_log *in);
-int xfs_reserve_blocks(struct xfs_mount *mp, uint64_t request);
+int xfs_reserve_blocks(struct xfs_mount *mp, enum xfs_free_counter cnt,
+               uint64_t request);
  int xfs_fs_goingdown(struct xfs_mount *mp, uint32_t inflags);
  
  int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index 0418aad..d250f7f 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1131,15 +1131,15 @@ xfs_ioctl_getset_resblocks(
                 error = mnt_want_write_file(filp);
                 if (error)
                         return error;
-               error = xfs_reserve_blocks(mp, fsop.resblks);
+               error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, fsop.resblks);
                 mnt_drop_write_file(filp);
                 if (error)
                         return error;
         }
  
         spin_lock(&mp->m_sb_lock);
-       fsop.resblks = mp->m_resblks;
-       fsop.resblks_avail = mp->m_resblks_avail;
+       fsop.resblks = mp->m_free[XC_FREE_BLOCKS].res_total;
+       fsop.resblks_avail = mp->m_free[XC_FREE_BLOCKS].res_avail;
         spin_unlock(&mp->m_sb_lock);
  
         if (copy_to_user(arg, &fsop, sizeof(fsop)))
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index f444b41..01f3877 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -461,11 +461,21 @@ xfs_mount_reset_sbqflags(
         return xfs_sync_sb(mp, false);
  }
  
+static const char *const xfs_free_pool_name[] = {
+       [XC_FREE_BLOCKS]        = "free blocks",
+       [XC_FREE_RTEXTENTS]     = "free rt extents",
+};
+
  uint64_t
-xfs_default_resblks(xfs_mount_t *mp)
+xfs_default_resblks(
+       struct xfs_mount        *mp,
+       enum xfs_free_counter   ctr)
  {
         uint64_t resblks;
  
+       if (ctr == XC_FREE_RTEXTENTS)
+               return 0;
+
         /*
          * We default to 5% or 8192 fsbs of space reserved, whichever is
          * smaller.  This is intended to cover concurrent allocation
@@ -678,6 +688,7 @@ xfs_mountfs(
         uint                    quotamount = 0;
         uint                    quotaflags = 0;
         int                     error = 0;
+       int                     i;
  
         xfs_sb_mount_common(mp, sbp);
  
@@ -1046,17 +1057,21 @@ xfs_mountfs(
          * privileged transactions. This is needed so that transaction
          * space required for critical operations can dip into this pool
          * when at ENOSPC. This is needed for operations like create with
-        * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
-        * are not allowed to use this reserved space.
+        * attr, unwritten extent conversion at ENOSPC, garbage collection
+        * etc. Data allocations are not allowed to use this reserved space.
          *
          * This may drive us straight to ENOSPC on mount, but that implies
          * we were already there on the last unmount. Warn if this occurs.
          */
         if (!xfs_is_readonly(mp)) {
-               error = xfs_reserve_blocks(mp, xfs_default_resblks(mp));
-               if (error)
-                       xfs_warn(mp,
-       "Unable to allocate reserve blocks. Continuing without reserve pool.");
+               for (i = 0; i < XC_FREE_NR; i++) {
+                       error = xfs_reserve_blocks(mp, i,
+                                       xfs_default_resblks(mp, i));
+                       if (error)
+                               xfs_warn(mp,
+"Unable to allocate reserve blocks. Continuing without reserve pool for %s.",
+                                       xfs_free_pool_name[i]);
+               }
  
                 /* Reserve AG blocks for future btree expansion. */
                 error = xfs_fs_reserve_ag_blocks(mp);
@@ -1173,7 +1188,7 @@ xfs_unmountfs(
          * we only every apply deltas to the superblock and hence the incore
          * value does not matter....
          */
-       error = xfs_reserve_blocks(mp, 0);
+       error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, 0);
         if (error)
                 xfs_warn(mp, "Unable to free reserved block pool. "
                                 "Freespace may not be correct on next mount.");
@@ -1244,26 +1259,26 @@ xfs_add_freecounter(
         enum xfs_free_counter   ctr,
         uint64_t                delta)
  {
-       bool                    has_resv_pool = (ctr == XC_FREE_BLOCKS);
+       struct xfs_freecounter  *counter = &mp->m_free[ctr];
         uint64_t                res_used;
  
         /*
          * If the reserve pool is depleted, put blocks back into it first.
          * Most of the time the pool is full.
          */
-       if (!has_resv_pool || mp->m_resblks == mp->m_resblks_avail) {
-               percpu_counter_add(&mp->m_free[ctr].count, delta);
+       if (likely(counter->res_avail == counter->res_total)) {
+               percpu_counter_add(&counter->count, delta);
                 return;
         }
  
         spin_lock(&mp->m_sb_lock);
-       res_used = mp->m_resblks - mp->m_resblks_avail;
+       res_used = counter->res_total - counter->res_avail;
         if (res_used > delta) {
-               mp->m_resblks_avail += delta;
+               counter->res_avail += delta;
         } else {
                 delta -= res_used;
-               mp->m_resblks_avail = mp->m_resblks;
-               percpu_counter_add(&mp->m_free[ctr].count, delta);
+               counter->res_avail = counter->res_total;
+               percpu_counter_add(&counter->count, delta);
         }
         spin_unlock(&mp->m_sb_lock);
  }
@@ -1277,15 +1292,10 @@ xfs_dec_freecounter(
         uint64_t                delta,
         bool                    rsvd)
  {
-       struct percpu_counter   *counter = &mp->m_free[ctr].count;
-       uint64_t                set_aside = 0;
+       struct xfs_freecounter  *counter = &mp->m_free[ctr];
         s32                     batch;
-       bool                    has_resv_pool;
  
         ASSERT(ctr < XC_FREE_NR);
-       has_resv_pool = (ctr == XC_FREE_BLOCKS);
-       if (rsvd)
-               ASSERT(has_resv_pool);
  
         /*
          * Taking blocks away, need to be more accurate the closer we
@@ -1295,7 +1305,7 @@ xfs_dec_freecounter(
          * then make everything serialise as we are real close to
          * ENOSPC.
          */
-       if (__percpu_counter_compare(counter, 2 * XFS_FDBLOCKS_BATCH,
+       if (__percpu_counter_compare(&counter->count, 2 * XFS_FDBLOCKS_BATCH,
                                      XFS_FDBLOCKS_BATCH) < 0)
                 batch = 1;
         else
@@ -1312,25 +1322,25 @@ xfs_dec_freecounter(
          * problems (i.e. transaction abort, pagecache discards, etc.) than
          * slightly premature -ENOSPC.
          */
-       if (has_resv_pool)
-               set_aside = xfs_freecounter_unavailable(mp, ctr);
-       percpu_counter_add_batch(counter, -((int64_t)delta), batch);
-       if (__percpu_counter_compare(counter, set_aside,
+       percpu_counter_add_batch(&counter->count, -((int64_t)delta), batch);
+       if (__percpu_counter_compare(&counter->count,
+                       xfs_freecounter_unavailable(mp, ctr),
                         XFS_FDBLOCKS_BATCH) < 0) {
                 /*
                  * Lock up the sb for dipping into reserves before releasing the
                  * space that took us to ENOSPC.
                  */
                 spin_lock(&mp->m_sb_lock);
-               percpu_counter_add(counter, delta);
+               percpu_counter_add(&counter->count, delta);
                 if (!rsvd)
                         goto fdblocks_enospc;
-               if (delta > mp->m_resblks_avail) {
-                       xfs_warn_once(mp,
+               if (delta > counter->res_avail) {
+                       if (ctr == XC_FREE_BLOCKS)
+                               xfs_warn_once(mp,
  "Reserve blocks depleted! Consider increasing reserve pool size.");
                         goto fdblocks_enospc;
                 }
-               mp->m_resblks_avail -= delta;
+               counter->res_avail -= delta;
                 spin_unlock(&mp->m_sb_lock);
         }
  
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index 7f3265d..579eaf0 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -108,6 +108,15 @@ struct xfs_groups {
  struct xfs_freecounter {
         /* free blocks for general use: */
         struct percpu_counter   count;
+
+       /* total reserved blocks: */
+       uint64_t                res_total;
+
+       /* available reserved blocks: */
+       uint64_t                res_avail;
+
+       /* reserved blks @ remount,ro: */
+       uint64_t                res_saved;
  };
  
  /*
@@ -250,9 +259,6 @@ typedef struct xfs_mount {
         atomic64_t              m_allocbt_blks;
  
         struct xfs_groups       m_groups[XG_TYPE_MAX];
-       uint64_t                m_resblks;      /* total reserved blocks */
-       uint64_t                m_resblks_avail;/* available reserved blocks */
-       uint64_t                m_resblks_save; /* reserved blks @ remount,ro */
         struct delayed_work     m_reclaim_work; /* background inode reclaim */
         struct dentry           *m_debugfs;     /* debugfs parent */
         struct xfs_kobj         m_kobj;
@@ -638,7 +644,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
  }
  
  extern void    xfs_uuid_table_free(void);
-extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
+uint64_t       xfs_default_resblks(struct xfs_mount *mp,
+                       enum xfs_free_counter ctr);
  extern int     xfs_mountfs(xfs_mount_t *mp);
  extern void    xfs_unmountfs(xfs_mount_t *);
  
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index b08d28a..366837e 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -924,24 +924,32 @@ xfs_fs_statfs(
  }
  
  STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
+xfs_save_resvblks(
+       struct xfs_mount        *mp)
  {
-       mp->m_resblks_save = mp->m_resblks;
-       xfs_reserve_blocks(mp, 0);
+       enum xfs_free_counter   i;
+
+       for (i = 0; i < XC_FREE_NR; i++) {
+               mp->m_free[i].res_saved = mp->m_free[i].res_total;
+               xfs_reserve_blocks(mp, i, 0);
+       }
  }
  
  STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
+xfs_restore_resvblks(
+       struct xfs_mount        *mp)
  {
-       uint64_t resblks;
-
-       if (mp->m_resblks_save) {
-               resblks = mp->m_resblks_save;
-               mp->m_resblks_save = 0;
-       } else
-               resblks = xfs_default_resblks(mp);
+       uint64_t                resblks;
+       enum xfs_free_counter   i;
  
-       xfs_reserve_blocks(mp, resblks);
+       for (i = 0; i < XC_FREE_NR; i++) {
+               if (mp->m_free[i].res_saved) {
+                       resblks = mp->m_free[i].res_saved;
+                       mp->m_free[i].res_saved = 0;
+               } else
+                       resblks = xfs_default_resblks(mp, i);
+               xfs_reserve_blocks(mp, i, resblks);
+       }
  }
  
  /*
author	Christoph Hellwig <hch@lst.de>
	Sun, 9 Feb 2025 05:19:06 +0000 (06:19 +0100)
committer	Christoph Hellwig <hch@lst.de>
	Mon, 3 Mar 2025 15:16:43 +0000 (08:16 -0700)
fs/xfs/scrub/fscounters.c		patch \| blob \| history
fs/xfs/xfs_fsops.c		patch \| blob \| history
fs/xfs/xfs_fsops.h		patch \| blob \| history
fs/xfs/xfs_ioctl.c		patch \| blob \| history
fs/xfs/xfs_mount.c		patch \| blob \| history
fs/xfs/xfs_mount.h		patch \| blob \| history
fs/xfs/xfs_super.c		patch \| blob \| history