bcachefs: Change how replicated data is accounted

author Kent Overstreet <kent.overstreet@gmail.com>

Tue, 24 Jul 2018 20:42:49 +0000 (16:42 -0400)

committer Kent Overstreet <kent.overstreet@linux.dev>

Sun, 22 Oct 2023 21:08:08 +0000 (17:08 -0400)
author Kent Overstreet <kent.overstreet@gmail.com>
Tue, 24 Jul 2018 20:42:49 +0000 (16:42 -0400)
committer Kent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:08 +0000 (17:08 -0400)
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c

index a37b5ed..b60eb3d 100644 (file)
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -185,7 +185,7 @@ found:
         replicas = bch2_extent_nr_dirty_ptrs(k);
         if (replicas)
                 stats->replicas[replicas - 1].data[BCH_DATA_BTREE] -=
-                       c->opts.btree_node_size;
+                       c->opts.btree_node_size * replicas;
  
         /*
          * We're dropping @k from the btree, but it's still live until the
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c

index 56b197b..ab61abd 100644 (file)
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -248,29 +248,28 @@ bch2_fs_usage_read(struct bch_fs *c)
  struct fs_usage_sum {
         u64     hidden;
         u64     data;
+       u64     cached;
         u64     reserved;
  };
  
  static inline struct fs_usage_sum __fs_usage_sum(struct bch_fs_usage stats)
  {
         struct fs_usage_sum sum = { 0 };
-       unsigned i, j;
+       unsigned i;
  
         /*
          * For superblock and journal we count bucket usage, not sector usage,
          * because any internal fragmentation should _not_ be counted as
          * free space:
          */
-       for (j = 1; j < BCH_DATA_BTREE; j++)
-               sum.hidden += stats.buckets[j];
+       sum.hidden += stats.buckets[BCH_DATA_SB];
+       sum.hidden += stats.buckets[BCH_DATA_JOURNAL];
  
         for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) {
-               for (j = BCH_DATA_BTREE;
-                    j < ARRAY_SIZE(stats.replicas[i].data);
-                    j++)
-                       sum.data += stats.replicas[i].data[j] * (i + 1);
-
-               sum.reserved += stats.replicas[i].persistent_reserved * (i + 1);
+               sum.data        += stats.replicas[i].data[BCH_DATA_BTREE];
+               sum.data        += stats.replicas[i].data[BCH_DATA_USER];
+               sum.cached      += stats.replicas[i].data[BCH_DATA_CACHED];
+               sum.reserved    += stats.replicas[i].persistent_reserved;
         }
  
         sum.reserved += stats.online_reserved;
@@ -379,17 +378,13 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
  {
         struct bch_dev_usage *dev_usage;
  
-       if (c)
-               percpu_rwsem_assert_held(&c->usage_lock);
+       percpu_rwsem_assert_held(&c->usage_lock);
  
-       if (old.data_type && new.data_type &&
-           old.data_type != new.data_type) {
-               BUG_ON(!c);
-               bch2_fs_inconsistent(c,
-                       "different types of data in same bucket: %s, %s",
-                       bch2_data_types[old.data_type],
-                       bch2_data_types[new.data_type]);
-       }
+       bch2_fs_inconsistent_on(old.data_type && new.data_type &&
+                               old.data_type != new.data_type, c,
+               "different types of data in same bucket: %s, %s",
+               bch2_data_types[old.data_type],
+               bch2_data_types[new.data_type]);
  
         stats->buckets[bucket_type(old)] -= ca->mi.bucket_size;
         stats->buckets[bucket_type(new)] += ca->mi.bucket_size;
@@ -448,6 +443,12 @@ void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
                 new.gen++;
         }));
  
+       /*
+        * This isn't actually correct yet, since fs usage is still
+        * uncompressed sectors:
+        */
+       stats->replicas[0].data[BCH_DATA_CACHED] -= old->cached_sectors;
+
         if (!old->owned_by_allocator && old->cached_sectors)
                 trace_invalidate(ca, bucket_to_sector(ca, b),
                                  old->cached_sectors);
@@ -501,26 +502,34 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
                 if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
                     gc_will_visit(c, pos))
                         return;
-       }
  
-       preempt_disable();
-       stats = this_cpu_ptr(c->usage_percpu);
+               preempt_disable();
+               stats = this_cpu_ptr(c->usage_percpu);
  
-       g = bucket(ca, b);
-       old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
-               new.data_type = type;
-               checked_add(new.dirty_sectors, sectors);
-       }));
+               g = bucket(ca, b);
+               old = bucket_data_cmpxchg(c, ca, stats, g, new, ({
+                       new.data_type = type;
+                       checked_add(new.dirty_sectors, sectors);
+               }));
  
-       stats->replicas[0].data[type] += sectors;
-       preempt_enable();
+               stats->replicas[0].data[type] += sectors;
+               preempt_enable();
+       } else {
+               rcu_read_lock();
+
+               g = bucket(ca, b);
+               old = bucket_cmpxchg(g, new, ({
+                       new.data_type = type;
+                       checked_add(new.dirty_sectors, sectors);
+               }));
+
+               rcu_read_unlock();
+       }
  
         BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
                bucket_became_unavailable(c, old, new));
  }
  
-/* Reverting this until the copygc + compression issue is fixed: */
-
  static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors)
  {
         if (!sectors)
@@ -540,12 +549,14 @@ static void bch2_mark_pointer(struct bch_fs *c,
                               const struct bch_extent_ptr *ptr,
                               struct bch_extent_crc_unpacked crc,
                               s64 sectors, enum bch_data_type data_type,
-                             struct bch_fs_usage *stats,
+                             unsigned replicas,
+                             struct bch_fs_usage *fs_usage,
                               u64 journal_seq, unsigned flags)
  {
         struct bucket_mark old, new;
         struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
         struct bucket *g = PTR_BUCKET(ca, ptr);
+       s64 uncompressed_sectors = sectors;
         u64 v;
  
         if (crc.compression_type) {
@@ -563,6 +574,20 @@ static void bch2_mark_pointer(struct bch_fs *c,
                           +__disk_sectors(crc, new_sectors);
         }
  
+       /*
+        * fs level usage (which determines free space) is in uncompressed
+        * sectors, until copygc + compression is sorted out:
+        *
+        * note also that we always update @fs_usage, even when we otherwise
+        * wouldn't do anything because gc is running - this is because the
+        * caller still needs to account w.r.t. its disk reservation. It is
+        * caller's responsibility to not apply @fs_usage if gc is in progress.
+        */
+       fs_usage->replicas
+               [!ptr->cached && replicas ? replicas - 1 : 0].data
+               [!ptr->cached ? data_type : BCH_DATA_CACHED] +=
+                       uncompressed_sectors;
+
         if (flags & BCH_BUCKET_MARK_GC_WILL_VISIT) {
                 if (journal_seq)
                         bucket_cmpxchg(g, new, ({
@@ -614,7 +639,7 @@ static void bch2_mark_pointer(struct bch_fs *c,
                               old.v.counter,
                               new.v.counter)) != old.v.counter);
  
-       bch2_dev_usage_update(c, ca, stats, old, new);
+       bch2_dev_usage_update(c, ca, fs_usage, old, new);
  
         BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
                bucket_became_unavailable(c, old, new));
@@ -677,15 +702,13 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
  
                 extent_for_each_ptr_crc(e, ptr, crc)
                         bch2_mark_pointer(c, e, ptr, crc, sectors, data_type,
-                                         stats, journal_seq, flags);
-
-               if (replicas)
-                       stats->replicas[replicas - 1].data[data_type] += sectors;
+                                         replicas, stats, journal_seq, flags);
                 break;
         }
         case BCH_RESERVATION:
                 if (replicas)
-                       stats->replicas[replicas - 1].persistent_reserved += sectors;
+                       stats->replicas[replicas - 1].persistent_reserved +=
+                               sectors * replicas;
                 break;
         }
         percpu_up_read(&c->usage_lock);
diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h

index 9968570..49f3ab9 100644 (file)
--- a/fs/bcachefs/buckets_types.h
+++ b/fs/bcachefs/buckets_types.h
@@ -62,7 +62,6 @@ struct bch_dev_usage {
  
  struct bch_fs_usage {
         /* all fields are in units of 512 byte sectors: */
-       /* _uncompressed_ sectors: */
         u64                     online_reserved;
         u64                     available_cache;
  
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c

index fe95b8b..e44bc95 100644 (file)
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -985,14 +985,6 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
         ca->disk_sb = *sb;
         memset(sb, 0, sizeof(*sb));
  
-       if (ca->fs)
-               mutex_lock(&ca->fs->sb_lock);
-
-       bch2_mark_dev_superblock(ca->fs, ca, BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
-
-       if (ca->fs)
-               mutex_unlock(&ca->fs->sb_lock);
-
         percpu_ref_reinit(&ca->io_ref);
  
         return 0;
@@ -1018,6 +1010,11 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
         if (ret)
                 return ret;
  
+       mutex_lock(&c->sb_lock);
+       bch2_mark_dev_superblock(ca->fs, ca,
+                       BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
+       mutex_unlock(&c->sb_lock);
+
         bch2_dev_sysfs_online(c, ca);
  
         if (c->sb.nr_devices == 1)
@@ -1295,6 +1292,24 @@ err:
         return ret;
  }
  
+static void dev_usage_clear(struct bch_dev *ca)
+{
+       struct bucket_array *buckets;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               struct bch_dev_usage *p =
+                       per_cpu_ptr(ca->usage_percpu, cpu);
+               memset(p, 0, sizeof(*p));
+       }
+
+       down_read(&ca->bucket_lock);
+       buckets = bucket_array(ca);
+
+       memset(buckets->b, 0, sizeof(buckets->b[0]) * buckets->nbuckets);
+       up_read(&ca->bucket_lock);
+}
+
  /* Add new device to running filesystem: */
  int bch2_dev_add(struct bch_fs *c, const char *path)
  {
@@ -1333,11 +1348,28 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
                 return ret;
         }
  
+       /*
+        * We want to allocate journal on the new device before adding the new
+        * device to the filesystem because allocating after we attach requires
+        * spinning up the allocator thread, and the allocator thread requires
+        * doing btree writes, which if the existing devices are RO isn't going
+        * to work
+        *
+        * So we have to mark where the superblocks are, but marking allocated
+        * data normally updates the filesystem usage too, so we have to mark,
+        * allocate the journal, reset all the marks, then remark after we
+        * attach...
+        */
+       bch2_mark_dev_superblock(ca->fs, ca,
+                       BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
+
         err = "journal alloc failed";
         ret = bch2_dev_journal_alloc(ca);
         if (ret)
                 goto err;
  
+       dev_usage_clear(ca);
+
         mutex_lock(&c->state_lock);
         mutex_lock(&c->sb_lock);
  
@@ -1388,6 +1420,9 @@ have_slot:
         ca->disk_sb.sb->dev_idx = dev_idx;
         bch2_dev_attach(c, ca, dev_idx);
  
+       bch2_mark_dev_superblock(c, ca,
+                       BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
+
         bch2_write_super(c);
         mutex_unlock(&c->sb_lock);
  
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c

index 4ce7168..582e281 100644 (file)
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -781,7 +781,7 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
                 "    meta:               %llu\n"
                 "    user:               %llu\n"
                 "    cached:             %llu\n"
-               "    available:          %llu\n"
+               "    available:          %lli\n"
                 "sectors:\n"
                 "    sb:                 %llu\n"
                 "    journal:            %llu\n"
@@ -802,7 +802,7 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
                 stats.buckets[BCH_DATA_BTREE],
                 stats.buckets[BCH_DATA_USER],
                 stats.buckets[BCH_DATA_CACHED],
-               __dev_buckets_available(ca, stats),
+               ca->mi.nbuckets - ca->mi.first_bucket - stats.buckets_unavailable,
                 stats.sectors[BCH_DATA_SB],
                 stats.sectors[BCH_DATA_JOURNAL],
                 stats.sectors[BCH_DATA_BTREE],
author	Kent Overstreet <kent.overstreet@gmail.com>
	Tue, 24 Jul 2018 20:42:49 +0000 (16:42 -0400)
committer	Kent Overstreet <kent.overstreet@linux.dev>
	Sun, 22 Oct 2023 21:08:08 +0000 (17:08 -0400)
fs/bcachefs/btree_update_interior.c		patch \| blob \| history
fs/bcachefs/buckets.c		patch \| blob \| history
fs/bcachefs/buckets_types.h		patch \| blob \| history
fs/bcachefs/super.c		patch \| blob \| history
fs/bcachefs/sysfs.c		patch \| blob \| history