bcachefs: bch_sb_field_downgrade
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 29 Dec 2023 20:25:07 +0000 (15:25 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Mon, 1 Jan 2024 16:47:07 +0000 (11:47 -0500)
Add a new superblock section that contains a list of
  { minor version, recovery passes, errors_to_fix }

that is - a list of recovery passes that must be run when downgrading
past a given version, and a list of errors to silently fix.

The upcoming disk accounting rewrite is not going to be fully
compatible: we're going to have to regenerate accounting both when
upgrading to the new version, and also from downgrading from the new
version, since the new method of doing disk space accounting is a
completely different architecture based on deltas, and synchronizing
them for every jounal entry write to maintain compatibility is going to
be too expensive and impractical.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/Makefile
fs/bcachefs/bcachefs_format.h
fs/bcachefs/errcode.h
fs/bcachefs/recovery.c
fs/bcachefs/sb-clean.c
fs/bcachefs/sb-downgrade.c [new file with mode: 0644]
fs/bcachefs/sb-downgrade.h [new file with mode: 0644]
fs/bcachefs/sb-errors.c
fs/bcachefs/super-io.c
fs/bcachefs/super-io.h

index ed550a4..b812684 100644 (file)
@@ -71,6 +71,7 @@ bcachefs-y            :=      \
        reflink.o               \
        replicas.o              \
        sb-clean.o              \
+       sb-downgrade.o          \
        sb-errors.o             \
        sb-members.o            \
        siphash.o               \
index bd5af51..fe78e87 100644 (file)
@@ -1220,7 +1220,8 @@ struct bch_sb_field {
        x(counters,                     10)     \
        x(members_v2,                   11)     \
        x(errors,                       12)     \
-       x(ext,                          13)
+       x(ext,                          13)     \
+       x(downgrade,                    14)
 
 enum bch_sb_field_type {
 #define x(f, nr)       BCH_SB_FIELD_##f = nr,
@@ -1638,6 +1639,18 @@ struct bch_sb_field_ext {
        __le64                  errors_silent[8];
 };
 
+struct bch_sb_field_downgrade_entry {
+       __le16                  version;
+       __le64                  recovery_passes[2];
+       __le16                  nr_errors;
+       __le16                  errors[] __counted_by(nr_errors);
+} __packed __aligned(2);
+
+struct bch_sb_field_downgrade {
+       struct bch_sb_field     field;
+       struct bch_sb_field_downgrade_entry entries[];
+};
+
 /* Superblock: */
 
 /*
@@ -1651,6 +1664,11 @@ struct bch_sb_field_ext {
 
 #define RECOVERY_PASS_ALL_FSCK         (1ULL << 63)
 
+/*
+ * field 1:            version name
+ * field 2:            BCH_VERSION(major, minor)
+ * field 3:            recovery passess required on upgrade
+ */
 #define BCH_METADATA_VERSIONS()                                                \
        x(bkey_renumber,                BCH_VERSION(0, 10),             \
          RECOVERY_PASS_ALL_FSCK)                                       \
index 79327b5..9ce2968 100644 (file)
@@ -95,6 +95,7 @@
        x(ENOSPC,                       ENOSPC_sb_members)                      \
        x(ENOSPC,                       ENOSPC_sb_members_v2)                   \
        x(ENOSPC,                       ENOSPC_sb_crypt)                        \
+       x(ENOSPC,                       ENOSPC_sb_downgrade)                    \
        x(ENOSPC,                       ENOSPC_btree_slot)                      \
        x(ENOSPC,                       ENOSPC_snapshot_tree)                   \
        x(ENOENT,                       ENOENT_bkey_type_mismatch)              \
        x(BCH_ERR_invalid_sb,           invalid_sb_errors)                      \
        x(BCH_ERR_invalid_sb,           invalid_sb_opt_compression)             \
        x(BCH_ERR_invalid_sb,           invalid_sb_ext)                         \
+       x(BCH_ERR_invalid_sb,           invalid_sb_downgrade)                   \
        x(BCH_ERR_invalid,              invalid_bkey)                           \
        x(BCH_ERR_operation_blocked,    nocow_lock_blocked)                     \
        x(EIO,                          btree_node_read_err)                    \
index b9c84e8..5cf7d05 100644 (file)
@@ -27,6 +27,7 @@
 #include "recovery.h"
 #include "replicas.h"
 #include "sb-clean.h"
+#include "sb-downgrade.h"
 #include "snapshot.h"
 #include "subvolume.h"
 #include "super-io.h"
@@ -744,6 +745,27 @@ int bch2_fs_recovery(struct bch_fs *c)
                        printbuf_exit(&buf);
                }
 
+               if (bch2_check_version_downgrade(c)) {
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_str(&buf, "Version downgrade required:\n");
+
+                       __le64 passes = ext->recovery_passes_required[0];
+                       bch2_sb_set_downgrade(c,
+                                       BCH_VERSION_MINOR(bcachefs_metadata_version_current),
+                                       BCH_VERSION_MINOR(c->sb.version));
+                       passes = ext->recovery_passes_required[0] & ~passes;
+                       if (passes) {
+                               prt_str(&buf, "  running recovery passes: ");
+                               prt_bitflags(&buf, bch2_recovery_passes,
+                                            bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
+                       }
+
+                       bch_info(c, "%s", buf.buf);
+                       printbuf_exit(&buf);
+                       write_sb = true;
+               }
+
                if (check_version_upgrade(c))
                        write_sb = true;
 
@@ -1022,7 +1044,7 @@ int bch2_fs_initialize(struct bch_fs *c)
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
 
-       bch2_sb_maybe_downgrade(c);
+       bch2_check_version_downgrade(c);
 
        if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
                bch2_sb_upgrade(c, bcachefs_metadata_version_current);
index e151ada..c76ad8e 100644 (file)
@@ -332,8 +332,6 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
 
        mutex_lock(&c->sb_lock);
        SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-
-       bch2_sb_maybe_downgrade(c);
        c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS);
 
        ret = bch2_write_super(c);
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
new file mode 100644 (file)
index 0000000..4919237
--- /dev/null
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Superblock section that contains a list of recovery passes to run when
+ * downgrading past a given version
+ */
+
+#include "bcachefs.h"
+#include "darray.h"
+#include "recovery.h"
+#include "sb-downgrade.h"
+#include "sb-errors.h"
+#include "super-io.h"
+
+/*
+ * Downgrade table:
+ * When dowgrading past certain versions, we need to run certain recovery passes
+ * and fix certain errors:
+ *
+ * x(version, recovery_passes, errors...)
+ */
+
+#define DOWNGRADE_TABLE()
+
+struct downgrade_entry {
+       u64             recovery_passes;
+       u16             version;
+       u16             nr_errors;
+       const u16       *errors;
+};
+
+#define x(ver, passes, ...) static const u16 ver_##errors[] = { __VA_ARGS__ };
+DOWNGRADE_TABLE()
+#undef x
+
+static const struct downgrade_entry downgrade_table[] = {
+#define x(ver, passes, ...) {                                  \
+       .recovery_passes        = passes,                       \
+       .version                = bcachefs_metadata_version_##ver,\
+       .nr_errors              = ARRAY_SIZE(ver_##errors),     \
+       .errors                 = ver_##errors,                 \
+},
+DOWNGRADE_TABLE()
+#undef x
+};
+
+static inline const struct bch_sb_field_downgrade_entry *
+downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e)
+{
+       return (void *) &e->errors[le16_to_cpu(e->nr_errors)];
+}
+
+#define for_each_downgrade_entry(_d, _i)                                               \
+       for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries;             \
+            (void *) _i        < vstruct_end(&(_d)->field) &&                          \
+            (void *) &_i->errors[0] < vstruct_end(&(_d)->field);                       \
+            _i = downgrade_entry_next_c(_i))
+
+static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
+                                     struct printbuf *err)
+{
+       struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
+
+       for_each_downgrade_entry(e, i) {
+               if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) !=
+                   BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) {
+                       prt_printf(err, "downgrade entry with mismatched major version (%u != %u)",
+                                  BCH_VERSION_MAJOR(le16_to_cpu(i->version)),
+                                  BCH_VERSION_MAJOR(le16_to_cpu(sb->version)));
+                       return -BCH_ERR_invalid_sb_downgrade;
+               }
+       }
+
+       return 0;
+}
+
+static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb,
+                                     struct bch_sb_field *f)
+{
+       struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
+
+       if (out->nr_tabstops <= 1)
+               printbuf_tabstop_push(out, 16);
+
+       for_each_downgrade_entry(e, i) {
+               prt_str(out, "version:");
+               prt_tab(out);
+               bch2_version_to_text(out, le16_to_cpu(i->version));
+               prt_newline(out);
+
+               prt_str(out, "recovery passes:");
+               prt_tab(out);
+               prt_bitflags(out, bch2_recovery_passes,
+                            bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0])));
+               prt_newline(out);
+
+               prt_str(out, "errors:");
+               prt_tab(out);
+               bool first = true;
+               for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
+                       if (!first)
+                               prt_char(out, ',');
+                       first = false;
+                       unsigned e = le16_to_cpu(i->errors[j]);
+                       prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)");
+               }
+               prt_newline(out);
+       }
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_downgrade = {
+       .validate       = bch2_sb_downgrade_validate,
+       .to_text        = bch2_sb_downgrade_to_text,
+};
+
+int bch2_sb_downgrade_update(struct bch_fs *c)
+{
+       darray_char table = {};
+       int ret = 0;
+
+       for (const struct downgrade_entry *src = downgrade_table;
+            src < downgrade_table + ARRAY_SIZE(downgrade_table);
+            src++) {
+               if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
+                       continue;
+
+               struct bch_sb_field_downgrade_entry *dst;
+               unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors;
+
+               ret = darray_make_room(&table, bytes);
+               if (ret)
+                       goto out;
+
+               dst = (void *) &darray_top(table);
+               dst->version = cpu_to_le16(src->version);
+               dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes);
+               dst->recovery_passes[1] = 0;
+               dst->nr_errors          = cpu_to_le16(src->nr_errors);
+               for (unsigned i = 0; i < src->nr_errors; i++)
+                       dst->errors[i] = cpu_to_le16(src->errors[i]);
+
+               table.nr += bytes;
+       }
+
+       struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
+
+       unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64));
+
+       if (d && le32_to_cpu(d->field.u64s) > sb_u64s)
+               goto out;
+
+       d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s);
+       if (!d) {
+               ret = -BCH_ERR_ENOSPC_sb_downgrade;
+               goto out;
+       }
+
+       memcpy(d->entries, table.data, table.nr);
+       memset_u64s_tail(d->entries, 0, table.nr);
+out:
+       darray_exit(&table);
+       return ret;
+}
+
+void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
+{
+       struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
+       if (!d)
+               return;
+
+       struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+       for_each_downgrade_entry(d, i) {
+               unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version));
+               if (new_minor < minor && minor <= old_minor) {
+                       ext->recovery_passes_required[0] |= i->recovery_passes[0];
+                       ext->recovery_passes_required[1] |= i->recovery_passes[1];
+
+                       for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
+                               unsigned e = le16_to_cpu(i->errors[j]);
+                               if (e < BCH_SB_ERR_MAX)
+                                       __set_bit(e, c->sb.errors_silent);
+                               if (e < sizeof(ext->errors_silent) * 8)
+                                       ext->errors_silent[e / 64] |= cpu_to_le64(BIT_ULL(e % 64));
+                       }
+               }
+       }
+}
diff --git a/fs/bcachefs/sb-downgrade.h b/fs/bcachefs/sb-downgrade.h
new file mode 100644 (file)
index 0000000..bc48fd2
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_DOWNGRADE_H
+#define _BCACHEFS_SB_DOWNGRADE_H
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade;
+
+int bch2_sb_downgrade_update(struct bch_fs *);
+void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned);
+
+#endif /* _BCACHEFS_SB_DOWNGRADE_H */
index caf7669..5f5bcae 100644 (file)
@@ -20,9 +20,7 @@ static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id
 
 static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e)
 {
-       return e
-               ? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0])
-               : 0;
+       return bch2_sb_field_nr_entries(e);
 }
 
 static inline unsigned bch2_sb_field_errors_u64s(unsigned nr)
index e085d3b..4c98d8c 100644 (file)
@@ -13,6 +13,7 @@
 #include "replicas.h"
 #include "quota.h"
 #include "sb-clean.h"
+#include "sb-downgrade.h"
 #include "sb-errors.h"
 #include "sb-members.h"
 #include "super-io.h"
@@ -939,6 +940,7 @@ int bch2_write_super(struct bch_fs *c)
        bch2_sb_members_from_cpu(c);
        bch2_sb_members_cpy_v2_v1(&c->disk_sb);
        bch2_sb_errors_from_cpu(c);
+       bch2_sb_downgrade_update(c);
 
        for_each_online_member(ca, c, i)
                bch2_sb_from_fs(c, ca);
@@ -1062,8 +1064,10 @@ void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
 }
 
 /* Downgrade if superblock is at a higher version than currently supported: */
-void bch2_sb_maybe_downgrade(struct bch_fs *c)
+bool bch2_check_version_downgrade(struct bch_fs *c)
 {
+       bool ret = bcachefs_metadata_version_current < c->sb.version;
+
        lockdep_assert_held(&c->sb_lock);
 
        /*
@@ -1077,12 +1081,17 @@ void bch2_sb_maybe_downgrade(struct bch_fs *c)
        if (c->sb.version_min > bcachefs_metadata_version_current)
                c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
        c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
+       return ret;
 }
 
 void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version)
 {
        lockdep_assert_held(&c->sb_lock);
 
+       if (BCH_VERSION_MAJOR(new_version) >
+           BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
+               bch2_sb_field_resize(&c->disk_sb, downgrade, 0);
+
        c->disk_sb.sb->version = cpu_to_le16(new_version);
        c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
 }
index 589509e..e41e5de 100644 (file)
@@ -93,7 +93,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
                __bch2_check_set_feature(c, feat);
 }
 
-void bch2_sb_maybe_downgrade(struct bch_fs *);
+bool bch2_check_version_downgrade(struct bch_fs *);
 void bch2_sb_upgrade(struct bch_fs *, unsigned);
 
 void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,