bcachefs; extents_format.h
authorKent Overstreet <kent.overstreet@linux.dev>
Sun, 21 Jan 2024 07:51:56 +0000 (02:51 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 21 Jan 2024 18:27:11 +0000 (13:27 -0500)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs_format.h
fs/bcachefs/extents_format.h [new file with mode: 0644]

index 5327514..2921ecd 100644 (file)
@@ -417,272 +417,12 @@ struct bch_set {
        struct bch_val          v;
 };
 
-/* Extents */
-
-/*
- * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
- * preceded by checksum/compression information (bch_extent_crc32 or
- * bch_extent_crc64).
- *
- * One major determining factor in the format of extents is how we handle and
- * represent extents that have been partially overwritten and thus trimmed:
- *
- * If an extent is not checksummed or compressed, when the extent is trimmed we
- * don't have to remember the extent we originally allocated and wrote: we can
- * merely adjust ptr->offset to point to the start of the data that is currently
- * live. The size field in struct bkey records the current (live) size of the
- * extent, and is also used to mean "size of region on disk that we point to" in
- * this case.
- *
- * Thus an extent that is not checksummed or compressed will consist only of a
- * list of bch_extent_ptrs, with none of the fields in
- * bch_extent_crc32/bch_extent_crc64.
- *
- * When an extent is checksummed or compressed, it's not possible to read only
- * the data that is currently live: we have to read the entire extent that was
- * originally written, and then return only the part of the extent that is
- * currently live.
- *
- * Thus, in addition to the current size of the extent in struct bkey, we need
- * to store the size of the originally allocated space - this is the
- * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
- * when the extent is trimmed, instead of modifying the offset field of the
- * pointer, we keep a second smaller offset field - "offset into the original
- * extent of the currently live region".
- *
- * The other major determining factor is replication and data migration:
- *
- * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
- * write, we will initially write all the replicas in the same format, with the
- * same checksum type and compression format - however, when copygc runs later (or
- * tiering/cache promotion, anything that moves data), it is not in general
- * going to rewrite all the pointers at once - one of the replicas may be in a
- * bucket on one device that has very little fragmentation while another lives
- * in a bucket that has become heavily fragmented, and thus is being rewritten
- * sooner than the rest.
- *
- * Thus it will only move a subset of the pointers (or in the case of
- * tiering/cache promotion perhaps add a single pointer without dropping any
- * current pointers), and if the extent has been partially overwritten it must
- * write only the currently live portion (or copygc would not be able to reduce
- * fragmentation!) - which necessitates a different bch_extent_crc format for
- * the new pointer.
- *
- * But in the interests of space efficiency, we don't want to store one
- * bch_extent_crc for each pointer if we don't have to.
- *
- * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
- * bch_extent_ptrs appended arbitrarily one after the other. We determine the
- * type of a given entry with a scheme similar to utf8 (except we're encoding a
- * type, not a size), encoding the type in the position of the first set bit:
- *
- * bch_extent_crc32    - 0b1
- * bch_extent_ptr      - 0b10
- * bch_extent_crc64    - 0b100
- *
- * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
- * bch_extent_crc64 is the least constrained).
- *
- * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
- * until the next bch_extent_crc32/64.
- *
- * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
- * is neither checksummed nor compressed.
- */
-
 /* 128 bits, sufficient for cryptographic MACs: */
 struct bch_csum {
        __le64                  lo;
        __le64                  hi;
 } __packed __aligned(8);
 
-#define BCH_EXTENT_ENTRY_TYPES()               \
-       x(ptr,                  0)              \
-       x(crc32,                1)              \
-       x(crc64,                2)              \
-       x(crc128,               3)              \
-       x(stripe_ptr,           4)              \
-       x(rebalance,            5)
-#define BCH_EXTENT_ENTRY_MAX   6
-
-enum bch_extent_entry_type {
-#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
-       BCH_EXTENT_ENTRY_TYPES()
-#undef x
-};
-
-/* Compressed/uncompressed size are stored biased by 1: */
-struct bch_extent_crc32 {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-       __u32                   type:2,
-                               _compressed_size:7,
-                               _uncompressed_size:7,
-                               offset:7,
-                               _unused:1,
-                               csum_type:4,
-                               compression_type:4;
-       __u32                   csum;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-       __u32                   csum;
-       __u32                   compression_type:4,
-                               csum_type:4,
-                               _unused:1,
-                               offset:7,
-                               _uncompressed_size:7,
-                               _compressed_size:7,
-                               type:2;
-#endif
-} __packed __aligned(8);
-
-#define CRC32_SIZE_MAX         (1U << 7)
-#define CRC32_NONCE_MAX                0
-
-struct bch_extent_crc64 {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-       __u64                   type:3,
-                               _compressed_size:9,
-                               _uncompressed_size:9,
-                               offset:9,
-                               nonce:10,
-                               csum_type:4,
-                               compression_type:4,
-                               csum_hi:16;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-       __u64                   csum_hi:16,
-                               compression_type:4,
-                               csum_type:4,
-                               nonce:10,
-                               offset:9,
-                               _uncompressed_size:9,
-                               _compressed_size:9,
-                               type:3;
-#endif
-       __u64                   csum_lo;
-} __packed __aligned(8);
-
-#define CRC64_SIZE_MAX         (1U << 9)
-#define CRC64_NONCE_MAX                ((1U << 10) - 1)
-
-struct bch_extent_crc128 {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-       __u64                   type:4,
-                               _compressed_size:13,
-                               _uncompressed_size:13,
-                               offset:13,
-                               nonce:13,
-                               csum_type:4,
-                               compression_type:4;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-       __u64                   compression_type:4,
-                               csum_type:4,
-                               nonce:13,
-                               offset:13,
-                               _uncompressed_size:13,
-                               _compressed_size:13,
-                               type:4;
-#endif
-       struct bch_csum         csum;
-} __packed __aligned(8);
-
-#define CRC128_SIZE_MAX                (1U << 13)
-#define CRC128_NONCE_MAX       ((1U << 13) - 1)
-
-/*
- * @reservation - pointer hasn't been written to, just reserved
- */
-struct bch_extent_ptr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-       __u64                   type:1,
-                               cached:1,
-                               unused:1,
-                               unwritten:1,
-                               offset:44, /* 8 petabytes */
-                               dev:8,
-                               gen:8;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-       __u64                   gen:8,
-                               dev:8,
-                               offset:44,
-                               unwritten:1,
-                               unused:1,
-                               cached:1,
-                               type:1;
-#endif
-} __packed __aligned(8);
-
-struct bch_extent_stripe_ptr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-       __u64                   type:5,
-                               block:8,
-                               redundancy:4,
-                               idx:47;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-       __u64                   idx:47,
-                               redundancy:4,
-                               block:8,
-                               type:5;
-#endif
-};
-
-struct bch_extent_rebalance {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-       __u64                   type:6,
-                               unused:34,
-                               compression:8, /* enum bch_compression_opt */
-                               target:16;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-       __u64                   target:16,
-                               compression:8,
-                               unused:34,
-                               type:6;
-#endif
-};
-
-union bch_extent_entry {
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ||  __BITS_PER_LONG == 64
-       unsigned long                   type;
-#elif __BITS_PER_LONG == 32
-       struct {
-               unsigned long           pad;
-               unsigned long           type;
-       };
-#else
-#error edit for your odd byteorder.
-#endif
-
-#define x(f, n) struct bch_extent_##f  f;
-       BCH_EXTENT_ENTRY_TYPES()
-#undef x
-};
-
-struct bch_btree_ptr {
-       struct bch_val          v;
-
-       __u64                   _data[0];
-       struct bch_extent_ptr   start[];
-} __packed __aligned(8);
-
-struct bch_btree_ptr_v2 {
-       struct bch_val          v;
-
-       __u64                   mem_ptr;
-       __le64                  seq;
-       __le16                  sectors_written;
-       __le16                  flags;
-       struct bpos             min_key;
-       __u64                   _data[0];
-       struct bch_extent_ptr   start[];
-} __packed __aligned(8);
-
-LE16_BITMASK(BTREE_PTR_RANGE_UPDATED,  struct bch_btree_ptr_v2, flags, 0, 1);
-
-struct bch_extent {
-       struct bch_val          v;
-
-       __u64                   _data[0];
-       union bch_extent_entry  start[];
-} __packed __aligned(8);
-
 struct bch_reservation {
        struct bch_val          v;
 
@@ -691,25 +431,6 @@ struct bch_reservation {
        __u8                    pad[3];
 } __packed __aligned(8);
 
-/* Maximum size (in u64s) a single pointer could be: */
-#define BKEY_EXTENT_PTR_U64s_MAX\
-       ((sizeof(struct bch_extent_crc128) +                    \
-         sizeof(struct bch_extent_ptr)) / sizeof(__u64))
-
-/* Maximum possible size of an entire extent value: */
-#define BKEY_EXTENT_VAL_U64s_MAX                               \
-       (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
-
-/* * Maximum possible size of an entire extent, key + value: */
-#define BKEY_EXTENT_U64s_MAX           (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
-
-/* Btree pointers don't carry around checksums: */
-#define BKEY_BTREE_PTR_VAL_U64s_MAX                            \
-       ((sizeof(struct bch_btree_ptr_v2) +                     \
-         sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
-#define BKEY_BTREE_PTR_U64s_MAX                                        \
-       (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
-
 struct bch_backpointer {
        struct bch_val          v;
        __u8                    btree_id;
@@ -720,6 +441,8 @@ struct bch_backpointer {
        struct bpos             pos;
 } __packed __aligned(8);
 
+#include "extents_format.h"
+
 /* Reflink: */
 
 struct bch_reflink_p {
diff --git a/fs/bcachefs/extents_format.h b/fs/bcachefs/extents_format.h
new file mode 100644 (file)
index 0000000..939d09f
--- /dev/null
@@ -0,0 +1,282 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_EXTENTS_FORMAT_H
+#define _BCACHEFS_EXTENTS_FORMAT_H
+
+/*
+ * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
+ * preceded by checksum/compression information (bch_extent_crc32 or
+ * bch_extent_crc64).
+ *
+ * One major determining factor in the format of extents is how we handle and
+ * represent extents that have been partially overwritten and thus trimmed:
+ *
+ * If an extent is not checksummed or compressed, when the extent is trimmed we
+ * don't have to remember the extent we originally allocated and wrote: we can
+ * merely adjust ptr->offset to point to the start of the data that is currently
+ * live. The size field in struct bkey records the current (live) size of the
+ * extent, and is also used to mean "size of region on disk that we point to" in
+ * this case.
+ *
+ * Thus an extent that is not checksummed or compressed will consist only of a
+ * list of bch_extent_ptrs, with none of the fields in
+ * bch_extent_crc32/bch_extent_crc64.
+ *
+ * When an extent is checksummed or compressed, it's not possible to read only
+ * the data that is currently live: we have to read the entire extent that was
+ * originally written, and then return only the part of the extent that is
+ * currently live.
+ *
+ * Thus, in addition to the current size of the extent in struct bkey, we need
+ * to store the size of the originally allocated space - this is the
+ * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
+ * when the extent is trimmed, instead of modifying the offset field of the
+ * pointer, we keep a second smaller offset field - "offset into the original
+ * extent of the currently live region".
+ *
+ * The other major determining factor is replication and data migration:
+ *
+ * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
+ * write, we will initially write all the replicas in the same format, with the
+ * same checksum type and compression format - however, when copygc runs later (or
+ * tiering/cache promotion, anything that moves data), it is not in general
+ * going to rewrite all the pointers at once - one of the replicas may be in a
+ * bucket on one device that has very little fragmentation while another lives
+ * in a bucket that has become heavily fragmented, and thus is being rewritten
+ * sooner than the rest.
+ *
+ * Thus it will only move a subset of the pointers (or in the case of
+ * tiering/cache promotion perhaps add a single pointer without dropping any
+ * current pointers), and if the extent has been partially overwritten it must
+ * write only the currently live portion (or copygc would not be able to reduce
+ * fragmentation!) - which necessitates a different bch_extent_crc format for
+ * the new pointer.
+ *
+ * But in the interests of space efficiency, we don't want to store one
+ * bch_extent_crc for each pointer if we don't have to.
+ *
+ * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
+ * bch_extent_ptrs appended arbitrarily one after the other. We determine the
+ * type of a given entry with a scheme similar to utf8 (except we're encoding a
+ * type, not a size), encoding the type in the position of the first set bit:
+ *
+ * bch_extent_crc32    - 0b1
+ * bch_extent_ptr      - 0b10
+ * bch_extent_crc64    - 0b100
+ *
+ * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
+ * bch_extent_crc64 is the least constrained).
+ *
+ * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
+ * until the next bch_extent_crc32/64.
+ *
+ * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
+ * is neither checksummed nor compressed.
+ */
+
+#define BCH_EXTENT_ENTRY_TYPES()               \
+       x(ptr,                  0)              \
+       x(crc32,                1)              \
+       x(crc64,                2)              \
+       x(crc128,               3)              \
+       x(stripe_ptr,           4)              \
+       x(rebalance,            5)
+#define BCH_EXTENT_ENTRY_MAX   6
+
+enum bch_extent_entry_type {
+#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
+       BCH_EXTENT_ENTRY_TYPES()
+#undef x
+};
+
+/* Compressed/uncompressed size are stored biased by 1: */
+struct bch_extent_crc32 {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u32                   type:2,
+                               _compressed_size:7,
+                               _uncompressed_size:7,
+                               offset:7,
+                               _unused:1,
+                               csum_type:4,
+                               compression_type:4;
+       __u32                   csum;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u32                   csum;
+       __u32                   compression_type:4,
+                               csum_type:4,
+                               _unused:1,
+                               offset:7,
+                               _uncompressed_size:7,
+                               _compressed_size:7,
+                               type:2;
+#endif
+} __packed __aligned(8);
+
+#define CRC32_SIZE_MAX         (1U << 7)
+#define CRC32_NONCE_MAX                0
+
+struct bch_extent_crc64 {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u64                   type:3,
+                               _compressed_size:9,
+                               _uncompressed_size:9,
+                               offset:9,
+                               nonce:10,
+                               csum_type:4,
+                               compression_type:4,
+                               csum_hi:16;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u64                   csum_hi:16,
+                               compression_type:4,
+                               csum_type:4,
+                               nonce:10,
+                               offset:9,
+                               _uncompressed_size:9,
+                               _compressed_size:9,
+                               type:3;
+#endif
+       __u64                   csum_lo;
+} __packed __aligned(8);
+
+#define CRC64_SIZE_MAX         (1U << 9)
+#define CRC64_NONCE_MAX                ((1U << 10) - 1)
+
+struct bch_extent_crc128 {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u64                   type:4,
+                               _compressed_size:13,
+                               _uncompressed_size:13,
+                               offset:13,
+                               nonce:13,
+                               csum_type:4,
+                               compression_type:4;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u64                   compression_type:4,
+                               csum_type:4,
+                               nonce:13,
+                               offset:13,
+                               _uncompressed_size:13,
+                               _compressed_size:13,
+                               type:4;
+#endif
+       struct bch_csum         csum;
+} __packed __aligned(8);
+
+#define CRC128_SIZE_MAX                (1U << 13)
+#define CRC128_NONCE_MAX       ((1U << 13) - 1)
+
+/*
+ * @reservation - pointer hasn't been written to, just reserved
+ */
+struct bch_extent_ptr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u64                   type:1,
+                               cached:1,
+                               unused:1,
+                               unwritten:1,
+                               offset:44, /* 8 petabytes */
+                               dev:8,
+                               gen:8;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u64                   gen:8,
+                               dev:8,
+                               offset:44,
+                               unwritten:1,
+                               unused:1,
+                               cached:1,
+                               type:1;
+#endif
+} __packed __aligned(8);
+
+struct bch_extent_stripe_ptr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u64                   type:5,
+                               block:8,
+                               redundancy:4,
+                               idx:47;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u64                   idx:47,
+                               redundancy:4,
+                               block:8,
+                               type:5;
+#endif
+};
+
+struct bch_extent_rebalance {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u64                   type:6,
+                               unused:34,
+                               compression:8, /* enum bch_compression_opt */
+                               target:16;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+       __u64                   target:16,
+                               compression:8,
+                               unused:34,
+                               type:6;
+#endif
+};
+
+union bch_extent_entry {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ||  __BITS_PER_LONG == 64
+       unsigned long                   type;
+#elif __BITS_PER_LONG == 32
+       struct {
+               unsigned long           pad;
+               unsigned long           type;
+       };
+#else
+#error edit for your odd byteorder.
+#endif
+
+#define x(f, n) struct bch_extent_##f  f;
+       BCH_EXTENT_ENTRY_TYPES()
+#undef x
+};
+
+struct bch_btree_ptr {
+       struct bch_val          v;
+
+       __u64                   _data[0];
+       struct bch_extent_ptr   start[];
+} __packed __aligned(8);
+
+struct bch_btree_ptr_v2 {
+       struct bch_val          v;
+
+       __u64                   mem_ptr;
+       __le64                  seq;
+       __le16                  sectors_written;
+       __le16                  flags;
+       struct bpos             min_key;
+       __u64                   _data[0];
+       struct bch_extent_ptr   start[];
+} __packed __aligned(8);
+
+LE16_BITMASK(BTREE_PTR_RANGE_UPDATED,  struct bch_btree_ptr_v2, flags, 0, 1);
+
+struct bch_extent {
+       struct bch_val          v;
+
+       __u64                   _data[0];
+       union bch_extent_entry  start[];
+} __packed __aligned(8);
+
+/* Maximum size (in u64s) a single pointer could be: */
+#define BKEY_EXTENT_PTR_U64s_MAX\
+       ((sizeof(struct bch_extent_crc128) +                    \
+         sizeof(struct bch_extent_ptr)) / sizeof(__u64))
+
+/* Maximum possible size of an entire extent value: */
+#define BKEY_EXTENT_VAL_U64s_MAX                               \
+       (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
+
+/* * Maximum possible size of an entire extent, key + value: */
+#define BKEY_EXTENT_U64s_MAX           (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
+
+/* Btree pointers don't carry around checksums: */
+#define BKEY_BTREE_PTR_VAL_U64s_MAX                            \
+       ((sizeof(struct bch_btree_ptr_v2) +                     \
+         sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
+#define BKEY_BTREE_PTR_U64s_MAX                                        \
+       (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
+
+#endif /* _BCACHEFS_EXTENTS_FORMAT_H */