bcachefs: Start moving debug info from sysfs to debugfs
authorKent Overstreet <kent.overstreet@gmail.com>
Sat, 26 Feb 2022 16:48:34 +0000 (11:48 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:26 +0000 (17:09 -0400)
In sysfs, files can only output at most PAGE_SIZE. This is a problem for
debug info that needs to list an arbitrary number of times, and because
of this limit some of our debug info has been terser and harder to read
than we'd like.

This patch moves info about journal pins and cached btree nodes to
debugfs, and greatly expands and improves the output we return.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_io.c
fs/bcachefs/btree_io.h
fs/bcachefs/debug.c
fs/bcachefs/journal.c
fs/bcachefs/journal.h
fs/bcachefs/sysfs.c

index e5bc098..6cda77a 100644 (file)
@@ -538,9 +538,6 @@ enum {
 
 struct btree_debug {
        unsigned                id;
-       struct dentry           *btree;
-       struct dentry           *btree_format;
-       struct dentry           *failed;
 };
 
 struct bch_fs_pcpu {
@@ -885,7 +882,8 @@ mempool_t           bio_bounce_pages;
        struct bch_memquota_type quotas[QTYP_NR];
 
        /* DEBUG JUNK */
-       struct dentry           *debug;
+       struct dentry           *fs_debug_dir;
+       struct dentry           *btree_debug_dir;
        struct btree_debug      btree_debug[BTREE_ID_NR];
        struct btree            *verify_data;
        struct btree_node       *verify_ondisk;
index 0670429..fd7f2a7 100644 (file)
@@ -2106,30 +2106,3 @@ void bch2_btree_flush_all_writes(struct bch_fs *c)
 {
        __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
 }
-
-void bch2_dirty_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c)
-{
-       struct bucket_table *tbl;
-       struct rhash_head *pos;
-       struct btree *b;
-       unsigned i;
-
-       rcu_read_lock();
-       for_each_cached_btree(b, c, tbl, i, pos) {
-               unsigned long flags = READ_ONCE(b->flags);
-
-               if (!(flags & (1 << BTREE_NODE_dirty)))
-                       continue;
-
-               pr_buf(out, "%p d %u n %u l %u w %u b %u r %u:%lu\n",
-                      b,
-                      (flags & (1 << BTREE_NODE_dirty)) != 0,
-                      (flags & (1 << BTREE_NODE_need_write)) != 0,
-                      b->c.level,
-                      b->written,
-                      !list_empty_careful(&b->write_blocked),
-                      b->will_make_reachable != 0,
-                      b->will_make_reachable & 1);
-       }
-       rcu_read_unlock();
-}
index a1dea8e..638a9b3 100644 (file)
@@ -177,7 +177,6 @@ do {                                                                        \
 
 void bch2_btree_flush_all_reads(struct bch_fs *);
 void bch2_btree_flush_all_writes(struct bch_fs *);
-void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *);
 
 static inline void compat_bformat(unsigned level, enum btree_id btree_id,
                                  unsigned version, unsigned big_endian,
index c3bfa7f..1fff03d 100644 (file)
@@ -185,9 +185,10 @@ out:
 /* XXX: bch_fs refcounting */
 
 struct dump_iter {
-       struct bpos             from;
-       struct bch_fs   *c;
+       struct bch_fs           *c;
        enum btree_id           id;
+       struct bpos             from;
+       u64                     iter;
 
        struct printbuf         buf;
 
@@ -226,6 +227,7 @@ static int bch2_dump_open(struct inode *inode, struct file *file)
 
        file->private_data = i;
        i->from = POS_MIN;
+       i->iter = 0;
        i->c    = container_of(bd, struct bch_fs, btree_debug[bd->id]);
        i->id   = bd->id;
        i->buf  = PRINTBUF;
@@ -420,10 +422,148 @@ static const struct file_operations bfloat_failed_debug_ops = {
        .read           = bch2_read_bfloat_failed,
 };
 
+static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
+                                          struct btree *b)
+{
+       out->tabstops[0] = 32;
+
+       pr_buf(out, "%px btree=%s l=%u ",
+              b,
+              bch2_btree_ids[b->c.btree_id],
+              b->c.level);
+       pr_newline(out);
+
+       pr_indent_push(out, 2);
+
+       bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+       pr_newline(out);
+
+       pr_buf(out, "flags: ");
+       pr_tab(out);
+       bch2_flags_to_text(out, bch2_btree_node_flags, b->flags);
+       pr_newline(out);
+
+       pr_buf(out, "written:");
+       pr_tab(out);
+       pr_buf(out, "%u", b->written);
+       pr_newline(out);
+
+       pr_buf(out, "writes blocked:");
+       pr_tab(out);
+       pr_buf(out, "%u", !list_empty_careful(&b->write_blocked));
+       pr_newline(out);
+
+       pr_buf(out, "will make reachable:");
+       pr_tab(out);
+       pr_buf(out, "%lx", b->will_make_reachable);
+       pr_newline(out);
+
+       pr_buf(out, "journal pin %px:", &b->writes[0].journal);
+       pr_tab(out);
+       pr_buf(out, "%llu", b->writes[0].journal.seq);
+       pr_newline(out);
+
+       pr_buf(out, "journal pin %px:", &b->writes[1].journal);
+       pr_tab(out);
+       pr_buf(out, "%llu", b->writes[1].journal.seq);
+       pr_newline(out);
+
+       pr_indent_pop(out, 2);
+}
+
+static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
+                                           size_t size, loff_t *ppos)
+{
+       struct dump_iter *i = file->private_data;
+       struct bch_fs *c = i->c;
+       bool done = false;
+       int err;
+
+       i->ubuf = buf;
+       i->size = size;
+       i->ret  = 0;
+
+       do {
+               struct bucket_table *tbl;
+               struct rhash_head *pos;
+               struct btree *b;
+
+               err = flush_buf(i);
+               if (err)
+                       return err;
+
+               if (!i->size)
+                       break;
+
+               rcu_read_lock();
+               i->buf.atomic++;
+               tbl = rht_dereference_rcu(c->btree_cache.table.tbl,
+                                         &c->btree_cache.table);
+               if (i->iter < tbl->size) {
+                       rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash)
+                               bch2_cached_btree_node_to_text(&i->buf, c, b);
+                       i->iter++;;
+               } else {
+                       done = true;
+               }
+               --i->buf.atomic;
+               rcu_read_unlock();
+       } while (!done);
+
+       if (i->buf.allocation_failure)
+               return -ENOMEM;
+
+       return i->ret;
+}
+
+static const struct file_operations cached_btree_nodes_ops = {
+       .owner          = THIS_MODULE,
+       .open           = bch2_dump_open,
+       .release        = bch2_dump_release,
+       .read           = bch2_cached_btree_nodes_read,
+};
+
+static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
+                                     size_t size, loff_t *ppos)
+{
+       struct dump_iter *i = file->private_data;
+       struct bch_fs *c = i->c;
+       bool done = false;
+       int err;
+
+       i->ubuf = buf;
+       i->size = size;
+       i->ret  = 0;
+
+       do {
+               err = flush_buf(i);
+               if (err)
+                       return err;
+
+               if (!i->size)
+                       break;
+
+               done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter);
+               i->iter++;
+       } while (!done);
+
+       if (i->buf.allocation_failure)
+               return -ENOMEM;
+
+       return i->ret;
+}
+
+static const struct file_operations journal_pins_ops = {
+       .owner          = THIS_MODULE,
+       .open           = bch2_dump_open,
+       .release        = bch2_dump_release,
+       .read           = bch2_journal_pins_read,
+};
+
 void bch2_fs_debug_exit(struct bch_fs *c)
 {
-       if (!IS_ERR_OR_NULL(c->debug))
-               debugfs_remove_recursive(c->debug);
+       if (!IS_ERR_OR_NULL(c->fs_debug_dir))
+               debugfs_remove_recursive(c->fs_debug_dir);
 }
 
 void bch2_fs_debug_init(struct bch_fs *c)
@@ -435,29 +575,39 @@ void bch2_fs_debug_init(struct bch_fs *c)
                return;
 
        snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
-       c->debug = debugfs_create_dir(name, bch_debug);
-       if (IS_ERR_OR_NULL(c->debug))
+       c->fs_debug_dir = debugfs_create_dir(name, bch_debug);
+       if (IS_ERR_OR_NULL(c->fs_debug_dir))
+               return;
+
+       debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir,
+                           c->btree_debug, &cached_btree_nodes_ops);
+
+       debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
+                           c->btree_debug, &journal_pins_ops);
+
+       c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
+       if (IS_ERR_OR_NULL(c->btree_debug_dir))
                return;
 
        for (bd = c->btree_debug;
             bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
             bd++) {
                bd->id = bd - c->btree_debug;
-               bd->btree = debugfs_create_file(bch2_btree_ids[bd->id],
-                                               0400, c->debug, bd,
-                                               &btree_debug_ops);
+               debugfs_create_file(bch2_btree_ids[bd->id],
+                                   0400, c->btree_debug_dir, bd,
+                                   &btree_debug_ops);
 
                snprintf(name, sizeof(name), "%s-formats",
                         bch2_btree_ids[bd->id]);
 
-               bd->btree_format = debugfs_create_file(name, 0400, c->debug, bd,
-                                                      &btree_format_debug_ops);
+               debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
+                                   &btree_format_debug_ops);
 
                snprintf(name, sizeof(name), "%s-bfloat-failed",
                         bch2_btree_ids[bd->id]);
 
-               bd->failed = debugfs_create_file(name, 0400, c->debug, bd,
-                                                &bfloat_failed_debug_ops);
+               debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
+                                   &bfloat_failed_debug_ops);
        }
 }
 
index a579e64..0cbd86d 100644 (file)
@@ -1283,35 +1283,59 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
        spin_unlock(&j->lock);
 }
 
-void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
+bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq)
 {
        struct journal_entry_pin_list *pin_list;
        struct journal_entry_pin *pin;
-       u64 i;
 
        spin_lock(&j->lock);
+       *seq = max(*seq, j->pin.front);
+
+       if (*seq >= j->pin.back) {
+               spin_unlock(&j->lock);
+               return true;
+       }
+
        out->atomic++;
 
-       fifo_for_each_entry_ptr(pin_list, &j->pin, i) {
-               pr_buf(out, "%llu: count %u\n",
-                      i, atomic_read(&pin_list->count));
+       pin_list = journal_seq_pin(j, *seq);
 
-               list_for_each_entry(pin, &pin_list->key_cache_list, list)
-                       pr_buf(out, "\t%px %ps\n",
-                              pin, pin->flush);
+       pr_buf(out, "%llu: count %u", *seq, atomic_read(&pin_list->count));
+       pr_newline(out);
+       pr_indent_push(out, 2);
 
-               list_for_each_entry(pin, &pin_list->list, list)
-                       pr_buf(out, "\t%px %ps\n",
-                              pin, pin->flush);
+       list_for_each_entry(pin, &pin_list->list, list) {
+               pr_buf(out, "\t%px %ps", pin, pin->flush);
+               pr_newline(out);
+       }
+
+       list_for_each_entry(pin, &pin_list->key_cache_list, list) {
+               pr_buf(out, "\t%px %ps", pin, pin->flush);
+               pr_newline(out);
+       }
 
-               if (!list_empty(&pin_list->flushed))
-                       pr_buf(out, "flushed:\n");
+       if (!list_empty(&pin_list->flushed)) {
+               pr_buf(out, "flushed:");
+               pr_newline(out);
+       }
 
-               list_for_each_entry(pin, &pin_list->flushed, list)
-                       pr_buf(out, "\t%px %ps\n",
-                              pin, pin->flush);
+       list_for_each_entry(pin, &pin_list->flushed, list) {
+               pr_buf(out, "\t%px %ps", pin, pin->flush);
+               pr_newline(out);
        }
 
+       pr_indent_pop(out, 2);
+
        --out->atomic;
        spin_unlock(&j->lock);
+
+       return false;
+}
+
+void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
+{
+       u64 seq = 0;
+
+       while (!bch2_journal_seq_pins_to_text(out, j, &seq))
+               seq++;
 }
index 5d263a5..6c7a38a 100644 (file)
@@ -499,6 +499,7 @@ void bch2_journal_block(struct journal *);
 void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
 void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
 void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
+bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
 
 int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
                                unsigned nr);
index ce32b90..3018250 100644 (file)
@@ -174,9 +174,7 @@ read_attribute(reserve_stats);
 read_attribute(btree_cache_size);
 read_attribute(compression_stats);
 read_attribute(journal_debug);
-read_attribute(journal_pins);
 read_attribute(btree_updates);
-read_attribute(dirty_btree_nodes);
 read_attribute(btree_cache);
 read_attribute(btree_key_cache);
 read_attribute(btree_transactions);
@@ -402,15 +400,9 @@ SHOW(bch2_fs)
        if (attr == &sysfs_journal_debug)
                bch2_journal_debug_to_text(out, &c->journal);
 
-       if (attr == &sysfs_journal_pins)
-               bch2_journal_pins_to_text(out, &c->journal);
-
        if (attr == &sysfs_btree_updates)
                bch2_btree_updates_to_text(out, c);
 
-       if (attr == &sysfs_dirty_btree_nodes)
-               bch2_dirty_btree_nodes_to_text(out, c);
-
        if (attr == &sysfs_btree_cache)
                bch2_btree_cache_to_text(out, c);
 
@@ -564,9 +556,7 @@ SYSFS_OPS(bch2_fs_internal);
 
 struct attribute *bch2_fs_internal_files[] = {
        &sysfs_journal_debug,
-       &sysfs_journal_pins,
        &sysfs_btree_updates,
-       &sysfs_dirty_btree_nodes,
        &sysfs_btree_cache,
        &sysfs_btree_key_cache,
        &sysfs_btree_transactions,