bcachefs: Improve some IO error messages
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 3 Dec 2020 18:57:22 +0000 (13:57 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:49 +0000 (17:08 -0400)
it's useful to know whether an error was for a read or a write - this
also standardizes error messages a bit more.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_io.c
fs/bcachefs/ec.c
fs/bcachefs/error.h
fs/bcachefs/fs-io.c
fs/bcachefs/io.c
fs/bcachefs/journal_io.c
fs/bcachefs/super-io.c

index 6db04dc..9645a4e 100644 (file)
         dynamic_fault("bcachefs:meta:write:" name)
 
 #ifdef __KERNEL__
-#define bch2_fmt(_c, fmt)      "bcachefs (%s): " fmt "\n", ((_c)->name)
+#define bch2_fmt(_c, fmt)              "bcachefs (%s): " fmt "\n", ((_c)->name)
+#define bch2_fmt_inum(_c, _inum, fmt)  "bcachefs (%s inum %llu): " fmt "\n", ((_c)->name), (_inum)
 #else
-#define bch2_fmt(_c, fmt)      fmt "\n"
+#define bch2_fmt(_c, fmt)              fmt "\n"
+#define bch2_fmt_inum(_c, _inum, fmt)  "inum %llu: " fmt "\n", (_inum)
 #endif
 
 #define bch_info(c, fmt, ...) \
        printk_ratelimited(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
 #define bch_err(c, fmt, ...) \
        printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
+
 #define bch_err_ratelimited(c, fmt, ...) \
        printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_err_inum_ratelimited(c, _inum, fmt, ...) \
+       printk_ratelimited(KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__)
 
 #define bch_verbose(c, fmt, ...)                                       \
 do {                                                                   \
index af3b39b..520eef5 100644 (file)
@@ -597,18 +597,25 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
                bch2_btree_iter_reinit_node(iter, b);
 }
 
+static void btree_pos_to_text(struct printbuf *out, struct bch_fs *c,
+                         struct btree *b)
+{
+       pr_buf(out, "%s level %u/%u\n  ",
+              bch2_btree_ids[b->c.btree_id],
+              b->c.level,
+              c->btree_roots[b->c.btree_id].level);
+       bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+}
+
 static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
                          struct btree *b, struct bset *i,
                          unsigned offset, int write)
 {
-       pr_buf(out, "error validating btree node %sat btree %u level %u/%u\n"
-              "pos ",
-              write ? "before write " : "",
-              b->c.btree_id, b->c.level,
-              c->btree_roots[b->c.btree_id].level);
-       bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
+       pr_buf(out, "error validating btree node %sat btree ",
+              write ? "before write " : "");
+       btree_pos_to_text(out, c, b);
 
-       pr_buf(out, " node offset %u", b->written);
+       pr_buf(out, "\n  node offset %u", b->written);
        if (i)
                pr_buf(out, " bset u64s %u", le16_to_cpu(i->u64s));
 }
@@ -1104,6 +1111,8 @@ static void btree_node_read_work(struct work_struct *work)
        struct btree *b         = rb->bio.bi_private;
        struct bio *bio         = &rb->bio;
        struct bch_io_failures failed = { .nr = 0 };
+       char buf[200];
+       struct printbuf out;
        bool can_retry;
 
        goto start;
@@ -1122,8 +1131,10 @@ static void btree_node_read_work(struct work_struct *work)
                        bio->bi_status = BLK_STS_REMOVED;
                }
 start:
-               bch2_dev_io_err_on(bio->bi_status, ca, "btree read: %s",
-                                  bch2_blk_status_to_str(bio->bi_status));
+               out = PBUF(buf);
+               btree_pos_to_text(&out, c, b);
+               bch2_dev_io_err_on(bio->bi_status, ca, "btree read error %s for %s",
+                                  bch2_blk_status_to_str(bio->bi_status), buf);
                if (rb->have_ioref)
                        percpu_ref_put(&ca->io_ref);
                rb->have_ioref = false;
@@ -1408,7 +1419,7 @@ static void btree_node_write_endio(struct bio *bio)
        if (wbio->have_ioref)
                bch2_latency_acct(ca, wbio->submit_time, WRITE);
 
-       if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write: %s",
+       if (bch2_dev_io_err_on(bio->bi_status, ca, "btree write error: %s",
                               bch2_blk_status_to_str(bio->bi_status)) ||
            bch2_meta_write_fault("btree")) {
                spin_lock_irqsave(&c->btree_write_error_lock, flags);
index 42331f0..6c9259e 100644 (file)
@@ -264,7 +264,7 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
                                             len << 9);
 
                        if (memcmp(stripe_csum(v, i, j), &csum, csum_bytes)) {
-                               __bcache_io_error(c,
+                               bch_err_ratelimited(c,
                                        "checksum error while doing reconstruct read (%u:%u)",
                                        i, j);
                                clear_bit(i, buf->valid);
@@ -305,7 +305,7 @@ static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
        unsigned bytes = buf->size << 9;
 
        if (ec_nr_failed(buf) > v->nr_redundant) {
-               __bcache_io_error(c,
+               bch_err_ratelimited(c,
                        "error doing reconstruct read: unable to read enough blocks");
                return -1;
        }
@@ -326,7 +326,7 @@ static void ec_block_endio(struct bio *bio)
        struct bch_dev *ca = ec_bio->ca;
        struct closure *cl = bio->bi_private;
 
-       if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s: %s",
+       if (bch2_dev_io_err_on(bio->bi_status, ca, "erasure coding %s error: %s",
                               bio_data_dir(bio) ? "write" : "read",
                               bch2_blk_status_to_str(bio->bi_status)))
                clear_bit(ec_bio->idx, ec_bio->buf->valid);
@@ -420,7 +420,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
                                   BTREE_ITER_SLOTS);
        k = bch2_btree_iter_peek_slot(iter);
        if (bkey_err(k) || k.k->type != KEY_TYPE_stripe) {
-               __bcache_io_error(c,
+               bch_err_ratelimited(c,
                        "error doing reconstruct read: stripe not found");
                kfree(buf);
                return bch2_trans_exit(&trans) ?: -EIO;
@@ -462,7 +462,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
                struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
 
                if (ptr_stale(ca, ptr)) {
-                       __bcache_io_error(c,
+                       bch_err_ratelimited(c,
                                          "error doing reconstruct read: stale pointer");
                        clear_bit(i, buf->valid);
                        continue;
@@ -474,7 +474,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio)
        closure_sync(&cl);
 
        if (ec_nr_failed(buf) > v->nr_redundant) {
-               __bcache_io_error(c,
+               bch_err_ratelimited(c,
                        "error doing reconstruct read: unable to read enough blocks");
                ret = -EIO;
                goto err;
index 94b5331..0e49fd7 100644 (file)
@@ -181,12 +181,18 @@ void bch2_io_error(struct bch_dev *);
 /* Logs message and handles the error: */
 #define bch2_dev_io_error(ca, fmt, ...)                                        \
 do {                                                                   \
-       printk_ratelimited(KERN_ERR bch2_fmt((ca)->fs,                  \
-               "IO error on %s for " fmt),                             \
+       printk_ratelimited(KERN_ERR "bcachefs (%s): " fmt,              \
                (ca)->name, ##__VA_ARGS__);                             \
        bch2_io_error(ca);                                              \
 } while (0)
 
+#define bch2_dev_inum_io_error(ca, _inum, _offset, fmt, ...)           \
+do {                                                                   \
+       printk_ratelimited(KERN_ERR "bcachefs (%s inum %llu offset %llu): " fmt,\
+               (ca)->name, (_inum), (_offset), ##__VA_ARGS__);         \
+       bch2_io_error(ca);                                              \
+} while (0)
+
 #define bch2_dev_io_err_on(cond, ca, ...)                              \
 ({                                                                     \
        bool _ret = (cond);                                             \
@@ -196,16 +202,13 @@ do {                                                                      \
        _ret;                                                           \
 })
 
-/* kill? */
-
-#define __bcache_io_error(c, fmt, ...)                                 \
-       printk_ratelimited(KERN_ERR bch2_fmt(c,                         \
-                       "IO error: " fmt), ##__VA_ARGS__)
-
-#define bcache_io_error(c, bio, fmt, ...)                              \
-do {                                                                   \
-       __bcache_io_error(c, fmt, ##__VA_ARGS__);                       \
-       (bio)->bi_status = BLK_STS_IOERR;                                       \
-} while (0)
+#define bch2_dev_inum_io_err_on(cond, ca, _inum, _offset, ...)         \
+({                                                                     \
+       bool _ret = (cond);                                             \
+                                                                       \
+       if (_ret)                                                       \
+               bch2_dev_inum_io_error(ca, _inum, _offset, __VA_ARGS__);\
+       _ret;                                                           \
+})
 
 #endif /* _BCACHEFS_ERROR_H */
index 4dafe2b..c10192e 100644 (file)
@@ -839,7 +839,9 @@ retry:
                goto retry;
 
        if (ret) {
-               bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
+               bch_err_inum_ratelimited(c, inum,
+                               "read error %i from btree lookup", ret);
+               rbio->bio.bi_status = BLK_STS_IOERR;
                bio_endio(&rbio->bio);
        }
 
index 8125642..9603381 100644 (file)
@@ -588,7 +588,8 @@ static void __bch2_write_index(struct bch_write_op *op)
                op->written += sectors_start - keylist_sectors(keys);
 
                if (ret) {
-                       __bcache_io_error(c, "btree IO error %i", ret);
+                       bch_err_inum_ratelimited(c, op->pos.inode,
+                               "write error %i from btree update", ret);
                        op->error = ret;
                }
        }
@@ -633,7 +634,10 @@ static void bch2_write_endio(struct bio *bio)
        struct bch_fs *c                = wbio->c;
        struct bch_dev *ca              = bch_dev_bkey_exists(c, wbio->dev);
 
-       if (bch2_dev_io_err_on(bio->bi_status, ca, "data write: %s",
+       if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
+                                   op->pos.inode,
+                                   op->pos.offset - bio_sectors(bio), /* XXX definitely wrong */
+                                   "data write error: %s",
                               bch2_blk_status_to_str(bio->bi_status)))
                set_bit(wbio->dev, op->failed.d);
 
@@ -1276,15 +1280,14 @@ void bch2_write(struct closure *cl)
        wbio_init(bio)->put_bio = false;
 
        if (bio_sectors(bio) & (c->opts.block_size - 1)) {
-               __bcache_io_error(c, "misaligned write");
+               bch_err_inum_ratelimited(c, op->pos.inode,
+                                        "misaligned write");
                op->error = -EIO;
                goto err;
        }
 
        if (c->opts.nochanges ||
            !percpu_ref_tryget(&c->writes)) {
-               if (!(op->flags & BCH_WRITE_FROM_INTERNAL))
-                       __bcache_io_error(c, "read only");
                op->error = -EROFS;
                goto err;
        }
@@ -1707,7 +1710,8 @@ retry:
         * reading a btree node
         */
        BUG_ON(!ret);
-       __bcache_io_error(c, "btree IO error: %i", ret);
+       bch_err_inum_ratelimited(c, inode,
+                       "read error %i from btree lookup", ret);
 err:
        rbio->bio.bi_status = BLK_STS_IOERR;
 out:
@@ -1911,17 +1915,15 @@ csum_err:
                return;
        }
 
-       bch2_dev_io_error(ca,
-               "data checksum error, inode %llu offset %llu: expected %0llx:%0llx got %0llx:%0llx (type %u)",
-               rbio->pos.inode, (u64) rbio->bvec_iter.bi_sector,
+       bch2_dev_inum_io_error(ca, rbio->pos.inode, (u64) rbio->bvec_iter.bi_sector,
+               "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %u)",
                rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo,
                csum.hi, csum.lo, crc.csum_type);
        bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
        return;
 decompression_err:
-       __bcache_io_error(c, "decompression error, inode %llu offset %llu",
-                         rbio->pos.inode,
-                         (u64) rbio->bvec_iter.bi_sector);
+       bch_err_inum_ratelimited(c, rbio->pos.inode,
+                                "decompression error");
        bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
        return;
 }
@@ -1943,7 +1945,14 @@ static void bch2_read_endio(struct bio *bio)
        if (!rbio->split)
                rbio->bio.bi_end_io = rbio->end_io;
 
-       if (bch2_dev_io_err_on(bio->bi_status, ca, "data read; %s",
+       /*
+        * XXX: rbio->pos is not what we want here when reading from indirect
+        * extents
+        */
+       if (bch2_dev_inum_io_err_on(bio->bi_status, ca,
+                                   rbio->pos.inode,
+                                   rbio->pos.offset,
+                                   "data read error: %s",
                               bch2_blk_status_to_str(bio->bi_status))) {
                bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
                return;
@@ -1993,7 +2002,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
 
        if (k.k->type != KEY_TYPE_reflink_v &&
            k.k->type != KEY_TYPE_indirect_inline_data) {
-               __bcache_io_error(trans->c,
+               bch_err_inum_ratelimited(trans->c, orig_k->k->k.p.inode,
                                "pointer to nonexistent indirect extent");
                ret = -EIO;
                goto err;
@@ -2038,7 +2047,8 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
                goto hole;
 
        if (pick_ret < 0) {
-               __bcache_io_error(c, "no device to read from");
+               bch_err_inum_ratelimited(c, k.k->p.inode,
+                                        "no device to read from");
                goto err;
        }
 
@@ -2190,7 +2200,8 @@ get_bio:
 
        if (!rbio->pick.idx) {
                if (!rbio->have_ioref) {
-                       __bcache_io_error(c, "no device to read from");
+                       bch_err_inum_ratelimited(c, k.k->p.inode,
+                                                "no device to read from");
                        bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
                        goto out;
                }
@@ -2345,7 +2356,9 @@ err:
        if (ret == -EINTR)
                goto retry;
 
-       bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret);
+       bch_err_inum_ratelimited(c, inode,
+                                "read error %i from btree lookup", ret);
+       rbio->bio.bi_status = BLK_STS_IOERR;
        bch2_rbio_done(rbio);
        goto out;
 }
index 79d5d89..fc2fdcc 100644 (file)
@@ -557,7 +557,7 @@ reread:
                        kfree(bio);
 
                        if (bch2_dev_io_err_on(ret, ca,
-                                              "journal read from sector %llu",
+                                              "journal read error: sector %llu",
                                               offset) ||
                            bch2_meta_read_fault("journal"))
                                return -EIO;
@@ -1015,7 +1015,7 @@ static void journal_write_endio(struct bio *bio)
        struct bch_dev *ca = bio->bi_private;
        struct journal *j = &ca->fs->journal;
 
-       if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write: %s",
+       if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write error: %s",
                               bch2_blk_status_to_str(bio->bi_status)) ||
            bch2_meta_write_fault("journal")) {
                struct journal_buf *w = journal_prev_buf(j);
index e25ff75..e1b4e6f 100644 (file)
@@ -640,7 +640,7 @@ static void write_super_endio(struct bio *bio)
 
        /* XXX: return errors directly */
 
-       if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write: %s",
+       if (bch2_dev_io_err_on(bio->bi_status, ca, "superblock write error: %s",
                               bch2_blk_status_to_str(bio->bi_status)))
                ca->sb_write_error = 1;