bcachefs: Fix failure to allocate btree node in cache
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 18 Feb 2022 05:47:45 +0000 (00:47 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:24 +0000 (17:09 -0400)
The error code when we fail to allocate a node in the btree node cache
doesn't make it to bch2_btree_path_traverse_all(). Instead, we need to
stash a flag in btree_trans so we know we have to take the cannibalize
lock.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_types.h
fs/bcachefs/trace.h

index 36b82df..c17db1d 100644 (file)
@@ -672,6 +672,15 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
        }
 
        b = bch2_btree_node_mem_alloc(c);
+
+       if (trans && b == ERR_PTR(-ENOMEM)) {
+               trans->memory_allocation_failure = true;
+               trace_trans_restart_memory_allocation_failure(trans->fn,
+                               _THIS_IP_, btree_id, &path->pos);
+               btree_trans_restart(trans);
+               return ERR_PTR(-EINTR);
+       }
+
        if (IS_ERR(b))
                return b;
 
index c6c1c9d..1015e89 100644 (file)
@@ -1407,12 +1407,12 @@ err:
 static int btree_path_traverse_one(struct btree_trans *, struct btree_path *,
                                   unsigned, unsigned long);
 
-static int __btree_path_traverse_all(struct btree_trans *trans, int ret,
-                                    unsigned long trace_ip)
+static int bch2_btree_path_traverse_all(struct btree_trans *trans)
 {
        struct bch_fs *c = trans->c;
        struct btree_path *path, *prev = NULL;
-       int i;
+       unsigned long trace_ip = _RET_IP_;
+       int i, ret = 0;
 
        if (trans->in_traverse_all)
                return -EINTR;
@@ -1441,7 +1441,7 @@ retry_all:
        bch2_trans_unlock(trans);
        cond_resched();
 
-       if (unlikely(ret == -ENOMEM)) {
+       if (unlikely(trans->memory_allocation_failure)) {
                struct closure cl;
 
                closure_init_stack(&cl);
@@ -1452,11 +1452,6 @@ retry_all:
                } while (ret);
        }
 
-       if (unlikely(ret == -EIO))
-               goto out;
-
-       BUG_ON(ret && ret != -EINTR);
-
        /* Now, redo traversals in correct order: */
        i = 0;
        while (i < trans->nr_sorted) {
@@ -1482,7 +1477,7 @@ retry_all:
         */
        trans_for_each_path(trans, path)
                BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE);
-out:
+
        bch2_btree_cache_cannibalize_unlock(c);
 
        trans->in_traverse_all = false;
@@ -1491,11 +1486,6 @@ out:
        return ret;
 }
 
-static int bch2_btree_path_traverse_all(struct btree_trans *trans)
-{
-       return __btree_path_traverse_all(trans, 0, _RET_IP_);
-}
-
 static inline bool btree_path_good_node(struct btree_trans *trans,
                                        struct btree_path *path,
                                        unsigned l, int check_pos)
@@ -1619,8 +1609,6 @@ out:
        return ret;
 }
 
-static int __btree_path_traverse_all(struct btree_trans *, int, unsigned long);
-
 int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
                                          struct btree_path *path, unsigned flags)
 {
index 7e5b70f..89c0d22 100644 (file)
@@ -393,6 +393,7 @@ struct btree_trans {
        bool                    in_traverse_all:1;
        bool                    restarted:1;
        bool                    paths_sorted:1;
+       bool                    memory_allocation_failure:1;
        bool                    journal_transaction_names:1;
        bool                    journal_replay_not_finished:1;
        /*
index 64b7d93..b35022d 100644 (file)
@@ -802,6 +802,14 @@ DEFINE_EVENT(transaction_restart_iter,     trans_restart_traverse,
        TP_ARGS(trans_fn, caller_ip, btree_id, pos)
 );
 
+DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure,
+       TP_PROTO(const char *trans_fn,
+                unsigned long caller_ip,
+                enum btree_id btree_id,
+                struct bpos *pos),
+       TP_ARGS(trans_fn, caller_ip, btree_id, pos)
+);
+
 TRACE_EVENT(trans_restart_would_deadlock,
        TP_PROTO(const char *trans_fn,
                 unsigned long  caller_ip,