bcachefs: Avoid using btree_node_lock_nopath()
authorKent Overstreet <kent.overstreet@linux.dev>
Sat, 3 Sep 2022 02:59:39 +0000 (22:59 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:40 +0000 (17:09 -0400)
With the upcoming cycle detector, we have to be careful about using
btree_node_lock_nopath - in particular, using it to take write locks can
cause deadlocks.

All held locks need to be tracked in a btree_path, so that the cycle
detector knows about them - unless we know that we cannot cause
deadlocks for other reasons: e.g. we are only taking read locks, or
we're in very early fsck (topology repair).

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_update_interior.c

index 9c39027..b1c8127 100644 (file)
@@ -716,13 +716,13 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
        struct btree_path *path;
 
        trans_for_each_path(trans, path)
-               if (!path->cached &&
+               if (path->uptodate == BTREE_ITER_UPTODATE &&
+                   !path->cached &&
                    btree_path_pos_in_node(path, b)) {
                        enum btree_node_locked_type t =
                                btree_lock_want(path, b->c.level);
 
-                       if (path->nodes_locked &&
-                           t != BTREE_NODE_UNLOCKED) {
+                       if (t != BTREE_NODE_UNLOCKED) {
                                btree_node_unlock(trans, path, b->c.level);
                                six_lock_increment(&b->c.lock, (enum six_lock_type) t);
                                mark_btree_node_locked(trans, path, b->c.level, (enum six_lock_type) t);
index 89e540c..0f54db0 100644 (file)
@@ -568,26 +568,21 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
                        atomic_long_dec(&c->btree_key_cache.nr_dirty);
                }
        } else {
+               struct btree_path *path2;
 evict:
-               BUG_ON(!btree_node_intent_locked(c_iter.path, 0));
+               trans_for_each_path(trans, path2)
+                       if (path2 != c_iter.path)
+                               __bch2_btree_path_unlock(trans, path2);
 
-               /*
-                * XXX: holding a lock that is not marked in btree_trans, not
-                * ideal:
-                */
-               six_lock_increment(&ck->c.lock, SIX_LOCK_intent);
-               bch2_trans_unlock(trans);
-
-               /* Will not fail because we are holding no other locks: */
-               btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_write);
+               bch2_btree_node_lock_write_nofail(trans, c_iter.path, &ck->c);
 
                if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
                        clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
                        atomic_long_dec(&c->btree_key_cache.nr_dirty);
                }
 
+               mark_btree_node_locked_noreset(c_iter.path, 0, BTREE_NODE_UNLOCKED);
                bkey_cached_evict(&c->btree_key_cache, ck);
-
                bkey_cached_free_fast(&c->btree_key_cache, ck);
        }
 out:
index d4e2ebe..d3e3f94 100644 (file)
@@ -160,22 +160,23 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b)
 }
 
 static void bch2_btree_node_free_inmem(struct btree_trans *trans,
+                                      struct btree_path *path,
                                       struct btree *b)
 {
        struct bch_fs *c = trans->c;
-       struct btree_path *path;
-
-       trans_for_each_path(trans, path)
-               BUG_ON(path->l[b->c.level].b == b &&
-                      path->l[b->c.level].lock_seq == b->c.lock.state.seq);
-
-       btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
+       unsigned level = b->c.level;
 
+       bch2_btree_node_lock_write_nofail(trans, path, &b->c);
        bch2_btree_node_hash_remove(&c->btree_cache, b);
        __btree_node_free(c, b);
-
        six_unlock_write(&b->c.lock);
-       six_unlock_intent(&b->c.lock);
+       mark_btree_node_locked_noreset(path, level, SIX_LOCK_intent);
+
+       trans_for_each_path(trans, path)
+               if (path->l[level].b == b) {
+                       btree_node_unlock(trans, path, level);
+                       path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
+               }
 }
 
 static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
@@ -1507,22 +1508,19 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
        if (n3)
                bch2_btree_update_get_open_buckets(as, n3);
 
-       /* Successful split, update the path to point to the new nodes: */
-
-       six_lock_increment(&b->c.lock, SIX_LOCK_intent);
-       if (n3)
-               bch2_trans_node_add(trans, n3);
-       if (n2)
-               bch2_trans_node_add(trans, n2);
-       bch2_trans_node_add(trans, n1);
-
        /*
         * The old node must be freed (in memory) _before_ unlocking the new
         * nodes - else another thread could re-acquire a read lock on the old
         * node after another thread has locked and updated the new node, thus
         * seeing stale data:
         */
-       bch2_btree_node_free_inmem(trans, b);
+       bch2_btree_node_free_inmem(trans, path, b);
+
+       if (n3)
+               bch2_trans_node_add(trans, n3);
+       if (n2)
+               bch2_trans_node_add(trans, n2);
+       bch2_trans_node_add(trans, n1);
 
        if (n3)
                six_unlock_intent(&n3->c.lock);
@@ -1785,16 +1783,13 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
 
        bch2_btree_update_get_open_buckets(as, n);
 
-       six_lock_increment(&b->c.lock, SIX_LOCK_intent);
-       six_lock_increment(&m->c.lock, SIX_LOCK_intent);
+       bch2_btree_node_free_inmem(trans, path, b);
+       bch2_btree_node_free_inmem(trans, sib_path, m);
 
        bch2_trans_node_add(trans, n);
 
        bch2_trans_verify_paths(trans);
 
-       bch2_btree_node_free_inmem(trans, b);
-       bch2_btree_node_free_inmem(trans, m);
-
        six_unlock_intent(&n->c.lock);
 
        bch2_btree_update_done(as, trans);
@@ -1851,9 +1846,9 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
 
        bch2_btree_update_get_open_buckets(as, n);
 
-       six_lock_increment(&b->c.lock, SIX_LOCK_intent);
+       bch2_btree_node_free_inmem(trans, iter->path, b);
+
        bch2_trans_node_add(trans, n);
-       bch2_btree_node_free_inmem(trans, b);
        six_unlock_intent(&n->c.lock);
 
        bch2_btree_update_done(as, trans);