bcachefs: Fix race leading to btree node write getting stuck
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 27 Feb 2022 14:56:33 +0000 (09:56 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:26 +0000 (17:09 -0400)
Checking btree_node_may_write() isn't atomic with the other btree flags,
dirty and need_write in particular. There was a rare race where we'd
unblock a node from writing while __btree_node_flush() was setting
need_write, and no thread would notice that the node was now both able
to write and needed to be written.

Fix this by adding btree node flags for will_make_reachable and
write_blocked that can be checked in the cmpxchg loop in
__bch2_btree_node_write.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_io.c
fs/bcachefs/btree_io.h
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update_interior.c

index 7b26461..5f96c5d 100644 (file)
@@ -223,10 +223,9 @@ wait_on_io:
                goto wait_on_io;
        }
 
-       if (btree_node_noevict(b))
-               goto out_unlock;
-
-       if (!btree_node_may_write(b))
+       if (btree_node_noevict(b) ||
+           btree_node_write_blocked(b) ||
+           btree_node_will_make_reachable(b))
                goto out_unlock;
 
        if (btree_node_dirty(b)) {
index 540bfe0..53f8334 100644 (file)
@@ -1606,7 +1606,8 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
                if ((old & (1U << BTREE_NODE_dirty)) &&
                    (old & (1U << BTREE_NODE_need_write)) &&
                    !(old & (1U << BTREE_NODE_never_write)) &&
-                   btree_node_may_write(b)) {
+                   !(old & (1U << BTREE_NODE_write_blocked)) &&
+                   !(old & (1U << BTREE_NODE_will_make_reachable))) {
                        new &= ~(1U << BTREE_NODE_dirty);
                        new &= ~(1U << BTREE_NODE_need_write);
                        new |=  (1U << BTREE_NODE_write_in_flight);
@@ -1778,10 +1779,13 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
                    !(old & (1 << BTREE_NODE_need_write)))
                        return;
 
-               if (!btree_node_may_write(b))
+               if (old &
+                   ((1 << BTREE_NODE_never_write)|
+                    (1 << BTREE_NODE_write_blocked)))
                        return;
 
-               if (old & (1 << BTREE_NODE_never_write))
+               if (b->written &&
+                   (old & (1 << BTREE_NODE_will_make_reachable)))
                        return;
 
                if (old & (1 << BTREE_NODE_write_in_flight))
index 7ed8808..d818d87 100644 (file)
@@ -62,12 +62,6 @@ void __bch2_btree_node_wait_on_write(struct btree *);
 void bch2_btree_node_wait_on_read(struct btree *);
 void bch2_btree_node_wait_on_write(struct btree *);
 
-static inline bool btree_node_may_write(struct btree *b)
-{
-       return list_empty_careful(&b->write_blocked) &&
-               (!b->written || !b->will_make_reachable);
-}
-
 enum compact_mode {
        COMPACT_LAZY,
        COMPACT_ALL,
index 165466d..561406b 100644 (file)
@@ -434,6 +434,8 @@ struct btree_trans {
        x(read_error)                                                   \
        x(dirty)                                                        \
        x(need_write)                                                   \
+       x(write_blocked)                                                \
+       x(will_make_reachable)                                          \
        x(noevict)                                                      \
        x(write_idx)                                                    \
        x(accessed)                                                     \
index fe0fc5f..17d65c9 100644 (file)
@@ -606,6 +606,8 @@ err:
                mutex_lock(&c->btree_interior_update_lock);
 
                list_del(&as->write_blocked_list);
+               if (list_empty(&b->write_blocked))
+                       clear_btree_node_write_blocked(b);
 
                /*
                 * Node might have been freed, recheck under
@@ -650,6 +652,7 @@ err:
 
                BUG_ON(b->will_make_reachable != (unsigned long) as);
                b->will_make_reachable = 0;
+               clear_btree_node_will_make_reachable(b);
        }
        mutex_unlock(&c->btree_interior_update_lock);
 
@@ -716,6 +719,8 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
 
        as->mode        = BTREE_INTERIOR_UPDATING_NODE;
        as->b           = b;
+
+       set_btree_node_write_blocked(b);
        list_add(&as->write_blocked_list, &b->write_blocked);
 
        mutex_unlock(&c->btree_interior_update_lock);
@@ -781,6 +786,7 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
 
        as->new_nodes[as->nr_new_nodes++] = b;
        b->will_make_reachable = 1UL|(unsigned long) as;
+       set_btree_node_will_make_reachable(b);
 
        mutex_unlock(&c->btree_interior_update_lock);
 
@@ -803,6 +809,7 @@ static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b)
         * xchg() is for synchronization with bch2_btree_complete_write:
         */
        v = xchg(&b->will_make_reachable, 0);
+       clear_btree_node_will_make_reachable(b);
        as = (struct btree_update *) (v & ~1UL);
 
        if (!as) {