bcache: fix race in btree_flush_write()

author Coly Li <colyli@suse.de>

Fri, 28 Jun 2019 11:59:58 +0000 (19:59 +0800)

committer Jens Axboe <axboe@kernel.dk>

Fri, 28 Jun 2019 13:39:18 +0000 (07:39 -0600)
author Coly Li <colyli@suse.de>
Fri, 28 Jun 2019 11:59:58 +0000 (19:59 +0800)
committer Jens Axboe <axboe@kernel.dk>
Fri, 28 Jun 2019 13:39:18 +0000 (07:39 -0600)
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c

index 846306c..ba434d9 100644 (file)
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -35,7 +35,7 @@
  #include <linux/rcupdate.h>
  #include <linux/sched/clock.h>
  #include <linux/rculist.h>
-
+#include <linux/delay.h>
  #include <trace/events/bcache.h>
  
  /*
@@ -659,12 +659,25 @@ static int mca_reap(struct btree *b, unsigned int min_order, bool flush)
                 up(&b->io_mutex);
         }
  
+retry:
         /*
          * BTREE_NODE_dirty might be cleared in btree_flush_btree() by
          * __bch_btree_node_write(). To avoid an extra flush, acquire
          * b->write_lock before checking BTREE_NODE_dirty bit.
          */
         mutex_lock(&b->write_lock);
+       /*
+        * If this btree node is selected in btree_flush_write() by journal
+        * code, delay and retry until the node is flushed by journal code
+        * and BTREE_NODE_journal_flush bit cleared by btree_flush_write().
+        */
+       if (btree_node_journal_flush(b)) {
+               pr_debug("bnode %p is flushing by journal, retry", b);
+               mutex_unlock(&b->write_lock);
+               udelay(1);
+               goto retry;
+       }
+
         if (btree_node_dirty(b))
                 __bch_btree_node_write(b, &cl);
         mutex_unlock(&b->write_lock);
@@ -1081,7 +1094,20 @@ static void btree_node_free(struct btree *b)
  
         BUG_ON(b == b->c->root);
  
+retry:
         mutex_lock(&b->write_lock);
+       /*
+        * If the btree node is selected and flushing in btree_flush_write(),
+        * delay and retry until the BTREE_NODE_journal_flush bit cleared,
+        * then it is safe to free the btree node here. Otherwise this btree
+        * node will be in race condition.
+        */
+       if (btree_node_journal_flush(b)) {
+               mutex_unlock(&b->write_lock);
+               pr_debug("bnode %p journal_flush set, retry", b);
+               udelay(1);
+               goto retry;
+       }
  
         if (btree_node_dirty(b)) {
                 btree_complete_write(b, btree_current_write(b));
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h

index d1c72ef..76cfd12 100644 (file)
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -158,11 +158,13 @@ enum btree_flags {
         BTREE_NODE_io_error,
         BTREE_NODE_dirty,
         BTREE_NODE_write_idx,
+       BTREE_NODE_journal_flush,
  };
  
  BTREE_FLAG(io_error);
  BTREE_FLAG(dirty);
  BTREE_FLAG(write_idx);
+BTREE_FLAG(journal_flush);
  
  static inline struct btree_write *btree_current_write(struct btree *b)
  {
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c

index 1218e3c..a1e3e1f 100644 (file)
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -430,6 +430,7 @@ static void btree_flush_write(struct cache_set *c)
  retry:
         best = NULL;
  
+       mutex_lock(&c->bucket_lock);
         for_each_cached_btree(b, c, i)
                 if (btree_current_write(b)->journal) {
                         if (!best)
@@ -442,15 +443,21 @@ retry:
                 }
  
         b = best;
+       if (b)
+               set_btree_node_journal_flush(b);
+       mutex_unlock(&c->bucket_lock);
+
         if (b) {
                 mutex_lock(&b->write_lock);
                 if (!btree_current_write(b)->journal) {
+                       clear_bit(BTREE_NODE_journal_flush, &b->flags);
                         mutex_unlock(&b->write_lock);
                         /* We raced */
                         goto retry;
                 }
  
                 __bch_btree_node_write(b, NULL);
+               clear_bit(BTREE_NODE_journal_flush, &b->flags);
                 mutex_unlock(&b->write_lock);
         }
  }
author	Coly Li <colyli@suse.de>
	Fri, 28 Jun 2019 11:59:58 +0000 (19:59 +0800)
committer	Jens Axboe <axboe@kernel.dk>
	Fri, 28 Jun 2019 13:39:18 +0000 (07:39 -0600)
drivers/md/bcache/btree.c		patch \| blob \| history
drivers/md/bcache/btree.h		patch \| blob \| history
drivers/md/bcache/journal.c		patch \| blob \| history