bcachefs: Be more conservation about journal pre-reservations
authorKent Overstreet <kent.overstreet@gmail.com>
Sat, 5 Dec 2020 21:25:05 +0000 (16:25 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:49 +0000 (17:08 -0400)
 - Try to always keep 1/8th of the journal free, on top of
   pre-reservations
 - Move the check for whether the journal is stuck to
   bch2_journal_space_available, and make it only fire when there aren't
   any journal writes in flight (that might free up space by updating
   last_seq)

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/journal.c
fs/bcachefs/journal.h
fs/bcachefs/journal_io.c
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_types.h

index 3116875..9c0de18 100644 (file)
@@ -442,20 +442,6 @@ unlock:
        if (!ret)
                goto retry;
 
-       if (WARN_ONCE(ret == cur_entry_journal_full &&
-                     !can_discard &&
-                     (flags & JOURNAL_RES_GET_RESERVED),
-                     "JOURNAL_RES_GET_RESERVED set but journal full")) {
-               char *buf;
-
-               buf = kmalloc(4096, GFP_NOFS);
-               if (buf) {
-                       bch2_journal_debug_to_text(&_PBUF(buf, 4096), j);
-                       pr_err("\n%s", buf);
-                       kfree(buf);
-               }
-       }
-
        /*
         * Journal is full - can't rely on reclaim from work item due to
         * freezing:
@@ -1139,7 +1125,7 @@ out:
 
 /* debug: */
 
-void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
+void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        union journal_res_state s;
@@ -1147,7 +1133,6 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
        unsigned i;
 
        rcu_read_lock();
-       spin_lock(&j->lock);
        s = READ_ONCE(j->reservations);
 
        pr_buf(out,
@@ -1247,10 +1232,16 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
                       ja->cur_idx,             ja->bucket_seq[ja->cur_idx]);
        }
 
-       spin_unlock(&j->lock);
        rcu_read_unlock();
 }
 
+void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
+{
+       spin_lock(&j->lock);
+       __bch2_journal_debug_to_text(out, j);
+       spin_unlock(&j->lock);
+}
+
 void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
 {
        struct journal_entry_pin_list *pin_list;
index 2c0014c..df353a1 100644 (file)
@@ -386,7 +386,7 @@ out:
 static inline bool journal_check_may_get_unreserved(struct journal *j)
 {
        union journal_preres_state s = READ_ONCE(j->prereserved);
-       bool ret = s.reserved <= s.remaining &&
+       bool ret = s.reserved < s.remaining &&
                fifo_free(&j->pin) > 8;
 
        lockdep_assert_held(&j->lock);
@@ -510,6 +510,7 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
 void bch2_journal_unblock(struct journal *);
 void bch2_journal_block(struct journal *);
 
+void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
 void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
 void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
 
index 26556bb..cb2cfbb 100644 (file)
@@ -1098,7 +1098,6 @@ static void journal_write_done(struct closure *cl)
        if (!w->noflush) {
                j->flushed_seq_ondisk = seq;
                j->last_seq_ondisk = last_seq;
-               bch2_journal_space_available(j);
        }
 
        /*
@@ -1122,6 +1121,8 @@ static void journal_write_done(struct closure *cl)
        } while ((v = atomic64_cmpxchg(&j->reservations.counter,
                                       old.v, new.v)) != old.v);
 
+       bch2_journal_space_available(j);
+
        closure_wake_up(&w->wait);
        journal_wake(j);
 
index a3d5405..0fba832 100644 (file)
@@ -2,6 +2,7 @@
 
 #include "bcachefs.h"
 #include "btree_key_cache.h"
+#include "error.h"
 #include "journal.h"
 #include "journal_io.h"
 #include "journal_reclaim.h"
@@ -159,7 +160,7 @@ void bch2_journal_space_available(struct journal *j)
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct bch_dev *ca;
        unsigned clean, clean_ondisk, total;
-       unsigned overhead, u64s_remaining = 0;
+       s64 u64s_remaining = 0;
        unsigned max_entry_size  = min(j->buf[0].buf_size >> 9,
                                       j->buf[1].buf_size >> 9);
        unsigned i, nr_online = 0, nr_devs_want;
@@ -208,22 +209,37 @@ void bch2_journal_space_available(struct journal *j)
        clean           = j->space[journal_space_clean].total;
        total           = j->space[journal_space_total].total;
 
-       if (!j->space[journal_space_discarded].next_entry)
+       if (!clean_ondisk &&
+           j->reservations.idx ==
+           j->reservations.unwritten_idx) {
+               char *buf = kmalloc(4096, GFP_ATOMIC);
+
+               bch_err(c, "journal stuck");
+               if (buf) {
+                       __bch2_journal_debug_to_text(&_PBUF(buf, 4096), j);
+                       pr_err("\n%s", buf);
+                       kfree(buf);
+               }
+
+               bch2_fatal_error(c);
+               ret = cur_entry_journal_stuck;
+       } else if (!j->space[journal_space_discarded].next_entry)
                ret = cur_entry_journal_full;
        else if (!fifo_free(&j->pin))
                ret = cur_entry_journal_pin_full;
 
-       if ((clean - clean_ondisk <= total / 8) &&
+       if ((j->space[journal_space_clean_ondisk].next_entry <
+            j->space[journal_space_clean_ondisk].total) &&
+           (clean - clean_ondisk <= total / 8) &&
            (clean_ondisk * 2 > clean ))
                set_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
        else
                clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
 
-       overhead = DIV_ROUND_UP(clean, max_entry_size) *
-               journal_entry_overhead(j);
-       u64s_remaining = clean << 6;
-       u64s_remaining = max_t(int, 0, u64s_remaining - overhead);
-       u64s_remaining /= 4;
+       u64s_remaining  = (u64) clean << 6;
+       u64s_remaining -= (u64) total << 3;
+       u64s_remaining = max(0LL, u64s_remaining);
+       u64s_remaining /= 2;
 out:
        j->cur_entry_sectors    = !ret ? j->space[journal_space_discarded].next_entry : 0;
        j->cur_entry_error      = ret;
@@ -572,6 +588,9 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
                    c->btree_cache.used  * 3)
                        min_nr = 1;
 
+               if (fifo_free(&j->pin) <= 32)
+                       min_nr = 1;
+
                min_nr = max(min_nr, bch2_nr_btree_keys_need_flush(c));
 
                trace_journal_reclaim_start(c,
index cf96753..1b13054 100644 (file)
@@ -172,6 +172,7 @@ struct journal {
                cur_entry_blocked,
                cur_entry_journal_full,
                cur_entry_journal_pin_full,
+               cur_entry_journal_stuck,
                cur_entry_insufficient_devices,
        }                       cur_entry_error;