bcachefs: Run btree key cache shrinker less aggressively
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 6 Nov 2023 14:53:14 +0000 (09:53 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Tue, 14 Nov 2023 02:45:01 +0000 (21:45 -0500)
The btree key cache maintains lists of items that have been freed, but
can't yet be reclaimed because a bch2_trans_relock() call might find
them - we're waiting for SRCU readers to release.

Previously, we wouldn't count these items against the number we're
attempting to scan for, which would mean we'd evict more live key cache
entries - doing quite a bit of potentially unecessary work.

With recent work to make sure we don't hold SRCU locks for too long, it
should be safe to count all the items on the freelists against number to
scan - even if we can't reclaim them yet, we will be able to soon.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_key_cache_types.h

index 9b78f78..b3305a0 100644 (file)
@@ -89,10 +89,13 @@ static void bkey_cached_free(struct btree_key_cache *bc,
        ck->btree_trans_barrier_seq =
                start_poll_synchronize_srcu(&c->btree_trans_barrier);
 
-       if (ck->c.lock.readers)
+       if (ck->c.lock.readers) {
                list_move_tail(&ck->list, &bc->freed_pcpu);
-       else
+               bc->nr_freed_pcpu++;
+       } else {
                list_move_tail(&ck->list, &bc->freed_nonpcpu);
+               bc->nr_freed_nonpcpu++;
+       }
        atomic_long_inc(&bc->nr_freed);
 
        kfree(ck->k);
@@ -109,6 +112,8 @@ static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc,
 {
        struct bkey_cached *pos;
 
+       bc->nr_freed_nonpcpu++;
+
        list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
                if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
                                 pos->btree_trans_barrier_seq)) {
@@ -158,6 +163,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
 #else
                mutex_lock(&bc->lock);
                list_move_tail(&ck->list, &bc->freed_nonpcpu);
+               bc->nr_freed_nonpcpu++;
                mutex_unlock(&bc->lock);
 #endif
        } else {
@@ -217,6 +223,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
                               f->nr < ARRAY_SIZE(f->objs) / 2) {
                                ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
                                list_del_init(&ck->list);
+                               bc->nr_freed_nonpcpu--;
                                f->objs[f->nr++] = ck;
                        }
 
@@ -229,6 +236,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
                if (!list_empty(&bc->freed_nonpcpu)) {
                        ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
                        list_del_init(&ck->list);
+                       bc->nr_freed_nonpcpu--;
                }
                mutex_unlock(&bc->lock);
 #endif
@@ -850,6 +858,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
         * Newest freed entries are at the end of the list - once we hit one
         * that's too new to be freed, we can bail out:
         */
+       scanned += bc->nr_freed_nonpcpu;
+
        list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
                if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
                                                 ck->btree_trans_barrier_seq))
@@ -859,13 +869,15 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                six_lock_exit(&ck->c.lock);
                kmem_cache_free(bch2_key_cache, ck);
                atomic_long_dec(&bc->nr_freed);
-               scanned++;
                freed++;
+               bc->nr_freed_nonpcpu--;
        }
 
        if (scanned >= nr)
                goto out;
 
+       scanned += bc->nr_freed_pcpu;
+
        list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
                if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
                                                 ck->btree_trans_barrier_seq))
@@ -875,8 +887,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                six_lock_exit(&ck->c.lock);
                kmem_cache_free(bch2_key_cache, ck);
                atomic_long_dec(&bc->nr_freed);
-               scanned++;
                freed++;
+               bc->nr_freed_pcpu--;
        }
 
        if (scanned >= nr)
@@ -982,6 +994,9 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
        }
 #endif
 
+       BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu);
+       BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu);
+
        list_splice(&bc->freed_pcpu,    &items);
        list_splice(&bc->freed_nonpcpu, &items);
 
index 0f96780..290e4e5 100644 (file)
@@ -11,8 +11,12 @@ struct btree_key_cache {
        struct mutex            lock;
        struct rhashtable       table;
        bool                    table_init_done;
+
        struct list_head        freed_pcpu;
+       size_t                  nr_freed_pcpu;
        struct list_head        freed_nonpcpu;
+       size_t                  nr_freed_nonpcpu;
+
        struct shrinker         *shrink;
        unsigned                shrink_iter;
        struct btree_key_cache_freelist __percpu *pcpu_freed;