From 3398124444b90079a09374ab10444cd937b72ae1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 26 May 2023 16:59:07 -0400 Subject: [PATCH] bcachefs: Improve trace_trans_restart_would_deadlock In the CI, we're seeing tests failing due to excessive would_deadlock transaction restarts - the tracepoint now includes the lock cycle that occured. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 3 ++- fs/bcachefs/btree_locking.c | 25 ++++++++++++++++++++++--- fs/bcachefs/trace.h | 7 ++++--- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index f83fab9e62fc..b304c7fc58b1 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -3054,6 +3054,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) struct btree_path *path; struct btree_bkey_cached_common *b; static char lock_types[] = { 'r', 'i', 'w' }; + struct task_struct *task = READ_ONCE(trans->locking_wait.task); unsigned l, idx; if (!out->nr_tabstops) { @@ -3061,7 +3062,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) printbuf_tabstop_push(out, 32); } - prt_printf(out, "%i %s\n", trans->locking_wait.task->pid, trans->fn); + prt_printf(out, "%i %s\n", task ? task->pid : 0, trans->fn); trans_for_each_path_safe(trans, path, idx) { if (!path->nodes_locked) diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 6039278121dc..1eca320e7574 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -142,10 +142,28 @@ static bool lock_graph_remove_non_waiters(struct lock_graph *g) return false; } +static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans, + unsigned long ip) +{ + struct bch_fs *c = trans->c; + + count_event(c, trans_restart_would_deadlock); + + if (trace_trans_restart_would_deadlock_enabled()) { + struct printbuf buf = PRINTBUF; + + buf.atomic++; + print_cycle(&buf, g); + + trace_trans_restart_would_deadlock(trans, ip, buf.buf); + printbuf_exit(&buf); + } +} + static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i) { if (i == g->g) { - trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_); + trace_would_deadlock(g, i->trans, _RET_IP_); return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock); } else { i->trans->lock_must_abort = true; @@ -266,15 +284,16 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) unsigned path_idx; int ret; + g.nr = 0; + if (trans->lock_must_abort) { if (cycle) return -1; - trace_and_count(trans->c, trans_restart_would_deadlock, trans, _RET_IP_); + trace_would_deadlock(&g, trans, _RET_IP_); return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock); } - g.nr = 0; lock_graph_down(&g, trans); next: if (!g.nr) diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index e0c8db352bff..6e2ad6f3db98 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -1205,10 +1205,11 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure, TP_ARGS(trans, caller_ip, path) ); -DEFINE_EVENT(transaction_event, trans_restart_would_deadlock, +DEFINE_EVENT(trans_str, trans_restart_would_deadlock, TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip), - TP_ARGS(trans, caller_ip) + unsigned long caller_ip, + const char *cycle), + TP_ARGS(trans, caller_ip, cycle) ); DEFINE_EVENT(transaction_event, trans_restart_would_deadlock_recursion_limit, -- 2.20.1