gfs2: instrumentation wrt ail1 stuck
authorBob Peterson <rpeterso@redhat.com>
Thu, 26 Mar 2020 17:19:54 +0000 (12:19 -0500)
committerBob Peterson <rpeterso@redhat.com>
Fri, 27 Mar 2020 19:08:05 +0000 (14:08 -0500)
Before this patch, if the ail1 flush got stuck for some reason, there
were no clues as to why. This patch introduces a check for getting
stuck for more than a minute, and if it happens, it dumps the items
still remaining on the ail1 list.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
fs/gfs2/log.c

index 8729f5f..82f356f 100644 (file)
@@ -139,6 +139,40 @@ __acquires(&sdp->sd_ail_lock)
        return ret;
 }
 
+static void dump_ail_list(struct gfs2_sbd *sdp)
+{
+       struct gfs2_trans *tr;
+       struct gfs2_bufdata *bd;
+       struct buffer_head *bh;
+
+       fs_err(sdp, "Error: In gfs2_ail1_flush for ten minutes! t=%d\n",
+              current->journal_info ? 1 : 0);
+
+       list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
+               list_for_each_entry_reverse(bd, &tr->tr_ail1_list,
+                                           bd_ail_st_list) {
+                       bh = bd->bd_bh;
+                       fs_err(sdp, "bd %p: blk:0x%llx bh=%p ", bd,
+                              (unsigned long long)bd->bd_blkno, bh);
+                       if (!bh) {
+                               fs_err(sdp, "\n");
+                               continue;
+                       }
+                       fs_err(sdp, "0x%llx up2:%d dirt:%d lkd:%d req:%d "
+                              "map:%d new:%d ar:%d aw:%d delay:%d "
+                              "io err:%d unwritten:%d dfr:%d pin:%d esc:%d\n",
+                              (unsigned long long)bh->b_blocknr,
+                              buffer_uptodate(bh), buffer_dirty(bh),
+                              buffer_locked(bh), buffer_req(bh),
+                              buffer_mapped(bh), buffer_new(bh),
+                              buffer_async_read(bh), buffer_async_write(bh),
+                              buffer_delay(bh), buffer_write_io_error(bh),
+                              buffer_unwritten(bh),
+                              buffer_defer_completion(bh),
+                              buffer_pinned(bh), buffer_escaped(bh));
+               }
+       }
+}
 
 /**
  * gfs2_ail1_flush - start writeback of some ail1 entries 
@@ -155,11 +189,16 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
        struct gfs2_trans *tr;
        struct blk_plug plug;
        int ret = 0;
+       unsigned long flush_start = jiffies;
 
        trace_gfs2_ail_flush(sdp, wbc, 1);
        blk_start_plug(&plug);
        spin_lock(&sdp->sd_ail_lock);
 restart:
+       if (time_after(jiffies, flush_start + (HZ * 600))) {
+               dump_ail_list(sdp);
+               goto out;
+       }
        list_for_each_entry_reverse(tr, head, tr_list) {
                if (wbc->nr_to_write <= 0)
                        break;
@@ -170,6 +209,7 @@ restart:
                        break;
                }
        }
+out:
        spin_unlock(&sdp->sd_ail_lock);
        blk_finish_plug(&plug);
        if (ret) {