writeback: Add tracing to balance_dirty_pages
authorDave Chinner <dchinner@redhat.com>
Wed, 7 Jul 2010 03:24:07 +0000 (13:24 +1000)
committerJens Axboe <jaxboe@fusionio.com>
Sat, 7 Aug 2010 16:24:25 +0000 (18:24 +0200)
Tracing high level background writeback events is good, but it doesn't
give the entire picture. Add visibility into write throttling to catch IO
dispatched by foreground throttling of processing dirtying lots of pages.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
fs/fs-writeback.c
include/trace/events/writeback.h
mm/page-writeback.c

index 73acab4..bf10cbf 100644 (file)
@@ -656,10 +656,14 @@ static long wb_writeback(struct bdi_writeback *wb,
                wbc.more_io = 0;
                wbc.nr_to_write = MAX_WRITEBACK_PAGES;
                wbc.pages_skipped = 0;
+
+               trace_wbc_writeback_start(&wbc, wb->bdi);
                if (work->sb)
                        __writeback_inodes_sb(work->sb, wb, &wbc);
                else
                        writeback_inodes_wb(wb, &wbc);
+               trace_wbc_writeback_written(&wbc, wb->bdi);
+
                work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
                wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 
@@ -687,6 +691,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                if (!list_empty(&wb->b_more_io))  {
                        inode = list_entry(wb->b_more_io.prev,
                                                struct inode, i_list);
+                       trace_wbc_writeback_wait(&wbc, wb->bdi);
                        inode_wait_for_writeback(inode);
                }
                spin_unlock(&inode_lock);
index 562fcae..0be26ac 100644 (file)
@@ -85,6 +85,70 @@ DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
 DEFINE_WRITEBACK_EVENT(writeback_thread_start);
 DEFINE_WRITEBACK_EVENT(writeback_thread_stop);
 
+DECLARE_EVENT_CLASS(wbc_class,
+       TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
+       TP_ARGS(wbc, bdi),
+       TP_STRUCT__entry(
+               __array(char, name, 32)
+               __field(long, nr_to_write)
+               __field(long, pages_skipped)
+               __field(int, sync_mode)
+               __field(int, nonblocking)
+               __field(int, encountered_congestion)
+               __field(int, for_kupdate)
+               __field(int, for_background)
+               __field(int, for_reclaim)
+               __field(int, range_cyclic)
+               __field(int, more_io)
+               __field(unsigned long, older_than_this)
+               __field(long, range_start)
+               __field(long, range_end)
+       ),
+
+       TP_fast_assign(
+               strncpy(__entry->name, dev_name(bdi->dev), 32);
+               __entry->nr_to_write    = wbc->nr_to_write;
+               __entry->pages_skipped  = wbc->pages_skipped;
+               __entry->sync_mode      = wbc->sync_mode;
+               __entry->for_kupdate    = wbc->for_kupdate;
+               __entry->for_background = wbc->for_background;
+               __entry->for_reclaim    = wbc->for_reclaim;
+               __entry->range_cyclic   = wbc->range_cyclic;
+               __entry->more_io        = wbc->more_io;
+               __entry->older_than_this = wbc->older_than_this ?
+                                               *wbc->older_than_this : 0;
+               __entry->range_start    = (long)wbc->range_start;
+               __entry->range_end      = (long)wbc->range_end;
+       ),
+
+       TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
+               "bgrd=%d reclm=%d cyclic=%d more=%d older=0x%lx "
+               "start=0x%lx end=0x%lx",
+               __entry->name,
+               __entry->nr_to_write,
+               __entry->pages_skipped,
+               __entry->sync_mode,
+               __entry->for_kupdate,
+               __entry->for_background,
+               __entry->for_reclaim,
+               __entry->range_cyclic,
+               __entry->more_io,
+               __entry->older_than_this,
+               __entry->range_start,
+               __entry->range_end)
+)
+
+#define DEFINE_WBC_EVENT(name) \
+DEFINE_EVENT(wbc_class, name, \
+       TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), \
+       TP_ARGS(wbc, bdi))
+DEFINE_WBC_EVENT(wbc_writeback_start);
+DEFINE_WBC_EVENT(wbc_writeback_written);
+DEFINE_WBC_EVENT(wbc_writeback_wait);
+DEFINE_WBC_EVENT(wbc_balance_dirty_start);
+DEFINE_WBC_EVENT(wbc_balance_dirty_written);
+DEFINE_WBC_EVENT(wbc_balance_dirty_wait);
+
 #endif /* _TRACE_WRITEBACK_H */
 
 /* This part must be outside protection */
index 37498ef..d556cd8 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/syscalls.h>
 #include <linux/buffer_head.h>
 #include <linux/pagevec.h>
+#include <trace/events/writeback.h>
 
 /*
  * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
@@ -535,11 +536,13 @@ static void balance_dirty_pages(struct address_space *mapping,
                 * threshold otherwise wait until the disk writes catch
                 * up.
                 */
+               trace_wbc_balance_dirty_start(&wbc, bdi);
                if (bdi_nr_reclaimable > bdi_thresh) {
                        writeback_inodes_wb(&bdi->wb, &wbc);
                        pages_written += write_chunk - wbc.nr_to_write;
                        get_dirty_limits(&background_thresh, &dirty_thresh,
                                       &bdi_thresh, bdi);
+                       trace_wbc_balance_dirty_written(&wbc, bdi);
                }
 
                /*
@@ -565,6 +568,7 @@ static void balance_dirty_pages(struct address_space *mapping,
                if (pages_written >= write_chunk)
                        break;          /* We've done our duty */
 
+               trace_wbc_balance_dirty_wait(&wbc, bdi);
                __set_current_state(TASK_INTERRUPTIBLE);
                io_schedule_timeout(pause);