md/raid1, raid10: add blktrace records when IO is delayed
authorNeilBrown <neilb@suse.com>
Mon, 14 Nov 2016 05:30:21 +0000 (16:30 +1100)
committerShaohua Li <shli@fb.com>
Fri, 18 Nov 2016 17:35:37 +0000 (09:35 -0800)
Both raid1 and raid10 will sometimes delay handling an IO request,
such as when resync is happening or there are too many requests queued.

Add some blktrace messsages so we can see when that is happening when
looking for performance artefacts.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
drivers/md/raid1.c
drivers/md/raid10.c

index 2dc1934..d24adc5 100644 (file)
@@ -71,6 +71,9 @@ static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
                          sector_t bi_sector);
 static void lower_barrier(struct r1conf *conf);
 
+#define raid1_log(md, fmt, args...)                            \
+       do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0)
+
 static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
        struct pool_info *pi = data;
@@ -861,6 +864,7 @@ static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
                 * that queue to allow conf->start_next_window
                 * to increase.
                 */
+               raid1_log(conf->mddev, "wait barrier");
                wait_event_lock_irq(conf->wait_barrier,
                                    !conf->array_frozen &&
                                    (!conf->barrier ||
@@ -940,6 +944,7 @@ static void freeze_array(struct r1conf *conf, int extra)
         */
        spin_lock_irq(&conf->resync_lock);
        conf->array_frozen = 1;
+       raid1_log(conf->mddev, "wait freeze");
        wait_event_lock_irq_cmd(conf->wait_barrier,
                                conf->nr_pending == conf->nr_queued+extra,
                                conf->resync_lock,
@@ -1144,6 +1149,7 @@ read_again:
                         * take care not to over-take any writes
                         * that are 'behind'
                         */
+                       raid1_log(mddev, "wait behind writes");
                        wait_event(bitmap->behind_wait,
                                   atomic_read(&bitmap->behind_writes) == 0);
                }
@@ -1208,6 +1214,7 @@ read_again:
         */
        if (conf->pending_count >= max_queued_requests) {
                md_wakeup_thread(mddev->thread);
+               raid1_log(mddev, "wait queued");
                wait_event(conf->wait_barrier,
                           conf->pending_count < max_queued_requests);
        }
@@ -1299,6 +1306,7 @@ read_again:
                                rdev_dec_pending(conf->mirrors[j].rdev, mddev);
                r1_bio->state = 0;
                allow_barrier(conf, start_next_window, bio->bi_iter.bi_sector);
+               raid1_log(mddev, "wait rdev %d blocked", blocked_rdev->raid_disk);
                md_wait_for_blocked_rdev(blocked_rdev, mddev);
                start_next_window = wait_barrier(conf, bio);
                /*
index 67f0034..bd8c884 100644 (file)
@@ -106,6 +106,9 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
 static void end_reshape_write(struct bio *bio);
 static void end_reshape(struct r10conf *conf);
 
+#define raid10_log(md, fmt, args...)                           \
+       do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0)
+
 static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
        struct r10conf *conf = data;
@@ -942,6 +945,7 @@ static void wait_barrier(struct r10conf *conf)
                 * that queue to get the nr_pending
                 * count down.
                 */
+               raid10_log(conf->mddev, "wait barrier");
                wait_event_lock_irq(conf->wait_barrier,
                                    !conf->barrier ||
                                    (atomic_read(&conf->nr_pending) &&
@@ -1093,6 +1097,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
                /* IO spans the reshape position.  Need to wait for
                 * reshape to pass
                 */
+               raid10_log(conf->mddev, "wait reshape");
                allow_barrier(conf);
                wait_event(conf->wait_barrier,
                           conf->reshape_progress <= bio->bi_iter.bi_sector ||
@@ -1112,6 +1117,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
                set_mask_bits(&mddev->flags, 0,
                              BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
                md_wakeup_thread(mddev->thread);
+               raid10_log(conf->mddev, "wait reshape metadata");
                wait_event(mddev->sb_wait,
                           !test_bit(MD_CHANGE_PENDING, &mddev->flags));
 
@@ -1209,6 +1215,7 @@ read_again:
         */
        if (conf->pending_count >= max_queued_requests) {
                md_wakeup_thread(mddev->thread);
+               raid10_log(mddev, "wait queued");
                wait_event(conf->wait_barrier,
                           conf->pending_count < max_queued_requests);
        }
@@ -1336,6 +1343,7 @@ retry_write:
                        }
                }
                allow_barrier(conf);
+               raid10_log(conf->mddev, "wait rdev %d blocked", blocked_rdev->raid_disk);
                md_wait_for_blocked_rdev(blocked_rdev, mddev);
                wait_barrier(conf);
                goto retry_write;