Revert "md/raid10: pull codes that wait for blocked dev into one function"
authorSong Liu <songliubraving@fb.com>
Wed, 9 Dec 2020 19:43:43 +0000 (11:43 -0800)
committerSong Liu <songliubraving@fb.com>
Thu, 10 Dec 2020 04:46:01 +0000 (20:46 -0800)
This reverts commit f046f5d0d79cdb968f219ce249e497fd1accf484.

Matthew Ruffell reported data corruption in raid10 due to the changes
in discard handling [1]. Revert these changes before we find a proper fix.

[1] https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1907262/
Cc: Matthew Ruffell <matthew.ruffell@canonical.com>
Cc: Xiao Ni <xni@redhat.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
drivers/md/raid10.c

index 4ad8447..f2ec44f 100644 (file)
@@ -1275,75 +1275,12 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
        }
 }
 
-static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
-{
-       int i;
-       struct r10conf *conf = mddev->private;
-       struct md_rdev *blocked_rdev;
-
-retry_wait:
-       blocked_rdev = NULL;
-       rcu_read_lock();
-       for (i = 0; i < conf->copies; i++) {
-               struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
-               struct md_rdev *rrdev = rcu_dereference(
-                       conf->mirrors[i].replacement);
-               if (rdev == rrdev)
-                       rrdev = NULL;
-               if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
-                       atomic_inc(&rdev->nr_pending);
-                       blocked_rdev = rdev;
-                       break;
-               }
-               if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
-                       atomic_inc(&rrdev->nr_pending);
-                       blocked_rdev = rrdev;
-                       break;
-               }
-
-               if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
-                       sector_t first_bad;
-                       sector_t dev_sector = r10_bio->devs[i].addr;
-                       int bad_sectors;
-                       int is_bad;
-
-                       /* Discard request doesn't care the write result
-                        * so it doesn't need to wait blocked disk here.
-                        */
-                       if (!r10_bio->sectors)
-                               continue;
-
-                       is_bad = is_badblock(rdev, dev_sector, r10_bio->sectors,
-                                            &first_bad, &bad_sectors);
-                       if (is_bad < 0) {
-                               /* Mustn't write here until the bad block
-                                * is acknowledged
-                                */
-                               atomic_inc(&rdev->nr_pending);
-                               set_bit(BlockedBadBlocks, &rdev->flags);
-                               blocked_rdev = rdev;
-                               break;
-                       }
-               }
-       }
-       rcu_read_unlock();
-
-       if (unlikely(blocked_rdev)) {
-               /* Have to wait for this device to get unblocked, then retry */
-               allow_barrier(conf);
-               raid10_log(conf->mddev, "%s wait rdev %d blocked",
-                               __func__, blocked_rdev->raid_disk);
-               md_wait_for_blocked_rdev(blocked_rdev, mddev);
-               wait_barrier(conf);
-               goto retry_wait;
-       }
-}
-
 static void raid10_write_request(struct mddev *mddev, struct bio *bio,
                                 struct r10bio *r10_bio)
 {
        struct r10conf *conf = mddev->private;
        int i;
+       struct md_rdev *blocked_rdev;
        sector_t sectors;
        int max_sectors;
 
@@ -1401,9 +1338,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 
        r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
        raid10_find_phys(conf, r10_bio);
-
-       wait_blocked_dev(mddev, r10_bio);
-
+retry_write:
+       blocked_rdev = NULL;
        rcu_read_lock();
        max_sectors = r10_bio->sectors;
 
@@ -1414,6 +1350,16 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
                        conf->mirrors[d].replacement);
                if (rdev == rrdev)
                        rrdev = NULL;
+               if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+                       atomic_inc(&rdev->nr_pending);
+                       blocked_rdev = rdev;
+                       break;
+               }
+               if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
+                       atomic_inc(&rrdev->nr_pending);
+                       blocked_rdev = rrdev;
+                       break;
+               }
                if (rdev && (test_bit(Faulty, &rdev->flags)))
                        rdev = NULL;
                if (rrdev && (test_bit(Faulty, &rrdev->flags)))
@@ -1434,6 +1380,15 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 
                        is_bad = is_badblock(rdev, dev_sector, max_sectors,
                                             &first_bad, &bad_sectors);
+                       if (is_bad < 0) {
+                               /* Mustn't write here until the bad block
+                                * is acknowledged
+                                */
+                               atomic_inc(&rdev->nr_pending);
+                               set_bit(BlockedBadBlocks, &rdev->flags);
+                               blocked_rdev = rdev;
+                               break;
+                       }
                        if (is_bad && first_bad <= dev_sector) {
                                /* Cannot write here at all */
                                bad_sectors -= (dev_sector - first_bad);
@@ -1469,6 +1424,35 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
        }
        rcu_read_unlock();
 
+       if (unlikely(blocked_rdev)) {
+               /* Have to wait for this device to get unblocked, then retry */
+               int j;
+               int d;
+
+               for (j = 0; j < i; j++) {
+                       if (r10_bio->devs[j].bio) {
+                               d = r10_bio->devs[j].devnum;
+                               rdev_dec_pending(conf->mirrors[d].rdev, mddev);
+                       }
+                       if (r10_bio->devs[j].repl_bio) {
+                               struct md_rdev *rdev;
+                               d = r10_bio->devs[j].devnum;
+                               rdev = conf->mirrors[d].replacement;
+                               if (!rdev) {
+                                       /* Race with remove_disk */
+                                       smp_mb();
+                                       rdev = conf->mirrors[d].rdev;
+                               }
+                               rdev_dec_pending(rdev, mddev);
+                       }
+               }
+               allow_barrier(conf);
+               raid10_log(conf->mddev, "wait rdev %d blocked", blocked_rdev->raid_disk);
+               md_wait_for_blocked_rdev(blocked_rdev, mddev);
+               wait_barrier(conf);
+               goto retry_write;
+       }
+
        if (max_sectors < r10_bio->sectors)
                r10_bio->sectors = max_sectors;