Merge linux-block/for-4.3/core into md/for-linux
authorNeilBrown <neilb@suse.com>
Sat, 5 Sep 2015 09:07:04 +0000 (11:07 +0200)
committerNeilBrown <neilb@suse.com>
Sat, 5 Sep 2015 09:08:32 +0000 (11:08 +0200)
There were a few conflicts that are fairly easy to resolve.

Signed-off-by: NeilBrown <neilb@suse.com>
1  2 
drivers/md/md.c
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c

diff --combined drivers/md/md.c
@@@ -257,13 -257,17 +257,17 @@@ static void md_make_request(struct requ
        unsigned int sectors;
        int cpu;
  
+       blk_queue_split(q, &bio, q->bio_split);
        if (mddev == NULL || mddev->pers == NULL
            || !mddev->ready) {
                bio_io_error(bio);
                return;
        }
        if (mddev->ro == 1 && unlikely(rw == WRITE)) {
-               bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS);
+               if (bio_sectors(bio) != 0)
+                       bio->bi_error = -EROFS;
+               bio_endio(bio);
                return;
        }
        smp_rmb(); /* Ensure implications of  'active' are visible */
@@@ -350,34 -354,11 +354,11 @@@ static int md_congested(void *data, in
        return mddev_congested(mddev, bits);
  }
  
- static int md_mergeable_bvec(struct request_queue *q,
-                            struct bvec_merge_data *bvm,
-                            struct bio_vec *biovec)
- {
-       struct mddev *mddev = q->queuedata;
-       int ret;
-       rcu_read_lock();
-       if (mddev->suspended) {
-               /* Must always allow one vec */
-               if (bvm->bi_size == 0)
-                       ret = biovec->bv_len;
-               else
-                       ret = 0;
-       } else {
-               struct md_personality *pers = mddev->pers;
-               if (pers && pers->mergeable_bvec)
-                       ret = pers->mergeable_bvec(mddev, bvm, biovec);
-               else
-                       ret = biovec->bv_len;
-       }
-       rcu_read_unlock();
-       return ret;
- }
  /*
   * Generic flush handling for md
   */
  
- static void md_end_flush(struct bio *bio, int err)
+ static void md_end_flush(struct bio *bio)
  {
        struct md_rdev *rdev = bio->bi_private;
        struct mddev *mddev = rdev->mddev;
@@@ -433,7 -414,7 +414,7 @@@ static void md_submit_flush_data(struc
  
        if (bio->bi_iter.bi_size == 0)
                /* an empty barrier - all done */
-               bio_endio(bio, 0);
+               bio_endio(bio);
        else {
                bio->bi_rw &= ~REQ_FLUSH;
                mddev->pers->make_request(mddev, bio);
@@@ -502,8 -483,6 +483,8 @@@ static void mddev_put(struct mddev *mdd
                bioset_free(bs);
  }
  
 +static void md_safemode_timeout(unsigned long data);
 +
  void mddev_init(struct mddev *mddev)
  {
        mutex_init(&mddev->open_mutex);
        mutex_init(&mddev->bitmap_info.mutex);
        INIT_LIST_HEAD(&mddev->disks);
        INIT_LIST_HEAD(&mddev->all_mddevs);
 -      init_timer(&mddev->safemode_timer);
 +      setup_timer(&mddev->safemode_timer, md_safemode_timeout,
 +                  (unsigned long) mddev);
        atomic_set(&mddev->active, 1);
        atomic_set(&mddev->openers, 0);
        atomic_set(&mddev->active_io, 0);
@@@ -731,15 -709,13 +712,13 @@@ void md_rdev_clear(struct md_rdev *rdev
  }
  EXPORT_SYMBOL_GPL(md_rdev_clear);
  
- static void super_written(struct bio *bio, int error)
+ static void super_written(struct bio *bio)
  {
        struct md_rdev *rdev = bio->bi_private;
        struct mddev *mddev = rdev->mddev;
  
-       if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-               printk("md: super_written gets error=%d, uptodate=%d\n",
-                      error, test_bit(BIO_UPTODATE, &bio->bi_flags));
-               WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
+       if (bio->bi_error) {
+               printk("md: super_written gets error=%d\n", bio->bi_error);
                md_error(mddev, rdev);
        }
  
@@@ -794,7 -770,7 +773,7 @@@ int sync_page_io(struct md_rdev *rdev, 
        bio_add_page(bio, page, size, 0);
        submit_bio_wait(rw, bio);
  
-       ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       ret = !bio->bi_error;
        bio_put(bio);
        return ret;
  }
@@@ -3279,6 -3255,8 +3258,6 @@@ int strict_strtoul_scaled(const char *c
        return 0;
  }
  
 -static void md_safemode_timeout(unsigned long data);
 -
  static ssize_t
  safe_delay_show(struct mddev *mddev, char *page)
  {
@@@ -4211,8 -4189,6 +4190,8 @@@ action_show(struct mddev *mddev, char *
                                type = "repair";
                } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
                        type = "recover";
 +              else if (mddev->reshape_position != MaxSector)
 +                      type = "reshape";
        }
        return sprintf(page, "%s\n", type);
  }
@@@ -5189,7 -5165,6 +5168,6 @@@ int md_run(struct mddev *mddev
        if (mddev->queue) {
                mddev->queue->backing_dev_info.congested_data = mddev;
                mddev->queue->backing_dev_info.congested_fn = md_congested;
-               blk_queue_merge_bvec(mddev->queue, md_mergeable_bvec);
        }
        if (pers->sync_request) {
                if (mddev->kobj.sd &&
        atomic_set(&mddev->max_corr_read_errors,
                   MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
        mddev->safemode = 0;
 -      mddev->safemode_timer.function = md_safemode_timeout;
 -      mddev->safemode_timer.data = (unsigned long) mddev;
        mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
        mddev->in_sync = 1;
        smp_wmb();
                        if (sysfs_link_rdev(mddev, rdev))
                                /* failure here is OK */;
  
 +      if (mddev->degraded && !mddev->ro)
 +              /* This ensures that recovering status is reported immediately
 +               * via sysfs - until a lack of spares is confirmed.
 +               */
 +              set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
  
        if (mddev->flags & MD_UPDATE_SB_FLAGS)
@@@ -5321,7 -5293,6 +5299,6 @@@ static void md_clean(struct mddev *mdde
        mddev->degraded = 0;
        mddev->safemode = 0;
        mddev->private = NULL;
-       mddev->merge_check_needed = 0;
        mddev->bitmap_info.offset = 0;
        mddev->bitmap_info.default_offset = 0;
        mddev->bitmap_info.default_space = 0;
@@@ -5520,7 -5491,6 +5497,6 @@@ static int do_md_stop(struct mddev *mdd
  
                __md_stop_writes(mddev);
                __md_stop(mddev);
-               mddev->queue->merge_bvec_fn = NULL;
                mddev->queue->backing_dev_info.congested_fn = NULL;
  
                /* tell userspace to handle 'inactive' */
@@@ -5771,16 -5741,16 +5747,16 @@@ static int get_bitmap_file(struct mdde
  
        err = 0;
        spin_lock(&mddev->lock);
 -      /* bitmap disabled, zero the first byte and copy out */
 -      if (!mddev->bitmap_info.file)
 -              file->pathname[0] = '\0';
 -      else if ((ptr = file_path(mddev->bitmap_info.file,
 -                             file->pathname, sizeof(file->pathname))),
 -               IS_ERR(ptr))
 -              err = PTR_ERR(ptr);
 -      else
 -              memmove(file->pathname, ptr,
 -                      sizeof(file->pathname)-(ptr-file->pathname));
 +      /* bitmap enabled */
 +      if (mddev->bitmap_info.file) {
 +              ptr = file_path(mddev->bitmap_info.file, file->pathname,
 +                              sizeof(file->pathname));
 +              if (IS_ERR(ptr))
 +                      err = PTR_ERR(ptr);
 +              else
 +                      memmove(file->pathname, ptr,
 +                              sizeof(file->pathname)-(ptr-file->pathname));
 +      }
        spin_unlock(&mddev->lock);
  
        if (err == 0 &&
@@@ -7099,7 -7069,7 +7075,7 @@@ static void status_unused(struct seq_fi
        seq_printf(seq, "\n");
  }
  
 -static void status_resync(struct seq_file *seq, struct mddev *mddev)
 +static int status_resync(struct seq_file *seq, struct mddev *mddev)
  {
        sector_t max_sectors, resync, res;
        unsigned long dt, db;
        int scale;
        unsigned int per_milli;
  
 -      if (mddev->curr_resync <= 3)
 -              resync = 0;
 -      else
 -              resync = mddev->curr_resync
 -                      - atomic_read(&mddev->recovery_active);
 -
        if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
            test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
                max_sectors = mddev->resync_max_sectors;
        else
                max_sectors = mddev->dev_sectors;
  
 +      resync = mddev->curr_resync;
 +      if (resync <= 3) {
 +              if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
 +                      /* Still cleaning up */
 +                      resync = max_sectors;
 +      } else
 +              resync -= atomic_read(&mddev->recovery_active);
 +
 +      if (resync == 0) {
 +              if (mddev->recovery_cp < MaxSector) {
 +                      seq_printf(seq, "\tresync=PENDING");
 +                      return 1;
 +              }
 +              return 0;
 +      }
 +      if (resync < 3) {
 +              seq_printf(seq, "\tresync=DELAYED");
 +              return 1;
 +      }
 +
        WARN_ON(max_sectors == 0);
        /* Pick 'scale' such that (resync>>scale)*1000 will fit
         * in a sector_t, and (max_sectors>>scale) will fit in a
                   ((unsigned long)rt % 60)/6);
  
        seq_printf(seq, " speed=%ldK/sec", db/2/dt);
 +      return 1;
  }
  
  static void *md_seq_start(struct seq_file *seq, loff_t *pos)
@@@ -7343,8 -7298,13 +7319,8 @@@ static int md_seq_show(struct seq_file 
                        mddev->pers->status(seq, mddev);
                        seq_printf(seq, "\n      ");
                        if (mddev->pers->sync_request) {
 -                              if (mddev->curr_resync > 2) {
 -                                      status_resync(seq, mddev);
 +                              if (status_resync(seq, mddev))
                                        seq_printf(seq, "\n      ");
 -                              } else if (mddev->curr_resync >= 1)
 -                                      seq_printf(seq, "\tresync=DELAYED\n      ");
 -                              else if (mddev->recovery_cp < MaxSector)
 -                                      seq_printf(seq, "\tresync=PENDING\n      ");
                        }
                } else
                        seq_printf(seq, "\n       ");
@@@ -7427,19 -7387,15 +7403,19 @@@ int unregister_md_personality(struct md
  }
  EXPORT_SYMBOL(unregister_md_personality);
  
 -int register_md_cluster_operations(struct md_cluster_operations *ops, struct module *module)
 +int register_md_cluster_operations(struct md_cluster_operations *ops,
 +                                 struct module *module)
  {
 -      if (md_cluster_ops != NULL)
 -              return -EALREADY;
 +      int ret = 0;
        spin_lock(&pers_lock);
 -      md_cluster_ops = ops;
 -      md_cluster_mod = module;
 +      if (md_cluster_ops != NULL)
 +              ret = -EALREADY;
 +      else {
 +              md_cluster_ops = ops;
 +              md_cluster_mod = module;
 +      }
        spin_unlock(&pers_lock);
 -      return 0;
 +      return ret;
  }
  EXPORT_SYMBOL(register_md_cluster_operations);
  
@@@ -7837,8 -7793,7 +7813,8 @@@ void md_do_sync(struct md_thread *threa
                      > (max_sectors >> 4)) ||
                     time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
                     (j - mddev->curr_resync_completed)*2
 -                   >= mddev->resync_max - mddev->curr_resync_completed
 +                   >= mddev->resync_max - mddev->curr_resync_completed ||
 +                   mddev->curr_resync_completed > mddev->resync_max
                            )) {
                        /* time to update curr_resync_completed */
                        wait_event(mddev->recovery_wait,
                        break;
  
                j += sectors;
 +              if (j > max_sectors)
 +                      /* when skipping, extra large numbers can be returned. */
 +                      j = max_sectors;
                if (j > 2)
                        mddev->curr_resync = j;
                if (mddev_is_clustered(mddev))
        blk_finish_plug(&plug);
        wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
  
 +      if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
 +          !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
 +          mddev->curr_resync > 2) {
 +              mddev->curr_resync_completed = mddev->curr_resync;
 +              sysfs_notify(&mddev->kobj, NULL, "sync_completed");
 +      }
        /* tell personality that we are finished */
        mddev->pers->sync_request(mddev, max_sectors, &skipped);
  
 -      if (mddev_is_clustered(mddev))
 -              md_cluster_ops->resync_finish(mddev);
 -
        if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
            mddev->curr_resync > 2) {
                if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
                }
        }
   skip:
 +      if (mddev_is_clustered(mddev))
 +              md_cluster_ops->resync_finish(mddev);
 +
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
  
        spin_lock(&mddev->lock);
                mddev->resync_max = MaxSector;
        } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
                mddev->resync_min = mddev->curr_resync_completed;
 +      set_bit(MD_RECOVERY_DONE, &mddev->recovery);
        mddev->curr_resync = 0;
        spin_unlock(&mddev->lock);
  
        wake_up(&resync_wait);
 -      set_bit(MD_RECOVERY_DONE, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
        return;
  }
@@@ -8182,7 -8128,6 +8158,7 @@@ void md_check_recovery(struct mddev *md
                         */
                        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
                        md_reap_sync_thread(mddev);
 +                      clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
                        clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
                        goto unlock;
                }
@@@ -8629,7 -8574,6 +8605,7 @@@ int rdev_set_badblocks(struct md_rdev *
                /* Make sure they get written out promptly */
                sysfs_notify_dirent_safe(rdev->sysfs_state);
                set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
 +              set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
                md_wakeup_thread(rdev->mddev->thread);
        }
        return rv;
diff --combined drivers/md/raid0.c
@@@ -83,7 -83,7 +83,7 @@@ static int create_strip_zones(struct md
        char b[BDEVNAME_SIZE];
        char b2[BDEVNAME_SIZE];
        struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
 -      bool discard_supported = false;
 +      unsigned short blksize = 512;
  
        if (!conf)
                return -ENOMEM;
@@@ -98,9 -98,6 +98,9 @@@
                sector_div(sectors, mddev->chunk_sectors);
                rdev1->sectors = sectors * mddev->chunk_sectors;
  
 +              blksize = max(blksize, queue_logical_block_size(
 +                                    rdev1->bdev->bd_disk->queue));
 +
                rdev_for_each(rdev2, mddev) {
                        pr_debug("md/raid0:%s:   comparing %s(%llu)"
                                 " with %s(%llu)\n",
        }
        pr_debug("md/raid0:%s: FINAL %d zones\n",
                 mdname(mddev), conf->nr_strip_zones);
 +      /*
 +       * now since we have the hard sector sizes, we can make sure
 +       * chunk size is a multiple of that sector size
 +       */
 +      if ((mddev->chunk_sectors << 9) % blksize) {
 +              printk(KERN_ERR "md/raid0:%s: chunk_size of %d not multiple of block size %d\n",
 +                     mdname(mddev),
 +                     mddev->chunk_sectors << 9, blksize);
 +              err = -EINVAL;
 +              goto abort;
 +      }
 +
        err = -ENOMEM;
        conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
                                conf->nr_strip_zones, GFP_KERNEL);
                }
                dev[j] = rdev1;
  
-               if (rdev1->bdev->bd_disk->queue->merge_bvec_fn)
-                       conf->has_merge_bvec = 1;
 -              if (mddev->queue)
 -                      disk_stack_limits(mddev->gendisk, rdev1->bdev,
 -                                        rdev1->data_offset << 9);
--
                if (!smallest || (rdev1->sectors < smallest->sectors))
                        smallest = rdev1;
                cnt++;
 -
 -              if (blk_queue_discard(bdev_get_queue(rdev1->bdev)))
 -                      discard_supported = true;
        }
        if (cnt != mddev->raid_disks) {
                printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - "
                         (unsigned long long)smallest->sectors);
        }
  
 -      /*
 -       * now since we have the hard sector sizes, we can make sure
 -       * chunk size is a multiple of that sector size
 -       */
 -      if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
 -              printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n",
 -                     mdname(mddev),
 -                     mddev->chunk_sectors << 9);
 -              goto abort;
 -      }
 -
 -      if (mddev->queue) {
 -              blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
 -              blk_queue_io_opt(mddev->queue,
 -                               (mddev->chunk_sectors << 9) * mddev->raid_disks);
 -
 -              if (!discard_supported)
 -                      queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
 -              else
 -                      queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
 -      }
 -
        pr_debug("md/raid0:%s: done.\n", mdname(mddev));
        *private_conf = conf;
  
@@@ -337,58 -348,6 +334,6 @@@ static struct md_rdev *map_sector(struc
                             + sector_div(sector, zone->nb_dev)];
  }
  
- /**
-  *    raid0_mergeable_bvec -- tell bio layer if two requests can be merged
-  *    @mddev: the md device
-  *    @bvm: properties of new bio
-  *    @biovec: the request that could be merged to it.
-  *
-  *    Return amount of bytes we can accept at this offset
-  */
- static int raid0_mergeable_bvec(struct mddev *mddev,
-                               struct bvec_merge_data *bvm,
-                               struct bio_vec *biovec)
- {
-       struct r0conf *conf = mddev->private;
-       sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-       sector_t sector_offset = sector;
-       int max;
-       unsigned int chunk_sectors = mddev->chunk_sectors;
-       unsigned int bio_sectors = bvm->bi_size >> 9;
-       struct strip_zone *zone;
-       struct md_rdev *rdev;
-       struct request_queue *subq;
-       if (is_power_of_2(chunk_sectors))
-               max =  (chunk_sectors - ((sector & (chunk_sectors-1))
-                                               + bio_sectors)) << 9;
-       else
-               max =  (chunk_sectors - (sector_div(sector, chunk_sectors)
-                                               + bio_sectors)) << 9;
-       if (max < 0)
-               max = 0; /* bio_add cannot handle a negative return */
-       if (max <= biovec->bv_len && bio_sectors == 0)
-               return biovec->bv_len;
-       if (max < biovec->bv_len)
-               /* too small already, no need to check further */
-               return max;
-       if (!conf->has_merge_bvec)
-               return max;
-       /* May need to check subordinate device */
-       sector = sector_offset;
-       zone = find_zone(mddev->private, &sector_offset);
-       rdev = map_sector(mddev, zone, sector, &sector_offset);
-       subq = bdev_get_queue(rdev->bdev);
-       if (subq->merge_bvec_fn) {
-               bvm->bi_bdev = rdev->bdev;
-               bvm->bi_sector = sector_offset + zone->dev_start +
-                       rdev->data_offset;
-               return min(max, subq->merge_bvec_fn(subq, bvm, biovec));
-       } else
-               return max;
- }
  static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks)
  {
        sector_t array_sectors = 0;
@@@ -419,6 -378,12 +364,6 @@@ static int raid0_run(struct mddev *mdde
        if (md_check_no_bitmap(mddev))
                return -EINVAL;
  
 -      if (mddev->queue) {
 -              blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
 -              blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
 -              blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
 -      }
 -
        /* if private is not null, we are here after takeover */
        if (mddev->private == NULL) {
                ret = create_strip_zones(mddev, &conf);
                mddev->private = conf;
        }
        conf = mddev->private;
 +      if (mddev->queue) {
 +              struct md_rdev *rdev;
 +              bool discard_supported = false;
 +
 +              rdev_for_each(rdev, mddev) {
 +                      disk_stack_limits(mddev->gendisk, rdev->bdev,
 +                                        rdev->data_offset << 9);
 +                      if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
 +                              discard_supported = true;
 +              }
 +              blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
 +              blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
 +              blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
 +
 +              blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
 +              blk_queue_io_opt(mddev->queue,
 +                               (mddev->chunk_sectors << 9) * mddev->raid_disks);
 +
 +              if (!discard_supported)
 +                      queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
 +              else
 +                      queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
 +      }
  
        /* calculate array device size */
        md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
@@@ -546,7 -488,7 +491,7 @@@ static void raid0_make_request(struct m
                if (unlikely((split->bi_rw & REQ_DISCARD) &&
                         !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
                        /* Just ignore it */
-                       bio_endio(split, 0);
+                       bio_endio(split);
                } else
                        generic_make_request(split);
        } while (split != bio);
@@@ -730,7 -672,6 +675,6 @@@ static struct md_personality raid0_pers
        .takeover       = raid0_takeover,
        .quiesce        = raid0_quiesce,
        .congested      = raid0_congested,
-       .mergeable_bvec = raid0_mergeable_bvec,
  };
  
  static int __init raid0_init (void)
diff --combined drivers/md/raid1.c
@@@ -255,9 -255,10 +255,10 @@@ static void call_bio_endio(struct r1bi
                done = 1;
  
        if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+               bio->bi_error = -EIO;
        if (done) {
-               bio_endio(bio, 0);
+               bio_endio(bio);
                /*
                 * Wake up any possible resync thread that waits for the device
                 * to go idle.
@@@ -312,9 -313,9 +313,9 @@@ static int find_bio_disk(struct r1bio *
        return mirror;
  }
  
- static void raid1_end_read_request(struct bio *bio, int error)
+ static void raid1_end_read_request(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       int uptodate = !bio->bi_error;
        struct r1bio *r1_bio = bio->bi_private;
        int mirror;
        struct r1conf *conf = r1_bio->mddev->private;
@@@ -397,9 -398,8 +398,8 @@@ static void r1_bio_write_done(struct r1
        }
  }
  
- static void raid1_end_write_request(struct bio *bio, int error)
+ static void raid1_end_write_request(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct r1bio *r1_bio = bio->bi_private;
        int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
        struct r1conf *conf = r1_bio->mddev->private;
        /*
         * 'one mirror IO has finished' event handler:
         */
-       if (!uptodate) {
+       if (bio->bi_error) {
                set_bit(WriteErrorSeen,
                        &conf->mirrors[mirror].rdev->flags);
                if (!test_and_set_bit(WantReplacement,
@@@ -557,7 -557,6 +557,6 @@@ static int read_balance(struct r1conf *
                rdev = rcu_dereference(conf->mirrors[disk].rdev);
                if (r1_bio->bios[disk] == IO_BLOCKED
                    || rdev == NULL
-                   || test_bit(Unmerged, &rdev->flags)
                    || test_bit(Faulty, &rdev->flags))
                        continue;
                if (!test_bit(In_sync, &rdev->flags) &&
        return best_disk;
  }
  
- static int raid1_mergeable_bvec(struct mddev *mddev,
-                               struct bvec_merge_data *bvm,
-                               struct bio_vec *biovec)
- {
-       struct r1conf *conf = mddev->private;
-       sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-       int max = biovec->bv_len;
-       if (mddev->merge_check_needed) {
-               int disk;
-               rcu_read_lock();
-               for (disk = 0; disk < conf->raid_disks * 2; disk++) {
-                       struct md_rdev *rdev = rcu_dereference(
-                               conf->mirrors[disk].rdev);
-                       if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                               struct request_queue *q =
-                                       bdev_get_queue(rdev->bdev);
-                               if (q->merge_bvec_fn) {
-                                       bvm->bi_sector = sector +
-                                               rdev->data_offset;
-                                       bvm->bi_bdev = rdev->bdev;
-                                       max = min(max, q->merge_bvec_fn(
-                                                         q, bvm, biovec));
-                               }
-                       }
-               }
-               rcu_read_unlock();
-       }
-       return max;
- }
  static int raid1_congested(struct mddev *mddev, int bits)
  {
        struct r1conf *conf = mddev->private;
@@@ -793,7 -760,7 +760,7 @@@ static void flush_pending_writes(struc
                        if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                            !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                                /* Just ignore it */
-                               bio_endio(bio, 0);
+                               bio_endio(bio);
                        else
                                generic_make_request(bio);
                        bio = next;
@@@ -1068,7 -1035,7 +1035,7 @@@ static void raid1_unplug(struct blk_plu
                if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                        /* Just ignore it */
-                       bio_endio(bio, 0);
+                       bio_endio(bio);
                else
                        generic_make_request(bio);
                bio = next;
@@@ -1158,7 -1125,7 +1125,7 @@@ static void make_request(struct mddev *
         * non-zero, then it is the number of not-completed requests.
         */
        bio->bi_phys_segments = 0;
-       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+       bio_clear_flag(bio, BIO_SEG_VALID);
  
        if (rw == READ) {
                /*
@@@ -1269,8 -1236,7 +1236,7 @@@ read_again
                        break;
                }
                r1_bio->bios[i] = NULL;
-               if (!rdev || test_bit(Faulty, &rdev->flags)
-                   || test_bit(Unmerged, &rdev->flags)) {
+               if (!rdev || test_bit(Faulty, &rdev->flags)) {
                        if (i < conf->raid_disks)
                                set_bit(R1BIO_Degraded, &r1_bio->state);
                        continue;
@@@ -1508,7 -1474,6 +1474,7 @@@ static void error(struct mddev *mddev, 
         */
        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
 +      set_bit(MD_CHANGE_PENDING, &mddev->flags);
        printk(KERN_ALERT
               "md/raid1:%s: Disk failure on %s, disabling device.\n"
               "md/raid1:%s: Operation continuing on %d devices.\n",
@@@ -1618,7 -1583,6 +1584,6 @@@ static int raid1_add_disk(struct mddev 
        struct raid1_info *p;
        int first = 0;
        int last = conf->raid_disks - 1;
-       struct request_queue *q = bdev_get_queue(rdev->bdev);
  
        if (mddev->recovery_disabled == conf->recovery_disabled)
                return -EBUSY;
        if (rdev->raid_disk >= 0)
                first = last = rdev->raid_disk;
  
-       if (q->merge_bvec_fn) {
-               set_bit(Unmerged, &rdev->flags);
-               mddev->merge_check_needed = 1;
-       }
        for (mirror = first; mirror <= last; mirror++) {
                p = conf->mirrors+mirror;
                if (!p->rdev) {
                        break;
                }
        }
-       if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
-               /* Some requests might not have seen this new
-                * merge_bvec_fn.  We must wait for them to complete
-                * before merging the device fully.
-                * First we make sure any code which has tested
-                * our function has submitted the request, then
-                * we wait for all outstanding requests to complete.
-                */
-               synchronize_sched();
-               freeze_array(conf, 0);
-               unfreeze_array(conf);
-               clear_bit(Unmerged, &rdev->flags);
-       }
        md_integrity_add_rdev(rdev, mddev);
        if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
@@@ -1738,7 -1684,7 +1685,7 @@@ abort
        return err;
  }
  
- static void end_sync_read(struct bio *bio, int error)
+ static void end_sync_read(struct bio *bio)
  {
        struct r1bio *r1_bio = bio->bi_private;
  
         * or re-read if the read failed.
         * We don't do much here, just schedule handling by raid1d
         */
-       if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+       if (!bio->bi_error)
                set_bit(R1BIO_Uptodate, &r1_bio->state);
  
        if (atomic_dec_and_test(&r1_bio->remaining))
                reschedule_retry(r1_bio);
  }
  
- static void end_sync_write(struct bio *bio, int error)
+ static void end_sync_write(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       int uptodate = !bio->bi_error;
        struct r1bio *r1_bio = bio->bi_private;
        struct mddev *mddev = r1_bio->mddev;
        struct r1conf *conf = mddev->private;
@@@ -1945,7 -1891,7 +1892,7 @@@ static int fix_sync_read_error(struct r
                idx ++;
        }
        set_bit(R1BIO_Uptodate, &r1_bio->state);
-       set_bit(BIO_UPTODATE, &bio->bi_flags);
+       bio->bi_error = 0;
        return 1;
  }
  
@@@ -1969,15 -1915,14 +1916,14 @@@ static void process_checks(struct r1bi
        for (i = 0; i < conf->raid_disks * 2; i++) {
                int j;
                int size;
-               int uptodate;
+               int error;
                struct bio *b = r1_bio->bios[i];
                if (b->bi_end_io != end_sync_read)
                        continue;
-               /* fixup the bio for reuse, but preserve BIO_UPTODATE */
-               uptodate = test_bit(BIO_UPTODATE, &b->bi_flags);
+               /* fixup the bio for reuse, but preserve errno */
+               error = b->bi_error;
                bio_reset(b);
-               if (!uptodate)
-                       clear_bit(BIO_UPTODATE, &b->bi_flags);
+               b->bi_error = error;
                b->bi_vcnt = vcnt;
                b->bi_iter.bi_size = r1_bio->sectors << 9;
                b->bi_iter.bi_sector = r1_bio->sector +
        }
        for (primary = 0; primary < conf->raid_disks * 2; primary++)
                if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
-                   test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
+                   !r1_bio->bios[primary]->bi_error) {
                        r1_bio->bios[primary]->bi_end_io = NULL;
                        rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
                        break;
                int j;
                struct bio *pbio = r1_bio->bios[primary];
                struct bio *sbio = r1_bio->bios[i];
-               int uptodate = test_bit(BIO_UPTODATE, &sbio->bi_flags);
+               int error = sbio->bi_error;
  
                if (sbio->bi_end_io != end_sync_read)
                        continue;
-               /* Now we can 'fixup' the BIO_UPTODATE flag */
-               set_bit(BIO_UPTODATE, &sbio->bi_flags);
+               /* Now we can 'fixup' the error value */
+               sbio->bi_error = 0;
  
-               if (uptodate) {
+               if (!error) {
                        for (j = vcnt; j-- ; ) {
                                struct page *p, *s;
                                p = pbio->bi_io_vec[j].bv_page;
                if (j >= 0)
                        atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
                if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
-                             && uptodate)) {
+                             && !error)) {
                        /* No need to write to this device. */
                        sbio->bi_end_io = NULL;
                        rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@@ -2273,11 -2218,11 +2219,11 @@@ static void handle_sync_write_finished(
                struct bio *bio = r1_bio->bios[m];
                if (bio->bi_end_io == NULL)
                        continue;
-               if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+               if (!bio->bi_error &&
                    test_bit(R1BIO_MadeGood, &r1_bio->state)) {
                        rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
                }
-               if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+               if (bio->bi_error &&
                    test_bit(R1BIO_WriteError, &r1_bio->state)) {
                        if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
                                md_error(conf->mddev, rdev);
  static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
  {
        int m;
 +      bool fail = false;
        for (m = 0; m < conf->raid_disks * 2 ; m++)
                if (r1_bio->bios[m] == IO_MADE_GOOD) {
                        struct md_rdev *rdev = conf->mirrors[m].rdev;
                         * narrow down and record precise write
                         * errors.
                         */
 +                      fail = true;
                        if (!narrow_write_error(r1_bio, m)) {
                                md_error(conf->mddev,
                                         conf->mirrors[m].rdev);
                }
        if (test_bit(R1BIO_WriteError, &r1_bio->state))
                close_write(r1_bio);
 -      raid_end_bio_io(r1_bio);
 +      if (fail) {
 +              spin_lock_irq(&conf->device_lock);
 +              list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
 +              spin_unlock_irq(&conf->device_lock);
 +              md_wakeup_thread(conf->mddev->thread);
 +      } else
 +              raid_end_bio_io(r1_bio);
  }
  
  static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
@@@ -2427,23 -2364,6 +2373,23 @@@ static void raid1d(struct md_thread *th
  
        md_check_recovery(mddev);
  
 +      if (!list_empty_careful(&conf->bio_end_io_list) &&
 +          !test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
 +              LIST_HEAD(tmp);
 +              spin_lock_irqsave(&conf->device_lock, flags);
 +              if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
 +                      list_add(&tmp, &conf->bio_end_io_list);
 +                      list_del_init(&conf->bio_end_io_list);
 +              }
 +              spin_unlock_irqrestore(&conf->device_lock, flags);
 +              while (!list_empty(&tmp)) {
 +                      r1_bio = list_first_entry(&conf->bio_end_io_list,
 +                                                struct r1bio, retry_list);
 +                      list_del(&r1_bio->retry_list);
 +                      raid_end_bio_io(r1_bio);
 +              }
 +      }
 +
        blk_start_plug(&plug);
        for (;;) {
  
@@@ -2741,7 -2661,7 +2687,7 @@@ static sector_t sync_request(struct mdd
                                                /* remove last page from this bio */
                                                bio->bi_vcnt--;
                                                bio->bi_iter.bi_size -= len;
-                                               __clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+                                               bio_clear_flag(bio, BIO_SEG_VALID);
                                        }
                                        goto bio_full;
                                }
@@@ -2836,8 -2756,6 +2782,6 @@@ static struct r1conf *setup_conf(struc
                        goto abort;
                disk->rdev = rdev;
                q = bdev_get_queue(rdev->bdev);
-               if (q->merge_bvec_fn)
-                       mddev->merge_check_needed = 1;
  
                disk->head_position = 0;
                disk->seq_start = MaxSector;
        conf->raid_disks = mddev->raid_disks;
        conf->mddev = mddev;
        INIT_LIST_HEAD(&conf->retry_list);
 +      INIT_LIST_HEAD(&conf->bio_end_io_list);
  
        spin_lock_init(&conf->resync_lock);
        init_waitqueue_head(&conf->wait_barrier);
@@@ -3140,7 -3057,6 +3084,7 @@@ static int raid1_reshape(struct mddev *
  
        unfreeze_array(conf);
  
 +      set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
  
@@@ -3204,7 -3120,6 +3148,6 @@@ static struct md_personality raid1_pers
        .quiesce        = raid1_quiesce,
        .takeover       = raid1_takeover,
        .congested      = raid1_congested,
-       .mergeable_bvec = raid1_mergeable_bvec,
  };
  
  static int __init raid_init(void)
diff --combined drivers/md/raid10.c
@@@ -101,7 -101,7 +101,7 @@@ static int _enough(struct r10conf *conf
  static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
                                int *skipped);
  static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
- static void end_reshape_write(struct bio *bio, int error);
+ static void end_reshape_write(struct bio *bio);
  static void end_reshape(struct r10conf *conf);
  
  static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
@@@ -307,9 -307,9 +307,9 @@@ static void raid_end_bio_io(struct r10b
        } else
                done = 1;
        if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+               bio->bi_error = -EIO;
        if (done) {
-               bio_endio(bio, 0);
+               bio_endio(bio);
                /*
                 * Wake up any possible resync thread that waits for the device
                 * to go idle.
@@@ -358,9 -358,9 +358,9 @@@ static int find_bio_disk(struct r10con
        return r10_bio->devs[slot].devnum;
  }
  
- static void raid10_end_read_request(struct bio *bio, int error)
+ static void raid10_end_read_request(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       int uptodate = !bio->bi_error;
        struct r10bio *r10_bio = bio->bi_private;
        int slot, dev;
        struct md_rdev *rdev;
@@@ -438,9 -438,8 +438,8 @@@ static void one_write_done(struct r10bi
        }
  }
  
- static void raid10_end_write_request(struct bio *bio, int error)
+ static void raid10_end_write_request(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct r10bio *r10_bio = bio->bi_private;
        int dev;
        int dec_rdev = 1;
        /*
         * this branch is our 'one mirror IO has finished' event handler:
         */
-       if (!uptodate) {
+       if (bio->bi_error) {
                if (repl)
                        /* Never record new bad blocks to replacement,
                         * just fail it.
@@@ -672,93 -671,6 +671,6 @@@ static sector_t raid10_find_virt(struc
        return (vchunk << geo->chunk_shift) + offset;
  }
  
- /**
-  *    raid10_mergeable_bvec -- tell bio layer if a two requests can be merged
-  *    @mddev: the md device
-  *    @bvm: properties of new bio
-  *    @biovec: the request that could be merged to it.
-  *
-  *    Return amount of bytes we can accept at this offset
-  *    This requires checking for end-of-chunk if near_copies != raid_disks,
-  *    and for subordinate merge_bvec_fns if merge_check_needed.
-  */
- static int raid10_mergeable_bvec(struct mddev *mddev,
-                                struct bvec_merge_data *bvm,
-                                struct bio_vec *biovec)
- {
-       struct r10conf *conf = mddev->private;
-       sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-       int max;
-       unsigned int chunk_sectors;
-       unsigned int bio_sectors = bvm->bi_size >> 9;
-       struct geom *geo = &conf->geo;
-       chunk_sectors = (conf->geo.chunk_mask & conf->prev.chunk_mask) + 1;
-       if (conf->reshape_progress != MaxSector &&
-           ((sector >= conf->reshape_progress) !=
-            conf->mddev->reshape_backwards))
-               geo = &conf->prev;
-       if (geo->near_copies < geo->raid_disks) {
-               max = (chunk_sectors - ((sector & (chunk_sectors - 1))
-                                       + bio_sectors)) << 9;
-               if (max < 0)
-                       /* bio_add cannot handle a negative return */
-                       max = 0;
-               if (max <= biovec->bv_len && bio_sectors == 0)
-                       return biovec->bv_len;
-       } else
-               max = biovec->bv_len;
-       if (mddev->merge_check_needed) {
-               struct {
-                       struct r10bio r10_bio;
-                       struct r10dev devs[conf->copies];
-               } on_stack;
-               struct r10bio *r10_bio = &on_stack.r10_bio;
-               int s;
-               if (conf->reshape_progress != MaxSector) {
-                       /* Cannot give any guidance during reshape */
-                       if (max <= biovec->bv_len && bio_sectors == 0)
-                               return biovec->bv_len;
-                       return 0;
-               }
-               r10_bio->sector = sector;
-               raid10_find_phys(conf, r10_bio);
-               rcu_read_lock();
-               for (s = 0; s < conf->copies; s++) {
-                       int disk = r10_bio->devs[s].devnum;
-                       struct md_rdev *rdev = rcu_dereference(
-                               conf->mirrors[disk].rdev);
-                       if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                               struct request_queue *q =
-                                       bdev_get_queue(rdev->bdev);
-                               if (q->merge_bvec_fn) {
-                                       bvm->bi_sector = r10_bio->devs[s].addr
-                                               + rdev->data_offset;
-                                       bvm->bi_bdev = rdev->bdev;
-                                       max = min(max, q->merge_bvec_fn(
-                                                         q, bvm, biovec));
-                               }
-                       }
-                       rdev = rcu_dereference(conf->mirrors[disk].replacement);
-                       if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                               struct request_queue *q =
-                                       bdev_get_queue(rdev->bdev);
-                               if (q->merge_bvec_fn) {
-                                       bvm->bi_sector = r10_bio->devs[s].addr
-                                               + rdev->data_offset;
-                                       bvm->bi_bdev = rdev->bdev;
-                                       max = min(max, q->merge_bvec_fn(
-                                                         q, bvm, biovec));
-                               }
-                       }
-               }
-               rcu_read_unlock();
-       }
-       return max;
- }
  /*
   * This routine returns the disk from which the requested read should
   * be done. There is a per-array 'next expected sequential IO' sector
@@@ -821,12 -733,10 +733,10 @@@ retry
                disk = r10_bio->devs[slot].devnum;
                rdev = rcu_dereference(conf->mirrors[disk].replacement);
                if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
-                   test_bit(Unmerged, &rdev->flags) ||
                    r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
                        rdev = rcu_dereference(conf->mirrors[disk].rdev);
                if (rdev == NULL ||
-                   test_bit(Faulty, &rdev->flags) ||
-                   test_bit(Unmerged, &rdev->flags))
+                   test_bit(Faulty, &rdev->flags))
                        continue;
                if (!test_bit(In_sync, &rdev->flags) &&
                    r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
@@@ -957,7 -867,7 +867,7 @@@ static void flush_pending_writes(struc
                        if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                            !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                                /* Just ignore it */
-                               bio_endio(bio, 0);
+                               bio_endio(bio);
                        else
                                generic_make_request(bio);
                        bio = next;
@@@ -1133,7 -1043,7 +1043,7 @@@ static void raid10_unplug(struct blk_pl
                if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                        /* Just ignore it */
-                       bio_endio(bio, 0);
+                       bio_endio(bio);
                else
                        generic_make_request(bio);
                bio = next;
@@@ -1217,7 -1127,7 +1127,7 @@@ static void __make_request(struct mdde
         * non-zero, then it is the number of not-completed requests.
         */
        bio->bi_phys_segments = 0;
-       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+       bio_clear_flag(bio, BIO_SEG_VALID);
  
        if (rw == READ) {
                /*
@@@ -1326,11 -1236,9 +1236,9 @@@ retry_write
                        blocked_rdev = rrdev;
                        break;
                }
-               if (rdev && (test_bit(Faulty, &rdev->flags)
-                            || test_bit(Unmerged, &rdev->flags)))
+               if (rdev && (test_bit(Faulty, &rdev->flags)))
                        rdev = NULL;
-               if (rrdev && (test_bit(Faulty, &rrdev->flags)
-                             || test_bit(Unmerged, &rrdev->flags)))
+               if (rrdev && (test_bit(Faulty, &rrdev->flags)))
                        rrdev = NULL;
  
                r10_bio->devs[i].bio = NULL;
@@@ -1681,7 -1589,6 +1589,7 @@@ static void error(struct mddev *mddev, 
        set_bit(Blocked, &rdev->flags);
        set_bit(Faulty, &rdev->flags);
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
 +      set_bit(MD_CHANGE_PENDING, &mddev->flags);
        spin_unlock_irqrestore(&conf->device_lock, flags);
        printk(KERN_ALERT
               "md/raid10:%s: Disk failure on %s, disabling device.\n"
@@@ -1778,7 -1685,6 +1686,6 @@@ static int raid10_add_disk(struct mdde
        int mirror;
        int first = 0;
        int last = conf->geo.raid_disks - 1;
-       struct request_queue *q = bdev_get_queue(rdev->bdev);
  
        if (mddev->recovery_cp < MaxSector)
                /* only hot-add to in-sync arrays, as recovery is
        if (rdev->raid_disk >= 0)
                first = last = rdev->raid_disk;
  
-       if (q->merge_bvec_fn) {
-               set_bit(Unmerged, &rdev->flags);
-               mddev->merge_check_needed = 1;
-       }
        if (rdev->saved_raid_disk >= first &&
            conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
                mirror = rdev->saved_raid_disk;
                rcu_assign_pointer(p->rdev, rdev);
                break;
        }
-       if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
-               /* Some requests might not have seen this new
-                * merge_bvec_fn.  We must wait for them to complete
-                * before merging the device fully.
-                * First we make sure any code which has tested
-                * our function has submitted the request, then
-                * we wait for all outstanding requests to complete.
-                */
-               synchronize_sched();
-               freeze_array(conf, 0);
-               unfreeze_array(conf);
-               clear_bit(Unmerged, &rdev->flags);
-       }
        md_integrity_add_rdev(rdev, mddev);
        if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
@@@ -1917,7 -1805,7 +1806,7 @@@ abort
        return err;
  }
  
- static void end_sync_read(struct bio *bio, int error)
+ static void end_sync_read(struct bio *bio)
  {
        struct r10bio *r10_bio = bio->bi_private;
        struct r10conf *conf = r10_bio->mddev->private;
        } else
                d = find_bio_disk(conf, r10_bio, bio, NULL, NULL);
  
-       if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+       if (!bio->bi_error)
                set_bit(R10BIO_Uptodate, &r10_bio->state);
        else
                /* The write handler will notice the lack of
@@@ -1978,9 -1866,8 +1867,8 @@@ static void end_sync_request(struct r10
        }
  }
  
- static void end_sync_write(struct bio *bio, int error)
+ static void end_sync_write(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct r10bio *r10_bio = bio->bi_private;
        struct mddev *mddev = r10_bio->mddev;
        struct r10conf *conf = mddev->private;
        else
                rdev = conf->mirrors[d].rdev;
  
-       if (!uptodate) {
+       if (bio->bi_error) {
                if (repl)
                        md_error(mddev, rdev);
                else {
@@@ -2045,7 -1932,7 +1933,7 @@@ static void sync_request_write(struct m
  
        /* find the first device with a block */
        for (i=0; i<conf->copies; i++)
-               if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
+               if (!r10_bio->devs[i].bio->bi_error)
                        break;
  
        if (i == conf->copies)
                        continue;
                if (i == first)
                        continue;
-               if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
+               if (!r10_bio->devs[i].bio->bi_error) {
                        /* We know that the bi_io_vec layout is the same for
                         * both 'first' and 'i', so we just compare them.
                         * All vec entries are PAGE_SIZE;
@@@ -2395,7 -2282,6 +2283,6 @@@ static void fix_read_error(struct r10co
                        d = r10_bio->devs[sl].devnum;
                        rdev = rcu_dereference(conf->mirrors[d].rdev);
                        if (rdev &&
-                           !test_bit(Unmerged, &rdev->flags) &&
                            test_bit(In_sync, &rdev->flags) &&
                            is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
                                        &first_bad, &bad_sectors) == 0) {
                        d = r10_bio->devs[sl].devnum;
                        rdev = rcu_dereference(conf->mirrors[d].rdev);
                        if (!rdev ||
-                           test_bit(Unmerged, &rdev->flags) ||
                            !test_bit(In_sync, &rdev->flags))
                                continue;
  
@@@ -2707,8 -2592,7 +2593,7 @@@ static void handle_write_completed(stru
                        rdev = conf->mirrors[dev].rdev;
                        if (r10_bio->devs[m].bio == NULL)
                                continue;
-                       if (test_bit(BIO_UPTODATE,
-                                    &r10_bio->devs[m].bio->bi_flags)) {
+                       if (!r10_bio->devs[m].bio->bi_error) {
                                rdev_clear_badblocks(
                                        rdev,
                                        r10_bio->devs[m].addr,
                        rdev = conf->mirrors[dev].replacement;
                        if (r10_bio->devs[m].repl_bio == NULL)
                                continue;
-                       if (test_bit(BIO_UPTODATE,
-                                    &r10_bio->devs[m].repl_bio->bi_flags)) {
+                       if (!r10_bio->devs[m].repl_bio->bi_error) {
                                rdev_clear_badblocks(
                                        rdev,
                                        r10_bio->devs[m].addr,
                }
                put_buf(r10_bio);
        } else {
 +              bool fail = false;
                for (m = 0; m < conf->copies; m++) {
                        int dev = r10_bio->devs[m].devnum;
                        struct bio *bio = r10_bio->devs[m].bio;
                                        r10_bio->devs[m].addr,
                                        r10_bio->sectors, 0);
                                rdev_dec_pending(rdev, conf->mddev);
-                       } else if (bio != NULL &&
-                                  !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+                       } else if (bio != NULL && bio->bi_error) {
 +                              fail = true;
                                if (!narrow_write_error(r10_bio, m)) {
                                        md_error(conf->mddev, rdev);
                                        set_bit(R10BIO_Degraded,
                if (test_bit(R10BIO_WriteError,
                             &r10_bio->state))
                        close_write(r10_bio);
 -              raid_end_bio_io(r10_bio);
 +              if (fail) {
 +                      spin_lock_irq(&conf->device_lock);
 +                      list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
 +                      spin_unlock_irq(&conf->device_lock);
 +                      md_wakeup_thread(conf->mddev->thread);
 +              } else
 +                      raid_end_bio_io(r10_bio);
        }
  }
  
@@@ -2794,23 -2669,6 +2678,23 @@@ static void raid10d(struct md_thread *t
  
        md_check_recovery(mddev);
  
 +      if (!list_empty_careful(&conf->bio_end_io_list) &&
 +          !test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
 +              LIST_HEAD(tmp);
 +              spin_lock_irqsave(&conf->device_lock, flags);
 +              if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
 +                      list_add(&tmp, &conf->bio_end_io_list);
 +                      list_del_init(&conf->bio_end_io_list);
 +              }
 +              spin_unlock_irqrestore(&conf->device_lock, flags);
 +              while (!list_empty(&tmp)) {
 +                      r10_bio = list_first_entry(&conf->bio_end_io_list,
 +                                                struct r10bio, retry_list);
 +                      list_del(&r10_bio->retry_list);
 +                      raid_end_bio_io(r10_bio);
 +              }
 +      }
 +
        blk_start_plug(&plug);
        for (;;) {
  
@@@ -3289,7 -3147,7 +3173,7 @@@ static sector_t sync_request(struct mdd
  
                        bio = r10_bio->devs[i].bio;
                        bio_reset(bio);
-                       clear_bit(BIO_UPTODATE, &bio->bi_flags);
+                       bio->bi_error = -EIO;
                        if (conf->mirrors[d].rdev == NULL ||
                            test_bit(Faulty, &conf->mirrors[d].rdev->flags))
                                continue;
                        /* Need to set up for writing to the replacement */
                        bio = r10_bio->devs[i].repl_bio;
                        bio_reset(bio);
-                       clear_bit(BIO_UPTODATE, &bio->bi_flags);
+                       bio->bi_error = -EIO;
  
                        sector = r10_bio->devs[i].addr;
                        atomic_inc(&conf->mirrors[d].rdev->nr_pending);
                                /* remove last page from this bio */
                                bio2->bi_vcnt--;
                                bio2->bi_iter.bi_size -= len;
-                               __clear_bit(BIO_SEG_VALID, &bio2->bi_flags);
+                               bio_clear_flag(bio2, BIO_SEG_VALID);
                        }
                        goto bio_full;
                }
  
                if (bio->bi_end_io == end_sync_read) {
                        md_sync_acct(bio->bi_bdev, nr_sectors);
-                       set_bit(BIO_UPTODATE, &bio->bi_flags);
+                       bio->bi_error = 0;
                        generic_make_request(bio);
                }
        }
@@@ -3585,7 -3443,6 +3469,7 @@@ static struct r10conf *setup_conf(struc
        conf->reshape_safe = conf->reshape_progress;
        spin_lock_init(&conf->device_lock);
        INIT_LIST_HEAD(&conf->retry_list);
 +      INIT_LIST_HEAD(&conf->bio_end_io_list);
  
        spin_lock_init(&conf->resync_lock);
        init_waitqueue_head(&conf->wait_barrier);
@@@ -3670,8 -3527,6 +3554,6 @@@ static int run(struct mddev *mddev
                        disk->rdev = rdev;
                }
                q = bdev_get_queue(rdev->bdev);
-               if (q->merge_bvec_fn)
-                       mddev->merge_check_needed = 1;
                diff = (rdev->new_data_offset - rdev->data_offset);
                if (!mddev->reshape_backwards)
                        diff = -diff;
@@@ -4242,7 -4097,7 +4124,7 @@@ static sector_t reshape_request(struct 
         * at a time, possibly less if that exceeds RESYNC_PAGES,
         * or we hit a bad block or something.
         * This might mean we pause for normal IO in the middle of
 -       * a chunk, but that is not a problem was mddev->reshape_position
 +       * a chunk, but that is not a problem as mddev->reshape_position
         * can record any location.
         *
         * If we will want to write to a location that isn't
         *
         * In all this the minimum difference in data offsets
         * (conf->offset_diff - always positive) allows a bit of slack,
 -       * so next can be after 'safe', but not by more than offset_disk
 +       * so next can be after 'safe', but not by more than offset_diff
         *
         * We need to prepare all the bios here before we start any IO
         * to ensure the size we choose is acceptable to all devices.
@@@ -4409,7 -4264,7 +4291,7 @@@ read_more
        read_bio->bi_end_io = end_sync_read;
        read_bio->bi_rw = READ;
        read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
-       __set_bit(BIO_UPTODATE, &read_bio->bi_flags);
+       read_bio->bi_error = 0;
        read_bio->bi_vcnt = 0;
        read_bio->bi_iter.bi_size = 0;
        r10_bio->master_bio = read_bio;
                                /* Remove last page from this bio */
                                bio2->bi_vcnt--;
                                bio2->bi_iter.bi_size -= len;
-                               __clear_bit(BIO_SEG_VALID, &bio2->bi_flags);
+                               bio_clear_flag(bio2, BIO_SEG_VALID);
                        }
                        goto bio_full;
                }
@@@ -4631,9 -4486,8 +4513,8 @@@ static int handle_reshape_read_error(st
        return 0;
  }
  
- static void end_reshape_write(struct bio *bio, int error)
+ static void end_reshape_write(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct r10bio *r10_bio = bio->bi_private;
        struct mddev *mddev = r10_bio->mddev;
        struct r10conf *conf = mddev->private;
                rdev = conf->mirrors[d].rdev;
        }
  
-       if (!uptodate) {
+       if (bio->bi_error) {
                /* FIXME should record badblock */
                md_error(mddev, rdev);
        }
@@@ -4727,7 -4581,6 +4608,6 @@@ static struct md_personality raid10_per
        .start_reshape  = raid10_start_reshape,
        .finish_reshape = raid10_finish_reshape,
        .congested      = raid10_congested,
-       .mergeable_bvec = raid10_mergeable_bvec,
  };
  
  static int __init raid_init(void)
diff --combined drivers/md/raid5.c
@@@ -223,14 -223,18 +223,14 @@@ static int raid6_idx_to_slot(int idx, s
        return slot;
  }
  
 -static void return_io(struct bio *return_bi)
 +static void return_io(struct bio_list *return_bi)
  {
 -      struct bio *bi = return_bi;
 -      while (bi) {
 -
 -              return_bi = bi->bi_next;
 -              bi->bi_next = NULL;
 +      struct bio *bi;
 +      while ((bi = bio_list_pop(return_bi)) != NULL) {
                bi->bi_iter.bi_size = 0;
                trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
                                         bi, 0);
-               bio_endio(bi, 0);
+               bio_endio(bi);
 -              bi = return_bi;
        }
  }
  
@@@ -883,9 -887,9 +883,9 @@@ static int use_new_offset(struct r5con
  }
  
  static void
- raid5_end_read_request(struct bio *bi, int error);
+ raid5_end_read_request(struct bio *bi);
  static void
- raid5_end_write_request(struct bio *bi, int error);
+ raid5_end_write_request(struct bio *bi);
  
  static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
  {
@@@ -1173,7 -1177,7 +1173,7 @@@ async_copy_data(int frombio, struct bi
  static void ops_complete_biofill(void *stripe_head_ref)
  {
        struct stripe_head *sh = stripe_head_ref;
 -      struct bio *return_bi = NULL;
 +      struct bio_list return_bi = BIO_EMPTY_LIST;
        int i;
  
        pr_debug("%s: stripe %llu\n", __func__,
                        while (rbi && rbi->bi_iter.bi_sector <
                                dev->sector + STRIPE_SECTORS) {
                                rbi2 = r5_next_bio(rbi, dev->sector);
 -                              if (!raid5_dec_bi_active_stripes(rbi)) {
 -                                      rbi->bi_next = return_bi;
 -                                      return_bi = rbi;
 -                              }
 +                              if (!raid5_dec_bi_active_stripes(rbi))
 +                                      bio_list_add(&return_bi, rbi);
                                rbi = rbi2;
                        }
                }
        }
        clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
  
 -      return_io(return_bi);
 +      return_io(&return_bi);
  
        set_bit(STRIPE_HANDLE, &sh->state);
        release_stripe(sh);
@@@ -2276,12 -2282,11 +2276,11 @@@ static void shrink_stripes(struct r5con
        conf->slab_cache = NULL;
  }
  
- static void raid5_end_read_request(struct bio * bi, int error)
+ static void raid5_end_read_request(struct bio * bi)
  {
        struct stripe_head *sh = bi->bi_private;
        struct r5conf *conf = sh->raid_conf;
        int disks = sh->disks, i;
-       int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
        char b[BDEVNAME_SIZE];
        struct md_rdev *rdev = NULL;
        sector_t s;
                if (bi == &sh->dev[i].req)
                        break;
  
-       pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n",
+       pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
-               uptodate);
+               bi->bi_error);
        if (i == disks) {
                BUG();
                return;
                s = sh->sector + rdev->new_data_offset;
        else
                s = sh->sector + rdev->data_offset;
-       if (uptodate) {
+       if (!bi->bi_error) {
                set_bit(R5_UPTODATE, &sh->dev[i].flags);
                if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
                        /* Note that this cannot happen on a
        release_stripe(sh);
  }
  
- static void raid5_end_write_request(struct bio *bi, int error)
+ static void raid5_end_write_request(struct bio *bi)
  {
        struct stripe_head *sh = bi->bi_private;
        struct r5conf *conf = sh->raid_conf;
        int disks = sh->disks, i;
        struct md_rdev *uninitialized_var(rdev);
-       int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
        sector_t first_bad;
        int bad_sectors;
        int replacement = 0;
                        break;
                }
        }
-       pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n",
+       pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
-               uptodate);
+               bi->bi_error);
        if (i == disks) {
                BUG();
                return;
        }
  
        if (replacement) {
-               if (!uptodate)
+               if (bi->bi_error)
                        md_error(conf->mddev, rdev);
                else if (is_badblock(rdev, sh->sector,
                                     STRIPE_SECTORS,
                                     &first_bad, &bad_sectors))
                        set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
        } else {
-               if (!uptodate) {
+               if (bi->bi_error) {
                        set_bit(STRIPE_DEGRADED, &sh->state);
                        set_bit(WriteErrorSeen, &rdev->flags);
                        set_bit(R5_WriteError, &sh->dev[i].flags);
        }
        rdev_dec_pending(rdev, conf->mddev);
  
-       if (sh->batch_head && !uptodate && !replacement)
+       if (sh->batch_head && bi->bi_error && !replacement)
                set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
  
        if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
@@@ -2513,7 -2517,6 +2511,7 @@@ static void error(struct mddev *mddev, 
        set_bit(Blocked, &rdev->flags);
        set_bit(Faulty, &rdev->flags);
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
 +      set_bit(MD_CHANGE_PENDING, &mddev->flags);
        printk(KERN_ALERT
               "md/raid:%s: Disk failure on %s, disabling device.\n"
               "md/raid:%s: Operation continuing on %d devices.\n",
@@@ -3066,7 -3069,7 +3064,7 @@@ static void stripe_set_idx(sector_t str
  static void
  handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                                struct stripe_head_state *s, int disks,
 -                              struct bio **return_bi)
 +                              struct bio_list *return_bi)
  {
        int i;
        BUG_ON(sh->batch_head);
                while (bi && bi->bi_iter.bi_sector <
                        sh->dev[i].sector + STRIPE_SECTORS) {
                        struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
-                       clear_bit(BIO_UPTODATE, &bi->bi_flags);
+                       bi->bi_error = -EIO;
                        if (!raid5_dec_bi_active_stripes(bi)) {
                                md_write_end(conf->mddev);
 -                              bi->bi_next = *return_bi;
 -                              *return_bi = bi;
 +                              bio_list_add(return_bi, bi);
                        }
                        bi = nextbi;
                }
                while (bi && bi->bi_iter.bi_sector <
                       sh->dev[i].sector + STRIPE_SECTORS) {
                        struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
-                       clear_bit(BIO_UPTODATE, &bi->bi_flags);
+                       bi->bi_error = -EIO;
                        if (!raid5_dec_bi_active_stripes(bi)) {
                                md_write_end(conf->mddev);
 -                              bi->bi_next = *return_bi;
 -                              *return_bi = bi;
 +                              bio_list_add(return_bi, bi);
                        }
                        bi = bi2;
                }
                               sh->dev[i].sector + STRIPE_SECTORS) {
                                struct bio *nextbi =
                                        r5_next_bio(bi, sh->dev[i].sector);
-                               clear_bit(BIO_UPTODATE, &bi->bi_flags);
+                               bi->bi_error = -EIO;
 -                              if (!raid5_dec_bi_active_stripes(bi)) {
 -                                      bi->bi_next = *return_bi;
 -                                      *return_bi = bi;
 -                              }
 +                              if (!raid5_dec_bi_active_stripes(bi))
 +                                      bio_list_add(return_bi, bi);
                                bi = nextbi;
                        }
                }
@@@ -3435,7 -3445,7 +3436,7 @@@ static void break_stripe_batch_list(str
   * never LOCKED, so we don't need to test 'failed' directly.
   */
  static void handle_stripe_clean_event(struct r5conf *conf,
 -      struct stripe_head *sh, int disks, struct bio **return_bi)
 +      struct stripe_head *sh, int disks, struct bio_list *return_bi)
  {
        int i;
        struct r5dev *dev;
@@@ -3469,7 -3479,8 +3470,7 @@@ returnbi
                                        wbi2 = r5_next_bio(wbi, dev->sector);
                                        if (!raid5_dec_bi_active_stripes(wbi)) {
                                                md_write_end(conf->mddev);
 -                                              wbi->bi_next = *return_bi;
 -                                              *return_bi = wbi;
 +                                              bio_list_add(return_bi, wbi);
                                        }
                                        wbi = wbi2;
                                }
@@@ -4602,15 -4613,7 +4603,15 @@@ finish
                        md_wakeup_thread(conf->mddev->thread);
        }
  
 -      return_io(s.return_bi);
 +      if (!bio_list_empty(&s.return_bi)) {
 +              if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) {
 +                      spin_lock_irq(&conf->device_lock);
 +                      bio_list_merge(&conf->return_bi, &s.return_bi);
 +                      spin_unlock_irq(&conf->device_lock);
 +                      md_wakeup_thread(conf->mddev->thread);
 +              } else
 +                      return_io(&s.return_bi);
 +      }
  
        clear_bit_unlock(STRIPE_ACTIVE, &sh->state);
  }
@@@ -4667,43 -4670,14 +4668,14 @@@ static int raid5_congested(struct mdde
        return 0;
  }
  
- /* We want read requests to align with chunks where possible,
-  * but write requests don't need to.
-  */
- static int raid5_mergeable_bvec(struct mddev *mddev,
-                               struct bvec_merge_data *bvm,
-                               struct bio_vec *biovec)
- {
-       struct r5conf *conf = mddev->private;
-       sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-       int max;
-       unsigned int chunk_sectors;
-       unsigned int bio_sectors = bvm->bi_size >> 9;
-       /*
-        * always allow writes to be mergeable, read as well if array
-        * is degraded as we'll go through stripe cache anyway.
-        */
-       if ((bvm->bi_rw & 1) == WRITE || mddev->degraded)
-               return biovec->bv_len;
-       chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors);
-       max =  (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
-       if (max < 0) max = 0;
-       if (max <= biovec->bv_len && bio_sectors == 0)
-               return biovec->bv_len;
-       else
-               return max;
- }
  static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
  {
 +      struct r5conf *conf = mddev->private;
        sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
 -      unsigned int chunk_sectors = mddev->chunk_sectors;
 +      unsigned int chunk_sectors;
        unsigned int bio_sectors = bio_sectors(bio);
  
 -      if (mddev->new_chunk_sectors < mddev->chunk_sectors)
 -              chunk_sectors = mddev->new_chunk_sectors;
 +      chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors);
        return  chunk_sectors >=
                ((sector & (chunk_sectors - 1)) + bio_sectors);
  }
@@@ -4754,13 -4728,13 +4726,13 @@@ static struct bio *remove_bio_from_retr
   *  first).
   *  If the read failed..
   */
- static void raid5_align_endio(struct bio *bi, int error)
+ static void raid5_align_endio(struct bio *bi)
  {
        struct bio* raid_bi  = bi->bi_private;
        struct mddev *mddev;
        struct r5conf *conf;
-       int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
        struct md_rdev *rdev;
+       int error = bi->bi_error;
  
        bio_put(bi);
  
  
        rdev_dec_pending(rdev, conf->mddev);
  
-       if (!error && uptodate) {
+       if (!error) {
                trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
                                         raid_bi, 0);
-               bio_endio(raid_bi, 0);
+               bio_endio(raid_bi);
                if (atomic_dec_and_test(&conf->active_aligned_reads))
                        wake_up(&conf->wait_for_quiescent);
                return;
        add_bio_to_retry(raid_bi, conf);
  }
  
- static int bio_fits_rdev(struct bio *bi)
- {
-       struct request_queue *q = bdev_get_queue(bi->bi_bdev);
-       if (bio_sectors(bi) > queue_max_sectors(q))
-               return 0;
-       blk_recount_segments(q, bi);
-       if (bi->bi_phys_segments > queue_max_segments(q))
-               return 0;
-       if (q->merge_bvec_fn)
-               /* it's too hard to apply the merge_bvec_fn at this stage,
-                * just just give up
-                */
-               return 0;
-       return 1;
- }
- static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
+ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
  {
        struct r5conf *conf = mddev->private;
        int dd_idx;
        sector_t end_sector;
  
        if (!in_chunk_boundary(mddev, raid_bio)) {
-               pr_debug("chunk_aligned_read : non aligned\n");
+               pr_debug("%s: non aligned\n", __func__);
                return 0;
        }
        /*
                rcu_read_unlock();
                raid_bio->bi_next = (void*)rdev;
                align_bi->bi_bdev =  rdev->bdev;
-               __clear_bit(BIO_SEG_VALID, &align_bi->bi_flags);
+               bio_clear_flag(align_bi, BIO_SEG_VALID);
  
-               if (!bio_fits_rdev(align_bi) ||
-                   is_badblock(rdev, align_bi->bi_iter.bi_sector,
+               if (is_badblock(rdev, align_bi->bi_iter.bi_sector,
                                bio_sectors(align_bi),
                                &first_bad, &bad_sectors)) {
-                       /* too big in some way, or has a known bad block */
                        bio_put(align_bi);
                        rdev_dec_pending(rdev, mddev);
                        return 0;
        }
  }
  
+ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
+ {
+       struct bio *split;
+       do {
+               sector_t sector = raid_bio->bi_iter.bi_sector;
+               unsigned chunk_sects = mddev->chunk_sectors;
+               unsigned sectors = chunk_sects - (sector & (chunk_sects-1));
+               if (sectors < bio_sectors(raid_bio)) {
+                       split = bio_split(raid_bio, sectors, GFP_NOIO, fs_bio_set);
+                       bio_chain(split, raid_bio);
+               } else
+                       split = raid_bio;
+               if (!raid5_read_one_chunk(mddev, split)) {
+                       if (split != raid_bio)
+                               generic_make_request(raid_bio);
+                       return split;
+               }
+       } while (split != raid_bio);
+       return NULL;
+ }
  /* __get_priority_stripe - get the next stripe to process
   *
   * Full stripe writes are allowed to pass preread active stripes up until
@@@ -5138,7 -5116,7 +5114,7 @@@ static void make_discard_request(struc
        remaining = raid5_dec_bi_active_stripes(bi);
        if (remaining == 0) {
                md_write_end(mddev);
-               bio_endio(bi, 0);
+               bio_endio(bi);
        }
  }
  
@@@ -5167,9 -5145,11 +5143,11 @@@ static void make_request(struct mddev *
         * data on failed drives.
         */
        if (rw == READ && mddev->degraded == 0 &&
-            mddev->reshape_position == MaxSector &&
-            chunk_aligned_read(mddev,bi))
-               return;
+           mddev->reshape_position == MaxSector) {
+               bi = chunk_aligned_read(mddev, bi);
+               if (!bi)
+                       return;
+       }
  
        if (unlikely(bi->bi_rw & REQ_DISCARD)) {
                make_discard_request(mddev, bi);
                        release_stripe_plug(mddev, sh);
                } else {
                        /* cannot get stripe for read-ahead, just give-up */
-                       clear_bit(BIO_UPTODATE, &bi->bi_flags);
+                       bi->bi_error = -EIO;
                        break;
                }
        }
  
                trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
                                         bi, 0);
-               bio_endio(bi, 0);
+               bio_endio(bi);
        }
  }
  
@@@ -5345,7 -5325,6 +5323,7 @@@ static sector_t reshape_request(struct 
        sector_t stripe_addr;
        int reshape_sectors;
        struct list_head stripes;
 +      sector_t retn;
  
        if (sector_nr == 0) {
                /* If restarting in the middle, skip the initial sectors */
                    conf->reshape_progress < raid5_size(mddev, 0, 0)) {
                        sector_nr = raid5_size(mddev, 0, 0)
                                - conf->reshape_progress;
 +              } else if (mddev->reshape_backwards &&
 +                         conf->reshape_progress == MaxSector) {
 +                      /* shouldn't happen, but just in case, finish up.*/
 +                      sector_nr = MaxSector;
                } else if (!mddev->reshape_backwards &&
                           conf->reshape_progress > 0)
                        sector_nr = conf->reshape_progress;
                        mddev->curr_resync_completed = sector_nr;
                        sysfs_notify(&mddev->kobj, NULL, "sync_completed");
                        *skipped = 1;
 -                      return sector_nr;
 +                      retn = sector_nr;
 +                      goto finish;
                }
        }
  
         * If old and new chunk sizes differ, we need to process the
         * largest of these
         */
 -      if (mddev->new_chunk_sectors > mddev->chunk_sectors)
 -              reshape_sectors = mddev->new_chunk_sectors;
 -      else
 -              reshape_sectors = mddev->chunk_sectors;
 +
 +      reshape_sectors = max(conf->chunk_sectors, conf->prev_chunk_sectors);
  
        /* We update the metadata at least every 10 seconds, or when
         * the data about to be copied would over-write the source of
        safepos = conf->reshape_safe;
        sector_div(safepos, data_disks);
        if (mddev->reshape_backwards) {
 -              writepos -= min_t(sector_t, reshape_sectors, writepos);
 +              BUG_ON(writepos < reshape_sectors);
 +              writepos -= reshape_sectors;
                readpos += reshape_sectors;
                safepos += reshape_sectors;
        } else {
                writepos += reshape_sectors;
 +              /* readpos and safepos are worst-case calculations.
 +               * A negative number is overly pessimistic, and causes
 +               * obvious problems for unsigned storage.  So clip to 0.
 +               */
                readpos -= min_t(sector_t, reshape_sectors, readpos);
                safepos -= min_t(sector_t, reshape_sectors, safepos);
        }
         * then we need to write out the superblock.
         */
        sector_nr += reshape_sectors;
 -      if ((sector_nr - mddev->curr_resync_completed) * 2
 +      retn = reshape_sectors;
 +finish:
 +      if (mddev->curr_resync_completed > mddev->resync_max ||
 +          (sector_nr - mddev->curr_resync_completed) * 2
            >= mddev->resync_max - mddev->curr_resync_completed) {
                /* Cannot proceed until we've updated the superblock... */
                wait_event(conf->wait_for_overlap,
                sysfs_notify(&mddev->kobj, NULL, "sync_completed");
        }
  ret:
 -      return reshape_sectors;
 +      return retn;
  }
  
  static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
@@@ -5724,7 -5692,7 +5702,7 @@@ static int  retry_aligned_read(struct r
        if (remaining == 0) {
                trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
                                         raid_bio, 0);
-               bio_endio(raid_bio, 0);
+               bio_endio(raid_bio);
        }
        if (atomic_dec_and_test(&conf->active_aligned_reads))
                wake_up(&conf->wait_for_quiescent);
@@@ -5826,18 -5794,6 +5804,18 @@@ static void raid5d(struct md_thread *th
  
        md_check_recovery(mddev);
  
 +      if (!bio_list_empty(&conf->return_bi) &&
 +          !test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
 +              struct bio_list tmp = BIO_EMPTY_LIST;
 +              spin_lock_irq(&conf->device_lock);
 +              if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
 +                      bio_list_merge(&tmp, &conf->return_bi);
 +                      bio_list_init(&conf->return_bi);
 +              }
 +              spin_unlock_irq(&conf->device_lock);
 +              return_io(&tmp);
 +      }
 +
        blk_start_plug(&plug);
        handled = 0;
        spin_lock_irq(&conf->device_lock);
@@@ -6278,8 -6234,8 +6256,8 @@@ raid5_size(struct mddev *mddev, sector_
                /* size is defined by the smallest of previous and new size */
                raid_disks = min(conf->raid_disks, conf->previous_raid_disks);
  
 -      sectors &= ~((sector_t)mddev->chunk_sectors - 1);
 -      sectors &= ~((sector_t)mddev->new_chunk_sectors - 1);
 +      sectors &= ~((sector_t)conf->chunk_sectors - 1);
 +      sectors &= ~((sector_t)conf->prev_chunk_sectors - 1);
        return sectors * (raid_disks - conf->max_degraded);
  }
  
@@@ -6497,7 -6453,6 +6475,7 @@@ static struct r5conf *setup_conf(struc
        INIT_LIST_HEAD(&conf->hold_list);
        INIT_LIST_HEAD(&conf->delayed_list);
        INIT_LIST_HEAD(&conf->bitmap_list);
 +      bio_list_init(&conf->return_bi);
        init_llist_head(&conf->released_stripes);
        atomic_set(&conf->active_stripes, 0);
        atomic_set(&conf->preread_active_stripes, 0);
        if (conf->reshape_progress != MaxSector) {
                conf->prev_chunk_sectors = mddev->chunk_sectors;
                conf->prev_algo = mddev->layout;
 +      } else {
 +              conf->prev_chunk_sectors = conf->chunk_sectors;
 +              conf->prev_algo = conf->algorithm;
        }
  
        conf->min_nr_stripes = NR_STRIPES;
@@@ -6709,8 -6661,6 +6687,8 @@@ static int run(struct mddev *mddev
                sector_t here_new, here_old;
                int old_disks;
                int max_degraded = (mddev->level == 6 ? 2 : 1);
 +              int chunk_sectors;
 +              int new_data_disks;
  
                if (mddev->new_level != mddev->level) {
                        printk(KERN_ERR "md/raid:%s: unsupported reshape "
                /* reshape_position must be on a new-stripe boundary, and one
                 * further up in new geometry must map after here in old
                 * geometry.
 +               * If the chunk sizes are different, then as we perform reshape
 +               * in units of the largest of the two, reshape_position needs
 +               * be a multiple of the largest chunk size times new data disks.
                 */
                here_new = mddev->reshape_position;
 -              if (sector_div(here_new, mddev->new_chunk_sectors *
 -                             (mddev->raid_disks - max_degraded))) {
 +              chunk_sectors = max(mddev->chunk_sectors, mddev->new_chunk_sectors);
 +              new_data_disks = mddev->raid_disks - max_degraded;
 +              if (sector_div(here_new, chunk_sectors * new_data_disks)) {
                        printk(KERN_ERR "md/raid:%s: reshape_position not "
                               "on a stripe boundary\n", mdname(mddev));
                        return -EINVAL;
                }
 -              reshape_offset = here_new * mddev->new_chunk_sectors;
 +              reshape_offset = here_new * chunk_sectors;
                /* here_new is the stripe we will write to */
                here_old = mddev->reshape_position;
 -              sector_div(here_old, mddev->chunk_sectors *
 -                         (old_disks-max_degraded));
 +              sector_div(here_old, chunk_sectors * (old_disks-max_degraded));
                /* here_old is the first stripe that we might need to read
                 * from */
                if (mddev->delta_disks == 0) {
 -                      if ((here_new * mddev->new_chunk_sectors !=
 -                           here_old * mddev->chunk_sectors)) {
 -                              printk(KERN_ERR "md/raid:%s: reshape position is"
 -                                     " confused - aborting\n", mdname(mddev));
 -                              return -EINVAL;
 -                      }
                        /* We cannot be sure it is safe to start an in-place
                         * reshape.  It is only safe if user-space is monitoring
                         * and taking constant backups.
                                return -EINVAL;
                        }
                } else if (mddev->reshape_backwards
 -                  ? (here_new * mddev->new_chunk_sectors + min_offset_diff <=
 -                     here_old * mddev->chunk_sectors)
 -                  : (here_new * mddev->new_chunk_sectors >=
 -                     here_old * mddev->chunk_sectors + (-min_offset_diff))) {
 +                  ? (here_new * chunk_sectors + min_offset_diff <=
 +                     here_old * chunk_sectors)
 +                  : (here_new * chunk_sectors >=
 +                     here_old * chunk_sectors + (-min_offset_diff))) {
                        /* Reading from the same stripe as writing to - bad */
                        printk(KERN_ERR "md/raid:%s: reshape_position too early for "
                               "auto-recovery - aborting.\n",
@@@ -7014,7 -6967,7 +6992,7 @@@ static void status(struct seq_file *seq
        int i;
  
        seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
 -              mddev->chunk_sectors / 2, mddev->layout);
 +              conf->chunk_sectors / 2, mddev->layout);
        seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
        for (i = 0; i < conf->raid_disks; i++)
                seq_printf (seq, "%s",
@@@ -7220,9 -7173,7 +7198,9 @@@ static int raid5_resize(struct mddev *m
         * worth it.
         */
        sector_t newsize;
 -      sectors &= ~((sector_t)mddev->chunk_sectors - 1);
 +      struct r5conf *conf = mddev->private;
 +
 +      sectors &= ~((sector_t)conf->chunk_sectors - 1);
        newsize = raid5_size(mddev, sectors, mddev->raid_disks);
        if (mddev->external_size &&
            mddev->array_sectors > newsize)
@@@ -7461,7 -7412,6 +7439,7 @@@ static void end_reshape(struct r5conf *
                        rdev->data_offset = rdev->new_data_offset;
                smp_wmb();
                conf->reshape_progress = MaxSector;
 +              conf->mddev->reshape_position = MaxSector;
                spin_unlock_irq(&conf->device_lock);
                wake_up(&conf->wait_for_overlap);
  
@@@ -7807,7 -7757,6 +7785,6 @@@ static struct md_personality raid6_pers
        .quiesce        = raid5_quiesce,
        .takeover       = raid6_takeover,
        .congested      = raid5_congested,
-       .mergeable_bvec = raid5_mergeable_bvec,
  };
  static struct md_personality raid5_personality =
  {
        .quiesce        = raid5_quiesce,
        .takeover       = raid5_takeover,
        .congested      = raid5_congested,
-       .mergeable_bvec = raid5_mergeable_bvec,
  };
  
  static struct md_personality raid4_personality =
        .quiesce        = raid5_quiesce,
        .takeover       = raid4_takeover,
        .congested      = raid5_congested,
-       .mergeable_bvec = raid5_mergeable_bvec,
  };
  
  static int __init raid5_init(void)