Merge tag 'phy-for-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy
[linux-2.6-microblaze.git] / drivers / md / md.c
index 78be781..0fe7ab6 100644 (file)
@@ -453,7 +453,6 @@ void mddev_suspend(struct mddev *mddev)
                mddev->pers->prepare_suspend(mddev);
 
        wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));
-       mddev->pers->quiesce(mddev, 1);
        clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
        wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
 
@@ -465,14 +464,15 @@ EXPORT_SYMBOL_GPL(mddev_suspend);
 
 void mddev_resume(struct mddev *mddev)
 {
-       /* entred the memalloc scope from mddev_suspend() */
-       memalloc_noio_restore(mddev->noio_flag);
        lockdep_assert_held(&mddev->reconfig_mutex);
        if (--mddev->suspended)
                return;
+
+       /* entred the memalloc scope from mddev_suspend() */
+       memalloc_noio_restore(mddev->noio_flag);
+
        percpu_ref_resurrect(&mddev->active_io);
        wake_up(&mddev->sb_wait);
-       mddev->pers->quiesce(mddev, 0);
 
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
@@ -643,6 +643,7 @@ void mddev_init(struct mddev *mddev)
 {
        mutex_init(&mddev->open_mutex);
        mutex_init(&mddev->reconfig_mutex);
+       mutex_init(&mddev->sync_mutex);
        mutex_init(&mddev->bitmap_info.mutex);
        INIT_LIST_HEAD(&mddev->disks);
        INIT_LIST_HEAD(&mddev->all_mddevs);
@@ -650,6 +651,7 @@ void mddev_init(struct mddev *mddev)
        timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
        atomic_set(&mddev->active, 1);
        atomic_set(&mddev->openers, 0);
+       atomic_set(&mddev->sync_seq, 0);
        spin_lock_init(&mddev->lock);
        atomic_set(&mddev->flush_pending, 0);
        init_waitqueue_head(&mddev->sb_wait);
@@ -2304,7 +2306,7 @@ int md_integrity_register(struct mddev *mddev)
        pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
        if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
            (mddev->level != 1 && mddev->level != 10 &&
-            bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
+            bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) {
                /*
                 * No need to handle the failure of bioset_integrity_create,
                 * because the function is called by md_run() -> pers->run(),
@@ -4747,6 +4749,62 @@ action_show(struct mddev *mddev, char *page)
        return sprintf(page, "%s\n", type);
 }
 
+static void stop_sync_thread(struct mddev *mddev)
+{
+       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+               return;
+
+       if (mddev_lock(mddev))
+               return;
+
+       /*
+        * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
+        * held.
+        */
+       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+               mddev_unlock(mddev);
+               return;
+       }
+
+       if (work_pending(&mddev->del_work))
+               flush_workqueue(md_misc_wq);
+
+       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+       /*
+        * Thread might be blocked waiting for metadata update which will now
+        * never happen
+        */
+       md_wakeup_thread_directly(mddev->sync_thread);
+
+       mddev_unlock(mddev);
+}
+
+static void idle_sync_thread(struct mddev *mddev)
+{
+       int sync_seq = atomic_read(&mddev->sync_seq);
+
+       mutex_lock(&mddev->sync_mutex);
+       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+       stop_sync_thread(mddev);
+
+       wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
+                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+
+       mutex_unlock(&mddev->sync_mutex);
+}
+
+static void frozen_sync_thread(struct mddev *mddev)
+{
+       mutex_lock(&mddev->sync_mutex);
+       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+       stop_sync_thread(mddev);
+
+       wait_event(resync_wait, mddev->sync_thread == NULL &&
+                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+
+       mutex_unlock(&mddev->sync_mutex);
+}
+
 static ssize_t
 action_store(struct mddev *mddev, const char *page, size_t len)
 {
@@ -4754,35 +4812,11 @@ action_store(struct mddev *mddev, const char *page, size_t len)
                return -EINVAL;
 
 
-       if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
-               if (cmd_match(page, "frozen"))
-                       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-               else
-                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-               if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
-                   mddev_lock(mddev) == 0) {
-                       if (work_pending(&mddev->del_work))
-                               flush_workqueue(md_misc_wq);
-                       if (mddev->sync_thread) {
-                               sector_t save_rp = mddev->reshape_position;
-
-                               mddev_unlock(mddev);
-                               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-                               md_unregister_thread(&mddev->sync_thread);
-                               mddev_lock_nointr(mddev);
-                               /*
-                                * set RECOVERY_INTR again and restore reshape
-                                * position in case others changed them after
-                                * got lock, eg, reshape_position_store and
-                                * md_check_recovery.
-                                */
-                               mddev->reshape_position = save_rp;
-                               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-                               md_reap_sync_thread(mddev);
-                       }
-                       mddev_unlock(mddev);
-               }
-       } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+       if (cmd_match(page, "idle"))
+               idle_sync_thread(mddev);
+       else if (cmd_match(page, "frozen"))
+               frozen_sync_thread(mddev);
+       else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                return -EBUSY;
        else if (cmd_match(page, "resync"))
                clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -5842,6 +5876,13 @@ int md_run(struct mddev *mddev)
                        goto exit_bio_set;
        }
 
+       if (!bioset_initialized(&mddev->io_clone_set)) {
+               err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
+                                 offsetof(struct md_io_clone, bio_clone), 0);
+               if (err)
+                       goto exit_sync_set;
+       }
+
        spin_lock(&pers_lock);
        pers = find_pers(mddev->level, mddev->clevel);
        if (!pers || !try_module_get(pers->owner)) {
@@ -6019,6 +6060,8 @@ bitmap_abort:
        module_put(pers->owner);
        md_bitmap_destroy(mddev);
 abort:
+       bioset_exit(&mddev->io_clone_set);
+exit_sync_set:
        bioset_exit(&mddev->sync_set);
 exit_bio_set:
        bioset_exit(&mddev->bio_set);
@@ -6176,7 +6219,6 @@ static void __md_stop_writes(struct mddev *mddev)
                flush_workqueue(md_misc_wq);
        if (mddev->sync_thread) {
                set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-               md_unregister_thread(&mddev->sync_thread);
                md_reap_sync_thread(mddev);
        }
 
@@ -6216,7 +6258,7 @@ static void mddev_detach(struct mddev *mddev)
                mddev->pers->quiesce(mddev, 1);
                mddev->pers->quiesce(mddev, 0);
        }
-       md_unregister_thread(&mddev->thread);
+       md_unregister_thread(mddev, &mddev->thread);
        if (mddev->queue)
                blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 }
@@ -6243,6 +6285,7 @@ static void __md_stop(struct mddev *mddev)
        percpu_ref_exit(&mddev->active_io);
        bioset_exit(&mddev->bio_set);
        bioset_exit(&mddev->sync_set);
+       bioset_exit(&mddev->io_clone_set);
 }
 
 void md_stop(struct mddev *mddev)
@@ -7012,6 +7055,15 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
 
                if (mddev->bitmap || mddev->bitmap_info.file)
                        return -EEXIST; /* cannot add when bitmap is present */
+
+               if (!IS_ENABLED(CONFIG_MD_BITMAP_FILE)) {
+                       pr_warn("%s: bitmap files not supported by this kernel\n",
+                               mdname(mddev));
+                       return -EINVAL;
+               }
+               pr_warn("%s: using deprecated bitmap file support\n",
+                       mdname(mddev));
+
                f = fget(fd);
 
                if (f == NULL) {
@@ -7940,9 +7992,10 @@ struct md_thread *md_register_thread(void (*run) (struct md_thread *),
 }
 EXPORT_SYMBOL(md_register_thread);
 
-void md_unregister_thread(struct md_thread __rcu **threadp)
+void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp)
 {
-       struct md_thread *thread = rcu_dereference_protected(*threadp, true);
+       struct md_thread *thread = rcu_dereference_protected(*threadp,
+                                       lockdep_is_held(&mddev->reconfig_mutex));
 
        if (!thread)
                return;
@@ -8601,62 +8654,44 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
 }
 EXPORT_SYMBOL_GPL(md_submit_discard_bio);
 
-int acct_bioset_init(struct mddev *mddev)
+static void md_end_clone_io(struct bio *bio)
 {
-       int err = 0;
-
-       if (!bioset_initialized(&mddev->io_acct_set))
-               err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
-                       offsetof(struct md_io_acct, bio_clone), 0);
-       return err;
-}
-EXPORT_SYMBOL_GPL(acct_bioset_init);
-
-void acct_bioset_exit(struct mddev *mddev)
-{
-       bioset_exit(&mddev->io_acct_set);
-}
-EXPORT_SYMBOL_GPL(acct_bioset_exit);
-
-static void md_end_io_acct(struct bio *bio)
-{
-       struct md_io_acct *md_io_acct = bio->bi_private;
-       struct bio *orig_bio = md_io_acct->orig_bio;
-       struct mddev *mddev = md_io_acct->mddev;
+       struct md_io_clone *md_io_clone = bio->bi_private;
+       struct bio *orig_bio = md_io_clone->orig_bio;
+       struct mddev *mddev = md_io_clone->mddev;
 
        orig_bio->bi_status = bio->bi_status;
 
-       bio_end_io_acct(orig_bio, md_io_acct->start_time);
+       if (md_io_clone->start_time)
+               bio_end_io_acct(orig_bio, md_io_clone->start_time);
+
        bio_put(bio);
        bio_endio(orig_bio);
-
        percpu_ref_put(&mddev->active_io);
 }
 
-/*
- * Used by personalities that don't already clone the bio and thus can't
- * easily add the timestamp to their extended bio structure.
- */
-void md_account_bio(struct mddev *mddev, struct bio **bio)
+static void md_clone_bio(struct mddev *mddev, struct bio **bio)
 {
        struct block_device *bdev = (*bio)->bi_bdev;
-       struct md_io_acct *md_io_acct;
-       struct bio *clone;
-
-       if (!blk_queue_io_stat(bdev->bd_disk->queue))
-               return;
+       struct md_io_clone *md_io_clone;
+       struct bio *clone =
+               bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_clone_set);
+
+       md_io_clone = container_of(clone, struct md_io_clone, bio_clone);
+       md_io_clone->orig_bio = *bio;
+       md_io_clone->mddev = mddev;
+       if (blk_queue_io_stat(bdev->bd_disk->queue))
+               md_io_clone->start_time = bio_start_io_acct(*bio);
+
+       clone->bi_end_io = md_end_clone_io;
+       clone->bi_private = md_io_clone;
+       *bio = clone;
+}
 
+void md_account_bio(struct mddev *mddev, struct bio **bio)
+{
        percpu_ref_get(&mddev->active_io);
-
-       clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set);
-       md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
-       md_io_acct->orig_bio = *bio;
-       md_io_acct->start_time = bio_start_io_acct(*bio);
-       md_io_acct->mddev = mddev;
-
-       clone->bi_end_io = md_end_io_acct;
-       clone->bi_private = md_io_acct;
-       *bio = clone;
+       md_clone_bio(mddev, bio);
 }
 EXPORT_SYMBOL_GPL(md_account_bio);
 
@@ -9329,7 +9364,6 @@ void md_check_recovery(struct mddev *mddev)
                         * ->spare_active and clear saved_raid_disk
                         */
                        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-                       md_unregister_thread(&mddev->sync_thread);
                        md_reap_sync_thread(mddev);
                        clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
                        clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -9358,17 +9392,24 @@ void md_check_recovery(struct mddev *mddev)
                if (mddev->sb_flags)
                        md_update_sb(mddev, 0);
 
-               if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
-                   !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
-                       /* resync/recovery still happening */
-                       clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-                       goto unlock;
-               }
-               if (mddev->sync_thread) {
-                       md_unregister_thread(&mddev->sync_thread);
+               /*
+                * Never start a new sync thread if MD_RECOVERY_RUNNING is
+                * still set.
+                */
+               if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+                       if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
+                               /* resync/recovery still happening */
+                               clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+                               goto unlock;
+                       }
+
+                       if (WARN_ON_ONCE(!mddev->sync_thread))
+                               goto unlock;
+
                        md_reap_sync_thread(mddev);
                        goto unlock;
                }
+
                /* Set RUNNING before clearing NEEDED to avoid
                 * any transients in the value of "sync_action".
                 */
@@ -9445,7 +9486,10 @@ void md_reap_sync_thread(struct mddev *mddev)
        sector_t old_dev_sectors = mddev->dev_sectors;
        bool is_reshaped = false;
 
-       /* sync_thread should be unregistered, collect result */
+       /* resync has finished, collect result */
+       md_unregister_thread(mddev, &mddev->sync_thread);
+       atomic_inc(&mddev->sync_seq);
+
        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
            !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
            mddev->degraded != mddev->raid_disks) {
@@ -9490,7 +9534,6 @@ void md_reap_sync_thread(struct mddev *mddev)
        if (mddev_is_clustered(mddev) && is_reshaped
                                      && !test_bit(MD_CLOSING, &mddev->flags))
                md_cluster_ops->update_size(mddev, old_dev_sectors);
-       wake_up(&resync_wait);
        /* flag recovery needed just to double check */
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        sysfs_notify_dirent_safe(mddev->sysfs_completed);
@@ -9498,6 +9541,7 @@ void md_reap_sync_thread(struct mddev *mddev)
        md_new_event();
        if (mddev->event_work.func)
                queue_work(md_misc_wq, &mddev->event_work);
+       wake_up(&resync_wait);
 }
 EXPORT_SYMBOL(md_reap_sync_thread);