cifs: update internal version number
[linux-2.6-microblaze.git] / kernel / cpu.c
index 1b6302e..804b847 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/relay.h>
 #include <linux/slab.h>
 #include <linux/percpu-rwsem.h>
+#include <linux/cpuset.h>
 
 #include <trace/events/power.h>
 #define CREATE_TRACE_POINTS
@@ -63,6 +64,7 @@ struct cpuhp_cpu_state {
        bool                    rollback;
        bool                    single;
        bool                    bringup;
+       int                     cpu;
        struct hlist_node       *node;
        struct hlist_node       *last;
        enum cpuhp_state        cb_state;
@@ -135,6 +137,11 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
        return cpuhp_hp_states + state;
 }
 
+static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
+{
+       return bringup ? !step->startup.single : !step->teardown.single;
+}
+
 /**
  * cpuhp_invoke_callback _ Invoke the callbacks for a given state
  * @cpu:       The cpu for which the callback should be invoked
@@ -157,26 +164,24 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
 
        if (st->fail == state) {
                st->fail = CPUHP_INVALID;
-
-               if (!(bringup ? step->startup.single : step->teardown.single))
-                       return 0;
-
                return -EAGAIN;
        }
 
+       if (cpuhp_step_empty(bringup, step)) {
+               WARN_ON_ONCE(1);
+               return 0;
+       }
+
        if (!step->multi_instance) {
                WARN_ON_ONCE(lastp && *lastp);
                cb = bringup ? step->startup.single : step->teardown.single;
-               if (!cb)
-                       return 0;
+
                trace_cpuhp_enter(cpu, st->target, state, cb);
                ret = cb(cpu);
                trace_cpuhp_exit(cpu, st->state, state, ret);
                return ret;
        }
        cbm = bringup ? step->startup.multi : step->teardown.multi;
-       if (!cbm)
-               return 0;
 
        /* Single invocation for instance add/remove */
        if (node) {
@@ -461,13 +466,16 @@ static inline enum cpuhp_state
 cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
 {
        enum cpuhp_state prev_state = st->state;
+       bool bringup = st->state < target;
 
        st->rollback = false;
        st->last = NULL;
 
        st->target = target;
        st->single = false;
-       st->bringup = st->state < target;
+       st->bringup = bringup;
+       if (cpu_dying(st->cpu) != !bringup)
+               set_cpu_dying(st->cpu, !bringup);
 
        return prev_state;
 }
@@ -475,6 +483,17 @@ cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
 static inline void
 cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
 {
+       bool bringup = !st->bringup;
+
+       st->target = prev_state;
+
+       /*
+        * Already rolling back. No need invert the bringup value or to change
+        * the current state.
+        */
+       if (st->rollback)
+               return;
+
        st->rollback = true;
 
        /*
@@ -488,8 +507,9 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
                        st->state++;
        }
 
-       st->target = prev_state;
-       st->bringup = !st->bringup;
+       st->bringup = bringup;
+       if (cpu_dying(st->cpu) != !bringup)
+               set_cpu_dying(st->cpu, !bringup);
 }
 
 /* Regular hotplug invocation of the AP hotplug thread */
@@ -591,10 +611,53 @@ static int finish_cpu(unsigned int cpu)
  * Hotplug state machine related functions
  */
 
-static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
+/*
+ * Get the next state to run. Empty ones will be skipped. Returns true if a
+ * state must be run.
+ *
+ * st->state will be modified ahead of time, to match state_to_run, as if it
+ * has already ran.
+ */
+static bool cpuhp_next_state(bool bringup,
+                            enum cpuhp_state *state_to_run,
+                            struct cpuhp_cpu_state *st,
+                            enum cpuhp_state target)
+{
+       do {
+               if (bringup) {
+                       if (st->state >= target)
+                               return false;
+
+                       *state_to_run = ++st->state;
+               } else {
+                       if (st->state <= target)
+                               return false;
+
+                       *state_to_run = st->state--;
+               }
+
+               if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run)))
+                       break;
+       } while (true);
+
+       return true;
+}
+
+static int cpuhp_invoke_callback_range(bool bringup,
+                                      unsigned int cpu,
+                                      struct cpuhp_cpu_state *st,
+                                      enum cpuhp_state target)
 {
-       for (st->state--; st->state > st->target; st->state--)
-               cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
+       enum cpuhp_state state;
+       int err = 0;
+
+       while (cpuhp_next_state(bringup, &state, st, target)) {
+               err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
+               if (err)
+                       break;
+       }
+
+       return err;
 }
 
 static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
@@ -617,16 +680,12 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
        enum cpuhp_state prev_state = st->state;
        int ret = 0;
 
-       while (st->state < target) {
-               st->state++;
-               ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
-               if (ret) {
-                       if (can_rollback_cpu(st)) {
-                               st->target = prev_state;
-                               undo_cpu_up(cpu, st);
-                       }
-                       break;
-               }
+       ret = cpuhp_invoke_callback_range(true, cpu, st, target);
+       if (ret) {
+               cpuhp_reset_state(st, prev_state);
+               if (can_rollback_cpu(st))
+                       WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
+                                                           prev_state));
        }
        return ret;
 }
@@ -640,6 +699,7 @@ static void cpuhp_create(unsigned int cpu)
 
        init_completion(&st->done_up);
        init_completion(&st->done_down);
+       st->cpu = cpu;
 }
 
 static int cpuhp_should_run(unsigned int cpu)
@@ -690,17 +750,9 @@ static void cpuhp_thread_fun(unsigned int cpu)
                state = st->cb_state;
                st->should_run = false;
        } else {
-               if (bringup) {
-                       st->state++;
-                       state = st->state;
-                       st->should_run = (st->state < st->target);
-                       WARN_ON_ONCE(st->state > st->target);
-               } else {
-                       state = st->state;
-                       st->state--;
-                       st->should_run = (st->state > st->target);
-                       WARN_ON_ONCE(st->state < st->target);
-               }
+               st->should_run = cpuhp_next_state(bringup, &state, st, st->target);
+               if (!st->should_run)
+                       goto end;
        }
 
        WARN_ON_ONCE(!cpuhp_is_ap_state(state));
@@ -728,6 +780,7 @@ static void cpuhp_thread_fun(unsigned int cpu)
                st->should_run = false;
        }
 
+end:
        cpuhp_lock_release(bringup);
        lockdep_release_cpus_lock();
 
@@ -821,6 +874,52 @@ void __init cpuhp_threads_init(void)
        kthread_unpark(this_cpu_read(cpuhp_state.thread));
 }
 
+/*
+ *
+ * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
+ * protected region.
+ *
+ * The operation is still serialized against concurrent CPU hotplug via
+ * cpu_add_remove_lock, i.e. CPU map protection.  But it is _not_
+ * serialized against other hotplug related activity like adding or
+ * removing of state callbacks and state instances, which invoke either the
+ * startup or the teardown callback of the affected state.
+ *
+ * This is required for subsystems which are unfixable vs. CPU hotplug and
+ * evade lock inversion problems by scheduling work which has to be
+ * completed _before_ cpu_up()/_cpu_down() returns.
+ *
+ * Don't even think about adding anything to this for any new code or even
+ * drivers. It's only purpose is to keep existing lock order trainwrecks
+ * working.
+ *
+ * For cpu_down() there might be valid reasons to finish cleanups which are
+ * not required to be done under cpu_hotplug_lock, but that's a different
+ * story and would be not invoked via this.
+ */
+static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
+{
+       /*
+        * cpusets delegate hotplug operations to a worker to "solve" the
+        * lock order problems. Wait for the worker, but only if tasks are
+        * _not_ frozen (suspend, hibernate) as that would wait forever.
+        *
+        * The wait is required because otherwise the hotplug operation
+        * returns with inconsistent state, which could even be observed in
+        * user space when a new CPU is brought up. The CPU plug uevent
+        * would be delivered and user space reacting on it would fail to
+        * move tasks to the newly plugged CPU up to the point where the
+        * work has finished because up to that point the newly plugged CPU
+        * is not assignable in cpusets/cgroups. On unplug that's not
+        * necessarily a visible issue, but it is still inconsistent state,
+        * which is the real problem which needs to be "fixed". This can't
+        * prevent the transient state between scheduling the work and
+        * returning from waiting for it.
+        */
+       if (!tasks_frozen)
+               cpuset_wait_for_hotplug();
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 #ifndef arch_clear_mm_cpumask_cpu
 #define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
@@ -881,19 +980,18 @@ static int take_cpu_down(void *_param)
                return err;
 
        /*
-        * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
-        * do this step again.
+        * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going
+        * down, that the current state is CPUHP_TEARDOWN_CPU - 1.
         */
-       WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
-       st->state--;
+       WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));
+
        /* Invoke the former CPU_DYING callbacks */
-       for (; st->state > target; st->state--) {
-               ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
-               /*
-                * DYING must not fail!
-                */
-               WARN_ON_ONCE(ret);
-       }
+       ret = cpuhp_invoke_callback_range(false, cpu, st, target);
+
+       /*
+        * DYING must not fail!
+        */
+       WARN_ON_ONCE(ret);
 
        /* Give up timekeeping duties */
        tick_handover_do_timer();
@@ -910,7 +1008,7 @@ static int takedown_cpu(unsigned int cpu)
        int err;
 
        /* Park the smpboot threads */
-       kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
+       kthread_park(st->thread);
 
        /*
         * Prevent irq alloc/free while the dying cpu reorganizes the
@@ -926,7 +1024,7 @@ static int takedown_cpu(unsigned int cpu)
                /* CPU refused to die */
                irq_unlock_sparse();
                /* Unpark the hotplug thread so we can rollback there */
-               kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
+               kthread_unpark(st->thread);
                return err;
        }
        BUG_ON(cpu_online(cpu));
@@ -975,27 +1073,22 @@ void cpuhp_report_idle_dead(void)
                                 cpuhp_complete_idle_dead, st, 0);
 }
 
-static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
-       for (st->state++; st->state < st->target; st->state++)
-               cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
-}
-
 static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
                                enum cpuhp_state target)
 {
        enum cpuhp_state prev_state = st->state;
        int ret = 0;
 
-       for (; st->state > target; st->state--) {
-               ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
-               if (ret) {
-                       st->target = prev_state;
-                       if (st->state < prev_state)
-                               undo_cpu_down(cpu, st);
-                       break;
-               }
+       ret = cpuhp_invoke_callback_range(false, cpu, st, target);
+       if (ret) {
+
+               cpuhp_reset_state(st, prev_state);
+
+               if (st->state < prev_state)
+                       WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
+                                                           prev_state));
        }
+
        return ret;
 }
 
@@ -1045,9 +1138,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
         * to do the further cleanups.
         */
        ret = cpuhp_down_callbacks(cpu, st, target);
-       if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
-               cpuhp_reset_state(st, prev_state);
-               __cpuhp_kick_ap(st);
+       if (ret && st->state < prev_state) {
+               if (st->state == CPUHP_TEARDOWN_CPU) {
+                       cpuhp_reset_state(st, prev_state);
+                       __cpuhp_kick_ap(st);
+               } else {
+                       WARN(1, "DEAD callback error for CPU%d", cpu);
+               }
        }
 
 out:
@@ -1058,6 +1155,7 @@ out:
         */
        lockup_detector_cleanup();
        arch_smt_update();
+       cpu_up_down_serialize_trainwrecks(tasks_frozen);
        return ret;
 }
 
@@ -1164,14 +1262,12 @@ void notify_cpu_starting(unsigned int cpu)
 
        rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
        cpumask_set_cpu(cpu, &cpus_booted_once_mask);
-       while (st->state < target) {
-               st->state++;
-               ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
-               /*
-                * STARTING must not fail!
-                */
-               WARN_ON_ONCE(ret);
-       }
+       ret = cpuhp_invoke_callback_range(true, cpu, st, target);
+
+       /*
+        * STARTING must not fail!
+        */
+       WARN_ON_ONCE(ret);
 }
 
 /*
@@ -1254,6 +1350,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 out:
        cpus_write_unlock();
        arch_smt_update();
+       cpu_up_down_serialize_trainwrecks(tasks_frozen);
        return ret;
 }
 
@@ -1777,8 +1874,7 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
         * If there's nothing to do, we done.
         * Relies on the union for multi_instance.
         */
-       if ((bringup && !sp->startup.single) ||
-           (!bringup && !sp->teardown.single))
+       if (cpuhp_step_empty(bringup, sp))
                return 0;
        /*
         * The non AP bound callbacks can fail on bringup. On teardown
@@ -2207,6 +2303,11 @@ static ssize_t write_cpuhp_fail(struct device *dev,
        if (ret)
                return ret;
 
+       if (fail == CPUHP_INVALID) {
+               st->fail = fail;
+               return count;
+       }
+
        if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
                return -EINVAL;
 
@@ -2216,6 +2317,15 @@ static ssize_t write_cpuhp_fail(struct device *dev,
        if (cpuhp_is_atomic_state(fail))
                return -EINVAL;
 
+       /*
+        * DEAD callbacks cannot fail...
+        * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter
+        * triggering STARTING callbacks, a failure in this state would
+        * hinder rollback.
+        */
+       if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU)
+               return -EINVAL;
+
        /*
         * Cannot fail anything that doesn't have callbacks.
         */
@@ -2460,6 +2570,9 @@ EXPORT_SYMBOL(__cpu_present_mask);
 struct cpumask __cpu_active_mask __read_mostly;
 EXPORT_SYMBOL(__cpu_active_mask);
 
+struct cpumask __cpu_dying_mask __read_mostly;
+EXPORT_SYMBOL(__cpu_dying_mask);
+
 atomic_t __num_online_cpus __read_mostly;
 EXPORT_SYMBOL(__num_online_cpus);