Merge branch 'stable/for-linus-5.15' of git://git.kernel.org/pub/scm/linux/kernel...

[linux-2.6-microblaze.git] / kernel / workqueue.c
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index f148eac..33a6b4a 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -205,9 +205,26 @@ struct pool_workqueue {
         int                     refcnt;         /* L: reference count */
         int                     nr_in_flight[WORK_NR_COLORS];
                                                 /* L: nr of in_flight works */
+
+       /*
+        * nr_active management and WORK_STRUCT_INACTIVE:
+        *
+        * When pwq->nr_active >= max_active, new work item is queued to
+        * pwq->inactive_works instead of pool->worklist and marked with
+        * WORK_STRUCT_INACTIVE.
+        *
+        * All work items marked with WORK_STRUCT_INACTIVE do not participate
+        * in pwq->nr_active and all work items in pwq->inactive_works are
+        * marked with WORK_STRUCT_INACTIVE.  But not all WORK_STRUCT_INACTIVE
+        * work items are in pwq->inactive_works.  Some of them are ready to
+        * run in pool->worklist or worker->scheduled.  Those work itmes are
+        * only struct wq_barrier which is used for flush_work() and should
+        * not participate in pwq->nr_active.  For non-barrier work item, it
+        * is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
+        */
         int                     nr_active;      /* L: nr of active works */
         int                     max_active;     /* L: max active works */
-       struct list_head        delayed_works;  /* L: delayed works */
+       struct list_head        inactive_works; /* L: inactive works */
         struct list_head        pwqs_node;      /* WR: node on wq->pwqs */
         struct list_head        mayday_node;    /* MD: node on wq->maydays */
  
@@ -524,7 +541,7 @@ static inline void debug_work_deactivate(struct work_struct *work) { }
  #endif
  
  /**
- * worker_pool_assign_id - allocate ID and assing it to @pool
+ * worker_pool_assign_id - allocate ID and assign it to @pool
   * @pool: the pool pointer of interest
   *
   * Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned
@@ -579,9 +596,9 @@ static unsigned int work_color_to_flags(int color)
         return color << WORK_STRUCT_COLOR_SHIFT;
  }
  
-static int get_work_color(struct work_struct *work)
+static int get_work_color(unsigned long work_data)
  {
-       return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
+       return (work_data >> WORK_STRUCT_COLOR_SHIFT) &
                 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
  }
  
@@ -1136,7 +1153,7 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
         }
  }
  
-static void pwq_activate_delayed_work(struct work_struct *work)
+static void pwq_activate_inactive_work(struct work_struct *work)
  {
         struct pool_workqueue *pwq = get_work_pwq(work);
  
@@ -1144,22 +1161,22 @@ static void pwq_activate_delayed_work(struct work_struct *work)
         if (list_empty(&pwq->pool->worklist))
                 pwq->pool->watchdog_ts = jiffies;
         move_linked_works(work, &pwq->pool->worklist, NULL);
-       __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
+       __clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
         pwq->nr_active++;
  }
  
-static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
+static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
  {
-       struct work_struct *work = list_first_entry(&pwq->delayed_works,
+       struct work_struct *work = list_first_entry(&pwq->inactive_works,
                                                     struct work_struct, entry);
  
-       pwq_activate_delayed_work(work);
+       pwq_activate_inactive_work(work);
  }
  
  /**
   * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
   * @pwq: pwq of interest
- * @color: color of work which left the queue
+ * @work_data: work_data of work which left the queue
   *
   * A work either has completed or is removed from pending queue,
   * decrement nr_in_flight of its pwq and handle workqueue flushing.
@@ -1167,21 +1184,21 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
   * CONTEXT:
   * raw_spin_lock_irq(pool->lock).
   */
-static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
+static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data)
  {
-       /* uncolored work items don't participate in flushing or nr_active */
-       if (color == WORK_NO_COLOR)
-               goto out_put;
-
-       pwq->nr_in_flight[color]--;
+       int color = get_work_color(work_data);
  
-       pwq->nr_active--;
-       if (!list_empty(&pwq->delayed_works)) {
-               /* one down, submit a delayed one */
-               if (pwq->nr_active < pwq->max_active)
-                       pwq_activate_first_delayed(pwq);
+       if (!(work_data & WORK_STRUCT_INACTIVE)) {
+               pwq->nr_active--;
+               if (!list_empty(&pwq->inactive_works)) {
+                       /* one down, submit an inactive one */
+                       if (pwq->nr_active < pwq->max_active)
+                               pwq_activate_first_inactive(pwq);
+               }
         }
  
+       pwq->nr_in_flight[color]--;
+
         /* is flush in progress and are we at the flushing tip? */
         if (likely(pwq->flush_color != color))
                 goto out_put;
@@ -1281,17 +1298,21 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
                 debug_work_deactivate(work);
  
                 /*
-                * A delayed work item cannot be grabbed directly because
-                * it might have linked NO_COLOR work items which, if left
-                * on the delayed_list, will confuse pwq->nr_active
+                * A cancelable inactive work item must be in the
+                * pwq->inactive_works since a queued barrier can't be
+                * canceled (see the comments in insert_wq_barrier()).
+                *
+                * An inactive work item cannot be grabbed directly because
+                * it might have linked barrier work items which, if left
+                * on the inactive_works list, will confuse pwq->nr_active
                  * management later on and cause stall.  Make sure the work
                  * item is activated before grabbing.
                  */
-               if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
-                       pwq_activate_delayed_work(work);
+               if (*work_data_bits(work) & WORK_STRUCT_INACTIVE)
+                       pwq_activate_inactive_work(work);
  
                 list_del_init(&work->entry);
-               pwq_dec_nr_in_flight(pwq, get_work_color(work));
+               pwq_dec_nr_in_flight(pwq, *work_data_bits(work));
  
                 /* work->data points to pwq iff queued, point to pool */
                 set_work_pool_and_keep_pending(work, pool->id);
@@ -1490,8 +1511,8 @@ retry:
                 if (list_empty(worklist))
                         pwq->pool->watchdog_ts = jiffies;
         } else {
-               work_flags |= WORK_STRUCT_DELAYED;
-               worklist = &pwq->delayed_works;
+               work_flags |= WORK_STRUCT_INACTIVE;
+               worklist = &pwq->inactive_works;
         }
  
         debug_work_activate(work);
@@ -1912,14 +1933,14 @@ static void worker_detach_from_pool(struct worker *worker)
   */
  static struct worker *create_worker(struct worker_pool *pool)
  {
-       struct worker *worker = NULL;
-       int id = -1;
+       struct worker *worker;
+       int id;
         char id_buf[16];
  
         /* ID is needed to determine kthread name */
-       id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
+       id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
         if (id < 0)
-               goto fail;
+               return NULL;
  
         worker = alloc_worker(pool->node);
         if (!worker)
@@ -1954,8 +1975,7 @@ static struct worker *create_worker(struct worker_pool *pool)
         return worker;
  
  fail:
-       if (id >= 0)
-               ida_simple_remove(&pool->worker_ida, id);
+       ida_free(&pool->worker_ida, id);
         kfree(worker);
         return NULL;
  }
@@ -2173,7 +2193,7 @@ __acquires(&pool->lock)
         struct pool_workqueue *pwq = get_work_pwq(work);
         struct worker_pool *pool = worker->pool;
         bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
-       int work_color;
+       unsigned long work_data;
         struct worker *collision;
  #ifdef CONFIG_LOCKDEP
         /*
@@ -2209,7 +2229,8 @@ __acquires(&pool->lock)
         worker->current_work = work;
         worker->current_func = work->func;
         worker->current_pwq = pwq;
-       work_color = get_work_color(work);
+       work_data = *work_data_bits(work);
+       worker->current_color = get_work_color(work_data);
  
         /*
          * Record wq name for cmdline and debug reporting, may get
@@ -2315,7 +2336,8 @@ __acquires(&pool->lock)
         worker->current_work = NULL;
         worker->current_func = NULL;
         worker->current_pwq = NULL;
-       pwq_dec_nr_in_flight(pwq, work_color);
+       worker->current_color = INT_MAX;
+       pwq_dec_nr_in_flight(pwq, work_data);
  }
  
  /**
@@ -2378,7 +2400,7 @@ woke_up:
                 set_pf_worker(false);
  
                 set_task_comm(worker->task, "kworker/dying");
-               ida_simple_remove(&pool->worker_ida, worker->id);
+               ida_free(&pool->worker_ida, worker->id);
                 worker_detach_from_pool(worker);
                 kfree(worker);
                 return 0;
@@ -2531,7 +2553,7 @@ repeat:
                         /*
                          * The above execution of rescued work items could
                          * have created more to rescue through
-                        * pwq_activate_first_delayed() or chained
+                        * pwq_activate_first_inactive() or chained
                          * queueing.  Let's put @pwq back on mayday list so
                          * that such back-to-back work items, which may be
                          * being used to relieve memory pressure, don't
@@ -2658,8 +2680,9 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
                               struct wq_barrier *barr,
                               struct work_struct *target, struct worker *worker)
  {
+       unsigned int work_flags = 0;
+       unsigned int work_color;
         struct list_head *head;
-       unsigned int linked = 0;
  
         /*
          * debugobject calls are safe here even with pool->lock locked
@@ -2674,24 +2697,31 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
  
         barr->task = current;
  
+       /* The barrier work item does not participate in pwq->nr_active. */
+       work_flags |= WORK_STRUCT_INACTIVE;
+
         /*
          * If @target is currently being executed, schedule the
          * barrier to the worker; otherwise, put it after @target.
          */
-       if (worker)
+       if (worker) {
                 head = worker->scheduled.next;
-       else {
+               work_color = worker->current_color;
+       } else {
                 unsigned long *bits = work_data_bits(target);
  
                 head = target->entry.next;
                 /* there can already be other linked works, inherit and set */
-               linked = *bits & WORK_STRUCT_LINKED;
+               work_flags |= *bits & WORK_STRUCT_LINKED;
+               work_color = get_work_color(*bits);
                 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
         }
  
+       pwq->nr_in_flight[work_color]++;
+       work_flags |= work_color_to_flags(work_color);
+
         debug_work_activate(&barr->work);
-       insert_work(pwq, &barr->work, head,
-                   work_color_to_flags(WORK_NO_COLOR) | linked);
+       insert_work(pwq, &barr->work, head, work_flags);
  }
  
  /**
@@ -2957,7 +2987,7 @@ reflush:
                 bool drained;
  
                 raw_spin_lock_irq(&pwq->pool->lock);
-               drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
+               drained = !pwq->nr_active && list_empty(&pwq->inactive_works);
                 raw_spin_unlock_irq(&pwq->pool->lock);
  
                 if (drained)
@@ -3293,7 +3323,7 @@ int schedule_on_each_cpu(work_func_t func)
         if (!works)
                 return -ENOMEM;
  
-       get_online_cpus();
+       cpus_read_lock();
  
         for_each_online_cpu(cpu) {
                 struct work_struct *work = per_cpu_ptr(works, cpu);
@@ -3305,7 +3335,7 @@ int schedule_on_each_cpu(work_func_t func)
         for_each_online_cpu(cpu)
                 flush_work(per_cpu_ptr(works, cpu));
  
-       put_online_cpus();
+       cpus_read_unlock();
         free_percpu(works);
         return 0;
  }
@@ -3713,7 +3743,7 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
   * @pwq: target pool_workqueue
   *
   * If @pwq isn't freezing, set @pwq->max_active to the associated
- * workqueue's saved_max_active and activate delayed work items
+ * workqueue's saved_max_active and activate inactive work items
   * accordingly.  If @pwq is freezing, clear @pwq->max_active to zero.
   */
  static void pwq_adjust_max_active(struct pool_workqueue *pwq)
@@ -3742,9 +3772,9 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
  
                 pwq->max_active = wq->saved_max_active;
  
-               while (!list_empty(&pwq->delayed_works) &&
+               while (!list_empty(&pwq->inactive_works) &&
                        pwq->nr_active < pwq->max_active) {
-                       pwq_activate_first_delayed(pwq);
+                       pwq_activate_first_inactive(pwq);
                         kick = true;
                 }
  
@@ -3763,7 +3793,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
         raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
  }
  
-/* initialize newly alloced @pwq which is associated with @wq and @pool */
+/* initialize newly allocated @pwq which is associated with @wq and @pool */
  static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
                      struct worker_pool *pool)
  {
@@ -3775,7 +3805,7 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
         pwq->wq = wq;
         pwq->flush_color = -1;
         pwq->refcnt = 1;
-       INIT_LIST_HEAD(&pwq->delayed_works);
+       INIT_LIST_HEAD(&pwq->inactive_works);
         INIT_LIST_HEAD(&pwq->pwqs_node);
         INIT_LIST_HEAD(&pwq->mayday_node);
         INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
@@ -4016,14 +4046,14 @@ static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
  static void apply_wqattrs_lock(void)
  {
         /* CPUs should stay stable across pwq creations and installations */
-       get_online_cpus();
+       cpus_read_lock();
         mutex_lock(&wq_pool_mutex);
  }
  
  static void apply_wqattrs_unlock(void)
  {
         mutex_unlock(&wq_pool_mutex);
-       put_online_cpus();
+       cpus_read_unlock();
  }
  
  static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
@@ -4068,7 +4098,7 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
   *
   * Performs GFP_KERNEL allocations.
   *
- * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus().
+ * Assumes caller has CPU hotplug read exclusion, i.e. cpus_read_lock().
   *
   * Return: 0 on success and -errno on failure.
   */
@@ -4196,7 +4226,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
                 return 0;
         }
  
-       get_online_cpus();
+       cpus_read_lock();
         if (wq->flags & __WQ_ORDERED) {
                 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
                 /* there should only be single pwq for ordering guarantee */
@@ -4206,7 +4236,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
         } else {
                 ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
         }
-       put_online_cpus();
+       cpus_read_unlock();
  
         return ret;
  }
@@ -4362,7 +4392,7 @@ static bool pwq_busy(struct pool_workqueue *pwq)
  
         if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
                 return true;
-       if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+       if (pwq->nr_active || !list_empty(&pwq->inactive_works))
                 return true;
  
         return false;
@@ -4558,7 +4588,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
         else
                 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
  
-       ret = !list_empty(&pwq->delayed_works);
+       ret = !list_empty(&pwq->inactive_works);
         preempt_enable();
         rcu_read_unlock();
  
@@ -4754,11 +4784,11 @@ static void show_pwq(struct pool_workqueue *pwq)
                 pr_cont("\n");
         }
  
-       if (!list_empty(&pwq->delayed_works)) {
+       if (!list_empty(&pwq->inactive_works)) {
                 bool comma = false;
  
-               pr_info("    delayed:");
-               list_for_each_entry(work, &pwq->delayed_works, entry) {
+               pr_info("    inactive:");
+               list_for_each_entry(work, &pwq->inactive_works, entry) {
                         pr_cont_work(comma, work);
                         comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
                 }
@@ -4788,7 +4818,7 @@ void show_workqueue_state(void)
                 bool idle = true;
  
                 for_each_pwq(pwq, wq) {
-                       if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
+                       if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
                                 idle = false;
                                 break;
                         }
@@ -4800,7 +4830,7 @@ void show_workqueue_state(void)
  
                 for_each_pwq(pwq, wq) {
                         raw_spin_lock_irqsave(&pwq->pool->lock, flags);
-                       if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+                       if (pwq->nr_active || !list_empty(&pwq->inactive_works))
                                 show_pwq(pwq);
                         raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
                         /*
@@ -5168,10 +5198,10 @@ long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
  {
         long ret = -ENODEV;
  
-       get_online_cpus();
+       cpus_read_lock();
         if (cpu_online(cpu))
                 ret = work_on_cpu(cpu, fn, arg);
-       put_online_cpus();
+       cpus_read_unlock();
         return ret;
  }
  EXPORT_SYMBOL_GPL(work_on_cpu_safe);
@@ -5183,7 +5213,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu_safe);
   * freeze_workqueues_begin - begin freezing workqueues
   *
   * Start freezing workqueues.  After this function returns, all freezable
- * workqueues will queue new works to their delayed_works list instead of
+ * workqueues will queue new works to their inactive_works list instead of
   * pool->worklist.
   *
   * CONTEXT:
@@ -5331,7 +5361,7 @@ static int workqueue_apply_unbound_cpumask(void)
   *  the affinity of all unbound workqueues.  This function check the @cpumask
   *  and apply it to all unbound workqueues and updates all pwqs of them.
   *
- *  Retun:     0       - Success
+ *  Return:    0       - Success
   *             -EINVAL - Invalid @cpumask
   *             -ENOMEM - Failed to allocate memory for attrs or pwqs.
   */
@@ -5443,7 +5473,7 @@ static ssize_t wq_pool_ids_show(struct device *dev,
         const char *delim = "";
         int node, written = 0;
  
-       get_online_cpus();
+       cpus_read_lock();
         rcu_read_lock();
         for_each_node(node) {
                 written += scnprintf(buf + written, PAGE_SIZE - written,
@@ -5453,7 +5483,7 @@ static ssize_t wq_pool_ids_show(struct device *dev,
         }
         written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
         rcu_read_unlock();
-       put_online_cpus();
+       cpus_read_unlock();
  
         return written;
  }
@@ -5902,6 +5932,13 @@ static void __init wq_numa_init(void)
                 return;
         }
  
+       for_each_possible_cpu(cpu) {
+               if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
+                       pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
+                       return;
+               }
+       }
+
         wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
         BUG_ON(!wq_update_unbound_numa_attrs_buf);
  
@@ -5919,11 +5956,6 @@ static void __init wq_numa_init(void)
  
         for_each_possible_cpu(cpu) {
                 node = cpu_to_node(cpu);
-               if (WARN_ON(node == NUMA_NO_NODE)) {
-                       pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
-                       /* happens iff arch is bonkers, let's just proceed */
-                       return;
-               }
                 cpumask_set_cpu(cpu, tbl[node]);
         }