Merge tag 'net-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[linux-2.6-microblaze.git] / kernel / workqueue.c
index f148eac..33a6b4a 100644 (file)
@@ -205,9 +205,26 @@ struct pool_workqueue {
        int                     refcnt;         /* L: reference count */
        int                     nr_in_flight[WORK_NR_COLORS];
                                                /* L: nr of in_flight works */
+
+       /*
+        * nr_active management and WORK_STRUCT_INACTIVE:
+        *
+        * When pwq->nr_active >= max_active, new work item is queued to
+        * pwq->inactive_works instead of pool->worklist and marked with
+        * WORK_STRUCT_INACTIVE.
+        *
+        * All work items marked with WORK_STRUCT_INACTIVE do not participate
+        * in pwq->nr_active and all work items in pwq->inactive_works are
+        * marked with WORK_STRUCT_INACTIVE.  But not all WORK_STRUCT_INACTIVE
+        * work items are in pwq->inactive_works.  Some of them are ready to
+        * run in pool->worklist or worker->scheduled.  Those work itmes are
+        * only struct wq_barrier which is used for flush_work() and should
+        * not participate in pwq->nr_active.  For non-barrier work item, it
+        * is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
+        */
        int                     nr_active;      /* L: nr of active works */
        int                     max_active;     /* L: max active works */
-       struct list_head        delayed_works;  /* L: delayed works */
+       struct list_head        inactive_works; /* L: inactive works */
        struct list_head        pwqs_node;      /* WR: node on wq->pwqs */
        struct list_head        mayday_node;    /* MD: node on wq->maydays */
 
@@ -524,7 +541,7 @@ static inline void debug_work_deactivate(struct work_struct *work) { }
 #endif
 
 /**
- * worker_pool_assign_id - allocate ID and assing it to @pool
+ * worker_pool_assign_id - allocate ID and assign it to @pool
  * @pool: the pool pointer of interest
  *
  * Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned
@@ -579,9 +596,9 @@ static unsigned int work_color_to_flags(int color)
        return color << WORK_STRUCT_COLOR_SHIFT;
 }
 
-static int get_work_color(struct work_struct *work)
+static int get_work_color(unsigned long work_data)
 {
-       return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
+       return (work_data >> WORK_STRUCT_COLOR_SHIFT) &
                ((1 << WORK_STRUCT_COLOR_BITS) - 1);
 }
 
@@ -1136,7 +1153,7 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)
        }
 }
 
-static void pwq_activate_delayed_work(struct work_struct *work)
+static void pwq_activate_inactive_work(struct work_struct *work)
 {
        struct pool_workqueue *pwq = get_work_pwq(work);
 
@@ -1144,22 +1161,22 @@ static void pwq_activate_delayed_work(struct work_struct *work)
        if (list_empty(&pwq->pool->worklist))
                pwq->pool->watchdog_ts = jiffies;
        move_linked_works(work, &pwq->pool->worklist, NULL);
-       __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
+       __clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
        pwq->nr_active++;
 }
 
-static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
+static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
 {
-       struct work_struct *work = list_first_entry(&pwq->delayed_works,
+       struct work_struct *work = list_first_entry(&pwq->inactive_works,
                                                    struct work_struct, entry);
 
-       pwq_activate_delayed_work(work);
+       pwq_activate_inactive_work(work);
 }
 
 /**
  * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
  * @pwq: pwq of interest
- * @color: color of work which left the queue
+ * @work_data: work_data of work which left the queue
  *
  * A work either has completed or is removed from pending queue,
  * decrement nr_in_flight of its pwq and handle workqueue flushing.
@@ -1167,21 +1184,21 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
  * CONTEXT:
  * raw_spin_lock_irq(pool->lock).
  */
-static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
+static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data)
 {
-       /* uncolored work items don't participate in flushing or nr_active */
-       if (color == WORK_NO_COLOR)
-               goto out_put;
-
-       pwq->nr_in_flight[color]--;
+       int color = get_work_color(work_data);
 
-       pwq->nr_active--;
-       if (!list_empty(&pwq->delayed_works)) {
-               /* one down, submit a delayed one */
-               if (pwq->nr_active < pwq->max_active)
-                       pwq_activate_first_delayed(pwq);
+       if (!(work_data & WORK_STRUCT_INACTIVE)) {
+               pwq->nr_active--;
+               if (!list_empty(&pwq->inactive_works)) {
+                       /* one down, submit an inactive one */
+                       if (pwq->nr_active < pwq->max_active)
+                               pwq_activate_first_inactive(pwq);
+               }
        }
 
+       pwq->nr_in_flight[color]--;
+
        /* is flush in progress and are we at the flushing tip? */
        if (likely(pwq->flush_color != color))
                goto out_put;
@@ -1281,17 +1298,21 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
                debug_work_deactivate(work);
 
                /*
-                * A delayed work item cannot be grabbed directly because
-                * it might have linked NO_COLOR work items which, if left
-                * on the delayed_list, will confuse pwq->nr_active
+                * A cancelable inactive work item must be in the
+                * pwq->inactive_works since a queued barrier can't be
+                * canceled (see the comments in insert_wq_barrier()).
+                *
+                * An inactive work item cannot be grabbed directly because
+                * it might have linked barrier work items which, if left
+                * on the inactive_works list, will confuse pwq->nr_active
                 * management later on and cause stall.  Make sure the work
                 * item is activated before grabbing.
                 */
-               if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
-                       pwq_activate_delayed_work(work);
+               if (*work_data_bits(work) & WORK_STRUCT_INACTIVE)
+                       pwq_activate_inactive_work(work);
 
                list_del_init(&work->entry);
-               pwq_dec_nr_in_flight(pwq, get_work_color(work));
+               pwq_dec_nr_in_flight(pwq, *work_data_bits(work));
 
                /* work->data points to pwq iff queued, point to pool */
                set_work_pool_and_keep_pending(work, pool->id);
@@ -1490,8 +1511,8 @@ retry:
                if (list_empty(worklist))
                        pwq->pool->watchdog_ts = jiffies;
        } else {
-               work_flags |= WORK_STRUCT_DELAYED;
-               worklist = &pwq->delayed_works;
+               work_flags |= WORK_STRUCT_INACTIVE;
+               worklist = &pwq->inactive_works;
        }
 
        debug_work_activate(work);
@@ -1912,14 +1933,14 @@ static void worker_detach_from_pool(struct worker *worker)
  */
 static struct worker *create_worker(struct worker_pool *pool)
 {
-       struct worker *worker = NULL;
-       int id = -1;
+       struct worker *worker;
+       int id;
        char id_buf[16];
 
        /* ID is needed to determine kthread name */
-       id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
+       id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
        if (id < 0)
-               goto fail;
+               return NULL;
 
        worker = alloc_worker(pool->node);
        if (!worker)
@@ -1954,8 +1975,7 @@ static struct worker *create_worker(struct worker_pool *pool)
        return worker;
 
 fail:
-       if (id >= 0)
-               ida_simple_remove(&pool->worker_ida, id);
+       ida_free(&pool->worker_ida, id);
        kfree(worker);
        return NULL;
 }
@@ -2173,7 +2193,7 @@ __acquires(&pool->lock)
        struct pool_workqueue *pwq = get_work_pwq(work);
        struct worker_pool *pool = worker->pool;
        bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
-       int work_color;
+       unsigned long work_data;
        struct worker *collision;
 #ifdef CONFIG_LOCKDEP
        /*
@@ -2209,7 +2229,8 @@ __acquires(&pool->lock)
        worker->current_work = work;
        worker->current_func = work->func;
        worker->current_pwq = pwq;
-       work_color = get_work_color(work);
+       work_data = *work_data_bits(work);
+       worker->current_color = get_work_color(work_data);
 
        /*
         * Record wq name for cmdline and debug reporting, may get
@@ -2315,7 +2336,8 @@ __acquires(&pool->lock)
        worker->current_work = NULL;
        worker->current_func = NULL;
        worker->current_pwq = NULL;
-       pwq_dec_nr_in_flight(pwq, work_color);
+       worker->current_color = INT_MAX;
+       pwq_dec_nr_in_flight(pwq, work_data);
 }
 
 /**
@@ -2378,7 +2400,7 @@ woke_up:
                set_pf_worker(false);
 
                set_task_comm(worker->task, "kworker/dying");
-               ida_simple_remove(&pool->worker_ida, worker->id);
+               ida_free(&pool->worker_ida, worker->id);
                worker_detach_from_pool(worker);
                kfree(worker);
                return 0;
@@ -2531,7 +2553,7 @@ repeat:
                        /*
                         * The above execution of rescued work items could
                         * have created more to rescue through
-                        * pwq_activate_first_delayed() or chained
+                        * pwq_activate_first_inactive() or chained
                         * queueing.  Let's put @pwq back on mayday list so
                         * that such back-to-back work items, which may be
                         * being used to relieve memory pressure, don't
@@ -2658,8 +2680,9 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
                              struct wq_barrier *barr,
                              struct work_struct *target, struct worker *worker)
 {
+       unsigned int work_flags = 0;
+       unsigned int work_color;
        struct list_head *head;
-       unsigned int linked = 0;
 
        /*
         * debugobject calls are safe here even with pool->lock locked
@@ -2674,24 +2697,31 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
 
        barr->task = current;
 
+       /* The barrier work item does not participate in pwq->nr_active. */
+       work_flags |= WORK_STRUCT_INACTIVE;
+
        /*
         * If @target is currently being executed, schedule the
         * barrier to the worker; otherwise, put it after @target.
         */
-       if (worker)
+       if (worker) {
                head = worker->scheduled.next;
-       else {
+               work_color = worker->current_color;
+       } else {
                unsigned long *bits = work_data_bits(target);
 
                head = target->entry.next;
                /* there can already be other linked works, inherit and set */
-               linked = *bits & WORK_STRUCT_LINKED;
+               work_flags |= *bits & WORK_STRUCT_LINKED;
+               work_color = get_work_color(*bits);
                __set_bit(WORK_STRUCT_LINKED_BIT, bits);
        }
 
+       pwq->nr_in_flight[work_color]++;
+       work_flags |= work_color_to_flags(work_color);
+
        debug_work_activate(&barr->work);
-       insert_work(pwq, &barr->work, head,
-                   work_color_to_flags(WORK_NO_COLOR) | linked);
+       insert_work(pwq, &barr->work, head, work_flags);
 }
 
 /**
@@ -2957,7 +2987,7 @@ reflush:
                bool drained;
 
                raw_spin_lock_irq(&pwq->pool->lock);
-               drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
+               drained = !pwq->nr_active && list_empty(&pwq->inactive_works);
                raw_spin_unlock_irq(&pwq->pool->lock);
 
                if (drained)
@@ -3293,7 +3323,7 @@ int schedule_on_each_cpu(work_func_t func)
        if (!works)
                return -ENOMEM;
 
-       get_online_cpus();
+       cpus_read_lock();
 
        for_each_online_cpu(cpu) {
                struct work_struct *work = per_cpu_ptr(works, cpu);
@@ -3305,7 +3335,7 @@ int schedule_on_each_cpu(work_func_t func)
        for_each_online_cpu(cpu)
                flush_work(per_cpu_ptr(works, cpu));
 
-       put_online_cpus();
+       cpus_read_unlock();
        free_percpu(works);
        return 0;
 }
@@ -3713,7 +3743,7 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
  * @pwq: target pool_workqueue
  *
  * If @pwq isn't freezing, set @pwq->max_active to the associated
- * workqueue's saved_max_active and activate delayed work items
+ * workqueue's saved_max_active and activate inactive work items
  * accordingly.  If @pwq is freezing, clear @pwq->max_active to zero.
  */
 static void pwq_adjust_max_active(struct pool_workqueue *pwq)
@@ -3742,9 +3772,9 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 
                pwq->max_active = wq->saved_max_active;
 
-               while (!list_empty(&pwq->delayed_works) &&
+               while (!list_empty(&pwq->inactive_works) &&
                       pwq->nr_active < pwq->max_active) {
-                       pwq_activate_first_delayed(pwq);
+                       pwq_activate_first_inactive(pwq);
                        kick = true;
                }
 
@@ -3763,7 +3793,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
        raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
 }
 
-/* initialize newly alloced @pwq which is associated with @wq and @pool */
+/* initialize newly allocated @pwq which is associated with @wq and @pool */
 static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
                     struct worker_pool *pool)
 {
@@ -3775,7 +3805,7 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
        pwq->wq = wq;
        pwq->flush_color = -1;
        pwq->refcnt = 1;
-       INIT_LIST_HEAD(&pwq->delayed_works);
+       INIT_LIST_HEAD(&pwq->inactive_works);
        INIT_LIST_HEAD(&pwq->pwqs_node);
        INIT_LIST_HEAD(&pwq->mayday_node);
        INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
@@ -4016,14 +4046,14 @@ static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
 static void apply_wqattrs_lock(void)
 {
        /* CPUs should stay stable across pwq creations and installations */
-       get_online_cpus();
+       cpus_read_lock();
        mutex_lock(&wq_pool_mutex);
 }
 
 static void apply_wqattrs_unlock(void)
 {
        mutex_unlock(&wq_pool_mutex);
-       put_online_cpus();
+       cpus_read_unlock();
 }
 
 static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
@@ -4068,7 +4098,7 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
  *
  * Performs GFP_KERNEL allocations.
  *
- * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus().
+ * Assumes caller has CPU hotplug read exclusion, i.e. cpus_read_lock().
  *
  * Return: 0 on success and -errno on failure.
  */
@@ -4196,7 +4226,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
                return 0;
        }
 
-       get_online_cpus();
+       cpus_read_lock();
        if (wq->flags & __WQ_ORDERED) {
                ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
                /* there should only be single pwq for ordering guarantee */
@@ -4206,7 +4236,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
        } else {
                ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
        }
-       put_online_cpus();
+       cpus_read_unlock();
 
        return ret;
 }
@@ -4362,7 +4392,7 @@ static bool pwq_busy(struct pool_workqueue *pwq)
 
        if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
                return true;
-       if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+       if (pwq->nr_active || !list_empty(&pwq->inactive_works))
                return true;
 
        return false;
@@ -4558,7 +4588,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
        else
                pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
 
-       ret = !list_empty(&pwq->delayed_works);
+       ret = !list_empty(&pwq->inactive_works);
        preempt_enable();
        rcu_read_unlock();
 
@@ -4754,11 +4784,11 @@ static void show_pwq(struct pool_workqueue *pwq)
                pr_cont("\n");
        }
 
-       if (!list_empty(&pwq->delayed_works)) {
+       if (!list_empty(&pwq->inactive_works)) {
                bool comma = false;
 
-               pr_info("    delayed:");
-               list_for_each_entry(work, &pwq->delayed_works, entry) {
+               pr_info("    inactive:");
+               list_for_each_entry(work, &pwq->inactive_works, entry) {
                        pr_cont_work(comma, work);
                        comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
                }
@@ -4788,7 +4818,7 @@ void show_workqueue_state(void)
                bool idle = true;
 
                for_each_pwq(pwq, wq) {
-                       if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
+                       if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
                                idle = false;
                                break;
                        }
@@ -4800,7 +4830,7 @@ void show_workqueue_state(void)
 
                for_each_pwq(pwq, wq) {
                        raw_spin_lock_irqsave(&pwq->pool->lock, flags);
-                       if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+                       if (pwq->nr_active || !list_empty(&pwq->inactive_works))
                                show_pwq(pwq);
                        raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
                        /*
@@ -5168,10 +5198,10 @@ long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
 {
        long ret = -ENODEV;
 
-       get_online_cpus();
+       cpus_read_lock();
        if (cpu_online(cpu))
                ret = work_on_cpu(cpu, fn, arg);
-       put_online_cpus();
+       cpus_read_unlock();
        return ret;
 }
 EXPORT_SYMBOL_GPL(work_on_cpu_safe);
@@ -5183,7 +5213,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu_safe);
  * freeze_workqueues_begin - begin freezing workqueues
  *
  * Start freezing workqueues.  After this function returns, all freezable
- * workqueues will queue new works to their delayed_works list instead of
+ * workqueues will queue new works to their inactive_works list instead of
  * pool->worklist.
  *
  * CONTEXT:
@@ -5331,7 +5361,7 @@ static int workqueue_apply_unbound_cpumask(void)
  *  the affinity of all unbound workqueues.  This function check the @cpumask
  *  and apply it to all unbound workqueues and updates all pwqs of them.
  *
- *  Retun:     0       - Success
+ *  Return:    0       - Success
  *             -EINVAL - Invalid @cpumask
  *             -ENOMEM - Failed to allocate memory for attrs or pwqs.
  */
@@ -5443,7 +5473,7 @@ static ssize_t wq_pool_ids_show(struct device *dev,
        const char *delim = "";
        int node, written = 0;
 
-       get_online_cpus();
+       cpus_read_lock();
        rcu_read_lock();
        for_each_node(node) {
                written += scnprintf(buf + written, PAGE_SIZE - written,
@@ -5453,7 +5483,7 @@ static ssize_t wq_pool_ids_show(struct device *dev,
        }
        written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
        rcu_read_unlock();
-       put_online_cpus();
+       cpus_read_unlock();
 
        return written;
 }
@@ -5902,6 +5932,13 @@ static void __init wq_numa_init(void)
                return;
        }
 
+       for_each_possible_cpu(cpu) {
+               if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
+                       pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
+                       return;
+               }
+       }
+
        wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
        BUG_ON(!wq_update_unbound_numa_attrs_buf);
 
@@ -5919,11 +5956,6 @@ static void __init wq_numa_init(void)
 
        for_each_possible_cpu(cpu) {
                node = cpu_to_node(cpu);
-               if (WARN_ON(node == NUMA_NO_NODE)) {
-                       pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
-                       /* happens iff arch is bonkers, let's just proceed */
-                       return;
-               }
                cpumask_set_cpu(cpu, tbl[node]);
        }