int refcnt; /* L: reference count */
int nr_in_flight[WORK_NR_COLORS];
/* L: nr of in_flight works */
+
+ /*
+ * nr_active management and WORK_STRUCT_INACTIVE:
+ *
+ * When pwq->nr_active >= max_active, new work item is queued to
+ * pwq->inactive_works instead of pool->worklist and marked with
+ * WORK_STRUCT_INACTIVE.
+ *
+ * All work items marked with WORK_STRUCT_INACTIVE do not participate
+ * in pwq->nr_active and all work items in pwq->inactive_works are
+ * marked with WORK_STRUCT_INACTIVE. But not all WORK_STRUCT_INACTIVE
+ * work items are in pwq->inactive_works. Some of them are ready to
+ * run in pool->worklist or worker->scheduled. Those work itmes are
+ * only struct wq_barrier which is used for flush_work() and should
+ * not participate in pwq->nr_active. For non-barrier work item, it
+ * is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
+ */
int nr_active; /* L: nr of active works */
int max_active; /* L: max active works */
- struct list_head delayed_works; /* L: delayed works */
+ struct list_head inactive_works; /* L: inactive works */
struct list_head pwqs_node; /* WR: node on wq->pwqs */
struct list_head mayday_node; /* MD: node on wq->maydays */
#endif
/**
- * worker_pool_assign_id - allocate ID and assing it to @pool
+ * worker_pool_assign_id - allocate ID and assign it to @pool
* @pool: the pool pointer of interest
*
* Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned
return color << WORK_STRUCT_COLOR_SHIFT;
}
-static int get_work_color(struct work_struct *work)
+static int get_work_color(unsigned long work_data)
{
- return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
+ return (work_data >> WORK_STRUCT_COLOR_SHIFT) &
((1 << WORK_STRUCT_COLOR_BITS) - 1);
}
}
}
-static void pwq_activate_delayed_work(struct work_struct *work)
+static void pwq_activate_inactive_work(struct work_struct *work)
{
struct pool_workqueue *pwq = get_work_pwq(work);
if (list_empty(&pwq->pool->worklist))
pwq->pool->watchdog_ts = jiffies;
move_linked_works(work, &pwq->pool->worklist, NULL);
- __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
+ __clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
pwq->nr_active++;
}
-static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
+static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
{
- struct work_struct *work = list_first_entry(&pwq->delayed_works,
+ struct work_struct *work = list_first_entry(&pwq->inactive_works,
struct work_struct, entry);
- pwq_activate_delayed_work(work);
+ pwq_activate_inactive_work(work);
}
/**
* pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
* @pwq: pwq of interest
- * @color: color of work which left the queue
+ * @work_data: work_data of work which left the queue
*
* A work either has completed or is removed from pending queue,
* decrement nr_in_flight of its pwq and handle workqueue flushing.
* CONTEXT:
* raw_spin_lock_irq(pool->lock).
*/
-static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
+static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data)
{
- /* uncolored work items don't participate in flushing or nr_active */
- if (color == WORK_NO_COLOR)
- goto out_put;
-
- pwq->nr_in_flight[color]--;
+ int color = get_work_color(work_data);
- pwq->nr_active--;
- if (!list_empty(&pwq->delayed_works)) {
- /* one down, submit a delayed one */
- if (pwq->nr_active < pwq->max_active)
- pwq_activate_first_delayed(pwq);
+ if (!(work_data & WORK_STRUCT_INACTIVE)) {
+ pwq->nr_active--;
+ if (!list_empty(&pwq->inactive_works)) {
+ /* one down, submit an inactive one */
+ if (pwq->nr_active < pwq->max_active)
+ pwq_activate_first_inactive(pwq);
+ }
}
+ pwq->nr_in_flight[color]--;
+
/* is flush in progress and are we at the flushing tip? */
if (likely(pwq->flush_color != color))
goto out_put;
debug_work_deactivate(work);
/*
- * A delayed work item cannot be grabbed directly because
- * it might have linked NO_COLOR work items which, if left
- * on the delayed_list, will confuse pwq->nr_active
+ * A cancelable inactive work item must be in the
+ * pwq->inactive_works since a queued barrier can't be
+ * canceled (see the comments in insert_wq_barrier()).
+ *
+ * An inactive work item cannot be grabbed directly because
+ * it might have linked barrier work items which, if left
+ * on the inactive_works list, will confuse pwq->nr_active
* management later on and cause stall. Make sure the work
* item is activated before grabbing.
*/
- if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
- pwq_activate_delayed_work(work);
+ if (*work_data_bits(work) & WORK_STRUCT_INACTIVE)
+ pwq_activate_inactive_work(work);
list_del_init(&work->entry);
- pwq_dec_nr_in_flight(pwq, get_work_color(work));
+ pwq_dec_nr_in_flight(pwq, *work_data_bits(work));
/* work->data points to pwq iff queued, point to pool */
set_work_pool_and_keep_pending(work, pool->id);
if (list_empty(worklist))
pwq->pool->watchdog_ts = jiffies;
} else {
- work_flags |= WORK_STRUCT_DELAYED;
- worklist = &pwq->delayed_works;
+ work_flags |= WORK_STRUCT_INACTIVE;
+ worklist = &pwq->inactive_works;
}
debug_work_activate(work);
*/
static struct worker *create_worker(struct worker_pool *pool)
{
- struct worker *worker = NULL;
- int id = -1;
+ struct worker *worker;
+ int id;
char id_buf[16];
/* ID is needed to determine kthread name */
- id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
+ id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
if (id < 0)
- goto fail;
+ return NULL;
worker = alloc_worker(pool->node);
if (!worker)
return worker;
fail:
- if (id >= 0)
- ida_simple_remove(&pool->worker_ida, id);
+ ida_free(&pool->worker_ida, id);
kfree(worker);
return NULL;
}
struct pool_workqueue *pwq = get_work_pwq(work);
struct worker_pool *pool = worker->pool;
bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
- int work_color;
+ unsigned long work_data;
struct worker *collision;
#ifdef CONFIG_LOCKDEP
/*
worker->current_work = work;
worker->current_func = work->func;
worker->current_pwq = pwq;
- work_color = get_work_color(work);
+ work_data = *work_data_bits(work);
+ worker->current_color = get_work_color(work_data);
/*
* Record wq name for cmdline and debug reporting, may get
worker->current_work = NULL;
worker->current_func = NULL;
worker->current_pwq = NULL;
- pwq_dec_nr_in_flight(pwq, work_color);
+ worker->current_color = INT_MAX;
+ pwq_dec_nr_in_flight(pwq, work_data);
}
/**
set_pf_worker(false);
set_task_comm(worker->task, "kworker/dying");
- ida_simple_remove(&pool->worker_ida, worker->id);
+ ida_free(&pool->worker_ida, worker->id);
worker_detach_from_pool(worker);
kfree(worker);
return 0;
/*
* The above execution of rescued work items could
* have created more to rescue through
- * pwq_activate_first_delayed() or chained
+ * pwq_activate_first_inactive() or chained
* queueing. Let's put @pwq back on mayday list so
* that such back-to-back work items, which may be
* being used to relieve memory pressure, don't
struct wq_barrier *barr,
struct work_struct *target, struct worker *worker)
{
+ unsigned int work_flags = 0;
+ unsigned int work_color;
struct list_head *head;
- unsigned int linked = 0;
/*
* debugobject calls are safe here even with pool->lock locked
barr->task = current;
+ /* The barrier work item does not participate in pwq->nr_active. */
+ work_flags |= WORK_STRUCT_INACTIVE;
+
/*
* If @target is currently being executed, schedule the
* barrier to the worker; otherwise, put it after @target.
*/
- if (worker)
+ if (worker) {
head = worker->scheduled.next;
- else {
+ work_color = worker->current_color;
+ } else {
unsigned long *bits = work_data_bits(target);
head = target->entry.next;
/* there can already be other linked works, inherit and set */
- linked = *bits & WORK_STRUCT_LINKED;
+ work_flags |= *bits & WORK_STRUCT_LINKED;
+ work_color = get_work_color(*bits);
__set_bit(WORK_STRUCT_LINKED_BIT, bits);
}
+ pwq->nr_in_flight[work_color]++;
+ work_flags |= work_color_to_flags(work_color);
+
debug_work_activate(&barr->work);
- insert_work(pwq, &barr->work, head,
- work_color_to_flags(WORK_NO_COLOR) | linked);
+ insert_work(pwq, &barr->work, head, work_flags);
}
/**
bool drained;
raw_spin_lock_irq(&pwq->pool->lock);
- drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
+ drained = !pwq->nr_active && list_empty(&pwq->inactive_works);
raw_spin_unlock_irq(&pwq->pool->lock);
if (drained)
if (!works)
return -ENOMEM;
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
struct work_struct *work = per_cpu_ptr(works, cpu);
for_each_online_cpu(cpu)
flush_work(per_cpu_ptr(works, cpu));
- put_online_cpus();
+ cpus_read_unlock();
free_percpu(works);
return 0;
}
* @pwq: target pool_workqueue
*
* If @pwq isn't freezing, set @pwq->max_active to the associated
- * workqueue's saved_max_active and activate delayed work items
+ * workqueue's saved_max_active and activate inactive work items
* accordingly. If @pwq is freezing, clear @pwq->max_active to zero.
*/
static void pwq_adjust_max_active(struct pool_workqueue *pwq)
pwq->max_active = wq->saved_max_active;
- while (!list_empty(&pwq->delayed_works) &&
+ while (!list_empty(&pwq->inactive_works) &&
pwq->nr_active < pwq->max_active) {
- pwq_activate_first_delayed(pwq);
+ pwq_activate_first_inactive(pwq);
kick = true;
}
raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
}
-/* initialize newly alloced @pwq which is associated with @wq and @pool */
+/* initialize newly allocated @pwq which is associated with @wq and @pool */
static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
struct worker_pool *pool)
{
pwq->wq = wq;
pwq->flush_color = -1;
pwq->refcnt = 1;
- INIT_LIST_HEAD(&pwq->delayed_works);
+ INIT_LIST_HEAD(&pwq->inactive_works);
INIT_LIST_HEAD(&pwq->pwqs_node);
INIT_LIST_HEAD(&pwq->mayday_node);
INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
static void apply_wqattrs_lock(void)
{
/* CPUs should stay stable across pwq creations and installations */
- get_online_cpus();
+ cpus_read_lock();
mutex_lock(&wq_pool_mutex);
}
static void apply_wqattrs_unlock(void)
{
mutex_unlock(&wq_pool_mutex);
- put_online_cpus();
+ cpus_read_unlock();
}
static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
*
* Performs GFP_KERNEL allocations.
*
- * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus().
+ * Assumes caller has CPU hotplug read exclusion, i.e. cpus_read_lock().
*
* Return: 0 on success and -errno on failure.
*/
return 0;
}
- get_online_cpus();
+ cpus_read_lock();
if (wq->flags & __WQ_ORDERED) {
ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
/* there should only be single pwq for ordering guarantee */
} else {
ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
}
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
return true;
- if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+ if (pwq->nr_active || !list_empty(&pwq->inactive_works))
return true;
return false;
else
pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
- ret = !list_empty(&pwq->delayed_works);
+ ret = !list_empty(&pwq->inactive_works);
preempt_enable();
rcu_read_unlock();
pr_cont("\n");
}
- if (!list_empty(&pwq->delayed_works)) {
+ if (!list_empty(&pwq->inactive_works)) {
bool comma = false;
- pr_info(" delayed:");
- list_for_each_entry(work, &pwq->delayed_works, entry) {
+ pr_info(" inactive:");
+ list_for_each_entry(work, &pwq->inactive_works, entry) {
pr_cont_work(comma, work);
comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
}
bool idle = true;
for_each_pwq(pwq, wq) {
- if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
+ if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
idle = false;
break;
}
for_each_pwq(pwq, wq) {
raw_spin_lock_irqsave(&pwq->pool->lock, flags);
- if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+ if (pwq->nr_active || !list_empty(&pwq->inactive_works))
show_pwq(pwq);
raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
/*
{
long ret = -ENODEV;
- get_online_cpus();
+ cpus_read_lock();
if (cpu_online(cpu))
ret = work_on_cpu(cpu, fn, arg);
- put_online_cpus();
+ cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu_safe);
* freeze_workqueues_begin - begin freezing workqueues
*
* Start freezing workqueues. After this function returns, all freezable
- * workqueues will queue new works to their delayed_works list instead of
+ * workqueues will queue new works to their inactive_works list instead of
* pool->worklist.
*
* CONTEXT:
* the affinity of all unbound workqueues. This function check the @cpumask
* and apply it to all unbound workqueues and updates all pwqs of them.
*
- * Retun: 0 - Success
+ * Return: 0 - Success
* -EINVAL - Invalid @cpumask
* -ENOMEM - Failed to allocate memory for attrs or pwqs.
*/
const char *delim = "";
int node, written = 0;
- get_online_cpus();
+ cpus_read_lock();
rcu_read_lock();
for_each_node(node) {
written += scnprintf(buf + written, PAGE_SIZE - written,
}
written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
rcu_read_unlock();
- put_online_cpus();
+ cpus_read_unlock();
return written;
}
return;
}
+ for_each_possible_cpu(cpu) {
+ if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
+ pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
+ return;
+ }
+ }
+
wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
BUG_ON(!wq_update_unbound_numa_attrs_buf);
for_each_possible_cpu(cpu) {
node = cpu_to_node(cpu);
- if (WARN_ON(node == NUMA_NO_NODE)) {
- pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
- /* happens iff arch is bonkers, let's just proceed */
- return;
- }
cpumask_set_cpu(cpu, tbl[node]);
}