io-wq: fix race between worker exiting and activating free worker

author Jens Axboe <axboe@kernel.dk>

Tue, 3 Aug 2021 15:14:35 +0000 (09:14 -0600)

committer Jens Axboe <axboe@kernel.dk>

Wed, 4 Aug 2021 20:34:46 +0000 (14:34 -0600)
author Jens Axboe <axboe@kernel.dk>
Tue, 3 Aug 2021 15:14:35 +0000 (09:14 -0600)
committer Jens Axboe <axboe@kernel.dk>
Wed, 4 Aug 2021 20:34:46 +0000 (14:34 -0600)
diff --git a/fs/io-wq.c b/fs/io-wq.c

index cf086b0..50dc93f 100644 (file)
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -130,6 +130,7 @@ struct io_cb_cancel_data {
  };
  
  static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
+static void io_wqe_dec_running(struct io_worker *worker);
  
  static bool io_worker_get(struct io_worker *worker)
  {
@@ -168,26 +169,21 @@ static void io_worker_exit(struct io_worker *worker)
  {
         struct io_wqe *wqe = worker->wqe;
         struct io_wqe_acct *acct = io_wqe_get_acct(worker);
-       unsigned flags;
  
         if (refcount_dec_and_test(&worker->ref))
                 complete(&worker->ref_done);
         wait_for_completion(&worker->ref_done);
  
-       preempt_disable();
-       current->flags &= ~PF_IO_WORKER;
-       flags = worker->flags;
-       worker->flags = 0;
-       if (flags & IO_WORKER_F_RUNNING)
-               atomic_dec(&acct->nr_running);
-       worker->flags = 0;
-       preempt_enable();
-
         raw_spin_lock_irq(&wqe->lock);
-       if (flags & IO_WORKER_F_FREE)
+       if (worker->flags & IO_WORKER_F_FREE)
                 hlist_nulls_del_rcu(&worker->nulls_node);
         list_del_rcu(&worker->all_list);
         acct->nr_workers--;
+       preempt_disable();
+       io_wqe_dec_running(worker);
+       worker->flags = 0;
+       current->flags &= ~PF_IO_WORKER;
+       preempt_enable();
         raw_spin_unlock_irq(&wqe->lock);
  
         kfree_rcu(worker, rcu);
@@ -214,15 +210,19 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
         struct hlist_nulls_node *n;
         struct io_worker *worker;
  
-       n = rcu_dereference(hlist_nulls_first_rcu(&wqe->free_list));
-       if (is_a_nulls(n))
-               return false;
-
-       worker = hlist_nulls_entry(n, struct io_worker, nulls_node);
-       if (io_worker_get(worker)) {
-               wake_up_process(worker->task);
+       /*
+        * Iterate free_list and see if we can find an idle worker to
+        * activate. If a given worker is on the free_list but in the process
+        * of exiting, keep trying.
+        */
+       hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
+               if (!io_worker_get(worker))
+                       continue;
+               if (wake_up_process(worker->task)) {
+                       io_worker_release(worker);
+                       return true;
+               }
                 io_worker_release(worker);
-               return true;
         }
  
         return false;
author	Jens Axboe <axboe@kernel.dk>
	Tue, 3 Aug 2021 15:14:35 +0000 (09:14 -0600)
committer	Jens Axboe <axboe@kernel.dk>
	Wed, 4 Aug 2021 20:34:46 +0000 (14:34 -0600)