block, bfq: make waker-queue detection more robust
authorPaolo Valente <paolo.valente@linaro.org>
Mon, 25 Jan 2021 19:02:48 +0000 (20:02 +0100)
committerJens Axboe <axboe@kernel.dk>
Mon, 25 Jan 2021 21:18:37 +0000 (14:18 -0700)
In the presence of many parallel I/O flows, the detection of waker
bfq_queues suffers from false positives. This commits addresses this
issue by making the filtering of actual wakers more selective. In more
detail, a candidate waker must be found to meet waker requirements
three times before being promoted to actual waker.

Tested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/bfq-iosched.c
block/bfq-iosched.h

index 79d232d..eaeda18 100644 (file)
@@ -158,7 +158,6 @@ BFQ_BFQQ_FNS(in_large_burst);
 BFQ_BFQQ_FNS(coop);
 BFQ_BFQQ_FNS(split_coop);
 BFQ_BFQQ_FNS(softrt_update);
-BFQ_BFQQ_FNS(has_waker);
 #undef BFQ_BFQQ_FNS                                            \
 
 /* Expiration time of sync (0) and async (1) requests, in ns. */
@@ -1905,6 +1904,107 @@ static void bfq_update_io_intensity(struct bfq_queue *bfqq, u64 now_ns)
        }
 }
 
+/*
+ * Detect whether bfqq's I/O seems synchronized with that of some
+ * other queue, i.e., whether bfqq, after remaining empty, happens to
+ * receive new I/O only right after some I/O request of the other
+ * queue has been completed. We call waker queue the other queue, and
+ * we assume, for simplicity, that bfqq may have at most one waker
+ * queue.
+ *
+ * A remarkable throughput boost can be reached by unconditionally
+ * injecting the I/O of the waker queue, every time a new
+ * bfq_dispatch_request happens to be invoked while I/O is being
+ * plugged for bfqq.  In addition to boosting throughput, this
+ * unblocks bfqq's I/O, thereby improving bandwidth and latency for
+ * bfqq. Note that these same results may be achieved with the general
+ * injection mechanism, but less effectively. For details on this
+ * aspect, see the comments on the choice of the queue for injection
+ * in bfq_select_queue().
+ *
+ * Turning back to the detection of a waker queue, a queue Q is deemed
+ * as a waker queue for bfqq if, for three consecutive times, bfqq
+ * happens to become non empty right after a request of Q has been
+ * completed. In particular, on the first time, Q is tentatively set
+ * as a candidate waker queue, while on the third consecutive time
+ * that Q is detected, the field waker_bfqq is set to Q, to confirm
+ * that Q is a waker queue for bfqq. These detection steps are
+ * performed only if bfqq has a long think time, so as to make it more
+ * likely that bfqq's I/O is actually being blocked by a
+ * synchronization. This last filter, plus the above three-times
+ * requirement, make false positives less likely.
+ *
+ * NOTE
+ *
+ * The sooner a waker queue is detected, the sooner throughput can be
+ * boosted by injecting I/O from the waker queue. Fortunately,
+ * detection is likely to be actually fast, for the following
+ * reasons. While blocked by synchronization, bfqq has a long think
+ * time. This implies that bfqq's inject limit is at least equal to 1
+ * (see the comments in bfq_update_inject_limit()). So, thanks to
+ * injection, the waker queue is likely to be served during the very
+ * first I/O-plugging time interval for bfqq. This triggers the first
+ * step of the detection mechanism. Thanks again to injection, the
+ * candidate waker queue is then likely to be confirmed no later than
+ * during the next I/O-plugging interval for bfqq.
+ *
+ * ISSUE
+ *
+ * On queue merging all waker information is lost.
+ */
+void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq, u64 now_ns)
+{
+       if (!bfqd->last_completed_rq_bfqq ||
+           bfqd->last_completed_rq_bfqq == bfqq ||
+           bfq_bfqq_has_short_ttime(bfqq) ||
+           now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
+           bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
+               return;
+
+       if (bfqd->last_completed_rq_bfqq !=
+           bfqq->tentative_waker_bfqq) {
+               /*
+                * First synchronization detected with a
+                * candidate waker queue, or with a different
+                * candidate waker queue from the current one.
+                */
+               bfqq->tentative_waker_bfqq =
+                       bfqd->last_completed_rq_bfqq;
+               bfqq->num_waker_detections = 1;
+       } else /* Same tentative waker queue detected again */
+               bfqq->num_waker_detections++;
+
+       if (bfqq->num_waker_detections == 3) {
+               bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq;
+               bfqq->tentative_waker_bfqq = NULL;
+
+               /*
+                * If the waker queue disappears, then
+                * bfqq->waker_bfqq must be reset. To
+                * this goal, we maintain in each
+                * waker queue a list, woken_list, of
+                * all the queues that reference the
+                * waker queue through their
+                * waker_bfqq pointer. When the waker
+                * queue exits, the waker_bfqq pointer
+                * of all the queues in the woken_list
+                * is reset.
+                *
+                * In addition, if bfqq is already in
+                * the woken_list of a waker queue,
+                * then, before being inserted into
+                * the woken_list of a new waker
+                * queue, bfqq must be removed from
+                * the woken_list of the old waker
+                * queue.
+                */
+               if (!hlist_unhashed(&bfqq->woken_list_node))
+                       hlist_del_init(&bfqq->woken_list_node);
+               hlist_add_head(&bfqq->woken_list_node,
+                              &bfqd->last_completed_rq_bfqq->woken_list);
+       }
+}
+
 static void bfq_add_request(struct request *rq)
 {
        struct bfq_queue *bfqq = RQ_BFQQ(rq);
@@ -1919,111 +2019,7 @@ static void bfq_add_request(struct request *rq)
        bfqd->queued++;
 
        if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
-               /*
-                * Detect whether bfqq's I/O seems synchronized with
-                * that of some other queue, i.e., whether bfqq, after
-                * remaining empty, happens to receive new I/O only
-                * right after some I/O request of the other queue has
-                * been completed. We call waker queue the other
-                * queue, and we assume, for simplicity, that bfqq may
-                * have at most one waker queue.
-                *
-                * A remarkable throughput boost can be reached by
-                * unconditionally injecting the I/O of the waker
-                * queue, every time a new bfq_dispatch_request
-                * happens to be invoked while I/O is being plugged
-                * for bfqq.  In addition to boosting throughput, this
-                * unblocks bfqq's I/O, thereby improving bandwidth
-                * and latency for bfqq. Note that these same results
-                * may be achieved with the general injection
-                * mechanism, but less effectively. For details on
-                * this aspect, see the comments on the choice of the
-                * queue for injection in bfq_select_queue().
-                *
-                * Turning back to the detection of a waker queue, a
-                * queue Q is deemed as a waker queue for bfqq if, for
-                * two consecutive times, bfqq happens to become non
-                * empty right after a request of Q has been
-                * completed. In particular, on the first time, Q is
-                * tentatively set as a candidate waker queue, while
-                * on the second time, the flag
-                * bfq_bfqq_has_waker(bfqq) is set to confirm that Q
-                * is a waker queue for bfqq. These detection steps
-                * are performed only if bfqq has a long think time,
-                * so as to make it more likely that bfqq's I/O is
-                * actually being blocked by a synchronization. This
-                * last filter, plus the above two-times requirement,
-                * make false positives less likely.
-                *
-                * NOTE
-                *
-                * The sooner a waker queue is detected, the sooner
-                * throughput can be boosted by injecting I/O from the
-                * waker queue. Fortunately, detection is likely to be
-                * actually fast, for the following reasons. While
-                * blocked by synchronization, bfqq has a long think
-                * time. This implies that bfqq's inject limit is at
-                * least equal to 1 (see the comments in
-                * bfq_update_inject_limit()). So, thanks to
-                * injection, the waker queue is likely to be served
-                * during the very first I/O-plugging time interval
-                * for bfqq. This triggers the first step of the
-                * detection mechanism. Thanks again to injection, the
-                * candidate waker queue is then likely to be
-                * confirmed no later than during the next
-                * I/O-plugging interval for bfqq.
-                */
-               if (bfqd->last_completed_rq_bfqq &&
-                   !bfq_bfqq_has_short_ttime(bfqq) &&
-                   now_ns - bfqd->last_completion <
-                   4 * NSEC_PER_MSEC) {
-                       if (bfqd->last_completed_rq_bfqq != bfqq &&
-                           bfqd->last_completed_rq_bfqq !=
-                           bfqq->waker_bfqq) {
-                               /*
-                                * First synchronization detected with
-                                * a candidate waker queue, or with a
-                                * different candidate waker queue
-                                * from the current one.
-                                */
-                               bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq;
-
-                               /*
-                                * If the waker queue disappears, then
-                                * bfqq->waker_bfqq must be reset. To
-                                * this goal, we maintain in each
-                                * waker queue a list, woken_list, of
-                                * all the queues that reference the
-                                * waker queue through their
-                                * waker_bfqq pointer. When the waker
-                                * queue exits, the waker_bfqq pointer
-                                * of all the queues in the woken_list
-                                * is reset.
-                                *
-                                * In addition, if bfqq is already in
-                                * the woken_list of a waker queue,
-                                * then, before being inserted into
-                                * the woken_list of a new waker
-                                * queue, bfqq must be removed from
-                                * the woken_list of the old waker
-                                * queue.
-                                */
-                               if (!hlist_unhashed(&bfqq->woken_list_node))
-                                       hlist_del_init(&bfqq->woken_list_node);
-                               hlist_add_head(&bfqq->woken_list_node,
-                                   &bfqd->last_completed_rq_bfqq->woken_list);
-
-                               bfq_clear_bfqq_has_waker(bfqq);
-                       } else if (bfqd->last_completed_rq_bfqq ==
-                                  bfqq->waker_bfqq &&
-                                  !bfq_bfqq_has_waker(bfqq)) {
-                               /*
-                                * synchronization with waker_bfqq
-                                * seen for the second time
-                                */
-                               bfq_mark_bfqq_has_waker(bfqq);
-                       }
-               }
+               bfq_check_waker(bfqd, bfqq, now_ns);
 
                /*
                 * Periodically reset inject limit, to make sure that
@@ -4569,7 +4565,7 @@ check_queue:
                    bfq_serv_to_charge(async_bfqq->next_rq, async_bfqq) <=
                    bfq_bfqq_budget_left(async_bfqq))
                        bfqq = bfqq->bic->bfqq[0];
-               else if (bfq_bfqq_has_waker(bfqq) &&
+               else if (bfqq->waker_bfqq &&
                           bfq_bfqq_busy(bfqq->waker_bfqq) &&
                           bfqq->waker_bfqq->next_rq &&
                           bfq_serv_to_charge(bfqq->waker_bfqq->next_rq,
@@ -4973,7 +4969,6 @@ void bfq_put_queue(struct bfq_queue *bfqq)
        hlist_for_each_entry_safe(item, n, &bfqq->woken_list,
                                  woken_list_node) {
                item->waker_bfqq = NULL;
-               bfq_clear_bfqq_has_waker(item);
                hlist_del_init(&item->woken_list_node);
        }
 
index 3f350fa..b8e793c 100644 (file)
@@ -376,6 +376,11 @@ struct bfq_queue {
         * bfq_select_queue().
         */
        struct bfq_queue *waker_bfqq;
+       /* pointer to the curr. tentative waker queue, see bfq_check_waker() */
+       struct bfq_queue *tentative_waker_bfqq;
+       /* number of times the same tentative waker has been detected */
+       unsigned int num_waker_detections;
+
        /* node for woken_list, see below */
        struct hlist_node woken_list_node;
        /*
@@ -776,7 +781,6 @@ enum bfqq_state_flags {
                                 */
        BFQQF_coop,             /* bfqq is shared */
        BFQQF_split_coop,       /* shared bfqq will be split */
-       BFQQF_has_waker         /* bfqq has a waker queue */
 };
 
 #define BFQ_BFQQ_FNS(name)                                             \
@@ -796,7 +800,6 @@ BFQ_BFQQ_FNS(in_large_burst);
 BFQ_BFQQ_FNS(coop);
 BFQ_BFQQ_FNS(split_coop);
 BFQ_BFQQ_FNS(softrt_update);
-BFQ_BFQQ_FNS(has_waker);
 #undef BFQ_BFQQ_FNS
 
 /* Expiration reasons. */