+static struct bfq_queue *
+bfq_do_early_stable_merge(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ struct bfq_io_cq *bic,
+ struct bfq_queue *last_bfqq_created)
+{
+ struct bfq_queue *new_bfqq =
+ bfq_setup_merge(bfqq, last_bfqq_created);
+
+ if (!new_bfqq)
+ return bfqq;
+
+ if (new_bfqq->bic)
+ new_bfqq->bic->stably_merged = true;
+ bic->stably_merged = true;
+
+ /*
+ * Reusing merge functions. This implies that
+ * bfqq->bic must be set too, for
+ * bfq_merge_bfqqs to correctly save bfqq's
+ * state before killing it.
+ */
+ bfqq->bic = bic;
+ bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
+
+ return new_bfqq;
+}
+
+/*
+ * Many throughput-sensitive workloads are made of several parallel
+ * I/O flows, with all flows generated by the same application, or
+ * more generically by the same task (e.g., system boot). The most
+ * counterproductive action with these workloads is plugging I/O
+ * dispatch when one of the bfq_queues associated with these flows
+ * remains temporarily empty.
+ *
+ * To avoid this plugging, BFQ has been using a burst-handling
+ * mechanism for years now. This mechanism has proven effective for
+ * throughput, and not detrimental for service guarantees. The
+ * following function pushes this mechanism a little bit further,
+ * basing on the following two facts.
+ *
+ * First, all the I/O flows of a the same application or task
+ * contribute to the execution/completion of that common application
+ * or task. So the performance figures that matter are total
+ * throughput of the flows and task-wide I/O latency. In particular,
+ * these flows do not need to be protected from each other, in terms
+ * of individual bandwidth or latency.
+ *
+ * Second, the above fact holds regardless of the number of flows.
+ *
+ * Putting these two facts together, this commits merges stably the
+ * bfq_queues associated with these I/O flows, i.e., with the
+ * processes that generate these IO/ flows, regardless of how many the
+ * involved processes are.
+ *
+ * To decide whether a set of bfq_queues is actually associated with
+ * the I/O flows of a common application or task, and to merge these
+ * queues stably, this function operates as follows: given a bfq_queue,
+ * say Q2, currently being created, and the last bfq_queue, say Q1,
+ * created before Q2, Q2 is merged stably with Q1 if
+ * - very little time has elapsed since when Q1 was created
+ * - Q2 has the same ioprio as Q1
+ * - Q2 belongs to the same group as Q1
+ *
+ * Merging bfq_queues also reduces scheduling overhead. A fio test
+ * with ten random readers on /dev/nullb shows a throughput boost of
+ * 40%, with a quadcore. Since BFQ's execution time amounts to ~50% of
+ * the total per-request processing time, the above throughput boost
+ * implies that BFQ's overhead is reduced by more than 50%.
+ *
+ * This new mechanism most certainly obsoletes the current
+ * burst-handling heuristics. We keep those heuristics for the moment.
+ */
+static struct bfq_queue *bfq_do_or_sched_stable_merge(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq,
+ struct bfq_io_cq *bic)
+{
+ struct bfq_queue **source_bfqq = bfqq->entity.parent ?
+ &bfqq->entity.parent->last_bfqq_created :
+ &bfqd->last_bfqq_created;
+
+ struct bfq_queue *last_bfqq_created = *source_bfqq;
+
+ /*
+ * If last_bfqq_created has not been set yet, then init it. If
+ * it has been set already, but too long ago, then move it
+ * forward to bfqq. Finally, move also if bfqq belongs to a
+ * different group than last_bfqq_created, or if bfqq has a
+ * different ioprio or ioprio_class. If none of these
+ * conditions holds true, then try an early stable merge or
+ * schedule a delayed stable merge.
+ *
+ * A delayed merge is scheduled (instead of performing an
+ * early merge), in case bfqq might soon prove to be more
+ * throughput-beneficial if not merged. Currently this is
+ * possible only if bfqd is rotational with no queueing. For
+ * such a drive, not merging bfqq is better for throughput if
+ * bfqq happens to contain sequential I/O. So, we wait a
+ * little bit for enough I/O to flow through bfqq. After that,
+ * if such an I/O is sequential, then the merge is
+ * canceled. Otherwise the merge is finally performed.
+ */
+ if (!last_bfqq_created ||
+ time_before(last_bfqq_created->creation_time +
+ bfqd->bfq_burst_interval,
+ bfqq->creation_time) ||
+ bfqq->entity.parent != last_bfqq_created->entity.parent ||
+ bfqq->ioprio != last_bfqq_created->ioprio ||
+ bfqq->ioprio_class != last_bfqq_created->ioprio_class)
+ *source_bfqq = bfqq;
+ else if (time_after_eq(last_bfqq_created->creation_time +
+ bfqd->bfq_burst_interval,
+ bfqq->creation_time)) {
+ if (likely(bfqd->nonrot_with_queueing))
+ /*
+ * With this type of drive, leaving
+ * bfqq alone may provide no
+ * throughput benefits compared with
+ * merging bfqq. So merge bfqq now.
+ */
+ bfqq = bfq_do_early_stable_merge(bfqd, bfqq,
+ bic,
+ last_bfqq_created);
+ else { /* schedule tentative stable merge */
+ /*
+ * get reference on last_bfqq_created,
+ * to prevent it from being freed,
+ * until we decide whether to merge
+ */
+ last_bfqq_created->ref++;
+ /*
+ * need to keep track of stable refs, to
+ * compute process refs correctly
+ */
+ last_bfqq_created->stable_ref++;
+ /*
+ * Record the bfqq to merge to.
+ */
+ bic->stable_merge_bfqq = last_bfqq_created;
+ }
+ }
+
+ return bfqq;
+}
+
+