block, bfq: split function bfq_better_to_idle

[linux-2.6-microblaze.git] / block / bfq-iosched.c
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c

index 9733721..2756f4b 100644 (file)
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -399,9 +399,9 @@ static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
                 unsigned long flags;
                 struct bfq_io_cq *icq;
  
-               spin_lock_irqsave(q->queue_lock, flags);
+               spin_lock_irqsave(&q->queue_lock, flags);
                 icq = icq_to_bic(ioc_lookup_icq(ioc, q));
-               spin_unlock_irqrestore(q->queue_lock, flags);
+               spin_unlock_irqrestore(&q->queue_lock, flags);
  
                 return icq;
         }
@@ -623,26 +623,6 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
                 bfqq->pos_root = NULL;
  }
  
-/*
- * Tell whether there are active queues with different weights or
- * active groups.
- */
-static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd)
-{
-       /*
-        * For queue weights to differ, queue_weights_tree must contain
-        * at least two nodes.
-        */
-       return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
-               (bfqd->queue_weights_tree.rb_node->rb_left ||
-                bfqd->queue_weights_tree.rb_node->rb_right)
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
-              ) ||
-               (bfqd->num_groups_with_pending_reqs > 0
-#endif
-              );
-}
-
  /*
   * The following function returns true if every queue must receive the
   * same share of the throughput (this condition is used when deciding
@@ -651,25 +631,48 @@ static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd)
   *
   * Such a scenario occurs when:
   * 1) all active queues have the same weight,
- * 2) all active groups at the same level in the groups tree have the same
- *    weight,
+ * 2) all active queues belong to the same I/O-priority class,
   * 3) all active groups at the same level in the groups tree have the same
+ *    weight,
+ * 4) all active groups at the same level in the groups tree have the same
   *    number of children.
   *
   * Unfortunately, keeping the necessary state for evaluating exactly
   * the last two symmetry sub-conditions above would be quite complex
- * and time consuming.  Therefore this function evaluates, instead,
- * only the following stronger two sub-conditions, for which it is
+ * and time consuming. Therefore this function evaluates, instead,
+ * only the following stronger three sub-conditions, for which it is
   * much easier to maintain the needed state:
   * 1) all active queues have the same weight,
- * 2) there are no active groups.
+ * 2) all active queues belong to the same I/O-priority class,
+ * 3) there are no active groups.
   * In particular, the last condition is always true if hierarchical
   * support or the cgroups interface are not enabled, thus no state
   * needs to be maintained in this case.
   */
  static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
  {
-       return !bfq_varied_queue_weights_or_active_groups(bfqd);
+       /*
+        * For queue weights to differ, queue_weights_tree must contain
+        * at least two nodes.
+        */
+       bool varied_queue_weights = !RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
+               (bfqd->queue_weights_tree.rb_node->rb_left ||
+                bfqd->queue_weights_tree.rb_node->rb_right);
+
+       bool multiple_classes_busy =
+               (bfqd->busy_queues[0] && bfqd->busy_queues[1]) ||
+               (bfqd->busy_queues[0] && bfqd->busy_queues[2]) ||
+               (bfqd->busy_queues[1] && bfqd->busy_queues[2]);
+
+       /*
+        * For queue weights to differ, queue_weights_tree must contain
+        * at least two nodes.
+        */
+       return !(varied_queue_weights || multiple_classes_busy
+#ifdef BFQ_GROUP_IOSCHED_ENABLED
+              || bfqd->num_groups_with_pending_reqs > 0
+#endif
+               );
  }
  
  /*
@@ -728,15 +731,14 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
         /*
          * In the unlucky event of an allocation failure, we just
          * exit. This will cause the weight of queue to not be
-        * considered in bfq_varied_queue_weights_or_active_groups,
-        * which, in its turn, causes the scenario to be deemed
-        * wrongly symmetric in case bfqq's weight would have been
-        * the only weight making the scenario asymmetric.  On the
-        * bright side, no unbalance will however occur when bfqq
-        * becomes inactive again (the invocation of this function
-        * is triggered by an activation of queue).  In fact,
-        * bfq_weights_tree_remove does nothing if
-        * !bfqq->weight_counter.
+        * considered in bfq_symmetric_scenario, which, in its turn,
+        * causes the scenario to be deemed wrongly symmetric in case
+        * bfqq's weight would have been the only weight making the
+        * scenario asymmetric.  On the bright side, no unbalance will
+        * however occur when bfqq becomes inactive again (the
+        * invocation of this function is triggered by an activation
+        * of queue).  In fact, bfq_weights_tree_remove does nothing
+        * if !bfqq->weight_counter.
          */
         if (unlikely(!bfqq->weight_counter))
                 return;
@@ -907,8 +909,10 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
                  */
                 return;
  
-       new_budget = max_t(unsigned long, bfqq->max_budget,
-                          bfq_serv_to_charge(next_rq, bfqq));
+       new_budget = max_t(unsigned long,
+                          max_t(unsigned long, bfqq->max_budget,
+                                bfq_serv_to_charge(next_rq, bfqq)),
+                          entity->service);
         if (entity->budget != new_budget) {
                 entity->budget = new_budget;
                 bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu",
@@ -1380,7 +1384,15 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
  {
         struct bfq_entity *entity = &bfqq->entity;
  
-       if (bfq_bfqq_non_blocking_wait_rq(bfqq) && arrived_in_time) {
+       /*
+        * In the next compound condition, we check also whether there
+        * is some budget left, because otherwise there is no point in
+        * trying to go on serving bfqq with this same budget: bfqq
+        * would be expired immediately after being selected for
+        * service. This would only cause useless overhead.
+        */
+       if (bfq_bfqq_non_blocking_wait_rq(bfqq) && arrived_in_time &&
+           bfq_bfqq_budget_left(bfqq) > 0) {
                 /*
                  * We do not clear the flag non_blocking_wait_rq here, as
                  * the latter is used in bfq_activate_bfqq to signal
@@ -2217,7 +2229,7 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
                 return NULL;
  
         /* If there is only one backlogged queue, don't search. */
-       if (bfqd->busy_queues == 1)
+       if (bfq_tot_busy_queues(bfqd) == 1)
                 return NULL;
  
         in_service_bfqq = bfqd->in_service_queue;
@@ -3274,16 +3286,32 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
                  * requests, then the request pattern is isochronous
                  * (see the comments on the function
                  * bfq_bfqq_softrt_next_start()). Thus we can compute
-                * soft_rt_next_start. If, instead, the queue still
-                * has outstanding requests, then we have to wait for
-                * the completion of all the outstanding requests to
-                * discover whether the request pattern is actually
-                * isochronous.
+                * soft_rt_next_start. And we do it, unless bfqq is in
+                * interactive weight raising. We do not do it in the
+                * latter subcase, for the following reason. bfqq may
+                * be conveying the I/O needed to load a soft
+                * real-time application. Such an application will
+                * actually exhibit a soft real-time I/O pattern after
+                * it finally starts doing its job. But, if
+                * soft_rt_next_start is computed here for an
+                * interactive bfqq, and bfqq had received a lot of
+                * service before remaining with no outstanding
+                * request (likely to happen on a fast device), then
+                * soft_rt_next_start would be assigned such a high
+                * value that, for a very long time, bfqq would be
+                * prevented from being possibly considered as soft
+                * real time.
+                *
+                * If, instead, the queue still has outstanding
+                * requests, then we have to wait for the completion
+                * of all the outstanding requests to discover whether
+                * the request pattern is actually isochronous.
                  */
-               if (bfqq->dispatched == 0)
+               if (bfqq->dispatched == 0 &&
+                   bfqq->wr_coeff != bfqd->bfq_wr_coeff)
                         bfqq->soft_rt_next_start =
                                 bfq_bfqq_softrt_next_start(bfqd, bfqq);
-               else {
+               else if (bfqq->dispatched > 0) {
                         /*
                          * Schedule an update of soft_rt_next_start to when
                          * the task may be discovered to be isochronous.
@@ -3376,53 +3404,13 @@ static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
                 bfq_bfqq_budget_timeout(bfqq);
  }
  
-/*
- * For a queue that becomes empty, device idling is allowed only if
- * this function returns true for the queue. As a consequence, since
- * device idling plays a critical role in both throughput boosting and
- * service guarantees, the return value of this function plays a
- * critical role in both these aspects as well.
- *
- * In a nutshell, this function returns true only if idling is
- * beneficial for throughput or, even if detrimental for throughput,
- * idling is however necessary to preserve service guarantees (low
- * latency, desired throughput distribution, ...). In particular, on
- * NCQ-capable devices, this function tries to return false, so as to
- * help keep the drives' internal queues full, whenever this helps the
- * device boost the throughput without causing any service-guarantee
- * issue.
- *
- * In more detail, the return value of this function is obtained by,
- * first, computing a number of boolean variables that take into
- * account throughput and service-guarantee issues, and, then,
- * combining these variables in a logical expression. Most of the
- * issues taken into account are not trivial. We discuss these issues
- * individually while introducing the variables.
- */
-static bool bfq_better_to_idle(struct bfq_queue *bfqq)
+static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd,
+                                            struct bfq_queue *bfqq)
  {
-       struct bfq_data *bfqd = bfqq->bfqd;
         bool rot_without_queueing =
                 !blk_queue_nonrot(bfqd->queue) && !bfqd->hw_tag,
                 bfqq_sequential_and_IO_bound,
-               idling_boosts_thr, idling_boosts_thr_without_issues,
-               idling_needed_for_service_guarantees,
-               asymmetric_scenario;
-
-       if (bfqd->strict_guarantees)
-               return true;
-
-       /*
-        * Idling is performed only if slice_idle > 0. In addition, we
-        * do not idle if
-        * (a) bfqq is async
-        * (b) bfqq is in the idle io prio class: in this case we do
-        * not idle because we want to minimize the bandwidth that
-        * queues in this class can steal to higher-priority queues
-        */
-       if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_sync(bfqq) ||
-           bfq_class_idle(bfqq))
-               return false;
+               idling_boosts_thr;
  
         bfqq_sequential_and_IO_bound = !BFQQ_SEEKY(bfqq) &&
                 bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_has_short_ttime(bfqq);
@@ -3454,8 +3442,7 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
                  bfqq_sequential_and_IO_bound);
  
         /*
-        * The value of the next variable,
-        * idling_boosts_thr_without_issues, is equal to that of
+        * The return value of this function is equal to that of
          * idling_boosts_thr, unless a special case holds. In this
          * special case, described below, idling may cause problems to
          * weight-raised queues.
@@ -3472,32 +3459,35 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
          * which enqueue several requests in advance, and further
          * reorder internally-queued requests.
          *
-        * For this reason, we force to false the value of
-        * idling_boosts_thr_without_issues if there are weight-raised
-        * busy queues. In this case, and if bfqq is not weight-raised,
-        * this guarantees that the device is not idled for bfqq (if,
-        * instead, bfqq is weight-raised, then idling will be
-        * guaranteed by another variable, see below). Combined with
-        * the timestamping rules of BFQ (see [1] for details), this
-        * behavior causes bfqq, and hence any sync non-weight-raised
-        * queue, to get a lower number of requests served, and thus
-        * to ask for a lower number of requests from the request
-        * pool, before the busy weight-raised queues get served
-        * again. This often mitigates starvation problems in the
-        * presence of heavy write workloads and NCQ, thereby
-        * guaranteeing a higher application and system responsiveness
-        * in these hostile scenarios.
+        * For this reason, we force to false the return value if
+        * there are weight-raised busy queues. In this case, and if
+        * bfqq is not weight-raised, this guarantees that the device
+        * is not idled for bfqq (if, instead, bfqq is weight-raised,
+        * then idling will be guaranteed by another variable, see
+        * below). Combined with the timestamping rules of BFQ (see
+        * [1] for details), this behavior causes bfqq, and hence any
+        * sync non-weight-raised queue, to get a lower number of
+        * requests served, and thus to ask for a lower number of
+        * requests from the request pool, before the busy
+        * weight-raised queues get served again. This often mitigates
+        * starvation problems in the presence of heavy write
+        * workloads and NCQ, thereby guaranteeing a higher
+        * application and system responsiveness in these hostile
+        * scenarios.
          */
-       idling_boosts_thr_without_issues = idling_boosts_thr &&
+       return idling_boosts_thr &&
                 bfqd->wr_busy_queues == 0;
+}
  
+static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd,
+                                                struct bfq_queue *bfqq)
+{
         /*
-        * There is then a case where idling must be performed not
-        * for throughput concerns, but to preserve service
-        * guarantees.
+        * There is a case where idling must be performed not for
+        * throughput concerns, but to preserve service guarantees.
          *
          * To introduce this case, we can note that allowing the drive
-        * to enqueue more than one request at a time, and hence
+        * to enqueue more than one request at a time, and thereby
          * delegating de facto final scheduling decisions to the
          * drive's internal scheduler, entails loss of control on the
          * actual request service order. In particular, the critical
@@ -3654,8 +3644,9 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
          * to let requests be served in the desired order until all
          * the requests already queued in the device have been served.
          */
-       asymmetric_scenario = (bfqq->wr_coeff > 1 &&
-                              bfqd->wr_busy_queues < bfqd->busy_queues) ||
+       bool asymmetric_scenario = (bfqq->wr_coeff > 1 &&
+                                   bfqd->wr_busy_queues <
+                                   bfq_tot_busy_queues(bfqd)) ||
                 !bfq_symmetric_scenario(bfqd);
  
         /*
@@ -3672,17 +3663,64 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
          * now establish when idling is actually needed to preserve
          * service guarantees.
          */
-       idling_needed_for_service_guarantees =
-               asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq);
+       return asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq);
+}
+
+/*
+ * For a queue that becomes empty, device idling is allowed only if
+ * this function returns true for that queue. As a consequence, since
+ * device idling plays a critical role for both throughput boosting
+ * and service guarantees, the return value of this function plays a
+ * critical role as well.
+ *
+ * In a nutshell, this function returns true only if idling is
+ * beneficial for throughput or, even if detrimental for throughput,
+ * idling is however necessary to preserve service guarantees (low
+ * latency, desired throughput distribution, ...). In particular, on
+ * NCQ-capable devices, this function tries to return false, so as to
+ * help keep the drives' internal queues full, whenever this helps the
+ * device boost the throughput without causing any service-guarantee
+ * issue.
+ *
+ * Most of the issues taken into account to get the return value of
+ * this function are not trivial. We discuss these issues in the two
+ * functions providing the main pieces of information needed by this
+ * function.
+ */
+static bool bfq_better_to_idle(struct bfq_queue *bfqq)
+{
+       struct bfq_data *bfqd = bfqq->bfqd;
+       bool idling_boosts_thr_with_no_issue, idling_needed_for_service_guar;
+
+       if (unlikely(bfqd->strict_guarantees))
+               return true;
+
+       /*
+        * Idling is performed only if slice_idle > 0. In addition, we
+        * do not idle if
+        * (a) bfqq is async
+        * (b) bfqq is in the idle io prio class: in this case we do
+        * not idle because we want to minimize the bandwidth that
+        * queues in this class can steal to higher-priority queues
+        */
+       if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_sync(bfqq) ||
+          bfq_class_idle(bfqq))
+               return false;
+
+       idling_boosts_thr_with_no_issue =
+               idling_boosts_thr_without_issues(bfqd, bfqq);
+
+       idling_needed_for_service_guar =
+               idling_needed_for_service_guarantees(bfqd, bfqq);
  
         /*
-        * We have now all the components we need to compute the
+        * We have now the two components we need to compute the
          * return value of the function, which is true only if idling
          * either boosts the throughput (without issues), or is
          * necessary to preserve service guarantees.
          */
-       return idling_boosts_thr_without_issues ||
-               idling_needed_for_service_guarantees;
+       return idling_boosts_thr_with_no_issue ||
+               idling_needed_for_service_guar;
  }
  
  /*
@@ -3934,7 +3972,7 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd,
          * belongs to CLASS_IDLE and other queues are waiting for
          * service.
          */
-       if (!(bfqd->busy_queues > 1 && bfq_class_idle(bfqq)))
+       if (!(bfq_tot_busy_queues(bfqd) > 1 && bfq_class_idle(bfqq)))
                 goto return_rq;
  
         bfq_bfqq_expire(bfqd, bfqq, false, BFQQE_BUDGET_EXHAUSTED);
@@ -3952,7 +3990,7 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
          * most a call to dispatch for nothing
          */
         return !list_empty_careful(&bfqd->dispatch) ||
-               bfqd->busy_queues > 0;
+               bfq_tot_busy_queues(bfqd) > 0;
  }
  
  static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
@@ -4006,9 +4044,10 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
                 goto start_rq;
         }
  
-       bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);
+       bfq_log(bfqd, "dispatch requests: %d busy queues",
+               bfq_tot_busy_queues(bfqd));
  
-       if (bfqd->busy_queues == 0)
+       if (bfq_tot_busy_queues(bfqd) == 0)
                 goto exit;
  
         /*
@@ -4066,7 +4105,7 @@ static void bfq_update_dispatch_stats(struct request_queue *q,
          * In addition, the following queue lock guarantees that
          * bfqq_group(bfqq) exists as well.
          */
-       spin_lock_irq(q->queue_lock);
+       spin_lock_irq(&q->queue_lock);
         if (idle_timer_disabled)
                 /*
                  * Since the idle timer has been disabled,
@@ -4085,7 +4124,7 @@ static void bfq_update_dispatch_stats(struct request_queue *q,
                 bfqg_stats_set_start_empty_time(bfqg);
                 bfqg_stats_update_io_remove(bfqg, rq->cmd_flags);
         }
-       spin_unlock_irq(q->queue_lock);
+       spin_unlock_irq(&q->queue_lock);
  }
  #else
  static inline void bfq_update_dispatch_stats(struct request_queue *q,
@@ -4416,7 +4455,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
  
         rcu_read_lock();
  
-       bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
+       bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
         if (!bfqg) {
                 bfqq = &bfqd->oom_bfqq;
                 goto out;
@@ -4607,8 +4646,6 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
         bool waiting, idle_timer_disabled = false;
  
         if (new_bfqq) {
-               if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
-                       new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);
                 /*
                  * Release the request's reference to the old bfqq
                  * and make sure one is taken to the shared queue.
@@ -4669,11 +4706,11 @@ static void bfq_update_insert_stats(struct request_queue *q,
          * In addition, the following queue lock guarantees that
          * bfqq_group(bfqq) exists as well.
          */
-       spin_lock_irq(q->queue_lock);
+       spin_lock_irq(&q->queue_lock);
         bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags);
         if (idle_timer_disabled)
                 bfqg_stats_update_idle_time(bfqq_group(bfqq));
-       spin_unlock_irq(q->queue_lock);
+       spin_unlock_irq(&q->queue_lock);
  }
  #else
  static inline void bfq_update_insert_stats(struct request_queue *q,
@@ -4834,11 +4871,14 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
          * isochronous, and both requisites for this condition to hold
          * are now satisfied, then compute soft_rt_next_start (see the
          * comments on the function bfq_bfqq_softrt_next_start()). We
-        * schedule this delayed check when bfqq expires, if it still
-        * has in-flight requests.
+        * do not compute soft_rt_next_start if bfqq is in interactive
+        * weight raising (see the comments in bfq_bfqq_expire() for
+        * an explanation). We schedule this delayed update when bfqq
+        * expires, if it still has in-flight requests.
          */
         if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&
-           RB_EMPTY_ROOT(&bfqq->sort_list))
+           RB_EMPTY_ROOT(&bfqq->sort_list) &&
+           bfqq->wr_coeff != bfqd->bfq_wr_coeff)
                 bfqq->soft_rt_next_start =
                         bfq_bfqq_softrt_next_start(bfqd, bfqq);
  
@@ -5414,9 +5454,9 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
         }
         eq->elevator_data = bfqd;
  
-       spin_lock_irq(q->queue_lock);
+       spin_lock_irq(&q->queue_lock);
         q->elevator = eq;
-       spin_unlock_irq(q->queue_lock);
+       spin_unlock_irq(&q->queue_lock);
  
         /*
          * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues.
@@ -5756,7 +5796,7 @@ static struct elv_fs_entry bfq_attrs[] = {
  };
  
  static struct elevator_type iosched_bfq_mq = {
-       .ops.mq = {
+       .ops = {
                 .limit_depth            = bfq_limit_depth,
                 .prepare_request        = bfq_prepare_request,
                 .requeue_request        = bfq_finish_requeue_request,
@@ -5777,7 +5817,6 @@ static struct elevator_type iosched_bfq_mq = {
                 .exit_sched             = bfq_exit_queue,
         },
  
-       .uses_mq =              true,
         .icq_size =             sizeof(struct bfq_io_cq),
         .icq_align =            __alignof__(struct bfq_io_cq),
         .elevator_attrs =       bfq_attrs,