blk-throttle: simpfy low limit reached check in throtl_tg_can_upgrade
[linux-2.6-microblaze.git] / block / blk-throttle.c
index 847721d..1623507 100644 (file)
@@ -395,8 +395,9 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
         * If on the default hierarchy, we switch to properly hierarchical
         * behavior where limits on a given throtl_grp are applied to the
         * whole subtree rather than just the group itself.  e.g. If 16M
-        * read_bps limit is set on the root group, the whole system can't
-        * exceed 16M for the device.
+        * read_bps limit is set on a parent group, summary bps of
+        * parent group and its subtree groups can't exceed 16M for the
+        * device.
         *
         * If not on the default hierarchy, the broken flat hierarchy
         * behavior is retained where all throtl_grps are treated as if
@@ -821,17 +822,15 @@ static void tg_update_carryover(struct throtl_grp *tg)
                   tg->carryover_ios[READ], tg->carryover_ios[WRITE]);
 }
 
-static bool tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio,
-                                u32 iops_limit, unsigned long *wait)
+static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio,
+                                u32 iops_limit)
 {
        bool rw = bio_data_dir(bio);
        unsigned int io_allowed;
        unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
 
        if (iops_limit == UINT_MAX) {
-               if (wait)
-                       *wait = 0;
-               return true;
+               return 0;
        }
 
        jiffy_elapsed = jiffies - tg->slice_start[rw];
@@ -841,21 +840,16 @@ static bool tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio,
        io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd) +
                     tg->carryover_ios[rw];
        if (tg->io_disp[rw] + 1 <= io_allowed) {
-               if (wait)
-                       *wait = 0;
-               return true;
+               return 0;
        }
 
        /* Calc approx time to dispatch */
        jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed;
-
-       if (wait)
-               *wait = jiffy_wait;
-       return false;
+       return jiffy_wait;
 }
 
-static bool tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
-                               u64 bps_limit, unsigned long *wait)
+static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
+                               u64 bps_limit)
 {
        bool rw = bio_data_dir(bio);
        u64 bytes_allowed, extra_bytes;
@@ -864,9 +858,7 @@ static bool tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
 
        /* no need to throttle if this bio's bytes have been accounted */
        if (bps_limit == U64_MAX || bio_flagged(bio, BIO_BPS_THROTTLED)) {
-               if (wait)
-                       *wait = 0;
-               return true;
+               return 0;
        }
 
        jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
@@ -879,9 +871,7 @@ static bool tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
        bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd) +
                        tg->carryover_bytes[rw];
        if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
-               if (wait)
-                       *wait = 0;
-               return true;
+               return 0;
        }
 
        /* Calc approx time to dispatch */
@@ -896,9 +886,7 @@ static bool tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
         * up we did. Add that time also.
         */
        jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);
-       if (wait)
-               *wait = jiffy_wait;
-       return false;
+       return jiffy_wait;
 }
 
 /*
@@ -946,8 +934,9 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
                                jiffies + tg->td->throtl_slice);
        }
 
-       if (tg_within_bps_limit(tg, bio, bps_limit, &bps_wait) &&
-           tg_within_iops_limit(tg, bio, iops_limit, &iops_wait)) {
+       bps_wait = tg_within_bps_limit(tg, bio, bps_limit);
+       iops_wait = tg_within_iops_limit(tg, bio, iops_limit);
+       if (bps_wait + iops_wait == 0) {
                if (wait)
                        *wait = 0;
                return true;
@@ -1066,7 +1055,6 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
        sq->nr_queued[rw]--;
 
        throtl_charge_bio(tg, bio);
-       bio_set_flag(bio, BIO_BPS_THROTTLED);
 
        /*
         * If our parent is another tg, we just need to transfer @bio to
@@ -1079,6 +1067,7 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
                throtl_add_bio_tg(bio, &tg->qnode_on_parent[rw], parent_tg);
                start_parent_slice_with_credit(tg, parent_tg, rw);
        } else {
+               bio_set_flag(bio, BIO_BPS_THROTTLED);
                throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw],
                                     &parent_sq->queued[rw]);
                BUG_ON(tg->td->nr_queued[rw] <= 0);
@@ -1737,7 +1726,18 @@ void blk_throtl_cancel_bios(struct gendisk *disk)
                 * Set the flag to make sure throtl_pending_timer_fn() won't
                 * stop until all throttled bios are dispatched.
                 */
-               blkg_to_tg(blkg)->flags |= THROTL_TG_CANCELING;
+               tg->flags |= THROTL_TG_CANCELING;
+
+               /*
+                * Do not dispatch cgroup without THROTL_TG_PENDING or cgroup
+                * will be inserted to service queue without THROTL_TG_PENDING
+                * set in tg_update_disptime below. Then IO dispatched from
+                * child in tg_dispatch_one_bio will trigger double insertion
+                * and corrupt the tree.
+                */
+               if (!(tg->flags & THROTL_TG_PENDING))
+                       continue;
+
                /*
                 * Update disptime after setting the above flag to make sure
                 * throtl_select_dispatch() won't exit without dispatching.
@@ -1816,24 +1816,29 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
        return ret;
 }
 
-static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
+static bool throtl_low_limit_reached(struct throtl_grp *tg, int rw)
 {
        struct throtl_service_queue *sq = &tg->service_queue;
-       bool read_limit, write_limit;
+       bool limit = tg->bps[rw][LIMIT_LOW] || tg->iops[rw][LIMIT_LOW];
 
        /*
-        * if cgroup reaches low limit (if low limit is 0, the cgroup always
-        * reaches), it's ok to upgrade to next limit
+        * if low limit is zero, low limit is always reached.
+        * if low limit is non-zero, we can check if there is any request
+        * is queued to determine if low limit is reached as we throttle
+        * request according to limit.
         */
-       read_limit = tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW];
-       write_limit = tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW];
-       if (!read_limit && !write_limit)
-               return true;
-       if (read_limit && sq->nr_queued[READ] &&
-           (!write_limit || sq->nr_queued[WRITE]))
-               return true;
-       if (write_limit && sq->nr_queued[WRITE] &&
-           (!read_limit || sq->nr_queued[READ]))
+       return !limit || sq->nr_queued[rw];
+}
+
+static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
+{
+       /*
+        * cgroup reaches low limit when low limit of READ and WRITE are
+        * both reached, it's ok to upgrade to next limit if cgroup reaches
+        * low limit
+        */
+       if (throtl_low_limit_reached(tg, READ) &&
+           throtl_low_limit_reached(tg, WRITE))
                return true;
 
        if (time_after_eq(jiffies,