blk-throttle: make bandwidth change smooth

author Shaohua Li <shli@fb.com>

Mon, 27 Mar 2017 17:51:40 +0000 (10:51 -0700)

committer Jens Axboe <axboe@fb.com>

Tue, 28 Mar 2017 14:02:20 +0000 (08:02 -0600)
author Shaohua Li <shli@fb.com>
Mon, 27 Mar 2017 17:51:40 +0000 (10:51 -0700)
committer Jens Axboe <axboe@fb.com>
Tue, 28 Mar 2017 14:02:20 +0000 (08:02 -0600)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c

index 014b2e9..62984fc 100644 (file)
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -175,6 +175,8 @@ struct throtl_data
  
         unsigned long low_upgrade_time;
         unsigned long low_downgrade_time;
+
+       unsigned int scale;
  };
  
  static void throtl_pending_timer_fn(unsigned long arg);
@@ -226,29 +228,70 @@ static struct throtl_data *sq_to_td(struct throtl_service_queue *sq)
                 return container_of(sq, struct throtl_data, service_queue);
  }
  
+/*
+ * cgroup's limit in LIMIT_MAX is scaled if low limit is set. This scale is to
+ * make the IO dispatch more smooth.
+ * Scale up: linearly scale up according to lapsed time since upgrade. For
+ *           every throtl_slice, the limit scales up 1/2 .low limit till the
+ *           limit hits .max limit
+ * Scale down: exponentially scale down if a cgroup doesn't hit its .low limit
+ */
+static uint64_t throtl_adjusted_limit(uint64_t low, struct throtl_data *td)
+{
+       /* arbitrary value to avoid too big scale */
+       if (td->scale < 4096 && time_after_eq(jiffies,
+           td->low_upgrade_time + td->scale * td->throtl_slice))
+               td->scale = (jiffies - td->low_upgrade_time) / td->throtl_slice;
+
+       return low + (low >> 1) * td->scale;
+}
+
  static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)
  {
         struct blkcg_gq *blkg = tg_to_blkg(tg);
+       struct throtl_data *td;
         uint64_t ret;
  
         if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
                 return U64_MAX;
-       ret = tg->bps[rw][tg->td->limit_index];
-       if (ret == 0 && tg->td->limit_index == LIMIT_LOW)
+
+       td = tg->td;
+       ret = tg->bps[rw][td->limit_index];
+       if (ret == 0 && td->limit_index == LIMIT_LOW)
                 return tg->bps[rw][LIMIT_MAX];
+
+       if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
+           tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
+               uint64_t adjusted;
+
+               adjusted = throtl_adjusted_limit(tg->bps[rw][LIMIT_LOW], td);
+               ret = min(tg->bps[rw][LIMIT_MAX], adjusted);
+       }
         return ret;
  }
  
  static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
  {
         struct blkcg_gq *blkg = tg_to_blkg(tg);
+       struct throtl_data *td;
         unsigned int ret;
  
         if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
                 return UINT_MAX;
-       ret = tg->iops[rw][tg->td->limit_index];
+       td = tg->td;
+       ret = tg->iops[rw][td->limit_index];
         if (ret == 0 && tg->td->limit_index == LIMIT_LOW)
                 return tg->iops[rw][LIMIT_MAX];
+
+       if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
+           tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
+               uint64_t adjusted;
+
+               adjusted = throtl_adjusted_limit(tg->iops[rw][LIMIT_LOW], td);
+               if (adjusted > UINT_MAX)
+                       adjusted = UINT_MAX;
+               ret = min_t(unsigned int, tg->iops[rw][LIMIT_MAX], adjusted);
+       }
         return ret;
  }
  
@@ -1677,6 +1720,7 @@ static void throtl_upgrade_state(struct throtl_data *td)
  
         td->limit_index = LIMIT_MAX;
         td->low_upgrade_time = jiffies;
+       td->scale = 0;
         rcu_read_lock();
         blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
                 struct throtl_grp *tg = blkg_to_tg(blkg);
@@ -1694,6 +1738,13 @@ static void throtl_upgrade_state(struct throtl_data *td)
  
  static void throtl_downgrade_state(struct throtl_data *td, int new)
  {
+       td->scale /= 2;
+
+       if (td->scale) {
+               td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
+               return;
+       }
+
         td->limit_index = new;
         td->low_downgrade_time = jiffies;
  }
author	Shaohua Li <shli@fb.com>
	Mon, 27 Mar 2017 17:51:40 +0000 (10:51 -0700)
committer	Jens Axboe <axboe@fb.com>
	Tue, 28 Mar 2017 14:02:20 +0000 (08:02 -0600)