Merge tag 'pci-v5.11-fixes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaa...

[linux-2.6-microblaze.git] / block / blk-throttle.c
diff --git a/block/blk-throttle.c b/block/blk-throttle.c

index fee3325..d52cac9 100644 (file)
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -15,10 +15,10 @@
  #include "blk-cgroup-rwstat.h"
  
  /* Max dispatch from a group in 1 round */
-static int throtl_grp_quantum = 8;
+#define THROTL_GRP_QUANTUM 8
  
  /* Total max dispatch from all groups in one round */
-static int throtl_quantum = 32;
+#define THROTL_QUANTUM 32
  
  /* Throttling is performed over a slice and after that slice is renewed */
  #define DFL_THROTL_SLICE_HD (HZ / 10)
@@ -150,7 +150,7 @@ struct throtl_grp {
         /* user configured IOPS limits */
         unsigned int iops_conf[2][LIMIT_CNT];
  
-       /* Number of bytes disptached in current slice */
+       /* Number of bytes dispatched in current slice */
         uint64_t bytes_disp[2];
         /* Number of bio's dispatched in current slice */
         unsigned int io_disp[2];
@@ -423,12 +423,13 @@ static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
   */
  static struct bio *throtl_peek_queued(struct list_head *queued)
  {
-       struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node);
+       struct throtl_qnode *qn;
         struct bio *bio;
  
         if (list_empty(queued))
                 return NULL;
  
+       qn = list_first_entry(queued, struct throtl_qnode, node);
         bio = bio_list_peek(&qn->bios);
         WARN_ON_ONCE(!bio);
         return bio;
@@ -451,12 +452,13 @@ static struct bio *throtl_peek_queued(struct list_head *queued)
  static struct bio *throtl_pop_queued(struct list_head *queued,
                                      struct throtl_grp **tg_to_put)
  {
-       struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node);
+       struct throtl_qnode *qn;
         struct bio *bio;
  
         if (list_empty(queued))
                 return NULL;
  
+       qn = list_first_entry(queued, struct throtl_qnode, node);
         bio = bio_list_pop(&qn->bios);
         WARN_ON_ONCE(!bio);
  
@@ -585,6 +587,7 @@ static void throtl_pd_online(struct blkg_policy_data *pd)
         tg_update_has_rules(tg);
  }
  
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
  static void blk_throtl_update_limit_valid(struct throtl_data *td)
  {
         struct cgroup_subsys_state *pos_css;
@@ -605,6 +608,11 @@ static void blk_throtl_update_limit_valid(struct throtl_data *td)
  
         td->limit_valid[LIMIT_LOW] = low_valid;
  }
+#else
+static inline void blk_throtl_update_limit_valid(struct throtl_data *td)
+{
+}
+#endif
  
  static void throtl_upgrade_state(struct throtl_data *td);
  static void throtl_pd_offline(struct blkg_policy_data *pd)
@@ -636,9 +644,6 @@ static struct throtl_grp *
  throtl_rb_first(struct throtl_service_queue *parent_sq)
  {
         struct rb_node *n;
-       /* Service tree is empty */
-       if (!parent_sq->nr_pending)
-               return NULL;
  
         n = rb_first_cached(&parent_sq->pending_tree);
         WARN_ON_ONCE(!n);
@@ -692,29 +697,21 @@ static void tg_service_queue_add(struct throtl_grp *tg)
                                leftmost);
  }
  
-static void __throtl_enqueue_tg(struct throtl_grp *tg)
-{
-       tg_service_queue_add(tg);
-       tg->flags |= THROTL_TG_PENDING;
-       tg->service_queue.parent_sq->nr_pending++;
-}
-
  static void throtl_enqueue_tg(struct throtl_grp *tg)
  {
-       if (!(tg->flags & THROTL_TG_PENDING))
-               __throtl_enqueue_tg(tg);
-}
-
-static void __throtl_dequeue_tg(struct throtl_grp *tg)
-{
-       throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
-       tg->flags &= ~THROTL_TG_PENDING;
+       if (!(tg->flags & THROTL_TG_PENDING)) {
+               tg_service_queue_add(tg);
+               tg->flags |= THROTL_TG_PENDING;
+               tg->service_queue.parent_sq->nr_pending++;
+       }
  }
  
  static void throtl_dequeue_tg(struct throtl_grp *tg)
  {
-       if (tg->flags & THROTL_TG_PENDING)
-               __throtl_dequeue_tg(tg);
+       if (tg->flags & THROTL_TG_PENDING) {
+               throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
+               tg->flags &= ~THROTL_TG_PENDING;
+       }
  }
  
  /* Call with queue lock held */
@@ -817,7 +814,7 @@ static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw,
  static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw,
                                        unsigned long jiffy_end)
  {
-       tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice);
+       throtl_set_slice_end(tg, rw, jiffy_end);
         throtl_log(&tg->service_queue,
                    "[%c] extend slice start=%lu end=%lu jiffies=%lu",
                    rw == READ ? 'R' : 'W', tg->slice_start[rw],
@@ -852,7 +849,7 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
         /*
          * A bio has been dispatched. Also adjust slice_end. It might happen
          * that initially cgroup limit was very low resulting in high
-        * slice_end, but later limit was bumped up and bio was dispached
+        * slice_end, but later limit was bumped up and bio was dispatched
          * sooner, then we need to reduce slice_end. A high bogus slice_end
          * is bad because it does not allow new slice to start.
          */
@@ -894,13 +891,19 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
  }
  
  static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
-                                 unsigned long *wait)
+                                 u32 iops_limit, unsigned long *wait)
  {
         bool rw = bio_data_dir(bio);
         unsigned int io_allowed;
         unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
         u64 tmp;
  
+       if (iops_limit == UINT_MAX) {
+               if (wait)
+                       *wait = 0;
+               return true;
+       }
+
         jiffy_elapsed = jiffies - tg->slice_start[rw];
  
         /* Round up to the next throttle slice, wait time must be nonzero */
@@ -913,7 +916,7 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
          * have been trimmed.
          */
  
-       tmp = (u64)tg_iops_limit(tg, rw) * jiffy_elapsed_rnd;
+       tmp = (u64)iops_limit * jiffy_elapsed_rnd;
         do_div(tmp, HZ);
  
         if (tmp > UINT_MAX)
@@ -936,13 +939,19 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
  }
  
  static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
-                                unsigned long *wait)
+                                u64 bps_limit, unsigned long *wait)
  {
         bool rw = bio_data_dir(bio);
         u64 bytes_allowed, extra_bytes, tmp;
         unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
         unsigned int bio_size = throtl_bio_data_size(bio);
  
+       if (bps_limit == U64_MAX) {
+               if (wait)
+                       *wait = 0;
+               return true;
+       }
+
         jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
  
         /* Slice has just started. Consider one slice interval */
@@ -951,7 +960,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
  
         jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
  
-       tmp = tg_bps_limit(tg, rw) * jiffy_elapsed_rnd;
+       tmp = bps_limit * jiffy_elapsed_rnd;
         do_div(tmp, HZ);
         bytes_allowed = tmp;
  
@@ -963,7 +972,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
  
         /* Calc approx time to dispatch */
         extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
-       jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw));
+       jiffy_wait = div64_u64(extra_bytes * HZ, bps_limit);
  
         if (!jiffy_wait)
                 jiffy_wait = 1;
@@ -987,6 +996,8 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
  {
         bool rw = bio_data_dir(bio);
         unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0;
+       u64 bps_limit = tg_bps_limit(tg, rw);
+       u32 iops_limit = tg_iops_limit(tg, rw);
  
         /*
          * Currently whole state machine of group depends on first bio
@@ -998,8 +1009,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
                bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
  
         /* If tg->bps = -1, then BW is unlimited */
-       if (tg_bps_limit(tg, rw) == U64_MAX &&
-           tg_iops_limit(tg, rw) == UINT_MAX) {
+       if (bps_limit == U64_MAX && iops_limit == UINT_MAX) {
                 if (wait)
                         *wait = 0;
                 return true;
@@ -1021,8 +1031,8 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
                                 jiffies + tg->td->throtl_slice);
         }
  
-       if (tg_with_in_bps_limit(tg, bio, &bps_wait) &&
-           tg_with_in_iops_limit(tg, bio, &iops_wait)) {
+       if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
+           tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
                 if (wait)
                         *wait = 0;
                 return true;
@@ -1082,7 +1092,7 @@ static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn,
          * If @tg doesn't currently have any bios queued in the same
          * direction, queueing @bio can change when @tg should be
          * dispatched.  Mark that @tg was empty.  This is automatically
-        * cleaered on the next tg_update_disptime().
+        * cleared on the next tg_update_disptime().
          */
         if (!sq->nr_queued[rw])
                 tg->flags |= THROTL_TG_WAS_EMPTY;
@@ -1175,8 +1185,8 @@ static int throtl_dispatch_tg(struct throtl_grp *tg)
  {
         struct throtl_service_queue *sq = &tg->service_queue;
         unsigned int nr_reads = 0, nr_writes = 0;
-       unsigned int max_nr_reads = throtl_grp_quantum*3/4;
-       unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads;
+       unsigned int max_nr_reads = THROTL_GRP_QUANTUM * 3 / 4;
+       unsigned int max_nr_writes = THROTL_GRP_QUANTUM - max_nr_reads;
         struct bio *bio;
  
         /* Try to dispatch 75% READS and 25% WRITES */
@@ -1209,9 +1219,13 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
         unsigned int nr_disp = 0;
  
         while (1) {
-               struct throtl_grp *tg = throtl_rb_first(parent_sq);
+               struct throtl_grp *tg;
                 struct throtl_service_queue *sq;
  
+               if (!parent_sq->nr_pending)
+                       break;
+
+               tg = throtl_rb_first(parent_sq);
                 if (!tg)
                         break;
  
@@ -1226,7 +1240,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
                 if (sq->nr_queued[0] || sq->nr_queued[1])
                         tg_update_disptime(tg);
  
-               if (nr_disp >= throtl_quantum)
+               if (nr_disp >= THROTL_QUANTUM)
                         break;
         }
  
@@ -1303,7 +1317,7 @@ again:
                         }
                 }
         } else {
-               /* reached the top-level, queue issueing */
+               /* reached the top-level, queue issuing */
                 queue_work(kthrotld_workqueue, &td->dispatch_work);
         }
  out_unlock:
@@ -1314,8 +1328,8 @@ out_unlock:
   * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work
   * @work: work item being executed
   *
- * This function is queued for execution when bio's reach the bio_lists[]
- * of throtl_data->service_queue.  Those bio's are ready and issued by this
+ * This function is queued for execution when bios reach the bio_lists[]
+ * of throtl_data->service_queue.  Those bios are ready and issued by this
   * function.
   */
  static void blk_throtl_dispatch_work_fn(struct work_struct *work)
@@ -1428,8 +1442,8 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
          * that a group's limit are dropped suddenly and we don't want to
          * account recently dispatched IO with new low rate.
          */
-       throtl_start_new_slice(tg, 0);
-       throtl_start_new_slice(tg, 1);
+       throtl_start_new_slice(tg, READ);
+       throtl_start_new_slice(tg, WRITE);
  
         if (tg->flags & THROTL_TG_PENDING) {
                 tg_update_disptime(tg);
@@ -1674,13 +1688,13 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
                         goto out_finish;
  
                 ret = -EINVAL;
-               if (!strcmp(tok, "rbps"))
+               if (!strcmp(tok, "rbps") && val > 1)
                         v[0] = val;
-               else if (!strcmp(tok, "wbps"))
+               else if (!strcmp(tok, "wbps") && val > 1)
                         v[1] = val;
-               else if (!strcmp(tok, "riops"))
+               else if (!strcmp(tok, "riops") && val > 1)
                         v[2] = min_t(u64, val, UINT_MAX);
-               else if (!strcmp(tok, "wiops"))
+               else if (!strcmp(tok, "wiops") && val > 1)
                         v[3] = min_t(u64, val, UINT_MAX);
                 else if (off == LIMIT_LOW && !strcmp(tok, "idle"))
                         idle_time = val;
@@ -1957,7 +1971,7 @@ static void throtl_upgrade_state(struct throtl_data *td)
         queue_work(kthrotld_workqueue, &td->dispatch_work);
  }
  
-static void throtl_downgrade_state(struct throtl_data *td, int new)
+static void throtl_downgrade_state(struct throtl_data *td)
  {
         td->scale /= 2;
  
@@ -1967,7 +1981,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new)
                 return;
         }
  
-       td->limit_index = new;
+       td->limit_index = LIMIT_LOW;
         td->low_downgrade_time = jiffies;
  }
  
@@ -2054,7 +2068,7 @@ static void throtl_downgrade_check(struct throtl_grp *tg)
          * cgroups
          */
         if (throtl_hierarchy_can_downgrade(tg))
-               throtl_downgrade_state(tg->td, LIMIT_LOW);
+               throtl_downgrade_state(tg->td);
  
         tg->last_bytes_disp[READ] = 0;
         tg->last_bytes_disp[WRITE] = 0;
@@ -2064,10 +2078,14 @@ static void throtl_downgrade_check(struct throtl_grp *tg)
  
  static void blk_throtl_update_idletime(struct throtl_grp *tg)
  {
-       unsigned long now = ktime_get_ns() >> 10;
+       unsigned long now;
         unsigned long last_finish_time = tg->last_finish_time;
  
-       if (now <= last_finish_time || last_finish_time == 0 ||
+       if (last_finish_time == 0)
+               return;
+
+       now = ktime_get_ns() >> 10;
+       if (now <= last_finish_time ||
             last_finish_time == tg->checked_last_finish_time)
                 return;
  
@@ -2083,7 +2101,7 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
         unsigned long last_latency[2] = { 0 };
         unsigned long latency[2];
  
-       if (!blk_queue_nonrot(td->queue))
+       if (!blk_queue_nonrot(td->queue) || !td->limit_valid[LIMIT_LOW])
                 return;
         if (time_before(jiffies, td->last_calculate_time + HZ))
                 return;
@@ -2230,7 +2248,7 @@ again:
  
                 /*
                  * @bio passed through this layer without being throttled.
-                * Climb up the ladder.  If we''re already at the top, it
+                * Climb up the ladder.  If we're already at the top, it
                  * can be executed directly.
                  */
                 qn = &tg->qnode_on_parent[rw];
@@ -2321,6 +2339,8 @@ void blk_throtl_bio_endio(struct bio *bio)
         if (!blkg)
                 return;
         tg = blkg_to_tg(blkg);
+       if (!tg->td->limit_valid[LIMIT_LOW])
+               return;
  
         finish_time_ns = ktime_get_ns();
         tg->last_finish_time = finish_time_ns >> 10;