Merge tag 'for-5.19/block-2022-06-02' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 3 Jun 2022 17:14:48 +0000 (10:14 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 3 Jun 2022 17:14:48 +0000 (10:14 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Jun 2022 17:14:48 +0000 (10:14 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Jun 2022 17:14:48 +0000 (10:14 -0700)
diff --git a/block/bio.c b/block/bio.c

index a3893d8..f92d022 100644 (file)
--- a/block/bio.c
+++ b/block/bio.c
@@ -722,6 +722,7 @@ static void bio_alloc_cache_destroy(struct bio_set *bs)
                 bio_alloc_cache_prune(cache, -1U);
         }
         free_percpu(bs->cache);
+       bs->cache = NULL;
  }
  
  /**
@@ -1366,10 +1367,12 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
                 struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
                 struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
                 unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
-               void *src_buf;
+               void *src_buf = bvec_kmap_local(&src_bv);
+               void *dst_buf = bvec_kmap_local(&dst_bv);
  
-               src_buf = bvec_kmap_local(&src_bv);
-               memcpy_to_bvec(&dst_bv, src_buf);
+               memcpy(dst_buf, src_buf, bytes);
+
+               kunmap_local(dst_buf);
                 kunmap_local(src_buf);
  
                 bio_advance_iter_single(src, src_iter, bytes);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c

index 40161a3..764e740 100644 (file)
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1974,12 +1974,8 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg);
   */
  void bio_clone_blkg_association(struct bio *dst, struct bio *src)
  {
-       if (src->bi_blkg) {
-               if (dst->bi_blkg)
-                       blkg_put(dst->bi_blkg);
-               blkg_get(src->bi_blkg);
-               dst->bi_blkg = src->bi_blkg;
-       }
+       if (src->bi_blkg)
+               bio_associate_blkg_from_css(dst, bio_blkcg_css(src));
  }
  EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
  
diff --git a/block/blk-core.c b/block/blk-core.c

index 80fa73c..06ff5bb 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -939,7 +939,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
  
         blk_flush_plug(current->plug, false);
  
-       if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
+       if (bio_queue_enter(bio))
                 return 0;
         if (queue_is_mq(q)) {
                 ret = blk_mq_poll(q, cookie, iob, flags);
diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c

index 18c68d8..56ed48d 100644 (file)
--- a/block/blk-ia-ranges.c
+++ b/block/blk-ia-ranges.c
@@ -54,13 +54,8 @@ static ssize_t blk_ia_range_sysfs_show(struct kobject *kobj,
                 container_of(attr, struct blk_ia_range_sysfs_entry, attr);
         struct blk_independent_access_range *iar =
                 container_of(kobj, struct blk_independent_access_range, kobj);
-       ssize_t ret;
  
-       mutex_lock(&iar->queue->sysfs_lock);
-       ret = entry->show(iar, buf);
-       mutex_unlock(&iar->queue->sysfs_lock);
-
-       return ret;
+       return entry->show(iar, buf);
  }
  
  static const struct sysfs_ops blk_ia_range_sysfs_ops = {
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c

index 5b676c7..9568bf8 100644 (file)
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -87,7 +87,17 @@ struct iolatency_grp;
  struct blk_iolatency {
         struct rq_qos rqos;
         struct timer_list timer;
-       atomic_t enabled;
+
+       /*
+        * ->enabled is the master enable switch gating the throttling logic and
+        * inflight tracking. The number of cgroups which have iolat enabled is
+        * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly
+        * from ->enable_work with the request_queue frozen. For details, See
+        * blkiolatency_enable_work_fn().
+        */
+       bool enabled;
+       atomic_t enable_cnt;
+       struct work_struct enable_work;
  };
  
  static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
@@ -95,11 +105,6 @@ static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
         return container_of(rqos, struct blk_iolatency, rqos);
  }
  
-static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
-{
-       return atomic_read(&blkiolat->enabled) > 0;
-}
-
  struct child_latency_info {
         spinlock_t lock;
  
@@ -464,7 +469,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
         struct blkcg_gq *blkg = bio->bi_blkg;
         bool issue_as_root = bio_issue_as_root_blkg(bio);
  
-       if (!blk_iolatency_enabled(blkiolat))
+       if (!blkiolat->enabled)
                 return;
  
         while (blkg && blkg->parent) {
@@ -594,7 +599,6 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
         u64 window_start;
         u64 now;
         bool issue_as_root = bio_issue_as_root_blkg(bio);
-       bool enabled = false;
         int inflight = 0;
  
         blkg = bio->bi_blkg;
@@ -605,8 +609,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
         if (!iolat)
                 return;
  
-       enabled = blk_iolatency_enabled(iolat->blkiolat);
-       if (!enabled)
+       if (!iolat->blkiolat->enabled)
                 return;
  
         now = ktime_to_ns(ktime_get());
@@ -645,6 +648,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
         struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
  
         del_timer_sync(&blkiolat->timer);
+       flush_work(&blkiolat->enable_work);
         blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
         kfree(blkiolat);
  }
@@ -716,6 +720,44 @@ next:
         rcu_read_unlock();
  }
  
+/**
+ * blkiolatency_enable_work_fn - Enable or disable iolatency on the device
+ * @work: enable_work of the blk_iolatency of interest
+ *
+ * iolatency needs to keep track of the number of in-flight IOs per cgroup. This
+ * is relatively expensive as it involves walking up the hierarchy twice for
+ * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we
+ * want to disable the in-flight tracking.
+ *
+ * We have to make sure that the counting is balanced - we don't want to leak
+ * the in-flight counts by disabling accounting in the completion path while IOs
+ * are in flight. This is achieved by ensuring that no IO is in flight by
+ * freezing the queue while flipping ->enabled. As this requires a sleepable
+ * context, ->enabled flipping is punted to this work function.
+ */
+static void blkiolatency_enable_work_fn(struct work_struct *work)
+{
+       struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency,
+                                                     enable_work);
+       bool enabled;
+
+       /*
+        * There can only be one instance of this function running for @blkiolat
+        * and it's guaranteed to be executed at least once after the latest
+        * ->enabled_cnt modification. Acting on the latest ->enable_cnt is
+        * sufficient.
+        *
+        * Also, we know @blkiolat is safe to access as ->enable_work is flushed
+        * in blkcg_iolatency_exit().
+        */
+       enabled = atomic_read(&blkiolat->enable_cnt);
+       if (enabled != blkiolat->enabled) {
+               blk_mq_freeze_queue(blkiolat->rqos.q);
+               blkiolat->enabled = enabled;
+               blk_mq_unfreeze_queue(blkiolat->rqos.q);
+       }
+}
+
  int blk_iolatency_init(struct request_queue *q)
  {
         struct blk_iolatency *blkiolat;
@@ -741,17 +783,15 @@ int blk_iolatency_init(struct request_queue *q)
         }
  
         timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
+       INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
  
         return 0;
  }
  
-/*
- * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
- * return 0.
- */
-static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
  {
         struct iolatency_grp *iolat = blkg_to_lat(blkg);
+       struct blk_iolatency *blkiolat = iolat->blkiolat;
         u64 oldval = iolat->min_lat_nsec;
  
         iolat->min_lat_nsec = val;
@@ -759,13 +799,15 @@ static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
         iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
                                     BLKIOLATENCY_MAX_WIN_SIZE);
  
-       if (!oldval && val)
-               return 1;
+       if (!oldval && val) {
+               if (atomic_inc_return(&blkiolat->enable_cnt) == 1)
+                       schedule_work(&blkiolat->enable_work);
+       }
         if (oldval && !val) {
                 blkcg_clear_delay(blkg);
-               return -1;
+               if (atomic_dec_return(&blkiolat->enable_cnt) == 0)
+                       schedule_work(&blkiolat->enable_work);
         }
-       return 0;
  }
  
  static void iolatency_clear_scaling(struct blkcg_gq *blkg)
@@ -797,7 +839,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
         u64 lat_val = 0;
         u64 oldval;
         int ret;
-       int enable = 0;
  
         ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
         if (ret)
@@ -832,41 +873,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
         blkg = ctx.blkg;
         oldval = iolat->min_lat_nsec;
  
-       enable = iolatency_set_min_lat_nsec(blkg, lat_val);
-       if (enable) {
-               if (!blk_get_queue(blkg->q)) {
-                       ret = -ENODEV;
-                       goto out;
-               }
-
-               blkg_get(blkg);
-       }
-
-       if (oldval != iolat->min_lat_nsec) {
+       iolatency_set_min_lat_nsec(blkg, lat_val);
+       if (oldval != iolat->min_lat_nsec)
                 iolatency_clear_scaling(blkg);
-       }
-
         ret = 0;
  out:
         blkg_conf_finish(&ctx);
-       if (ret == 0 && enable) {
-               struct iolatency_grp *tmp = blkg_to_lat(blkg);
-               struct blk_iolatency *blkiolat = tmp->blkiolat;
-
-               blk_mq_freeze_queue(blkg->q);
-
-               if (enable == 1)
-                       atomic_inc(&blkiolat->enabled);
-               else if (enable == -1)
-                       atomic_dec(&blkiolat->enabled);
-               else
-                       WARN_ON_ONCE(1);
-
-               blk_mq_unfreeze_queue(blkg->q);
-
-               blkg_put(blkg);
-               blk_put_queue(blkg->q);
-       }
         return ret ?: nbytes;
  }
  
@@ -1005,14 +1017,8 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd)
  {
         struct iolatency_grp *iolat = pd_to_lat(pd);
         struct blkcg_gq *blkg = lat_to_blkg(iolat);
-       struct blk_iolatency *blkiolat = iolat->blkiolat;
-       int ret;
  
-       ret = iolatency_set_min_lat_nsec(blkg, 0);
-       if (ret == 1)
-               atomic_inc(&blkiolat->enabled);
-       if (ret == -1)
-               atomic_dec(&blkiolat->enabled);
+       iolatency_set_min_lat_nsec(blkg, 0);
         iolatency_clear_scaling(blkg);
  }
  
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c

index 68ac23d..2dcd738 100644 (file)
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -228,7 +228,6 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
                 BUG_ON(real_tag >= tags->nr_tags);
                 sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
         } else {
-               BUG_ON(tag >= tags->nr_reserved_tags);
                 sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
         }
  }
diff --git a/block/blk-mq.c b/block/blk-mq.c

index ae116b7..30e4bdc 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -133,7 +133,8 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv,
  {
         struct mq_inflight *mi = priv;
  
-       if ((!mi->part->bd_partno || rq->part == mi->part) &&
+       if (rq->part && blk_do_io_stat(rq) &&
+           (!mi->part->bd_partno || rq->part == mi->part) &&
             blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
                 mi->inflight[rq_data_dir(rq)]++;
  
@@ -2174,8 +2175,7 @@ static bool blk_mq_has_sqsched(struct request_queue *q)
   */
  static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
  {
-       struct blk_mq_hw_ctx *hctx;
-
+       struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
         /*
          * If the IO scheduler does not respect hardware queues when
          * dispatching, we just don't bother with multiple HW queues and
@@ -2183,8 +2183,8 @@ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
          * just causes lock contention inside the scheduler and pointless cache
          * bouncing.
          */
-       hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT,
-                                    raw_smp_processor_id());
+       struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, 0, ctx);
+
         if (!blk_mq_hctx_stopped(hctx))
                 return hctx;
         return NULL;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h

index c007d58..a24d407 100644 (file)
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -105,6 +105,10 @@ typedef u16 blk_short_t;
  /* hack for device mapper, don't use elsewhere: */
  #define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
  
+/*
+ * BLK_STS_AGAIN should only be returned if RQF_NOWAIT is set
+ * and the bio would block (cf bio_wouldblock_error())
+ */
  #define BLK_STS_AGAIN          ((__force blk_status_t)12)
  
  /*
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 3 Jun 2022 17:14:48 +0000 (10:14 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 3 Jun 2022 17:14:48 +0000 (10:14 -0700)
block/bio.c		patch \| blob \| history
block/blk-cgroup.c		patch \| blob \| history
block/blk-core.c		patch \| blob \| history
block/blk-ia-ranges.c		patch \| blob \| history
block/blk-iolatency.c		patch \| blob \| history
block/blk-mq-tag.c		patch \| blob \| history
block/blk-mq.c		patch \| blob \| history
include/linux/blk_types.h		patch \| blob \| history