Merge tag 'perf-tools-for-v6.2-1-2022-12-16' of git://git.kernel.org/pub/scm/linux...

[linux-2.6-microblaze.git] / block / mq-deadline.c
diff --git a/block/mq-deadline.c b/block/mq-deadline.c

index 5639921..f10c2a0 100644 (file)
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -130,6 +130,20 @@ static u8 dd_rq_ioclass(struct request *rq)
         return IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
  }
  
+/*
+ * get the request before `rq' in sector-sorted order
+ */
+static inline struct request *
+deadline_earlier_request(struct request *rq)
+{
+       struct rb_node *node = rb_prev(&rq->rb_node);
+
+       if (node)
+               return rb_entry_rq(node);
+
+       return NULL;
+}
+
  /*
   * get the request after `rq' in sector-sorted order
   */
@@ -277,6 +291,39 @@ static inline int deadline_check_fifo(struct dd_per_prio *per_prio,
         return 0;
  }
  
+/*
+ * Check if rq has a sequential request preceding it.
+ */
+static bool deadline_is_seq_write(struct deadline_data *dd, struct request *rq)
+{
+       struct request *prev = deadline_earlier_request(rq);
+
+       if (!prev)
+               return false;
+
+       return blk_rq_pos(prev) + blk_rq_sectors(prev) == blk_rq_pos(rq);
+}
+
+/*
+ * Skip all write requests that are sequential from @rq, even if we cross
+ * a zone boundary.
+ */
+static struct request *deadline_skip_seq_writes(struct deadline_data *dd,
+                                               struct request *rq)
+{
+       sector_t pos = blk_rq_pos(rq);
+       sector_t skipped_sectors = 0;
+
+       while (rq) {
+               if (blk_rq_pos(rq) != pos + skipped_sectors)
+                       break;
+               skipped_sectors += blk_rq_sectors(rq);
+               rq = deadline_latter_request(rq);
+       }
+
+       return rq;
+}
+
  /*
   * For the specified data direction, return the next request to
   * dispatch using arrival ordered lists.
@@ -297,11 +344,16 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
  
         /*
          * Look for a write request that can be dispatched, that is one with
-        * an unlocked target zone.
+        * an unlocked target zone. For some HDDs, breaking a sequential
+        * write stream can lead to lower throughput, so make sure to preserve
+        * sequential write streams, even if that stream crosses into the next
+        * zones and these zones are unlocked.
          */
         spin_lock_irqsave(&dd->zone_lock, flags);
         list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) {
-               if (blk_req_can_dispatch_to_zone(rq))
+               if (blk_req_can_dispatch_to_zone(rq) &&
+                   (blk_queue_nonrot(rq->q) ||
+                    !deadline_is_seq_write(dd, rq)))
                         goto out;
         }
         rq = NULL;
@@ -331,13 +383,19 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
  
         /*
          * Look for a write request that can be dispatched, that is one with
-        * an unlocked target zone.
+        * an unlocked target zone. For some HDDs, breaking a sequential
+        * write stream can lead to lower throughput, so make sure to preserve
+        * sequential write streams, even if that stream crosses into the next
+        * zones and these zones are unlocked.
          */
         spin_lock_irqsave(&dd->zone_lock, flags);
         while (rq) {
                 if (blk_req_can_dispatch_to_zone(rq))
                         break;
-               rq = deadline_latter_request(rq);
+               if (blk_queue_nonrot(rq->q))
+                       rq = deadline_latter_request(rq);
+               else
+                       rq = deadline_skip_seq_writes(dd, rq);
         }
         spin_unlock_irqrestore(&dd->zone_lock, flags);
  
@@ -789,6 +847,18 @@ static void dd_prepare_request(struct request *rq)
         rq->elv.priv[0] = NULL;
  }
  
+static bool dd_has_write_work(struct blk_mq_hw_ctx *hctx)
+{
+       struct deadline_data *dd = hctx->queue->elevator->elevator_data;
+       enum dd_prio p;
+
+       for (p = 0; p <= DD_PRIO_MAX; p++)
+               if (!list_empty_careful(&dd->per_prio[p].fifo_list[DD_WRITE]))
+                       return true;
+
+       return false;
+}
+
  /*
   * Callback from inside blk_mq_free_request().
   *
@@ -828,9 +898,10 @@ static void dd_finish_request(struct request *rq)
  
                 spin_lock_irqsave(&dd->zone_lock, flags);
                 blk_req_zone_write_unlock(rq);
-               if (!list_empty(&per_prio->fifo_list[DD_WRITE]))
-                       blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
                 spin_unlock_irqrestore(&dd->zone_lock, flags);
+
+               if (dd_has_write_work(rq->mq_hctx))
+                       blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
         }
  }