tools headers UAPI: Sync linux/kvm.h with the kernel sources

[linux-2.6-microblaze.git] / block / blk-mq.c
diff --git a/block/blk-mq.c b/block/blk-mq.c

index f285a91..d4d7c1c 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -41,7 +41,7 @@
  #include "blk-mq-sched.h"
  #include "blk-rq-qos.h"
  
-static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
+static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
  
  static void blk_mq_poll_stats_start(struct request_queue *q);
  static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
@@ -567,80 +567,29 @@ void blk_mq_end_request(struct request *rq, blk_status_t error)
  }
  EXPORT_SYMBOL(blk_mq_end_request);
  
-/*
- * Softirq action handler - move entries to local list and loop over them
- * while passing them to the queue registered handler.
- */
-static __latent_entropy void blk_done_softirq(struct softirq_action *h)
+static void blk_complete_reqs(struct llist_head *list)
  {
-       struct list_head *cpu_list, local_list;
-
-       local_irq_disable();
-       cpu_list = this_cpu_ptr(&blk_cpu_done);
-       list_replace_init(cpu_list, &local_list);
-       local_irq_enable();
-
-       while (!list_empty(&local_list)) {
-               struct request *rq;
+       struct llist_node *entry = llist_reverse_order(llist_del_all(list));
+       struct request *rq, *next;
  
-               rq = list_entry(local_list.next, struct request, ipi_list);
-               list_del_init(&rq->ipi_list);
+       llist_for_each_entry_safe(rq, next, entry, ipi_list)
                 rq->q->mq_ops->complete(rq);
-       }
  }
  
-static void blk_mq_trigger_softirq(struct request *rq)
+static __latent_entropy void blk_done_softirq(struct softirq_action *h)
  {
-       struct list_head *list;
-       unsigned long flags;
-
-       local_irq_save(flags);
-       list = this_cpu_ptr(&blk_cpu_done);
-       list_add_tail(&rq->ipi_list, list);
-
-       /*
-        * If the list only contains our just added request, signal a raise of
-        * the softirq.  If there are already entries there, someone already
-        * raised the irq but it hasn't run yet.
-        */
-       if (list->next == &rq->ipi_list)
-               raise_softirq_irqoff(BLOCK_SOFTIRQ);
-       local_irq_restore(flags);
+       blk_complete_reqs(this_cpu_ptr(&blk_cpu_done));
  }
  
  static int blk_softirq_cpu_dead(unsigned int cpu)
  {
-       /*
-        * If a CPU goes away, splice its entries to the current CPU
-        * and trigger a run of the softirq
-        */
-       local_irq_disable();
-       list_splice_init(&per_cpu(blk_cpu_done, cpu),
-                        this_cpu_ptr(&blk_cpu_done));
-       raise_softirq_irqoff(BLOCK_SOFTIRQ);
-       local_irq_enable();
-
+       blk_complete_reqs(&per_cpu(blk_cpu_done, cpu));
         return 0;
  }
  
-
  static void __blk_mq_complete_request_remote(void *data)
  {
-       struct request *rq = data;
-
-       /*
-        * For most of single queue controllers, there is only one irq vector
-        * for handling I/O completion, and the only irq's affinity is set
-        * to all possible CPUs.  On most of ARCHs, this affinity means the irq
-        * is handled on one specific CPU.
-        *
-        * So complete I/O requests in softirq context in case of single queue
-        * devices to avoid degrading I/O performance due to irqsoff latency.
-        */
-       if (rq->q->nr_hw_queues == 1)
-               blk_mq_trigger_softirq(rq);
-       else
-               rq->q->mq_ops->complete(rq);
+       __raise_softirq_irqoff(BLOCK_SOFTIRQ);
  }
  
  static inline bool blk_mq_complete_need_ipi(struct request *rq)
@@ -669,6 +618,30 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
         return cpu_online(rq->mq_ctx->cpu);
  }
  
+static void blk_mq_complete_send_ipi(struct request *rq)
+{
+       struct llist_head *list;
+       unsigned int cpu;
+
+       cpu = rq->mq_ctx->cpu;
+       list = &per_cpu(blk_cpu_done, cpu);
+       if (llist_add(&rq->ipi_list, list)) {
+               INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq);
+               smp_call_function_single_async(cpu, &rq->csd);
+       }
+}
+
+static void blk_mq_raise_softirq(struct request *rq)
+{
+       struct llist_head *list;
+
+       preempt_disable();
+       list = this_cpu_ptr(&blk_cpu_done);
+       if (llist_add(&rq->ipi_list, list))
+               raise_softirq(BLOCK_SOFTIRQ);
+       preempt_enable();
+}
+
  bool blk_mq_complete_request_remote(struct request *rq)
  {
         WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
@@ -681,15 +654,15 @@ bool blk_mq_complete_request_remote(struct request *rq)
                 return false;
  
         if (blk_mq_complete_need_ipi(rq)) {
-               INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq);
-               smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
-       } else {
-               if (rq->q->nr_hw_queues > 1)
-                       return false;
-               blk_mq_trigger_softirq(rq);
+               blk_mq_complete_send_ipi(rq);
+               return true;
         }
  
-       return true;
+       if (rq->q->nr_hw_queues == 1) {
+               blk_mq_raise_softirq(rq);
+               return true;
+       }
+       return false;
  }
  EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);
  
@@ -1646,6 +1619,42 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
  }
  EXPORT_SYMBOL(blk_mq_run_hw_queue);
  
+/*
+ * Is the request queue handled by an IO scheduler that does not respect
+ * hardware queues when dispatching?
+ */
+static bool blk_mq_has_sqsched(struct request_queue *q)
+{
+       struct elevator_queue *e = q->elevator;
+
+       if (e && e->type->ops.dispatch_request &&
+           !(e->type->elevator_features & ELEVATOR_F_MQ_AWARE))
+               return true;
+       return false;
+}
+
+/*
+ * Return prefered queue to dispatch from (if any) for non-mq aware IO
+ * scheduler.
+ */
+static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
+{
+       struct blk_mq_hw_ctx *hctx;
+
+       /*
+        * If the IO scheduler does not respect hardware queues when
+        * dispatching, we just don't bother with multiple HW queues and
+        * dispatch from hctx for the current CPU since running multiple queues
+        * just causes lock contention inside the scheduler and pointless cache
+        * bouncing.
+        */
+       hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT,
+                                    raw_smp_processor_id());
+       if (!blk_mq_hctx_stopped(hctx))
+               return hctx;
+       return NULL;
+}
+
  /**
   * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
   * @q: Pointer to the request queue to run.
@@ -1653,14 +1662,23 @@ EXPORT_SYMBOL(blk_mq_run_hw_queue);
   */
  void blk_mq_run_hw_queues(struct request_queue *q, bool async)
  {
-       struct blk_mq_hw_ctx *hctx;
+       struct blk_mq_hw_ctx *hctx, *sq_hctx;
         int i;
  
+       sq_hctx = NULL;
+       if (blk_mq_has_sqsched(q))
+               sq_hctx = blk_mq_get_sq_hctx(q);
         queue_for_each_hw_ctx(q, hctx, i) {
                 if (blk_mq_hctx_stopped(hctx))
                         continue;
-
-               blk_mq_run_hw_queue(hctx, async);
+               /*
+                * Dispatch from this hctx either if there's no hctx preferred
+                * by IO scheduler or if it has requests that bypass the
+                * scheduler.
+                */
+               if (!sq_hctx || sq_hctx == hctx ||
+                   !list_empty_careful(&hctx->dispatch))
+                       blk_mq_run_hw_queue(hctx, async);
         }
  }
  EXPORT_SYMBOL(blk_mq_run_hw_queues);
@@ -1672,14 +1690,23 @@ EXPORT_SYMBOL(blk_mq_run_hw_queues);
   */
  void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
  {
-       struct blk_mq_hw_ctx *hctx;
+       struct blk_mq_hw_ctx *hctx, *sq_hctx;
         int i;
  
+       sq_hctx = NULL;
+       if (blk_mq_has_sqsched(q))
+               sq_hctx = blk_mq_get_sq_hctx(q);
         queue_for_each_hw_ctx(q, hctx, i) {
                 if (blk_mq_hctx_stopped(hctx))
                         continue;
-
-               blk_mq_delay_run_hw_queue(hctx, msecs);
+               /*
+                * Dispatch from this hctx either if there's no hctx preferred
+                * by IO scheduler or if it has requests that bypass the
+                * scheduler.
+                */
+               if (!sq_hctx || sq_hctx == hctx ||
+                   !list_empty_careful(&hctx->dispatch))
+                       blk_mq_delay_run_hw_queue(hctx, msecs);
         }
  }
  EXPORT_SYMBOL(blk_mq_delay_run_hw_queues);
@@ -2128,7 +2155,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
   */
  blk_qc_t blk_mq_submit_bio(struct bio *bio)
  {
-       struct request_queue *q = bio->bi_disk->queue;
+       struct request_queue *q = bio->bi_bdev->bd_disk->queue;
         const int is_sync = op_is_sync(bio->bi_opf);
         const int is_flush_fua = op_is_flush(bio->bi_opf);
         struct blk_mq_alloc_data data = {
@@ -2653,7 +2680,6 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
                 goto free_hctx;
  
         atomic_set(&hctx->nr_active, 0);
-       atomic_set(&hctx->elevator_queued, 0);
         if (node == NUMA_NO_NODE)
                 node = set->numa_node;
         hctx->numa_node = node;
@@ -3904,7 +3930,7 @@ static int __init blk_mq_init(void)
         int i;
  
         for_each_possible_cpu(i)
-               INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
+               init_llist_head(&per_cpu(blk_cpu_done, i));
         open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
  
         cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,