Merge tag 'hsi-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-hsi
[linux-2.6-microblaze.git] / block / blk-core.c
index 048be4a..7c54c19 100644 (file)
@@ -333,11 +333,13 @@ EXPORT_SYMBOL(blk_stop_queue);
 void blk_sync_queue(struct request_queue *q)
 {
        del_timer_sync(&q->timeout);
+       cancel_work_sync(&q->timeout_work);
 
        if (q->mq_ops) {
                struct blk_mq_hw_ctx *hctx;
                int i;
 
+               cancel_delayed_work_sync(&q->requeue_work);
                queue_for_each_hw_ctx(q, hctx, i)
                        cancel_delayed_work_sync(&hctx->run_work);
        } else {
@@ -346,6 +348,37 @@ void blk_sync_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
+/**
+ * blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY
+ * @q: request queue pointer
+ *
+ * Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not
+ * set and 1 if the flag was already set.
+ */
+int blk_set_preempt_only(struct request_queue *q)
+{
+       unsigned long flags;
+       int res;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       res = queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+
+       return res;
+}
+EXPORT_SYMBOL_GPL(blk_set_preempt_only);
+
+void blk_clear_preempt_only(struct request_queue *q)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
+       wake_up_all(&q->mq_freeze_wq);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
+
 /**
  * __blk_run_queue_uncond - run a queue whether or not it has been stopped
  * @q: The queue to run
@@ -610,6 +643,9 @@ void blk_set_queue_dying(struct request_queue *q)
                }
                spin_unlock_irq(q->queue_lock);
        }
+
+       /* Make blk_queue_enter() reexamine the DYING flag. */
+       wake_up_all(&q->mq_freeze_wq);
 }
 EXPORT_SYMBOL_GPL(blk_set_queue_dying);
 
@@ -718,7 +754,7 @@ static void free_request_size(void *element, void *data)
 int blk_init_rl(struct request_list *rl, struct request_queue *q,
                gfp_t gfp_mask)
 {
-       if (unlikely(rl->rq_pool))
+       if (unlikely(rl->rq_pool) || q->mq_ops)
                return 0;
 
        rl->q = q;
@@ -760,15 +796,38 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
-int blk_queue_enter(struct request_queue *q, bool nowait)
+/**
+ * blk_queue_enter() - try to increase q->q_usage_counter
+ * @q: request queue pointer
+ * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT
+ */
+int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 {
+       const bool preempt = flags & BLK_MQ_REQ_PREEMPT;
+
        while (true) {
+               bool success = false;
                int ret;
 
-               if (percpu_ref_tryget_live(&q->q_usage_counter))
+               rcu_read_lock_sched();
+               if (percpu_ref_tryget_live(&q->q_usage_counter)) {
+                       /*
+                        * The code that sets the PREEMPT_ONLY flag is
+                        * responsible for ensuring that that flag is globally
+                        * visible before the queue is unfrozen.
+                        */
+                       if (preempt || !blk_queue_preempt_only(q)) {
+                               success = true;
+                       } else {
+                               percpu_ref_put(&q->q_usage_counter);
+                       }
+               }
+               rcu_read_unlock_sched();
+
+               if (success)
                        return 0;
 
-               if (nowait)
+               if (flags & BLK_MQ_REQ_NOWAIT)
                        return -EBUSY;
 
                /*
@@ -781,7 +840,8 @@ int blk_queue_enter(struct request_queue *q, bool nowait)
                smp_rmb();
 
                ret = wait_event_interruptible(q->mq_freeze_wq,
-                               !atomic_read(&q->mq_freeze_depth) ||
+                               (atomic_read(&q->mq_freeze_depth) == 0 &&
+                                (preempt || !blk_queue_preempt_only(q))) ||
                                blk_queue_dying(q));
                if (blk_queue_dying(q))
                        return -ENODEV;
@@ -844,6 +904,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        setup_timer(&q->backing_dev_info->laptop_mode_wb_timer,
                    laptop_mode_timer_fn, (unsigned long) q);
        setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
+       INIT_WORK(&q->timeout_work, NULL);
        INIT_LIST_HEAD(&q->queue_head);
        INIT_LIST_HEAD(&q->timeout_list);
        INIT_LIST_HEAD(&q->icq_list);
@@ -1154,7 +1215,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
  * @rl: request list to allocate from
  * @op: operation and flags
  * @bio: bio to allocate request for (can be %NULL)
- * @gfp_mask: allocation mask
+ * @flags: BLQ_MQ_REQ_* flags
  *
  * Get a free request from @q.  This function may fail under memory
  * pressure or if @q is dead.
@@ -1164,7 +1225,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
  * Returns request pointer on success, with @q->queue_lock *not held*.
  */
 static struct request *__get_request(struct request_list *rl, unsigned int op,
-               struct bio *bio, gfp_t gfp_mask)
+                                    struct bio *bio, blk_mq_req_flags_t flags)
 {
        struct request_queue *q = rl->q;
        struct request *rq;
@@ -1173,6 +1234,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
        struct io_cq *icq = NULL;
        const bool is_sync = op_is_sync(op);
        int may_queue;
+       gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
+                        __GFP_DIRECT_RECLAIM;
        req_flags_t rq_flags = RQF_ALLOCED;
 
        lockdep_assert_held(q->queue_lock);
@@ -1255,6 +1318,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
        blk_rq_set_rl(rq, rl);
        rq->cmd_flags = op;
        rq->rq_flags = rq_flags;
+       if (flags & BLK_MQ_REQ_PREEMPT)
+               rq->rq_flags |= RQF_PREEMPT;
 
        /* init elvpriv */
        if (rq_flags & RQF_ELVPRIV) {
@@ -1333,7 +1398,7 @@ rq_starved:
  * @q: request_queue to allocate request from
  * @op: operation and flags
  * @bio: bio to allocate request for (can be %NULL)
- * @gfp_mask: allocation mask
+ * @flags: BLK_MQ_REQ_* flags.
  *
  * Get a free request from @q.  If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
  * this function keeps retrying under memory pressure and fails iff @q is dead.
@@ -1343,7 +1408,7 @@ rq_starved:
  * Returns request pointer on success, with @q->queue_lock *not held*.
  */
 static struct request *get_request(struct request_queue *q, unsigned int op,
-               struct bio *bio, gfp_t gfp_mask)
+                                  struct bio *bio, blk_mq_req_flags_t flags)
 {
        const bool is_sync = op_is_sync(op);
        DEFINE_WAIT(wait);
@@ -1355,7 +1420,7 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
 
        rl = blk_get_rl(q, bio);        /* transferred to @rq on success */
 retry:
-       rq = __get_request(rl, op, bio, gfp_mask);
+       rq = __get_request(rl, op, bio, flags);
        if (!IS_ERR(rq))
                return rq;
 
@@ -1364,7 +1429,7 @@ retry:
                return ERR_PTR(-EAGAIN);
        }
 
-       if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
+       if ((flags & BLK_MQ_REQ_NOWAIT) || unlikely(blk_queue_dying(q))) {
                blk_put_rl(rl);
                return rq;
        }
@@ -1391,20 +1456,28 @@ retry:
        goto retry;
 }
 
+/* flags: BLK_MQ_REQ_PREEMPT and/or BLK_MQ_REQ_NOWAIT. */
 static struct request *blk_old_get_request(struct request_queue *q,
-                                          unsigned int op, gfp_t gfp_mask)
+                               unsigned int op, blk_mq_req_flags_t flags)
 {
        struct request *rq;
+       gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
+                        __GFP_DIRECT_RECLAIM;
+       int ret = 0;
 
        WARN_ON_ONCE(q->mq_ops);
 
        /* create ioc upfront */
        create_io_context(gfp_mask, q->node);
 
+       ret = blk_queue_enter(q, flags);
+       if (ret)
+               return ERR_PTR(ret);
        spin_lock_irq(q->queue_lock);
-       rq = get_request(q, op, NULL, gfp_mask);
+       rq = get_request(q, op, NULL, flags);
        if (IS_ERR(rq)) {
                spin_unlock_irq(q->queue_lock);
+               blk_queue_exit(q);
                return rq;
        }
 
@@ -1415,25 +1488,40 @@ static struct request *blk_old_get_request(struct request_queue *q,
        return rq;
 }
 
-struct request *blk_get_request(struct request_queue *q, unsigned int op,
-                               gfp_t gfp_mask)
+/**
+ * blk_get_request_flags - allocate a request
+ * @q: request queue to allocate a request for
+ * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC.
+ * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT.
+ */
+struct request *blk_get_request_flags(struct request_queue *q, unsigned int op,
+                                     blk_mq_req_flags_t flags)
 {
        struct request *req;
 
+       WARN_ON_ONCE(op & REQ_NOWAIT);
+       WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT));
+
        if (q->mq_ops) {
-               req = blk_mq_alloc_request(q, op,
-                       (gfp_mask & __GFP_DIRECT_RECLAIM) ?
-                               0 : BLK_MQ_REQ_NOWAIT);
+               req = blk_mq_alloc_request(q, op, flags);
                if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
                        q->mq_ops->initialize_rq_fn(req);
        } else {
-               req = blk_old_get_request(q, op, gfp_mask);
+               req = blk_old_get_request(q, op, flags);
                if (!IS_ERR(req) && q->initialize_rq_fn)
                        q->initialize_rq_fn(req);
        }
 
        return req;
 }
+EXPORT_SYMBOL(blk_get_request_flags);
+
+struct request *blk_get_request(struct request_queue *q, unsigned int op,
+                               gfp_t gfp_mask)
+{
+       return blk_get_request_flags(q, op, gfp_mask & __GFP_DIRECT_RECLAIM ?
+                                    0 : BLK_MQ_REQ_NOWAIT);
+}
 EXPORT_SYMBOL(blk_get_request);
 
 /**
@@ -1576,6 +1664,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
                blk_free_request(rl, req);
                freed_request(rl, sync, rq_flags);
                blk_put_rl(rl);
+               blk_queue_exit(q);
        }
 }
 EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1857,8 +1946,10 @@ get_rq:
         * Grab a free request. This is might sleep but can not fail.
         * Returns with the queue unlocked.
         */
-       req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
+       blk_queue_enter_live(q);
+       req = get_request(q, bio->bi_opf, bio, 0);
        if (IS_ERR(req)) {
+               blk_queue_exit(q);
                __wbt_done(q->rq_wb, wb_acct);
                if (PTR_ERR(req) == -ENOMEM)
                        bio->bi_status = BLK_STS_RESOURCE;
@@ -2200,8 +2291,10 @@ blk_qc_t generic_make_request(struct bio *bio)
        current->bio_list = bio_list_on_stack;
        do {
                struct request_queue *q = bio->bi_disk->queue;
+               blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ?
+                       BLK_MQ_REQ_NOWAIT : 0;
 
-               if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
+               if (likely(blk_queue_enter(q, flags) == 0)) {
                        struct bio_list lower, same;
 
                        /* Create a fresh bio_list for all subordinate requests */
@@ -2241,6 +2334,40 @@ out:
 }
 EXPORT_SYMBOL(generic_make_request);
 
+/**
+ * direct_make_request - hand a buffer directly to its device driver for I/O
+ * @bio:  The bio describing the location in memory and on the device.
+ *
+ * This function behaves like generic_make_request(), but does not protect
+ * against recursion.  Must only be used if the called driver is known
+ * to not call generic_make_request (or direct_make_request) again from
+ * its make_request function.  (Calling direct_make_request again from
+ * a workqueue is perfectly fine as that doesn't recurse).
+ */
+blk_qc_t direct_make_request(struct bio *bio)
+{
+       struct request_queue *q = bio->bi_disk->queue;
+       bool nowait = bio->bi_opf & REQ_NOWAIT;
+       blk_qc_t ret;
+
+       if (!generic_make_request_checks(bio))
+               return BLK_QC_T_NONE;
+
+       if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
+               if (nowait && !blk_queue_dying(q))
+                       bio->bi_status = BLK_STS_AGAIN;
+               else
+                       bio->bi_status = BLK_STS_IOERR;
+               bio_endio(bio);
+               return BLK_QC_T_NONE;
+       }
+
+       ret = q->make_request_fn(q, bio);
+       blk_queue_exit(q);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(direct_make_request);
+
 /**
  * submit_bio - submit a bio to the block device layer for I/O
  * @bio: The &struct bio which describes the I/O
@@ -2285,6 +2412,17 @@ blk_qc_t submit_bio(struct bio *bio)
 }
 EXPORT_SYMBOL(submit_bio);
 
+bool blk_poll(struct request_queue *q, blk_qc_t cookie)
+{
+       if (!q->poll_fn || !blk_qc_t_valid(cookie))
+               return false;
+
+       if (current->plug)
+               blk_flush_plug_list(current->plug, false);
+       return q->poll_fn(q, cookie);
+}
+EXPORT_SYMBOL_GPL(blk_poll);
+
 /**
  * blk_cloned_rq_check_limits - Helper function to check a cloned request
  *                              for new the queue limits
@@ -2350,7 +2488,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
                 * bypass a potential scheduler on the bottom device for
                 * insert.
                 */
-               blk_mq_request_bypass_insert(rq);
+               blk_mq_request_bypass_insert(rq, true);
                return BLK_STS_OK;
        }
 
@@ -2464,20 +2602,22 @@ void blk_account_io_done(struct request *req)
  * Don't process normal requests when queue is suspended
  * or in the process of suspending/resuming
  */
-static struct request *blk_pm_peek_request(struct request_queue *q,
-                                          struct request *rq)
+static bool blk_pm_allow_request(struct request *rq)
 {
-       if (q->dev && (q->rpm_status == RPM_SUSPENDED ||
-           (q->rpm_status != RPM_ACTIVE && !(rq->rq_flags & RQF_PM))))
-               return NULL;
-       else
-               return rq;
+       switch (rq->q->rpm_status) {
+       case RPM_RESUMING:
+       case RPM_SUSPENDING:
+               return rq->rq_flags & RQF_PM;
+       case RPM_SUSPENDED:
+               return false;
+       }
+
+       return true;
 }
 #else
-static inline struct request *blk_pm_peek_request(struct request_queue *q,
-                                                 struct request *rq)
+static bool blk_pm_allow_request(struct request *rq)
 {
-       return rq;
+       return true;
 }
 #endif
 
@@ -2517,6 +2657,48 @@ void blk_account_io_start(struct request *rq, bool new_io)
        part_stat_unlock();
 }
 
+static struct request *elv_next_request(struct request_queue *q)
+{
+       struct request *rq;
+       struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
+
+       WARN_ON_ONCE(q->mq_ops);
+
+       while (1) {
+               list_for_each_entry(rq, &q->queue_head, queuelist) {
+                       if (blk_pm_allow_request(rq))
+                               return rq;
+
+                       if (rq->rq_flags & RQF_SOFTBARRIER)
+                               break;
+               }
+
+               /*
+                * Flush request is running and flush request isn't queueable
+                * in the drive, we can hold the queue till flush request is
+                * finished. Even we don't do this, driver can't dispatch next
+                * requests and will requeue them. And this can improve
+                * throughput too. For example, we have request flush1, write1,
+                * flush 2. flush1 is dispatched, then queue is hold, write1
+                * isn't inserted to queue. After flush1 is finished, flush2
+                * will be dispatched. Since disk cache is already clean,
+                * flush2 will be finished very soon, so looks like flush2 is
+                * folded to flush1.
+                * Since the queue is hold, a flag is set to indicate the queue
+                * should be restarted later. Please see flush_end_io() for
+                * details.
+                */
+               if (fq->flush_pending_idx != fq->flush_running_idx &&
+                               !queue_flush_queueable(q)) {
+                       fq->flush_queue_delayed = 1;
+                       return NULL;
+               }
+               if (unlikely(blk_queue_bypass(q)) ||
+                   !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
+                       return NULL;
+       }
+}
+
 /**
  * blk_peek_request - peek at the top of a request queue
  * @q: request queue to peek at
@@ -2538,12 +2720,7 @@ struct request *blk_peek_request(struct request_queue *q)
        lockdep_assert_held(q->queue_lock);
        WARN_ON_ONCE(q->mq_ops);
 
-       while ((rq = __elv_next_request(q)) != NULL) {
-
-               rq = blk_pm_peek_request(q, rq);
-               if (!rq)
-                       break;
-
+       while ((rq = elv_next_request(q)) != NULL) {
                if (!(rq->rq_flags & RQF_STARTED)) {
                        /*
                         * This is the first time the device driver
@@ -2695,6 +2872,27 @@ struct request *blk_fetch_request(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_fetch_request);
 
+/*
+ * Steal bios from a request and add them to a bio list.
+ * The request must not have been partially completed before.
+ */
+void blk_steal_bios(struct bio_list *list, struct request *rq)
+{
+       if (rq->bio) {
+               if (list->tail)
+                       list->tail->bi_next = rq->bio;
+               else
+                       list->head = rq->bio;
+               list->tail = rq->biotail;
+
+               rq->bio = NULL;
+               rq->biotail = NULL;
+       }
+
+       rq->__data_len = 0;
+}
+EXPORT_SYMBOL_GPL(blk_steal_bios);
+
 /**
  * blk_update_request - Special helper function for request stacking drivers
  * @req:      the request being processed