Merge tag 'fixes-2021-07-09' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt...
[linux-2.6-microblaze.git] / block / blk-mq.c
index e41edae..2c4ac51 100644 (file)
@@ -909,6 +909,14 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
        return false;
 }
 
+void blk_mq_put_rq_ref(struct request *rq)
+{
+       if (is_flush_rq(rq, rq->mq_hctx))
+               rq->end_io(rq, 0);
+       else if (refcount_dec_and_test(&rq->ref))
+               __blk_mq_free_request(rq);
+}
+
 static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
                struct request *rq, void *priv, bool reserved)
 {
@@ -942,11 +950,7 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
        if (blk_mq_req_expired(rq, next))
                blk_mq_rq_timed_out(rq, reserved);
 
-       if (is_flush_rq(rq, hctx))
-               rq->end_io(rq, 0);
-       else if (refcount_dec_and_test(&rq->ref))
-               __blk_mq_free_request(rq);
-
+       blk_mq_put_rq_ref(rq);
        return true;
 }
 
@@ -1100,7 +1104,7 @@ static bool __blk_mq_get_driver_tag(struct request *rq)
        return true;
 }
 
-static bool blk_mq_get_driver_tag(struct request *rq)
+bool blk_mq_get_driver_tag(struct request *rq)
 {
        struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
 
@@ -1220,9 +1224,6 @@ static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
 {
        unsigned int ewma;
 
-       if (hctx->queue->elevator)
-               return;
-
        ewma = hctx->dispatch_busy;
 
        if (!ewma && !busy)
@@ -2303,6 +2304,45 @@ queue_exit:
        return BLK_QC_T_NONE;
 }
 
+static size_t order_to_size(unsigned int order)
+{
+       return (size_t)PAGE_SIZE << order;
+}
+
+/* called before freeing request pool in @tags */
+static void blk_mq_clear_rq_mapping(struct blk_mq_tag_set *set,
+               struct blk_mq_tags *tags, unsigned int hctx_idx)
+{
+       struct blk_mq_tags *drv_tags = set->tags[hctx_idx];
+       struct page *page;
+       unsigned long flags;
+
+       list_for_each_entry(page, &tags->page_list, lru) {
+               unsigned long start = (unsigned long)page_address(page);
+               unsigned long end = start + order_to_size(page->private);
+               int i;
+
+               for (i = 0; i < set->queue_depth; i++) {
+                       struct request *rq = drv_tags->rqs[i];
+                       unsigned long rq_addr = (unsigned long)rq;
+
+                       if (rq_addr >= start && rq_addr < end) {
+                               WARN_ON_ONCE(refcount_read(&rq->ref) != 0);
+                               cmpxchg(&drv_tags->rqs[i], rq, NULL);
+                       }
+               }
+       }
+
+       /*
+        * Wait until all pending iteration is done.
+        *
+        * Request reference is cleared and it is guaranteed to be observed
+        * after the ->lock is released.
+        */
+       spin_lock_irqsave(&drv_tags->lock, flags);
+       spin_unlock_irqrestore(&drv_tags->lock, flags);
+}
+
 void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
                     unsigned int hctx_idx)
 {
@@ -2321,6 +2361,8 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
                }
        }
 
+       blk_mq_clear_rq_mapping(set, tags, hctx_idx);
+
        while (!list_empty(&tags->page_list)) {
                page = list_first_entry(&tags->page_list, struct page, lru);
                list_del_init(&page->lru);
@@ -2380,11 +2422,6 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
        return tags;
 }
 
-static size_t order_to_size(unsigned int order)
-{
-       return (size_t)PAGE_SIZE << order;
-}
-
 static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
                               unsigned int hctx_idx, int node)
 {
@@ -2603,16 +2640,49 @@ static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
                                            &hctx->cpuhp_dead);
 }
 
+/*
+ * Before freeing hw queue, clearing the flush request reference in
+ * tags->rqs[] for avoiding potential UAF.
+ */
+static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags,
+               unsigned int queue_depth, struct request *flush_rq)
+{
+       int i;
+       unsigned long flags;
+
+       /* The hw queue may not be mapped yet */
+       if (!tags)
+               return;
+
+       WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0);
+
+       for (i = 0; i < queue_depth; i++)
+               cmpxchg(&tags->rqs[i], flush_rq, NULL);
+
+       /*
+        * Wait until all pending iteration is done.
+        *
+        * Request reference is cleared and it is guaranteed to be observed
+        * after the ->lock is released.
+        */
+       spin_lock_irqsave(&tags->lock, flags);
+       spin_unlock_irqrestore(&tags->lock, flags);
+}
+
 /* hctx->ctxs will be freed in queue's release handler */
 static void blk_mq_exit_hctx(struct request_queue *q,
                struct blk_mq_tag_set *set,
                struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 {
+       struct request *flush_rq = hctx->fq->flush_rq;
+
        if (blk_mq_hw_queue_mapped(hctx))
                blk_mq_tag_idle(hctx);
 
+       blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
+                       set->queue_depth, flush_rq);
        if (set->ops->exit_request)
-               set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
+               set->ops->exit_request(set, flush_rq, hctx_idx);
 
        if (set->ops->exit_hctx)
                set->ops->exit_hctx(hctx, hctx_idx);
@@ -3039,27 +3109,23 @@ void blk_mq_release(struct request_queue *q)
        blk_mq_sysfs_deinit(q);
 }
 
-struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
+static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
                void *queuedata)
 {
-       struct request_queue *uninit_q, *q;
+       struct request_queue *q;
+       int ret;
 
-       uninit_q = blk_alloc_queue(set->numa_node);
-       if (!uninit_q)
+       q = blk_alloc_queue(set->numa_node);
+       if (!q)
                return ERR_PTR(-ENOMEM);
-       uninit_q->queuedata = queuedata;
-
-       /*
-        * Initialize the queue without an elevator. device_add_disk() will do
-        * the initialization.
-        */
-       q = blk_mq_init_allocated_queue(set, uninit_q, false);
-       if (IS_ERR(q))
-               blk_cleanup_queue(uninit_q);
-
+       q->queuedata = queuedata;
+       ret = blk_mq_init_allocated_queue(set, q);
+       if (ret) {
+               blk_cleanup_queue(q);
+               return ERR_PTR(ret);
+       }
        return q;
 }
-EXPORT_SYMBOL_GPL(blk_mq_init_queue_data);
 
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 {
@@ -3067,39 +3133,24 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_init_queue);
 
-/*
- * Helper for setting up a queue with mq ops, given queue depth, and
- * the passed in mq ops flags.
- */
-struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
-                                          const struct blk_mq_ops *ops,
-                                          unsigned int queue_depth,
-                                          unsigned int set_flags)
+struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata)
 {
        struct request_queue *q;
-       int ret;
+       struct gendisk *disk;
 
-       memset(set, 0, sizeof(*set));
-       set->ops = ops;
-       set->nr_hw_queues = 1;
-       set->nr_maps = 1;
-       set->queue_depth = queue_depth;
-       set->numa_node = NUMA_NO_NODE;
-       set->flags = set_flags;
-
-       ret = blk_mq_alloc_tag_set(set);
-       if (ret)
-               return ERR_PTR(ret);
+       q = blk_mq_init_queue_data(set, queuedata);
+       if (IS_ERR(q))
+               return ERR_CAST(q);
 
-       q = blk_mq_init_queue(set);
-       if (IS_ERR(q)) {
-               blk_mq_free_tag_set(set);
-               return q;
+       disk = __alloc_disk_node(0, set->numa_node);
+       if (!disk) {
+               blk_cleanup_queue(q);
+               return ERR_PTR(-ENOMEM);
        }
-
-       return q;
+       disk->queue = q;
+       return disk;
 }
-EXPORT_SYMBOL(blk_mq_init_sq_queue);
+EXPORT_SYMBOL(__blk_mq_alloc_disk);
 
 static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
                struct blk_mq_tag_set *set, struct request_queue *q,
@@ -3212,9 +3263,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
        mutex_unlock(&q->sysfs_lock);
 }
 
-struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
-                                                 struct request_queue *q,
-                                                 bool elevator_init)
+int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+               struct request_queue *q)
 {
        /* mark the queue as mq asap */
        q->mq_ops = set->ops;
@@ -3264,11 +3314,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
        blk_mq_init_cpu_queues(q, set->nr_hw_queues);
        blk_mq_add_queue_tag_set(set, q);
        blk_mq_map_swqueue(q);
-
-       if (elevator_init)
-               elevator_init_mq(q);
-
-       return q;
+       return 0;
 
 err_hctxs:
        kfree(q->queue_hw_ctx);
@@ -3279,7 +3325,7 @@ err_poll:
        q->poll_cb = NULL;
 err_exit:
        q->mq_ops = NULL;
-       return ERR_PTR(-ENOMEM);
+       return -ENOMEM;
 }
 EXPORT_SYMBOL(blk_mq_init_allocated_queue);
 
@@ -3491,7 +3537,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
        if (blk_mq_is_sbitmap_shared(set->flags)) {
                atomic_set(&set->active_queues_shared_sbitmap, 0);
 
-               if (blk_mq_init_shared_sbitmap(set, set->flags)) {
+               if (blk_mq_init_shared_sbitmap(set)) {
                        ret = -ENOMEM;
                        goto out_free_mq_rq_maps;
                }
@@ -3516,6 +3562,22 @@ out_free_mq_map:
 }
 EXPORT_SYMBOL(blk_mq_alloc_tag_set);
 
+/* allocate and initialize a tagset for a simple single-queue device */
+int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
+               const struct blk_mq_ops *ops, unsigned int queue_depth,
+               unsigned int set_flags)
+{
+       memset(set, 0, sizeof(*set));
+       set->ops = ops;
+       set->nr_hw_queues = 1;
+       set->nr_maps = 1;
+       set->queue_depth = queue_depth;
+       set->numa_node = NUMA_NO_NODE;
+       set->flags = set_flags;
+       return blk_mq_alloc_tag_set(set);
+}
+EXPORT_SYMBOL_GPL(blk_mq_alloc_sq_tag_set);
+
 void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
 {
        int i, j;
@@ -3567,15 +3629,24 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
                } else {
                        ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
                                                        nr, true);
+                       if (blk_mq_is_sbitmap_shared(set->flags)) {
+                               hctx->sched_tags->bitmap_tags =
+                                       &q->sched_bitmap_tags;
+                               hctx->sched_tags->breserved_tags =
+                                       &q->sched_breserved_tags;
+                       }
                }
                if (ret)
                        break;
                if (q->elevator && q->elevator->type->ops.depth_updated)
                        q->elevator->type->ops.depth_updated(hctx);
        }
-
-       if (!ret)
+       if (!ret) {
                q->nr_requests = nr;
+               if (q->elevator && blk_mq_is_sbitmap_shared(set->flags))
+                       sbitmap_queue_resize(&q->sched_bitmap_tags,
+                                            nr - set->reserved_tags);
+       }
 
        blk_mq_unquiesce_queue(q);
        blk_mq_unfreeze_queue(q);