Merge branch 'for-5.9/block' into for-5.9/block-merge

author Jens Axboe <axboe@kernel.dk>

Mon, 20 Jul 2020 21:38:23 +0000 (15:38 -0600)

committer Jens Axboe <axboe@kernel.dk>

Mon, 20 Jul 2020 21:38:23 +0000 (15:38 -0600)
author Jens Axboe <axboe@kernel.dk>
Mon, 20 Jul 2020 21:38:23 +0000 (15:38 -0600)
committer Jens Axboe <axboe@kernel.dk>
Mon, 20 Jul 2020 21:38:23 +0000 (15:38 -0600)
diff --combined Documentation/admin-guide/cgroup-v2.rst

index d09471a,2d7c590..a789755
--- 1/Documentation/admin-guide/cgroup-v2.rst
--- 2/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@@ -1356,8 -1356,8 +1356,8 @@@ PAGE_SIZE multiple when read back
   
           thp_fault_alloc
                 Number of transparent hugepages which were allocated to satisfy
- -              a page fault, including COW faults. This counter is not present
- -              when CONFIG_TRANSPARENT_HUGEPAGE is not set.
+ +              a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE
+ +                is not set.
   
           thp_collapse_alloc
                 Number of transparent hugepages which were allocated to allow
@@@ -1483,8 -1483,7 +1483,7 @@@ IO Interface File
   ~~~~~~~~~~~~~~~~~~
   
     io.stat
-       A read-only nested-keyed file which exists on non-root
-       cgroups.
+       A read-only nested-keyed file.
   
         Lines are keyed by $MAJ:$MIN device numbers and not ordered.
         The following nested keys are defined.
diff --combined block/blk-mq-debugfs.c

index e0b2bc1,439b7c2..3f09bcb
--- 1/block/blk-mq-debugfs.c
--- 2/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@@ -125,9 -125,6 +125,9 @@@ static const char *const blk_queue_flag
         QUEUE_FLAG_NAME(REGISTERED),
         QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
         QUEUE_FLAG_NAME(QUIESCED),
+ +      QUEUE_FLAG_NAME(PCI_P2PDMA),
+ +      QUEUE_FLAG_NAME(ZONE_RESETALL),
+ +      QUEUE_FLAG_NAME(RQ_ALLOC_TIME),
   };
   #undef QUEUE_FLAG_NAME
   
@@@ -404,8 -401,7 +404,7 @@@ static bool hctx_show_busy_rq(struct re
         const struct show_busy_params *params = data;
   
         if (rq->mq_hctx == params->hctx)
-               __blk_mq_debugfs_rq_show(params->m,
-                                        list_entry_rq(&rq->queuelist));
+               __blk_mq_debugfs_rq_show(params->m, rq);
   
         return true;
   }
@@@ -827,9 -823,6 +826,6 @@@ void blk_mq_debugfs_register(struct req
         struct blk_mq_hw_ctx *hctx;
         int i;
   
-       q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
-                                           blk_debugfs_root);
- 
         debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
   
         /*
@@@ -860,9 -853,7 +856,7 @@@
   
   void blk_mq_debugfs_unregister(struct request_queue *q)
   {
-       debugfs_remove_recursive(q->debugfs_dir);
         q->sched_debugfs_dir = NULL;
-       q->debugfs_dir = NULL;
   }
   
   static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
diff --combined block/blk-mq.c

index 4e0d173,c385637..667155f
--- 1/block/blk-mq.c
--- 2/block/blk-mq.c
+++ b/block/blk-mq.c
@@@ -41,6 -41,8 +41,8 @@@
   #include "blk-mq-sched.h"
   #include "blk-rq-qos.h"
   
+ static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
+ 
   static void blk_mq_poll_stats_start(struct request_queue *q);
   static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
   
@@@ -275,26 -277,20 +277,20 @@@ static struct request *blk_mq_rq_ctx_in
   {
         struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
         struct request *rq = tags->static_rqs[tag];
-       req_flags_t rq_flags = 0;
   
-       if (data->flags & BLK_MQ_REQ_INTERNAL) {
+       if (data->q->elevator) {
                 rq->tag = BLK_MQ_NO_TAG;
                 rq->internal_tag = tag;
         } else {
-               if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
-                       rq_flags = RQF_MQ_INFLIGHT;
-                       atomic_inc(&data->hctx->nr_active);
-               }
                 rq->tag = tag;
                 rq->internal_tag = BLK_MQ_NO_TAG;
-               data->hctx->tags->rqs[rq->tag] = rq;
         }
   
         /* csd/requeue_work/fifo_time is initialized before use */
         rq->q = data->q;
         rq->mq_ctx = data->ctx;
         rq->mq_hctx = data->hctx;
-       rq->rq_flags = rq_flags;
+       rq->rq_flags = 0;
         rq->cmd_flags = data->cmd_flags;
         if (data->flags & BLK_MQ_REQ_PREEMPT)
                 rq->rq_flags |= RQF_PREEMPT;
@@@ -362,8 -358,6 +358,6 @@@ static struct request *__blk_mq_alloc_r
                 data->flags |= BLK_MQ_REQ_NOWAIT;
   
         if (e) {
-               data->flags |= BLK_MQ_REQ_INTERNAL;
- 
                 /*
                  * Flush requests are special and go directly to the
                  * dispatch list. Don't include reserved tags in the
@@@ -378,7 -372,7 +372,7 @@@
   retry:
         data->ctx = blk_mq_get_ctx(q);
         data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
-       if (!(data->flags & BLK_MQ_REQ_INTERNAL))
+       if (!e)
                 blk_mq_tag_busy(data->hctx);
   
         /*
@@@ -474,9 -468,7 +468,7 @@@ struct request *blk_mq_alloc_request_hc
         cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
         data.ctx = __blk_mq_get_ctx(q, cpu);
   
-       if (q->elevator)
-               data.flags |= BLK_MQ_REQ_INTERNAL;
-       else
+       if (!q->elevator)
                 blk_mq_tag_busy(data.hctx);
   
         ret = -EWOULDBLOCK;
@@@ -552,8 -544,7 +544,7 @@@ inline void __blk_mq_end_request(struc
                 blk_stat_add(rq, now);
         }
   
-       if (rq->internal_tag != BLK_MQ_NO_TAG)
-               blk_mq_sched_completed_request(rq, now);
+       blk_mq_sched_completed_request(rq, now);
   
         blk_account_io_done(rq, now);
   
@@@ -574,71 -565,139 +565,139 @@@ void blk_mq_end_request(struct request 
   }
   EXPORT_SYMBOL(blk_mq_end_request);
   
- static void __blk_mq_complete_request_remote(void *data)
+ /*
+  * Softirq action handler - move entries to local list and loop over them
+  * while passing them to the queue registered handler.
+  */
+ static __latent_entropy void blk_done_softirq(struct softirq_action *h)
   {
-       struct request *rq = data;
-       struct request_queue *q = rq->q;
+       struct list_head *cpu_list, local_list;
   
-       q->mq_ops->complete(rq);
+       local_irq_disable();
+       cpu_list = this_cpu_ptr(&blk_cpu_done);
+       list_replace_init(cpu_list, &local_list);
+       local_irq_enable();
+ 
+       while (!list_empty(&local_list)) {
+               struct request *rq;
+ 
+               rq = list_entry(local_list.next, struct request, ipi_list);
+               list_del_init(&rq->ipi_list);
+               rq->q->mq_ops->complete(rq);
+       }
   }
   
- /**
-  * blk_mq_force_complete_rq() - Force complete the request, bypassing any error
-  *                            injection that could drop the completion.
-  * @rq: Request to be force completed
-  *
-  * Drivers should use blk_mq_complete_request() to complete requests in their
-  * normal IO path. For timeout error recovery, drivers may call this forced
-  * completion routine after they've reclaimed timed out requests to bypass
-  * potentially subsequent fake timeouts.
-  */
- void blk_mq_force_complete_rq(struct request *rq)
+ static void blk_mq_trigger_softirq(struct request *rq)
   {
-       struct blk_mq_ctx *ctx = rq->mq_ctx;
-       struct request_queue *q = rq->q;
-       bool shared = false;
-       int cpu;
+       struct list_head *list;
+       unsigned long flags;
+ 
+       local_irq_save(flags);
+       list = this_cpu_ptr(&blk_cpu_done);
+       list_add_tail(&rq->ipi_list, list);
+ 
+       /*
+        * If the list only contains our just added request, signal a raise of
+        * the softirq.  If there are already entries there, someone already
+        * raised the irq but it hasn't run yet.
+        */
+       if (list->next == &rq->ipi_list)
+               raise_softirq_irqoff(BLOCK_SOFTIRQ);
+       local_irq_restore(flags);
+ }
+ 
+ static int blk_softirq_cpu_dead(unsigned int cpu)
+ {
+       /*
+        * If a CPU goes away, splice its entries to the current CPU
+        * and trigger a run of the softirq
+        */
+       local_irq_disable();
+       list_splice_init(&per_cpu(blk_cpu_done, cpu),
+                        this_cpu_ptr(&blk_cpu_done));
+       raise_softirq_irqoff(BLOCK_SOFTIRQ);
+       local_irq_enable();
+ 
+       return 0;
+ }
+ 
+ 
+ static void __blk_mq_complete_request_remote(void *data)
+ {
+       struct request *rq = data;
   
-       WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
         /*
-        * Most of single queue controllers, there is only one irq vector
-        * for handling IO completion, and the only irq's affinity is set
-        * as all possible CPUs. On most of ARCHs, this affinity means the
-        * irq is handled on one specific CPU.
+        * For most of single queue controllers, there is only one irq vector
+        * for handling I/O completion, and the only irq's affinity is set
+        * to all possible CPUs.  On most of ARCHs, this affinity means the irq
+        * is handled on one specific CPU.
          *
-        * So complete IO reqeust in softirq context in case of single queue
-        * for not degrading IO performance by irqsoff latency.
+        * So complete I/O requests in softirq context in case of single queue
+        * devices to avoid degrading I/O performance due to irqsoff latency.
          */
-       if (q->nr_hw_queues == 1) {
-               __blk_complete_request(rq);
-               return;
-       }
+       if (rq->q->nr_hw_queues == 1)
+               blk_mq_trigger_softirq(rq);
+       else
+               rq->q->mq_ops->complete(rq);
+ }
+ 
+ static inline bool blk_mq_complete_need_ipi(struct request *rq)
+ {
+       int cpu = raw_smp_processor_id();
+ 
+       if (!IS_ENABLED(CONFIG_SMP) ||
+           !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))
+               return false;
+ 
+       /* same CPU or cache domain?  Complete locally */
+       if (cpu == rq->mq_ctx->cpu ||
+           (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
+            cpus_share_cache(cpu, rq->mq_ctx->cpu)))
+               return false;
+ 
+       /* don't try to IPI to an offline CPU */
+       return cpu_online(rq->mq_ctx->cpu);
+ }
+ 
+ bool blk_mq_complete_request_remote(struct request *rq)
+ {
+       WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
   
         /*
          * For a polled request, always complete locallly, it's pointless
          * to redirect the completion.
          */
-       if ((rq->cmd_flags & REQ_HIPRI) ||
-           !test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
-               q->mq_ops->complete(rq);
-               return;
-       }
- 
-       cpu = get_cpu();
-       if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
-               shared = cpus_share_cache(cpu, ctx->cpu);
+       if (rq->cmd_flags & REQ_HIPRI)
+               return false;
   
-       if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
+       if (blk_mq_complete_need_ipi(rq)) {
                 rq->csd.func = __blk_mq_complete_request_remote;
                 rq->csd.info = rq;
                 rq->csd.flags = 0;
-               smp_call_function_single_async(ctx->cpu, &rq->csd);
+               smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
         } else {
-               q->mq_ops->complete(rq);
+               if (rq->q->nr_hw_queues > 1)
+                       return false;
+               blk_mq_trigger_softirq(rq);
         }
-       put_cpu();
+ 
+       return true;
+ }
+ EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);
+ 
+ /**
+  * blk_mq_complete_request - end I/O on a request
+  * @rq:               the request being processed
+  *
+  * Description:
+  *    Complete a request by scheduling the ->complete_rq operation.
+  **/
+ void blk_mq_complete_request(struct request *rq)
+ {
+       if (!blk_mq_complete_request_remote(rq))
+               rq->q->mq_ops->complete(rq);
   }
- EXPORT_SYMBOL_GPL(blk_mq_force_complete_rq);
+ EXPORT_SYMBOL(blk_mq_complete_request);
   
   static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
         __releases(hctx->srcu)
@@@ -660,23 -719,6 +719,6 @@@ static void hctx_lock(struct blk_mq_hw_
                 *srcu_idx = srcu_read_lock(hctx->srcu);
   }
   
- /**
-  * blk_mq_complete_request - end I/O on a request
-  * @rq:               the request being processed
-  *
-  * Description:
-  *    Ends all I/O on a request. It does not handle partial completions.
-  *    The actual completion happens out-of-order, through a IPI handler.
-  **/
- bool blk_mq_complete_request(struct request *rq)
- {
-       if (unlikely(blk_should_fake_timeout(rq->q)))
-               return false;
-       blk_mq_force_complete_rq(rq);
-       return true;
- }
- EXPORT_SYMBOL(blk_mq_complete_request);
- 
   /**
    * blk_mq_start_request - Start processing a request
    * @rq: Pointer to request to be started
@@@ -828,10 -870,10 +870,10 @@@ static bool blk_mq_rq_inflight(struct b
                                void *priv, bool reserved)
   {
         /*
- -       * If we find a request that is inflight and the queue matches,
+ +       * If we find a request that isn't idle and the queue matches,
          * we know the queue is busy. Return false to stop the iteration.
          */
- -      if (rq->state == MQ_RQ_IN_FLIGHT && rq->q == hctx->queue) {
+ +      if (blk_mq_request_started(rq) && rq->q == hctx->queue) {
                 bool *busy = priv;
   
                 *busy = true;
@@@ -1052,6 -1094,45 +1094,45 @@@ static inline unsigned int queued_to_in
         return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
   }
   
+ static bool __blk_mq_get_driver_tag(struct request *rq)
+ {
+       struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
+       unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
+       int tag;
+ 
+       blk_mq_tag_busy(rq->mq_hctx);
+ 
+       if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
+               bt = &rq->mq_hctx->tags->breserved_tags;
+               tag_offset = 0;
+       }
+ 
+       if (!hctx_may_queue(rq->mq_hctx, bt))
+               return false;
+       tag = __sbitmap_queue_get(bt);
+       if (tag == BLK_MQ_NO_TAG)
+               return false;
+ 
+       rq->tag = tag + tag_offset;
+       return true;
+ }
+ 
+ static bool blk_mq_get_driver_tag(struct request *rq)
+ {
+       struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+ 
+       if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq))
+               return false;
+ 
+       if ((hctx->flags & BLK_MQ_F_TAG_SHARED) &&
+                       !(rq->rq_flags & RQF_MQ_INFLIGHT)) {
+               rq->rq_flags |= RQF_MQ_INFLIGHT;
+               atomic_inc(&hctx->nr_active);
+       }
+       hctx->tags->rqs[rq->tag] = rq;
+       return true;
+ }
+ 
   static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
                                 int flags, void *key)
   {
@@@ -1204,25 -1285,70 +1285,70 @@@ static void blk_mq_handle_zone_resource
         __blk_mq_requeue_request(rq);
   }
   
+ enum prep_dispatch {
+       PREP_DISPATCH_OK,
+       PREP_DISPATCH_NO_TAG,
+       PREP_DISPATCH_NO_BUDGET,
+ };
+ 
+ static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq,
+                                                 bool need_budget)
+ {
+       struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+ 
+       if (need_budget && !blk_mq_get_dispatch_budget(rq->q)) {
+               blk_mq_put_driver_tag(rq);
+               return PREP_DISPATCH_NO_BUDGET;
+       }
+ 
+       if (!blk_mq_get_driver_tag(rq)) {
+               /*
+                * The initial allocation attempt failed, so we need to
+                * rerun the hardware queue when a tag is freed. The
+                * waitqueue takes care of that. If the queue is run
+                * before we add this entry back on the dispatch list,
+                * we'll re-run it below.
+                */
+               if (!blk_mq_mark_tag_wait(hctx, rq)) {
+                       /*
+                        * All budgets not got from this function will be put
+                        * together during handling partial dispatch
+                        */
+                       if (need_budget)
+                               blk_mq_put_dispatch_budget(rq->q);
+                       return PREP_DISPATCH_NO_TAG;
+               }
+       }
+ 
+       return PREP_DISPATCH_OK;
+ }
+ 
+ /* release all allocated budgets before calling to blk_mq_dispatch_rq_list */
+ static void blk_mq_release_budgets(struct request_queue *q,
+               unsigned int nr_budgets)
+ {
+       int i;
+ 
+       for (i = 0; i < nr_budgets; i++)
+               blk_mq_put_dispatch_budget(q);
+ }
+ 
   /*
    * Returns true if we did some work AND can potentially do more.
    */
- bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
-                            bool got_budget)
+ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
+                            unsigned int nr_budgets)
   {
-       struct blk_mq_hw_ctx *hctx;
+       enum prep_dispatch prep;
+       struct request_queue *q = hctx->queue;
         struct request *rq, *nxt;
-       bool no_tag = false;
         int errors, queued;
         blk_status_t ret = BLK_STS_OK;
-       bool no_budget_avail = false;
         LIST_HEAD(zone_list);
   
         if (list_empty(list))
                 return false;
   
-       WARN_ON(!list_is_singular(list) && got_budget);
- 
         /*
          * Now process all the entries, sending them to the driver.
          */
@@@ -1232,32 -1358,10 +1358,10 @@@
   
                 rq = list_first_entry(list, struct request, queuelist);
   
-               hctx = rq->mq_hctx;
-               if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {
-                       blk_mq_put_driver_tag(rq);
-                       no_budget_avail = true;
+               WARN_ON_ONCE(hctx != rq->mq_hctx);
+               prep = blk_mq_prep_dispatch_rq(rq, !nr_budgets);
+               if (prep != PREP_DISPATCH_OK)
                         break;
-               }
- 
-               if (!blk_mq_get_driver_tag(rq)) {
-                       /*
-                        * The initial allocation attempt failed, so we need to
-                        * rerun the hardware queue when a tag is freed. The
-                        * waitqueue takes care of that. If the queue is run
-                        * before we add this entry back on the dispatch list,
-                        * we'll re-run it below.
-                        */
-                       if (!blk_mq_mark_tag_wait(hctx, rq)) {
-                               blk_mq_put_dispatch_budget(hctx);
-                               /*
-                                * For non-shared tags, the RESTART check
-                                * will suffice.
-                                */
-                               if (hctx->flags & BLK_MQ_F_TAG_SHARED)
-                                       no_tag = true;
-                               break;
-                       }
-               }
   
                 list_del_init(&rq->queuelist);
   
@@@ -1274,31 -1378,35 +1378,35 @@@
                         bd.last = !blk_mq_get_driver_tag(nxt);
                 }
   
+               /*
+                * once the request is queued to lld, no need to cover the
+                * budget any more
+                */
+               if (nr_budgets)
+                       nr_budgets--;
                 ret = q->mq_ops->queue_rq(hctx, &bd);
-               if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
-                       blk_mq_handle_dev_resource(rq, list);
+               switch (ret) {
+               case BLK_STS_OK:
+                       queued++;
                         break;
-               } else if (ret == BLK_STS_ZONE_RESOURCE) {
+               case BLK_STS_RESOURCE:
+               case BLK_STS_DEV_RESOURCE:
+                       blk_mq_handle_dev_resource(rq, list);
+                       goto out;
+               case BLK_STS_ZONE_RESOURCE:
                         /*
                          * Move the request to zone_list and keep going through
                          * the dispatch list to find more requests the drive can
                          * accept.
                          */
                         blk_mq_handle_zone_resource(rq, &zone_list);
-                       if (list_empty(list))
-                               break;
-                       continue;
-               }
- 
-               if (unlikely(ret != BLK_STS_OK)) {
+                       break;
+               default:
                         errors++;
                         blk_mq_end_request(rq, BLK_STS_IOERR);
-                       continue;
                 }
- 
-               queued++;
         } while (!list_empty(list));
- 
+ out:
         if (!list_empty(&zone_list))
                 list_splice_tail_init(&zone_list, list);
   
@@@ -1310,6 -1418,12 +1418,12 @@@
          */
         if (!list_empty(list)) {
                 bool needs_restart;
+               /* For non-shared tags, the RESTART check will suffice */
+               bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
+                         (hctx->flags & BLK_MQ_F_TAG_SHARED);
+               bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET;
+ 
+               blk_mq_release_budgets(q, nr_budgets);
   
                 /*
                  * If we didn't flush the entire list, we could have told
@@@ -1361,13 -1475,6 +1475,6 @@@
         } else
                 blk_mq_update_dispatch_busy(hctx, false);
   
-       /*
-        * If the host/device is unable to accept more work, inform the
-        * caller of that.
-        */
-       if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
-               return false;
- 
         return (queued + errors) != 0;
   }
   
@@@ -1896,11 -2003,11 +2003,11 @@@ static blk_status_t __blk_mq_try_issue_
         if (q->elevator && !bypass_insert)
                 goto insert;
   
-       if (!blk_mq_get_dispatch_budget(hctx))
+       if (!blk_mq_get_dispatch_budget(q))
                 goto insert;
   
         if (!blk_mq_get_driver_tag(rq)) {
-               blk_mq_put_dispatch_budget(hctx);
+               blk_mq_put_dispatch_budget(q);
                 goto insert;
         }
   
@@@ -2005,8 -2112,7 +2112,7 @@@ static void blk_add_rq_to_plug(struct b
   }
   
   /**
-  * blk_mq_make_request - Create and send a request to block device.
-  * @q: Request queue pointer.
+  * blk_mq_submit_bio - Create and send a request to block device.
    * @bio: Bio pointer.
    *
    * Builds up a request structure from @q and @bio and send to the device. The
@@@ -2020,8 -2126,9 +2126,9 @@@
    *
    * Returns: Request queue cookie.
    */
- blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
+ blk_qc_t blk_mq_submit_bio(struct bio *bio)
   {
+       struct request_queue *q = bio->bi_disk->queue;
         const int is_sync = op_is_sync(bio->bi_opf);
         const int is_flush_fua = op_is_flush(bio->bi_opf);
         struct blk_mq_alloc_data data = {
@@@ -2035,7 -2142,7 +2142,7 @@@
         blk_status_t ret;
   
         blk_queue_bounce(q, &bio);
-       __blk_queue_split(q, &bio, &nr_segs);
+       __blk_queue_split(&bio, &nr_segs);
   
         if (!bio_integrity_prep(bio))
                 goto queue_exit;
@@@ -2146,7 -2253,7 +2253,7 @@@ queue_exit
         blk_queue_exit(q);
         return BLK_QC_T_NONE;
   }
- EXPORT_SYMBOL_GPL(blk_mq_make_request); /* only for request based dm */
+ EXPORT_SYMBOL_GPL(blk_mq_submit_bio); /* only for request based dm */
   
   void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
                      unsigned int hctx_idx)
@@@ -2886,7 -2993,7 +2993,7 @@@ struct request_queue *blk_mq_init_queue
   {
         struct request_queue *uninit_q, *q;
   
-       uninit_q = __blk_alloc_queue(set->numa_node);
+       uninit_q = blk_alloc_queue(set->numa_node);
         if (!uninit_q)
                 return ERR_PTR(-ENOMEM);
         uninit_q->queuedata = queuedata;
@@@ -3760,6 -3867,15 +3867,15 @@@ EXPORT_SYMBOL(blk_mq_rq_cpu)
   
   static int __init blk_mq_init(void)
   {
+       int i;
+ 
+       for_each_possible_cpu(i)
+               INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
+       open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
+ 
+       cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
+                                 "block/softirq:dead", NULL,
+                                 blk_softirq_cpu_dead);
         cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
                                 blk_mq_hctx_notify_dead);
         cpuhp_setup_state_multi(CPUHP_AP_BLK_MQ_ONLINE, "block/mq:online",
diff --combined drivers/block/nbd.c

index ce7e9f2,01794cd..3ff4054
--- 1/drivers/block/nbd.c
--- 2/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@@ -784,6 -784,7 +784,7 @@@ static void recv_work(struct work_struc
         struct nbd_device *nbd = args->nbd;
         struct nbd_config *config = nbd->config;
         struct nbd_cmd *cmd;
+       struct request *rq;
   
         while (1) {
                 cmd = nbd_read_stat(nbd, args->index);
@@@ -796,7 -797,9 +797,9 @@@
                         break;
                 }
   
-               blk_mq_complete_request(blk_mq_rq_from_pdu(cmd));
+               rq = blk_mq_rq_from_pdu(cmd);
+               if (likely(!blk_should_fake_timeout(rq->q)))
+                       blk_mq_complete_request(rq);
         }
         atomic_dec(&config->recv_threads);
         wake_up(&config->recv_wq);
@@@ -1033,26 -1036,25 +1036,26 @@@ static int nbd_add_socket(struct nbd_de
              test_bit(NBD_RT_BOUND, &config->runtime_flags))) {
                 dev_err(disk_to_dev(nbd->disk),
                         "Device being setup by another task");
- -              sockfd_put(sock);
- -              return -EBUSY;
+ +              err = -EBUSY;
+ +              goto put_socket;
+ +      }
+ +
+ +      nsock = kzalloc(sizeof(*nsock), GFP_KERNEL);
+ +      if (!nsock) {
+ +              err = -ENOMEM;
+ +              goto put_socket;
         }
   
         socks = krealloc(config->socks, (config->num_connections + 1) *
                          sizeof(struct nbd_sock *), GFP_KERNEL);
         if (!socks) {
- -              sockfd_put(sock);
- -              return -ENOMEM;
+ +              kfree(nsock);
+ +              err = -ENOMEM;
+ +              goto put_socket;
         }
   
         config->socks = socks;
   
- -      nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL);
- -      if (!nsock) {
- -              sockfd_put(sock);
- -              return -ENOMEM;
- -      }
- -
         nsock->fallback_index = -1;
         nsock->dead = false;
         mutex_init(&nsock->tx_lock);
@@@ -1064,10 -1066,6 +1067,10 @@@
         atomic_inc(&config->live_connections);
   
         return 0;
+ +
+ +put_socket:
+ +      sockfd_put(sock);
+ +      return err;
   }
   
   static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
diff --combined drivers/block/virtio_blk.c

index 980df85,741804b..63b213e
--- 1/drivers/block/virtio_blk.c
--- 2/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@@ -171,7 -171,8 +171,8 @@@ static void virtblk_done(struct virtque
                 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
                         struct request *req = blk_mq_rq_from_pdu(vbr);
   
-                       blk_mq_complete_request(req);
+                       if (likely(!blk_should_fake_timeout(req->q)))
+                               blk_mq_complete_request(req);
                         req_done = true;
                 }
                 if (unlikely(virtqueue_is_broken(vq)))
@@@ -878,7 -879,6 +879,7 @@@ out_put_disk
         put_disk(vblk->disk);
   out_free_vq:
         vdev->config->del_vqs(vdev);
+ +      kfree(vblk->vqs);
   out_free_vblk:
         kfree(vblk);
   out_free_index:
diff --combined drivers/block/zram/zram_drv.c

index 270dd81,f9a57f1..9100ac3
--- 1/drivers/block/zram/zram_drv.c
--- 2/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@@ -793,9 -793,9 +793,9 @@@ static void zram_sync_read(struct work_
   }
   
   /*
-  * Block layer want one ->make_request_fn to be active at a time
-  * so if we use chained IO with parent IO in same context,
-  * it's a deadlock. To avoid, it, it uses worker thread context.
+  * Block layer want one ->submit_bio to be active at a time, so if we use
+  * chained IO with parent IO in same context, it's a deadlock. To avoid that,
+  * use a worker thread context.
    */
   static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
                                 unsigned long entry, struct bio *bio)
@@@ -1584,9 -1584,9 +1584,9 @@@ static void __zram_make_request(struct 
   /*
    * Handler function for all zram I/O requests.
    */
- static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
+ static blk_qc_t zram_submit_bio(struct bio *bio)
   {
-       struct zram *zram = queue->queuedata;
+       struct zram *zram = bio->bi_disk->private_data;
   
         if (!valid_io_request(zram, bio->bi_iter.bi_sector,
                                         bio->bi_iter.bi_size)) {
@@@ -1813,6 -1813,7 +1813,7 @@@ static int zram_open(struct block_devic
   
   static const struct block_device_operations zram_devops = {
         .open = zram_open,
+       .submit_bio = zram_submit_bio,
         .swap_slot_free_notify = zram_slot_free_notify,
         .rw_page = zram_rw_page,
         .owner = THIS_MODULE
@@@ -1891,7 -1892,7 +1892,7 @@@ static int zram_add(void
   #ifdef CONFIG_ZRAM_WRITEBACK
         spin_lock_init(&zram->wb_limit_lock);
   #endif
-       queue = blk_alloc_queue(zram_make_request, NUMA_NO_NODE);
+       queue = blk_alloc_queue(NUMA_NO_NODE);
         if (!queue) {
                 pr_err("Error allocating disk queue for device %d\n",
                         device_id);
@@@ -1912,7 -1913,6 +1913,6 @@@
         zram->disk->first_minor = device_id;
         zram->disk->fops = &zram_devops;
         zram->disk->queue = queue;
-       zram->disk->queue->queuedata = zram;
         zram->disk->private_data = zram;
         snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
   
@@@ -2021,8 -2021,7 +2021,8 @@@ static ssize_t hot_add_show(struct clas
                 return ret;
         return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
   }
- -static CLASS_ATTR_RO(hot_add);
+ +static struct class_attribute class_attr_hot_add =
+ +      __ATTR(hot_add, 0400, hot_add_show, NULL);
   
   static ssize_t hot_remove_store(struct class *class,
                         struct class_attribute *attr,
diff --combined drivers/md/dm-rq.c

index 85e0daa,5aec1cd..7ce387a
--- 1/drivers/md/dm-rq.c
--- 2/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@@ -146,6 -146,10 +146,6 @@@ static void rq_end_stats(struct mapped_
    */
   static void rq_completed(struct mapped_device *md)
   {
- -      /* nudge anyone waiting on suspend queue */
- -      if (unlikely(wq_has_sleeper(&md->wait)))
- -              wake_up(&md->wait);
- -
         /*
          * dm_put() must be at the end of this function. See the comment above
          */
@@@ -284,7 -288,8 +284,8 @@@ static void dm_complete_request(struct 
         struct dm_rq_target_io *tio = tio_from_request(rq);
   
         tio->error = error;
-       blk_mq_complete_request(rq);
+       if (likely(!blk_should_fake_timeout(rq->q)))
+               blk_mq_complete_request(rq);
   }
   
   /*
diff --combined drivers/md/dm-writecache.c

index 5358894,6242155..8aa306e
--- 1/drivers/md/dm-writecache.c
--- 2/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@@ -282,8 -282,6 +282,8 @@@ static int persistent_memory_claim(stru
                         while (daa-- && i < p) {
                                 pages[i++] = pfn_t_to_page(pfn);
                                 pfn.val++;
+ +                              if (!(i & 15))
+ +                                      cond_resched();
                         }
                 } while (i < p);
                 wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL);
@@@ -851,14 -849,10 +851,14 @@@ static void writecache_discard(struct d
   
                 if (likely(!e->write_in_progress)) {
                         if (!discarded_something) {
- -                              writecache_wait_for_ios(wc, READ);
- -                              writecache_wait_for_ios(wc, WRITE);
+ +                              if (!WC_MODE_PMEM(wc)) {
+ +                                      writecache_wait_for_ios(wc, READ);
+ +                                      writecache_wait_for_ios(wc, WRITE);
+ +                              }
                                 discarded_something = true;
                         }
+ +                      if (!writecache_entry_is_committed(wc, e))
+ +                              wc->uncommitted_blocks--;
                         writecache_free_entry(wc, e);
                 }
   
@@@ -1244,7 -1238,7 +1244,7 @@@ static int writecache_flush_thread(voi
                                            bio_end_sector(bio));
                         wc_unlock(wc);
                         bio_set_dev(bio, wc->dev->bdev);
-                       generic_make_request(bio);
+                       submit_bio_noacct(bio);
                 } else {
                         writecache_flush(wc);
                         wc_unlock(wc);
@@@ -2266,12 -2260,6 +2266,12 @@@ invalid_optional
         }
   
         if (WC_MODE_PMEM(wc)) {
+ +              if (!dax_synchronous(wc->ssd_dev->dax_dev)) {
+ +                      r = -EOPNOTSUPP;
+ +                      ti->error = "Asynchronous persistent memory not supported as pmem cache";
+ +                      goto bad;
+ +              }
+ +
                 r = persistent_memory_claim(wc);
                 if (r) {
                         ti->error = "Unable to map persistent memory for cache";
diff --combined drivers/md/dm-zoned-target.c

index 42aa513,05a3cfe..697f9de
--- 1/drivers/md/dm-zoned-target.c
--- 2/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@@ -140,7 -140,7 +140,7 @@@ static int dmz_submit_bio(struct dmz_ta
         bio_advance(bio, clone->bi_iter.bi_size);
   
         refcount_inc(&bioctx->ref);
-       generic_make_request(clone);
+       submit_bio_noacct(clone);
   
         if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
                 zone->wp_block += nr_blocks;
@@@ -400,7 -400,15 +400,7 @@@ static void dmz_handle_bio(struct dmz_t
                 dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
         struct dmz_metadata *zmd = dmz->metadata;
         struct dm_zone *zone;
- -      int i, ret;
- -
- -      /*
- -       * Write may trigger a zone allocation. So make sure the
- -       * allocation can succeed.
- -       */
- -      if (bio_op(bio) == REQ_OP_WRITE)
- -              for (i = 0; i < dmz->nr_ddevs; i++)
- -                      dmz_schedule_reclaim(dmz->dev[i].reclaim);
+ +      int ret;
   
         dmz_lock_metadata(zmd);
   
@@@ -882,7 -890,7 +882,7 @@@ static int dmz_ctr(struct dm_target *ti
         }
   
         /* Set target (no write same support) */
- -      ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata) << 9;
+ +      ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata);
         ti->num_flush_bios = 1;
         ti->num_discard_bios = 1;
         ti->num_write_zeroes_bios = 1;
diff --combined drivers/md/dm.c

index 52449af,e2148fc..b2b07d9
--- 1/drivers/md/dm.c
--- 2/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@@ -12,7 -12,6 +12,7 @@@
   #include <linux/init.h>
   #include <linux/module.h>
   #include <linux/mutex.h>
+ +#include <linux/sched/mm.h>
   #include <linux/sched/signal.h>
   #include <linux/blkpg.h>
   #include <linux/bio.h>
@@@ -655,6 -654,28 +655,6 @@@ static void free_tio(struct dm_target_i
         bio_put(&tio->clone);
   }
   
- -static bool md_in_flight_bios(struct mapped_device *md)
- -{
- -      int cpu;
- -      struct hd_struct *part = &dm_disk(md)->part0;
- -      long sum = 0;
- -
- -      for_each_possible_cpu(cpu) {
- -              sum += part_stat_local_read_cpu(part, in_flight[0], cpu);
- -              sum += part_stat_local_read_cpu(part, in_flight[1], cpu);
- -      }
- -
- -      return sum != 0;
- -}
- -
- -static bool md_in_flight(struct mapped_device *md)
- -{
- -      if (queue_is_mq(md->queue))
- -              return blk_mq_queue_inflight(md->queue);
- -      else
- -              return md_in_flight_bios(md);
- -}
- -
   u64 dm_start_time_ns_from_clone(struct bio *bio)
   {
         struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
@@@ -988,7 -1009,6 +988,7 @@@ static void clone_endio(struct bio *bio
         struct dm_io *io = tio->io;
         struct mapped_device *md = tio->io->md;
         dm_endio_fn endio = tio->ti->type->end_io;
+ +      struct bio *orig_bio = io->orig_bio;
   
         if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) {
                 if (bio_op(bio) == REQ_OP_DISCARD &&
@@@ -1002,18 -1022,6 +1002,18 @@@
                         disable_write_zeroes(md);
         }
   
+ +      /*
+ +       * For zone-append bios get offset in zone of the written
+ +       * sector and add that to the original bio sector pos.
+ +       */
+ +      if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) {
+ +              sector_t written_sector = bio->bi_iter.bi_sector;
+ +              struct request_queue *q = orig_bio->bi_disk->queue;
+ +              u64 mask = (u64)blk_queue_zone_sectors(q) - 1;
+ +
+ +              orig_bio->bi_iter.bi_sector += written_sector & mask;
+ +      }
+ +
         if (endio) {
                 int r = endio(tio->ti, bio, &error);
                 switch (r) {
@@@ -1272,7 -1280,6 +1272,6 @@@ static blk_qc_t __map_bio(struct dm_tar
         sector_t sector;
         struct bio *clone = &tio->clone;
         struct dm_io *io = tio->io;
-       struct mapped_device *md = io->md;
         struct dm_target *ti = tio->ti;
         blk_qc_t ret = BLK_QC_T_NONE;
   
@@@ -1294,10 -1301,7 +1293,7 @@@
                 /* the bio has been remapped so dispatch it */
                 trace_block_bio_remap(clone->bi_disk->queue, clone,
                                       bio_dev(io->orig_bio), sector);
-               if (md->type == DM_TYPE_NVME_BIO_BASED)
-                       ret = direct_make_request(clone);
-               else
-                       ret = generic_make_request(clone);
+               ret = submit_bio_noacct(clone);
                 break;
         case DM_MAPIO_KILL:
                 free_tio(tio);
@@@ -1644,7 -1648,7 +1640,7 @@@ static blk_qc_t __split_and_process_bio
                         error = __split_and_process_non_flush(&ci);
                         if (current->bio_list && ci.sector_count && !error) {
                                 /*
-                                * Remainder must be passed to generic_make_request()
+                                * Remainder must be passed to submit_bio_noacct()
                                  * so that it gets handled *after* bios already submitted
                                  * have been completely processed.
                                  * We take a clone of the original to store in
@@@ -1669,7 -1673,7 +1665,7 @@@
   
                                 bio_chain(b, bio);
                                 trace_block_split(md->queue, b, bio->bi_iter.bi_sector);
-                               ret = generic_make_request(bio);
+                               ret = submit_bio_noacct(bio);
                                 break;
                         }
                 }
@@@ -1737,7 -1741,7 +1733,7 @@@ static void dm_queue_split(struct mappe
   
                 bio_chain(split, *bio);
                 trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector);
-               generic_make_request(*bio);
+               submit_bio_noacct(*bio);
                 *bio = split;
         }
   }
@@@ -1762,13 -1766,13 +1758,13 @@@ static blk_qc_t dm_process_bio(struct m
         }
   
         /*
-        * If in ->make_request_fn we need to use blk_queue_split(), otherwise
+        * If in ->queue_bio we need to use blk_queue_split(), otherwise
          * queue_limits for abnormal requests (e.g. discard, writesame, etc)
          * won't be imposed.
          */
         if (current->bio_list) {
                 if (is_abnormal_io(bio))
-                       blk_queue_split(md->queue, &bio);
+                       blk_queue_split(&bio);
                 else
                         dm_queue_split(md, ti, &bio);
         }
@@@ -1779,9 -1783,9 +1775,9 @@@
                 return __split_and_process_bio(md, map, bio);
   }
   
- static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
+ static blk_qc_t dm_submit_bio(struct bio *bio)
   {
-       struct mapped_device *md = q->queuedata;
+       struct mapped_device *md = bio->bi_disk->private_data;
         blk_qc_t ret = BLK_QC_T_NONE;
         int srcu_idx;
         struct dm_table *map;
@@@ -1790,12 -1794,12 +1786,12 @@@
                 /*
                  * We are called with a live reference on q_usage_counter, but
                  * that one will be released as soon as we return.  Grab an
-                * extra one as blk_mq_make_request expects to be able to
-                * consume a reference (which lives until the request is freed
-                * in case a request is allocated).
+                * extra one as blk_mq_submit_bio expects to be able to consume
+                * a reference (which lives until the request is freed in case a
+                * request is allocated).
                  */
-               percpu_ref_get(&q->q_usage_counter);
-               return blk_mq_make_request(q, bio);
+               percpu_ref_get(&bio->bi_disk->queue->q_usage_counter);
+               return blk_mq_submit_bio(bio);
         }
   
         map = dm_get_live_table(md, &srcu_idx);
@@@ -1817,31 -1821,6 +1813,6 @@@
         return ret;
   }
   
- static int dm_any_congested(void *congested_data, int bdi_bits)
- {
-       int r = bdi_bits;
-       struct mapped_device *md = congested_data;
-       struct dm_table *map;
- 
-       if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
-               if (dm_request_based(md)) {
-                       /*
-                        * With request-based DM we only need to check the
-                        * top-level queue for congestion.
-                        */
-                       struct backing_dev_info *bdi = md->queue->backing_dev_info;
-                       r = bdi->wb.congested->state & bdi_bits;
-               } else {
-                       map = dm_get_live_table_fast(md);
-                       if (map)
-                               r = dm_table_any_congested(map, bdi_bits);
-                       dm_put_live_table_fast(md);
-               }
-       }
- 
-       return r;
- }
- 
   /*-----------------------------------------------------------------
    * An IDR is used to keep track of allocated minor numbers.
    *---------------------------------------------------------------*/
@@@ -1980,14 -1959,13 +1951,13 @@@ static struct mapped_device *alloc_dev(
         spin_lock_init(&md->uevent_lock);
   
         /*
-        * default to bio-based required ->make_request_fn until DM
-        * table is loaded and md->type established. If request-based
-        * table is loaded: blk-mq will override accordingly.
+        * default to bio-based until DM table is loaded and md->type
+        * established. If request-based table is loaded: blk-mq will
+        * override accordingly.
          */
-       md->queue = blk_alloc_queue(dm_make_request, numa_node_id);
+       md->queue = blk_alloc_queue(numa_node_id);
         if (!md->queue)
                 goto bad;
-       md->queue->queuedata = md;
   
         md->disk = alloc_disk_node(1, md->numa_node_id);
         if (!md->disk)
@@@ -2281,12 -2259,6 +2251,6 @@@ struct queue_limits *dm_get_queue_limit
   }
   EXPORT_SYMBOL_GPL(dm_get_queue_limits);
   
- static void dm_init_congested_fn(struct mapped_device *md)
- {
-       md->queue->backing_dev_info->congested_data = md;
-       md->queue->backing_dev_info->congested_fn = dm_any_congested;
- }
- 
   /*
    * Setup the DM device's queue based on md's type
    */
@@@ -2303,12 -2275,10 +2267,10 @@@ int dm_setup_md_queue(struct mapped_dev
                         DMERR("Cannot initialize queue for request-based dm-mq mapped device");
                         return r;
                 }
-               dm_init_congested_fn(md);
                 break;
         case DM_TYPE_BIO_BASED:
         case DM_TYPE_DAX_BIO_BASED:
         case DM_TYPE_NVME_BIO_BASED:
-               dm_init_congested_fn(md);
                 break;
         case DM_TYPE_NONE:
                 WARN_ON_ONCE(true);
@@@ -2448,29 -2418,15 +2410,29 @@@ void dm_put(struct mapped_device *md
   }
   EXPORT_SYMBOL_GPL(dm_put);
   
- -static int dm_wait_for_completion(struct mapped_device *md, long task_state)
+ +static bool md_in_flight_bios(struct mapped_device *md)
+ +{
+ +      int cpu;
+ +      struct hd_struct *part = &dm_disk(md)->part0;
+ +      long sum = 0;
+ +
+ +      for_each_possible_cpu(cpu) {
+ +              sum += part_stat_local_read_cpu(part, in_flight[0], cpu);
+ +              sum += part_stat_local_read_cpu(part, in_flight[1], cpu);
+ +      }
+ +
+ +      return sum != 0;
+ +}
+ +
+ +static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state)
   {
         int r = 0;
         DEFINE_WAIT(wait);
   
- -      while (1) {
+ +      while (true) {
                 prepare_to_wait(&md->wait, &wait, task_state);
   
- -              if (!md_in_flight(md))
+ +              if (!md_in_flight_bios(md))
                         break;
   
                 if (signal_pending_state(task_state, current)) {
@@@ -2485,28 -2441,6 +2447,28 @@@
         return r;
   }
   
+ +static int dm_wait_for_completion(struct mapped_device *md, long task_state)
+ +{
+ +      int r = 0;
+ +
+ +      if (!queue_is_mq(md->queue))
+ +              return dm_wait_for_bios_completion(md, task_state);
+ +
+ +      while (true) {
+ +              if (!blk_mq_queue_inflight(md->queue))
+ +                      break;
+ +
+ +              if (signal_pending_state(task_state, current)) {
+ +                      r = -EINTR;
+ +                      break;
+ +              }
+ +
+ +              msleep(5);
+ +      }
+ +
+ +      return r;
+ +}
+ +
   /*
    * Process the deferred bios
    */
@@@ -2529,7 -2463,7 +2491,7 @@@ static void dm_wq_work(struct work_stru
                         break;
   
                 if (dm_request_based(md))
-                       (void) generic_make_request(c);
+                       (void) submit_bio_noacct(c);
                 else
                         (void) dm_process_bio(md, map, c);
         }
@@@ -2940,25 -2874,17 +2902,25 @@@ EXPORT_SYMBOL_GPL(dm_internal_resume_fa
   int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
                        unsigned cookie)
   {
+ +      int r;
+ +      unsigned noio_flag;
         char udev_cookie[DM_COOKIE_LENGTH];
         char *envp[] = { udev_cookie, NULL };
   
+ +      noio_flag = memalloc_noio_save();
+ +
         if (!cookie)
- -              return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
+ +              r = kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
         else {
                 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
                          DM_COOKIE_ENV_VAR_NAME, cookie);
- -              return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
- -                                        action, envp);
+ +              r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
+ +                                     action, envp);
         }
+ +
+ +      memalloc_noio_restore(noio_flag);
+ +
+ +      return r;
   }
   
   uint32_t dm_next_uevent_seq(struct mapped_device *md)
@@@ -3269,6 -3195,7 +3231,7 @@@ static const struct pr_ops dm_pr_ops = 
   };
   
   static const struct block_device_operations dm_blk_dops = {
+       .submit_bio = dm_submit_bio,
         .open = dm_blk_open,
         .release = dm_blk_close,
         .ioctl = dm_blk_ioctl,
diff --combined drivers/nvme/host/core.c

index add0401,5192a02..71c2c1b
--- 1/drivers/nvme/host/core.c
--- 2/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@@ -304,7 -304,7 +304,7 @@@ bool nvme_cancel_request(struct reques
                 return true;
   
         nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
-       blk_mq_force_complete_rq(req);
+       blk_mq_complete_request(req);
         return true;
   }
   EXPORT_SYMBOL_GPL(nvme_cancel_request);
@@@ -1116,16 -1116,10 +1116,16 @@@ static int nvme_identify_ns_descs(struc
                 dev_warn(ctrl->device,
                         "Identify Descriptors failed (%d)\n", status);
                  /*
- -                * Don't treat an error as fatal, as we potentially already
- -                * have a NGUID or EUI-64.
+ +                * Don't treat non-retryable errors as fatal, as we potentially
+ +                * already have a NGUID or EUI-64.  If we failed with DNR set,
+ +                * we want to silently ignore the error as we can still
+ +                * identify the device, but if the status has DNR set, we want
+ +                * to propagate the error back specifically for the disk
+ +                * revalidation flow to make sure we don't abandon the
+ +                * device just because of a temporal retry-able error (such
+ +                * as path of transport errors).
                   */
- -              if (status > 0 && !(status & NVME_SC_DNR))
+ +              if (status > 0 && (status & NVME_SC_DNR))
                         status = 0;
                 goto free_data;
         }
@@@ -1980,7 -1974,7 +1980,7 @@@ static int __nvme_revalidate_disk(struc
         if (ns->head->disk) {
                 nvme_update_disk_info(ns->head->disk, ns, id);
                 blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
- -              revalidate_disk(ns->head->disk);
+ +              nvme_mpath_update_disk_size(ns->head->disk);
         }
   #endif
         return 0;
@@@ -2184,6 -2178,7 +2184,7 @@@ static void nvme_ns_head_release(struc
   
   const struct block_device_operations nvme_ns_head_ops = {
         .owner          = THIS_MODULE,
+       .submit_bio     = nvme_ns_head_submit_bio,
         .open           = nvme_ns_head_open,
         .release        = nvme_ns_head_release,
         .ioctl          = nvme_ioctl,
@@@ -4180,7 -4175,6 +4181,7 @@@ int nvme_init_ctrl(struct nvme_ctrl *ct
         ctrl->dev = dev;
         ctrl->ops = ops;
         ctrl->quirks = quirks;
+ +      ctrl->numa_node = NUMA_NO_NODE;
         INIT_WORK(&ctrl->scan_work, nvme_scan_work);
         INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
         INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
diff --combined drivers/nvme/host/multipath.c

index 6650947,a986ac5..5a37a59
--- 1/drivers/nvme/host/multipath.c
--- 2/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@@ -291,8 -291,7 +291,7 @@@ static bool nvme_available_path(struct 
         return false;
   }
   
- static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
-               struct bio *bio)
+ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
   {
         struct nvme_ns_head *head = bio->bi_disk->private_data;
         struct device *dev = disk_to_dev(head->disk);
@@@ -301,12 -300,11 +300,11 @@@
         int srcu_idx;
   
         /*
-        * The namespace might be going away and the bio might
-        * be moved to a different queue via blk_steal_bios(),
-        * so we need to use the bio_split pool from the original
-        * queue to allocate the bvecs from.
+        * The namespace might be going away and the bio might be moved to a
+        * different queue via blk_steal_bios(), so we need to use the bio_split
+        * pool from the original queue to allocate the bvecs from.
          */
-       blk_queue_split(q, &bio);
+       blk_queue_split(&bio);
   
         srcu_idx = srcu_read_lock(&head->srcu);
         ns = nvme_find_path(head);
@@@ -316,7 -314,7 +314,7 @@@
                 trace_block_bio_remap(bio->bi_disk->queue, bio,
                                       disk_devt(ns->head->disk),
                                       bio->bi_iter.bi_sector);
-               ret = direct_make_request(bio);
+               ret = submit_bio_noacct(bio);
         } else if (nvme_available_path(head)) {
                 dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
   
@@@ -353,7 -351,7 +351,7 @@@ static void nvme_requeue_work(struct wo
                  * path.
                  */
                 bio->bi_disk = head->disk;
-               generic_make_request(bio);
+               submit_bio_noacct(bio);
         }
   }
   
@@@ -375,7 -373,7 +373,7 @@@ int nvme_mpath_alloc_disk(struct nvme_c
         if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
                 return 0;
   
-       q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node);
+       q = blk_alloc_queue(ctrl->numa_node);
         if (!q)
                 goto out;
         blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
@@@ -409,14 -407,15 +407,14 @@@ static void nvme_mpath_set_live(struct 
   {
         struct nvme_ns_head *head = ns->head;
   
- -      lockdep_assert_held(&ns->head->lock);
- -
         if (!head->disk)
                 return;
   
- -      if (!(head->disk->flags & GENHD_FL_UP))
+ +      if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
                 device_add_disk(&head->subsys->dev, head->disk,
                                 nvme_ns_id_attr_groups);
   
+ +      mutex_lock(&head->lock);
         if (nvme_path_is_optimized(ns)) {
                 int node, srcu_idx;
   
@@@ -425,10 -424,9 +423,10 @@@
                         __nvme_find_path(head, node);
                 srcu_read_unlock(&head->srcu, srcu_idx);
         }
+ +      mutex_unlock(&head->lock);
   
- -      synchronize_srcu(&ns->head->srcu);
- -      kblockd_schedule_work(&ns->head->requeue_work);
+ +      synchronize_srcu(&head->srcu);
+ +      kblockd_schedule_work(&head->requeue_work);
   }
   
   static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
@@@ -483,12 -481,14 +481,12 @@@ static inline bool nvme_state_is_live(e
   static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
                 struct nvme_ns *ns)
   {
- -      mutex_lock(&ns->head->lock);
         ns->ana_grpid = le32_to_cpu(desc->grpid);
         ns->ana_state = desc->state;
         clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
   
         if (nvme_state_is_live(ns->ana_state))
                 nvme_mpath_set_live(ns);
- -      mutex_unlock(&ns->head->lock);
   }
   
   static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
@@@ -638,45 -638,38 +636,45 @@@ static ssize_t ana_state_show(struct de
   }
   DEVICE_ATTR_RO(ana_state);
   
- -static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl,
+ +static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
                 struct nvme_ana_group_desc *desc, void *data)
   {
- -      struct nvme_ns *ns = data;
+ +      struct nvme_ana_group_desc *dst = data;
   
- -      if (ns->ana_grpid == le32_to_cpu(desc->grpid)) {
- -              nvme_update_ns_ana_state(desc, ns);
- -              return -ENXIO; /* just break out of the loop */
- -      }
+ +      if (desc->grpid != dst->grpid)
+ +              return 0;
   
- -      return 0;
+ +      *dst = *desc;
+ +      return -ENXIO; /* just break out of the loop */
   }
   
   void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
   {
         if (nvme_ctrl_use_ana(ns->ctrl)) {
+ +              struct nvme_ana_group_desc desc = {
+ +                      .grpid = id->anagrpid,
+ +                      .state = 0,
+ +              };
+ +
                 mutex_lock(&ns->ctrl->ana_lock);
                 ns->ana_grpid = le32_to_cpu(id->anagrpid);
- -              nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state);
+ +              nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
                 mutex_unlock(&ns->ctrl->ana_lock);
+ +              if (desc.state) {
+ +                      /* found the group desc: update */
+ +                      nvme_update_ns_ana_state(&desc, ns);
+ +              }
         } else {
- -              mutex_lock(&ns->head->lock);
                 ns->ana_state = NVME_ANA_OPTIMIZED; 
                 nvme_mpath_set_live(ns);
- -              mutex_unlock(&ns->head->lock);
         }
   
         if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
- -              struct backing_dev_info *info =
- -                                      ns->head->disk->queue->backing_dev_info;
+ +              struct gendisk *disk = ns->head->disk;
   
- -              info->capabilities |= BDI_CAP_STABLE_WRITES;
+ +              if (disk)
+ +                      disk->queue->backing_dev_info->capabilities |=
+ +                                      BDI_CAP_STABLE_WRITES;
         }
   }
   
@@@ -691,14 -684,6 +689,14 @@@ void nvme_mpath_remove_disk(struct nvme
         kblockd_schedule_work(&head->requeue_work);
         flush_work(&head->requeue_work);
         blk_cleanup_queue(head->disk->queue);
+ +      if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
+ +              /*
+ +               * if device_add_disk wasn't called, prevent
+ +               * disk release to put a bogus reference on the
+ +               * request queue
+ +               */
+ +              head->disk->queue = NULL;
+ +      }
         put_disk(head->disk);
   }
   
diff --combined drivers/nvme/host/nvme.h

index 1de3f9b,9f2b0e0..26099ea
--- 1/drivers/nvme/host/nvme.h
--- 2/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@@ -364,8 -364,6 +364,8 @@@ struct nvme_ns_head 
         spinlock_t              requeue_lock;
         struct work_struct      requeue_work;
         struct mutex            lock;
+ +      unsigned long           flags;
+ +#define NVME_NSHEAD_DISK_LIVE 0
         struct nvme_ns __rcu    *current_path[];
   #endif
   };
@@@ -474,7 -472,7 +474,7 @@@ static inline u32 nvme_bytes_to_numd(si
         return (len >> 2) - 1;
   }
   
- static inline void nvme_end_request(struct request *req, __le16 status,
+ static inline bool nvme_end_request(struct request *req, __le16 status,
                 union nvme_result result)
   {
         struct nvme_request *rq = nvme_req(req);
@@@ -483,7 -481,9 +483,9 @@@
         rq->result = result;
         /* inject error when permitted by fault injection framework */
         nvme_should_fail(req);
-       blk_mq_complete_request(req);
+       if (unlikely(blk_should_fake_timeout(req->q)))
+               return true;
+       return blk_mq_complete_request_remote(req);
   }
   
   static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl)
@@@ -586,6 -586,7 +588,7 @@@ void nvme_mpath_stop(struct nvme_ctrl *
   bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
   void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
   struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
+ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio);
   
   static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
   {
@@@ -604,16 -605,6 +607,16 @@@ static inline void nvme_trace_bio_compl
                 trace_block_bio_complete(ns->head->disk->queue, req->bio);
   }
   
+ +static inline void nvme_mpath_update_disk_size(struct gendisk *disk)
+ +{
+ +      struct block_device *bdev = bdget_disk(disk, 0);
+ +
+ +      if (bdev) {
+ +              bd_set_size(bdev, get_capacity(disk) << SECTOR_SHIFT);
+ +              bdput(bdev);
+ +      }
+ +}
+ +
   extern struct device_attribute dev_attr_ana_grpid;
   extern struct device_attribute dev_attr_ana_state;
   extern struct device_attribute subsys_attr_iopolicy;
@@@ -689,9 -680,6 +692,9 @@@ static inline void nvme_mpath_wait_free
   static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
   {
   }
+ +static inline void nvme_mpath_update_disk_size(struct gendisk *disk)
+ +{
+ +}
   #endif /* CONFIG_NVME_MULTIPATH */
   
   #ifdef CONFIG_NVM
diff --combined drivers/nvme/host/pci.c

index b1d18f0,0b3fedc..74a2e2e
--- 1/drivers/nvme/host/pci.c
--- 2/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@@ -963,7 -963,8 +963,8 @@@ static inline void nvme_handle_cqe(stru
   
         req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id);
         trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
-       nvme_end_request(req, cqe->status, cqe->result);
+       if (!nvme_end_request(req, cqe->status, cqe->result))
+               nvme_pci_complete_rq(req);
   }
   
   static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
@@@ -1593,7 -1594,7 +1594,7 @@@ static int nvme_alloc_admin_tags(struc
   
                 dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
                 dev->admin_tagset.timeout = ADMIN_TIMEOUT;
- -              dev->admin_tagset.numa_node = dev_to_node(dev->dev);
+ +              dev->admin_tagset.numa_node = dev->ctrl.numa_node;
                 dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
                 dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
                 dev->admin_tagset.driver_data = dev;
@@@ -1669,8 -1670,6 +1670,8 @@@ static int nvme_pci_configure_admin_que
         if (result)
                 return result;
   
+ +      dev->ctrl.numa_node = dev_to_node(dev->dev);
+ +
         nvmeq = &dev->queues[0];
         aqa = nvmeq->q_depth - 1;
         aqa |= aqa << 16;
@@@ -2259,7 -2258,7 +2260,7 @@@ static void nvme_dev_add(struct nvme_de
                 if (dev->io_queues[HCTX_TYPE_POLL])
                         dev->tagset.nr_maps++;
                 dev->tagset.timeout = NVME_IO_TIMEOUT;
- -              dev->tagset.numa_node = dev_to_node(dev->dev);
+ +              dev->tagset.numa_node = dev->ctrl.numa_node;
                 dev->tagset.queue_depth =
                                 min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
                 dev->tagset.cmd_size = sizeof(struct nvme_iod);
diff --combined drivers/nvme/host/rdma.c

index 13506a8,6d5c449..e881f87
--- 1/drivers/nvme/host/rdma.c
--- 2/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@@ -149,6 -149,7 +149,7 @@@ MODULE_PARM_DESC(register_always
   static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
                 struct rdma_cm_event *event);
   static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+ static void nvme_rdma_complete_rq(struct request *rq);
   
   static const struct blk_mq_ops nvme_rdma_mq_ops;
   static const struct blk_mq_ops nvme_rdma_admin_mq_ops;
@@@ -470,7 -471,7 +471,7 @@@ static int nvme_rdma_create_queue_ib(st
          * Spread I/O queues completion vectors according their queue index.
          * Admin queues can always go on completion vector 0.
          */
- -      comp_vector = idx == 0 ? idx : idx - 1;
+ +      comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
   
         /* Polling queues need direct cq polling context */
         if (nvme_rdma_poll_queue(queue))
@@@ -1149,6 -1150,16 +1150,16 @@@ static void nvme_rdma_error_recovery(st
         queue_work(nvme_reset_wq, &ctrl->err_work);
   }
   
+ static void nvme_rdma_end_request(struct nvme_rdma_request *req)
+ {
+       struct request *rq = blk_mq_rq_from_pdu(req);
+ 
+       if (!refcount_dec_and_test(&req->ref))
+               return;
+       if (!nvme_end_request(rq, req->status, req->result))
+               nvme_rdma_complete_rq(rq);
+ }
+ 
   static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
                 const char *op)
   {
@@@ -1173,16 -1184,11 +1184,11 @@@ static void nvme_rdma_inv_rkey_done(str
   {
         struct nvme_rdma_request *req =
                 container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe);
-       struct request *rq = blk_mq_rq_from_pdu(req);
   
-       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+       if (unlikely(wc->status != IB_WC_SUCCESS))
                 nvme_rdma_wr_error(cq, wc, "LOCAL_INV");
-               return;
-       }
- 
-       if (refcount_dec_and_test(&req->ref))
-               nvme_end_request(rq, req->status, req->result);
- 
+       else
+               nvme_rdma_end_request(req);
   }
   
   static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
@@@ -1547,15 -1553,11 +1553,11 @@@ static void nvme_rdma_send_done(struct 
                 container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
         struct nvme_rdma_request *req =
                 container_of(qe, struct nvme_rdma_request, sqe);
-       struct request *rq = blk_mq_rq_from_pdu(req);
   
-       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+       if (unlikely(wc->status != IB_WC_SUCCESS))
                 nvme_rdma_wr_error(cq, wc, "SEND");
-               return;
-       }
- 
-       if (refcount_dec_and_test(&req->ref))
-               nvme_end_request(rq, req->status, req->result);
+       else
+               nvme_rdma_end_request(req);
   }
   
   static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
@@@ -1697,8 -1699,7 +1699,7 @@@ static void nvme_rdma_process_nvme_rsp(
                 return;
         }
   
-       if (refcount_dec_and_test(&req->ref))
-               nvme_end_request(rq, req->status, req->result);
+       nvme_rdma_end_request(req);
   }
   
   static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
diff --combined drivers/nvme/host/tcp.c

index 79ef2b8,157b5b2..7006aca
--- 1/drivers/nvme/host/tcp.c
--- 2/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@@ -464,7 -464,8 +464,8 @@@ static int nvme_tcp_process_nvme_cqe(st
                 return -EINVAL;
         }
   
-       nvme_end_request(rq, cqe->status, cqe->result);
+       if (!nvme_end_request(rq, cqe->status, cqe->result))
+               nvme_complete_rq(rq);
         queue->nr_cqe++;
   
         return 0;
@@@ -654,7 -655,8 +655,8 @@@ static inline void nvme_tcp_end_request
   {
         union nvme_result res = {};
   
-       nvme_end_request(rq, cpu_to_le16(status << 1), res);
+       if (!nvme_end_request(rq, cpu_to_le16(status << 1), res))
+               nvme_complete_rq(rq);
   }
   
   static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
@@@ -1532,7 -1534,7 +1534,7 @@@ static struct blk_mq_tag_set *nvme_tcp_
                 set->ops = &nvme_tcp_admin_mq_ops;
                 set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
                 set->reserved_tags = 2; /* connect + keep-alive */
- -              set->numa_node = NUMA_NO_NODE;
+ +              set->numa_node = nctrl->numa_node;
                 set->flags = BLK_MQ_F_BLOCKING;
                 set->cmd_size = sizeof(struct nvme_tcp_request);
                 set->driver_data = ctrl;
@@@ -1544,7 -1546,7 +1546,7 @@@
                 set->ops = &nvme_tcp_mq_ops;
                 set->queue_depth = nctrl->sqsize + 1;
                 set->reserved_tags = 1; /* fabric connect */
- -              set->numa_node = NUMA_NO_NODE;
+ +              set->numa_node = nctrl->numa_node;
                 set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
                 set->cmd_size = sizeof(struct nvme_tcp_request);
                 set->driver_data = ctrl;
diff --combined drivers/nvme/target/loop.c

index 6344e73,1b9c246..8a0d4fe
--- 1/drivers/nvme/target/loop.c
--- 2/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@@ -116,7 -116,8 +116,8 @@@ static void nvme_loop_queue_response(st
                         return;
                 }
   
-               nvme_end_request(rq, cqe->status, cqe->result);
+               if (!nvme_end_request(rq, cqe->status, cqe->result))
+                       nvme_loop_complete_rq(rq);
         }
   }
   
@@@ -340,7 -341,7 +341,7 @@@ static int nvme_loop_configure_admin_qu
         ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
         ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
         ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
- -      ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
+ +      ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
         ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
                 NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
         ctrl->admin_tag_set.driver_data = ctrl;
@@@ -512,7 -513,7 +513,7 @@@ static int nvme_loop_create_io_queues(s
         ctrl->tag_set.ops = &nvme_loop_mq_ops;
         ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
         ctrl->tag_set.reserved_tags = 1; /* fabric connect */
- -      ctrl->tag_set.numa_node = NUMA_NO_NODE;
+ +      ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
         ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
         ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
                 NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
diff --combined fs/btrfs/disk-io.c

index b1a1480,eb5f250..ad157b5
--- 1/fs/btrfs/disk-io.c
--- 2/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@@ -1616,27 -1616,6 +1616,6 @@@ fail
         return ERR_PTR(ret);
   }
   
- static int btrfs_congested_fn(void *congested_data, int bdi_bits)
- {
-       struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
-       int ret = 0;
-       struct btrfs_device *device;
-       struct backing_dev_info *bdi;
- 
-       rcu_read_lock();
-       list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) {
-               if (!device->bdev)
-                       continue;
-               bdi = device->bdev->bd_bdi;
-               if (bdi_congested(bdi, bdi_bits)) {
-                       ret = 1;
-                       break;
-               }
-       }
-       rcu_read_unlock();
-       return ret;
- }
- 
   /*
    * called by the kthread helper functions to finally call the bio end_io
    * functions.  This is where read checksum verification actually happens
@@@ -2593,12 -2572,10 +2572,12 @@@ static int __cold init_tree_roots(struc
                     !extent_buffer_uptodate(tree_root->node)) {
                         handle_error = true;
   
- -                      if (IS_ERR(tree_root->node))
+ +                      if (IS_ERR(tree_root->node)) {
                                 ret = PTR_ERR(tree_root->node);
- -                      else if (!extent_buffer_uptodate(tree_root->node))
+ +                              tree_root->node = NULL;
+ +                      } else if (!extent_buffer_uptodate(tree_root->node)) {
                                 ret = -EUCLEAN;
+ +                      }
   
                         btrfs_warn(fs_info, "failed to read tree root");
                         continue;
@@@ -3053,8 -3030,6 +3032,6 @@@ int __cold open_ctree(struct super_bloc
                 goto fail_sb_buffer;
         }
   
-       sb->s_bdi->congested_fn = btrfs_congested_fn;
-       sb->s_bdi->congested_data = fs_info;
         sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
         sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
         sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
diff --combined include/linux/blkdev.h

index 5724141,06995b9..9ab06ea
--- 1/include/linux/blkdev.h
--- 2/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@@ -4,9 -4,6 +4,6 @@@
   
   #include <linux/sched.h>
   #include <linux/sched/clock.h>
- 
- #ifdef CONFIG_BLOCK
- 
   #include <linux/major.h>
   #include <linux/genhd.h>
   #include <linux/list.h>
@@@ -289,8 -286,6 +286,6 @@@ static inline unsigned short req_get_io
   
   struct blk_queue_ctx;
   
- typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio);
- 
   struct bio_vec;
   
   enum blk_eh_timer_return {
@@@ -401,8 -396,6 +396,6 @@@ struct request_queue 
         struct blk_queue_stats  *stats;
         struct rq_qos           *rq_qos;
   
-       make_request_fn         *make_request_fn;
- 
         const struct blk_mq_ops *mq_ops;
   
         /* sw queues */
@@@ -528,9 -521,9 +521,9 @@@
         unsigned int            sg_timeout;
         unsigned int            sg_reserved_size;
         int                     node;
+       struct mutex            debugfs_mutex;
   #ifdef CONFIG_BLK_DEV_IO_TRACE
         struct blk_trace __rcu  *blk_trace;
-       struct mutex            blk_trace_mutex;
   #endif
         /*
          * for flush operations
@@@ -574,8 -567,9 +567,9 @@@
         struct list_head        tag_set_list;
         struct bio_set          bio_split;
   
- #ifdef CONFIG_BLK_DEBUG_FS
         struct dentry           *debugfs_dir;
+ 
+ #ifdef CONFIG_BLK_DEBUG_FS
         struct dentry           *sched_debugfs_dir;
         struct dentry           *rqos_debugfs_dir;
   #endif
@@@ -584,13 -578,10 +578,11 @@@
   
         size_t                  cmd_size;
   
-       struct work_struct      release_work;
- 
   #define BLK_MAX_WRITE_HINTS   5
         u64                     write_hints[BLK_MAX_WRITE_HINTS];
   };
   
+ +/* Keep blk_queue_flag_name[] in sync with the definitions below */
   #define QUEUE_FLAG_STOPPED    0       /* queue is stopped */
   #define QUEUE_FLAG_DYING      1       /* queue being torn down */
   #define QUEUE_FLAG_NOMERGES     3     /* disable merge attempts */
@@@ -861,8 -852,7 +853,7 @@@ static inline void rq_flush_dcache_page
   
   extern int blk_register_queue(struct gendisk *disk);
   extern void blk_unregister_queue(struct gendisk *disk);
- extern blk_qc_t generic_make_request(struct bio *bio);
- extern blk_qc_t direct_make_request(struct bio *bio);
+ blk_qc_t submit_bio_noacct(struct bio *bio);
   extern void blk_rq_init(struct request_queue *q, struct request *rq);
   extern void blk_put_request(struct request *);
   extern struct request *blk_get_request(struct request_queue *, unsigned int op,
@@@ -876,7 -866,7 +867,7 @@@ extern void blk_rq_unprep_clone(struct 
   extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
                                      struct request *rq);
   extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
- extern void blk_queue_split(struct request_queue *, struct bio **);
+ extern void blk_queue_split(struct bio **);
   extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
   extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
                               unsigned int, void __user *);
@@@ -1079,7 -1069,6 +1070,6 @@@ void blk_steal_bios(struct bio_list *li
   extern bool blk_update_request(struct request *rq, blk_status_t error,
                                unsigned int nr_bytes);
   
- extern void __blk_complete_request(struct request *);
   extern void blk_abort_request(struct request *);
   
   /*
@@@ -1166,13 -1155,13 +1156,13 @@@ static inline int blk_rq_map_sg(struct 
         return __blk_rq_map_sg(q, rq, sglist, &last_sg);
   }
   extern void blk_dump_rq_flags(struct request *, char *);
- extern long nr_blockdev_pages(void);
   
   bool __must_check blk_get_queue(struct request_queue *);
- struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id);
+ struct request_queue *blk_alloc_queue(int node_id);
   extern void blk_put_queue(struct request_queue *);
   extern void blk_set_queue_dying(struct request_queue *);
   
+ #ifdef CONFIG_BLOCK
   /*
    * blk_plug permits building a queue of related requests by holding the I/O
    * fragments for a short period. This allows merging of sequential requests
@@@ -1232,9 -1221,47 +1222,47 @@@ static inline bool blk_needs_flush_plug
                  !list_empty(&plug->cb_list));
   }
   
+ int blkdev_issue_flush(struct block_device *, gfp_t);
+ long nr_blockdev_pages(void);
+ #else /* CONFIG_BLOCK */
+ struct blk_plug {
+ };
+ 
+ static inline void blk_start_plug(struct blk_plug *plug)
+ {
+ }
+ 
+ static inline void blk_finish_plug(struct blk_plug *plug)
+ {
+ }
+ 
+ static inline void blk_flush_plug(struct task_struct *task)
+ {
+ }
+ 
+ static inline void blk_schedule_flush_plug(struct task_struct *task)
+ {
+ }
+ 
+ 
+ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
+ {
+       return false;
+ }
+ 
+ static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
+ {
+       return 0;
+ }
+ 
+ static inline long nr_blockdev_pages(void)
+ {
+       return 0;
+ }
+ #endif /* CONFIG_BLOCK */
+ 
   extern void blk_io_schedule(void);
   
- int blkdev_issue_flush(struct block_device *, gfp_t);
   extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
                 sector_t nr_sects, gfp_t gfp_mask, struct page *page);
   
@@@ -1516,7 -1543,7 +1544,7 @@@ static inline unsigned int blksize_bits
   
   static inline unsigned int block_size(struct block_device *bdev)
   {
-       return bdev->bd_block_size;
+       return 1 << bdev->bd_inode->i_blkbits;
   }
   
   int kblockd_schedule_work(struct work_struct *work);
@@@ -1746,6 -1773,7 +1774,7 @@@ static inline void blk_ksm_unregister(s
   
   
   struct block_device_operations {
+       blk_qc_t (*submit_bio) (struct bio *bio);
         int (*open) (struct block_device *, fmode_t);
         void (*release) (struct gendisk *, fmode_t);
         int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
@@@ -1753,8 -1781,6 +1782,6 @@@
         int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
         unsigned int (*check_events) (struct gendisk *disk,
                                       unsigned int clearing);
-       /* ->media_changed() is DEPRECATED, use ->check_events() instead */
-       int (*media_changed) (struct gendisk *);
         void (*unlock_native_capacity) (struct gendisk *);
         int (*revalidate_disk) (struct gendisk *);
         int (*getgeo)(struct block_device *, struct hd_geometry *);
@@@ -1834,52 -1860,6 +1861,6 @@@ static inline bool blk_req_can_dispatch
   }
   #endif /* CONFIG_BLK_DEV_ZONED */
   
- #else /* CONFIG_BLOCK */
- 
- struct block_device;
- 
- /*
-  * stubs for when the block layer is configured out
-  */
- #define buffer_heads_over_limit 0
- 
- static inline long nr_blockdev_pages(void)
- {
-       return 0;
- }
- 
- struct blk_plug {
- };
- 
- static inline void blk_start_plug(struct blk_plug *plug)
- {
- }
- 
- static inline void blk_finish_plug(struct blk_plug *plug)
- {
- }
- 
- static inline void blk_flush_plug(struct task_struct *task)
- {
- }
- 
- static inline void blk_schedule_flush_plug(struct task_struct *task)
- {
- }
- 
- 
- static inline bool blk_needs_flush_plug(struct task_struct *tsk)
- {
-       return false;
- }
- 
- static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask)
- {
-       return 0;
- }
- 
- #endif /* CONFIG_BLOCK */
- 
   static inline void blk_wake_io_task(struct task_struct *waiter)
   {
         /*
@@@ -1893,7 -1873,6 +1874,6 @@@
                 wake_up_process(waiter);
   }
   
- #ifdef CONFIG_BLOCK
   unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
                 unsigned int op);
   void disk_end_io_acct(struct gendisk *disk, unsigned int op,
@@@ -1919,6 -1898,53 +1899,53 @@@ static inline void bio_end_io_acct(stru
   {
         return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time);
   }
- #endif /* CONFIG_BLOCK */
   
+ int bdev_read_only(struct block_device *bdev);
+ int set_blocksize(struct block_device *bdev, int size);
+ 
+ const char *bdevname(struct block_device *bdev, char *buffer);
+ struct block_device *lookup_bdev(const char *);
+ 
+ void blkdev_show(struct seq_file *seqf, off_t offset);
+ 
+ #define BDEVNAME_SIZE 32      /* Largest string for a blockdev identifier */
+ #define BDEVT_SIZE    10      /* Largest string for MAJ:MIN for blkdev */
+ #ifdef CONFIG_BLOCK
+ #define BLKDEV_MAJOR_MAX      512
+ #else
+ #define BLKDEV_MAJOR_MAX      0
+ #endif
+ 
+ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
+ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+               void *holder);
+ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder);
+ int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole,
+               void *holder);
+ void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
+               void *holder);
+ void blkdev_put(struct block_device *bdev, fmode_t mode);
+ 
+ struct block_device *I_BDEV(struct inode *inode);
+ struct block_device *bdget(dev_t);
+ struct block_device *bdgrab(struct block_device *bdev);
+ void bdput(struct block_device *);
+ 
+ #ifdef CONFIG_BLOCK
+ void invalidate_bdev(struct block_device *bdev);
+ int sync_blockdev(struct block_device *bdev);
+ #else
+ static inline void invalidate_bdev(struct block_device *bdev)
+ {
+ }
+ static inline int sync_blockdev(struct block_device *bdev)
+ {
+       return 0;
+ }
   #endif
+ int fsync_bdev(struct block_device *bdev);
+ 
+ struct super_block *freeze_bdev(struct block_device *bdev);
+ int thaw_bdev(struct block_device *bdev, struct super_block *sb);
+ 
+ #endif /* _LINUX_BLKDEV_H */
diff --combined include/linux/fs.h

index f5abba8,1d7c4f7..41cd993
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -315,7 -315,6 +315,7 @@@ enum rw_hint 
   #define IOCB_SYNC             (1 << 5)
   #define IOCB_WRITE            (1 << 6)
   #define IOCB_NOWAIT           (1 << 7)
+ +#define IOCB_NOIO             (1 << 9)
   
   struct kiocb {
         struct file             *ki_filp;
@@@ -471,45 -470,6 +471,6 @@@ struct address_space 
          * must be enforced here for CRIS, to let the least significant bit
          * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
          */
- struct request_queue;
- 
- struct block_device {
-       dev_t                   bd_dev;  /* not a kdev_t - it's a search key */
-       int                     bd_openers;
-       struct inode *          bd_inode;       /* will die */
-       struct super_block *    bd_super;
-       struct mutex            bd_mutex;       /* open/close mutex */
-       void *                  bd_claiming;
-       void *                  bd_holder;
-       int                     bd_holders;
-       bool                    bd_write_holder;
- #ifdef CONFIG_SYSFS
-       struct list_head        bd_holder_disks;
- #endif
-       struct block_device *   bd_contains;
-       unsigned                bd_block_size;
-       u8                      bd_partno;
-       struct hd_struct *      bd_part;
-       /* number of times partitions within this device have been opened. */
-       unsigned                bd_part_count;
-       int                     bd_invalidated;
-       struct gendisk *        bd_disk;
-       struct request_queue *  bd_queue;
-       struct backing_dev_info *bd_bdi;
-       struct list_head        bd_list;
-       /*
-        * Private data.  You must have bd_claim'ed the block_device
-        * to use this.  NOTE:  bd_claim allows an owner to claim
-        * the same device multiple times, the owner must take special
-        * care to not mess up bd_private for that case.
-        */
-       unsigned long           bd_private;
- 
-       /* The counter of freeze processes */
-       int                     bd_fsfreeze_count;
-       /* Mutex for freeze */
-       struct mutex            bd_fsfreeze_mutex;
- } __randomize_layout;
   
   /* XArray tags, for tagging dirty and writeback pages in the pagecache. */
   #define PAGECACHE_TAG_DIRTY   XA_MARK_0
@@@ -908,8 -868,6 +869,6 @@@ static inline unsigned imajor(const str
         return MAJOR(inode->i_rdev);
   }
   
- extern struct block_device *I_BDEV(struct inode *inode);
- 
   struct fown_struct {
         rwlock_t lock;          /* protects pid, uid, euid fields */
         struct pid *pid;        /* pid or -pgrp where SIGIO should be sent */
@@@ -1775,14 -1733,6 +1734,6 @@@ struct dir_context 
         loff_t pos;
   };
   
- struct block_device_operations;
- 
- /* These macros are for out of kernel modules to test that
-  * the kernel supports the unlocked_ioctl and compat_ioctl
-  * fields in struct file_operations. */
- #define HAVE_COMPAT_IOCTL 1
- #define HAVE_UNLOCKED_IOCTL 1
- 
   /*
    * These flags let !MMU mmap() govern direct device mapping vs immediate
    * copying more easily for MAP_PRIVATE, especially for ROM filesystems.
@@@ -1918,6 -1868,7 +1869,6 @@@ ssize_t rw_copy_check_uvector(int type
                               struct iovec *fast_pointer,
                               struct iovec **ret_pointer);
   
- -extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
   extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
   extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
   extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
@@@ -2264,18 -2215,9 +2215,9 @@@ struct file_system_type 
   
   #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
   
- #ifdef CONFIG_BLOCK
   extern struct dentry *mount_bdev(struct file_system_type *fs_type,
         int flags, const char *dev_name, void *data,
         int (*fill_super)(struct super_block *, void *, int));
- #else
- static inline struct dentry *mount_bdev(struct file_system_type *fs_type,
-       int flags, const char *dev_name, void *data,
-       int (*fill_super)(struct super_block *, void *, int))
- {
-       return ERR_PTR(-ENODEV);
- }
- #endif
   extern struct dentry *mount_single(struct file_system_type *fs_type,
         int flags, void *data,
         int (*fill_super)(struct super_block *, void *, int));
@@@ -2284,14 -2226,7 +2226,7 @@@ extern struct dentry *mount_nodev(struc
         int (*fill_super)(struct super_block *, void *, int));
   extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
   void generic_shutdown_super(struct super_block *sb);
- #ifdef CONFIG_BLOCK
   void kill_block_super(struct super_block *sb);
- #else
- static inline void kill_block_super(struct super_block *sb)
- {
-       BUG();
- }
- #endif
   void kill_anon_super(struct super_block *sb);
   void kill_litter_super(struct super_block *sb);
   void deactivate_super(struct super_block *sb);
@@@ -2581,93 -2516,16 +2516,16 @@@ extern struct kmem_cache *names_cachep
   #define __getname()           kmem_cache_alloc(names_cachep, GFP_KERNEL)
   #define __putname(name)               kmem_cache_free(names_cachep, (void *)(name))
   
- #ifdef CONFIG_BLOCK
- extern int register_blkdev(unsigned int, const char *);
- extern void unregister_blkdev(unsigned int, const char *);
- extern struct block_device *bdget(dev_t);
- extern struct block_device *bdgrab(struct block_device *bdev);
- extern void bd_set_size(struct block_device *, loff_t size);
- extern void bd_forget(struct inode *inode);
- extern void bdput(struct block_device *);
- extern void invalidate_bdev(struct block_device *);
- extern void iterate_bdevs(void (*)(struct block_device *, void *), void *);
- extern int sync_blockdev(struct block_device *bdev);
- extern struct super_block *freeze_bdev(struct block_device *);
- extern void emergency_thaw_all(void);
- extern void emergency_thaw_bdev(struct super_block *sb);
- extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
- extern int fsync_bdev(struct block_device *);
- 
   extern struct super_block *blockdev_superblock;
- 
   static inline bool sb_is_blkdev_sb(struct super_block *sb)
   {
-       return sb == blockdev_superblock;
- }
- #else
- static inline void bd_forget(struct inode *inode) {}
- static inline int sync_blockdev(struct block_device *bdev) { return 0; }
- static inline void invalidate_bdev(struct block_device *bdev) {}
- 
- static inline struct super_block *freeze_bdev(struct block_device *sb)
- {
-       return NULL;
- }
- 
- static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
- {
-       return 0;
+       return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
   }
   
- static inline int emergency_thaw_bdev(struct super_block *sb)
- {
-       return 0;
- }
- 
- static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg)
- {
- }
- 
- static inline bool sb_is_blkdev_sb(struct super_block *sb)
- {
-       return false;
- }
- #endif
+ void emergency_thaw_all(void);
   extern int sync_filesystem(struct super_block *);
   extern const struct file_operations def_blk_fops;
   extern const struct file_operations def_chr_fops;
- #ifdef CONFIG_BLOCK
- extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
- extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
- extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
- extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
-                                              void *holder);
- extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
-                                             void *holder);
- extern struct block_device *bd_start_claiming(struct block_device *bdev,
-                                             void *holder);
- extern void bd_finish_claiming(struct block_device *bdev,
-                              struct block_device *whole, void *holder);
- extern void bd_abort_claiming(struct block_device *bdev,
-                             struct block_device *whole, void *holder);
- extern void blkdev_put(struct block_device *bdev, fmode_t mode);
- 
- #ifdef CONFIG_SYSFS
- extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
- extern void bd_unlink_disk_holder(struct block_device *bdev,
-                                 struct gendisk *disk);
- #else
- static inline int bd_link_disk_holder(struct block_device *bdev,
-                                     struct gendisk *disk)
- {
-       return 0;
- }
- static inline void bd_unlink_disk_holder(struct block_device *bdev,
-                                        struct gendisk *disk)
- {
- }
- #endif
- #endif
   
   /* fs/char_dev.c */
   #define CHRDEV_MAJOR_MAX 512
@@@ -2698,31 -2556,12 +2556,12 @@@ static inline void unregister_chrdev(un
         __unregister_chrdev(major, 0, 256, name);
   }
   
- /* fs/block_dev.c */
- #define BDEVNAME_SIZE 32      /* Largest string for a blockdev identifier */
- #define BDEVT_SIZE    10      /* Largest string for MAJ:MIN for blkdev */
- 
- #ifdef CONFIG_BLOCK
- #define BLKDEV_MAJOR_MAX      512
- extern const char *bdevname(struct block_device *bdev, char *buffer);
- extern struct block_device *lookup_bdev(const char *);
- extern void blkdev_show(struct seq_file *,off_t);
- 
- #else
- #define BLKDEV_MAJOR_MAX      0
- #endif
- 
   extern void init_special_inode(struct inode *, umode_t, dev_t);
   
   /* Invalid inode operations -- fs/bad_inode.c */
   extern void make_bad_inode(struct inode *);
   extern bool is_bad_inode(struct inode *);
   
- #ifdef CONFIG_BLOCK
- extern int revalidate_disk(struct gendisk *);
- extern int check_disk_change(struct block_device *);
- extern int __invalidate_device(struct block_device *, bool);
- #endif
   unsigned long invalidate_mapping_pages(struct address_space *mapping,
                                         pgoff_t start, pgoff_t end);
   
@@@ -3033,7 -2872,6 +2872,7 @@@ extern int kernel_read_file_from_path_i
   extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t,
                                     enum kernel_read_file_id);
   extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *);
+ +ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos);
   extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
   extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *);
   extern struct file * open_exec(const char *);
@@@ -3123,10 -2961,6 +2962,6 @@@ static inline void remove_inode_hash(st
   
   extern void inode_sb_list_add(struct inode *inode);
   
- #ifdef CONFIG_BLOCK
- extern int bdev_read_only(struct block_device *);
- #endif
- extern int set_blocksize(struct block_device *, int);
   extern int sb_set_blocksize(struct super_block *, int);
   extern int sb_min_blocksize(struct super_block *, int);
author	Jens Axboe <axboe@kernel.dk>
	Mon, 20 Jul 2020 21:38:23 +0000 (15:38 -0600)
committer	Jens Axboe <axboe@kernel.dk>
	Mon, 20 Jul 2020 21:38:23 +0000 (15:38 -0600)
		1	2
Documentation/admin-guide/cgroup-v2.rst	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-mq-debugfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-mq.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/nbd.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/virtio_blk.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/zram/zram_drv.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-rq.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-writecache.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-zoned-target.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/host/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/host/multipath.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/host/nvme.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/host/pci.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/host/rdma.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/host/tcp.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/nvme/target/loop.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/disk-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/blkdev.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history