Merge tag 'for-5.15/block-2021-08-30' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 31 Aug 2021 01:52:11 +0000 (18:52 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 31 Aug 2021 01:52:11 +0000 (18:52 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 31 Aug 2021 01:52:11 +0000 (18:52 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 31 Aug 2021 01:52:11 +0000 (18:52 -0700)
diff --combined block/Makefile

index 1e1afa1,0d951ad..1d0d466
--- 1/block/Makefile
--- 2/block/Makefile
+++ b/block/Makefile
@@@ -22,11 -22,12 +22,10 @@@ obj-$(CONFIG_BLK_CGROUP_IOPRIO)    += blk-
   obj-$(CONFIG_BLK_CGROUP_IOLATENCY)    += blk-iolatency.o
   obj-$(CONFIG_BLK_CGROUP_IOCOST)       += blk-iocost.o
   obj-$(CONFIG_MQ_IOSCHED_DEADLINE)     += mq-deadline.o
- -mq-deadline-y += mq-deadline-main.o
- -mq-deadline-$(CONFIG_MQ_IOSCHED_DEADLINE_CGROUP)+= mq-deadline-cgroup.o
   obj-$(CONFIG_MQ_IOSCHED_KYBER)        += kyber-iosched.o
   bfq-y                         := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
   obj-$(CONFIG_IOSCHED_BFQ)     += bfq.o
   
- obj-$(CONFIG_BLK_CMDLINE_PARSER)      += cmdline-parser.o
   obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o
   obj-$(CONFIG_BLK_DEV_INTEGRITY_T10)   += t10-pi.o
   obj-$(CONFIG_BLK_MQ_PCI)      += blk-mq-pci.o
@@@ -40,3 -41,4 +39,4 @@@ obj-$(CONFIG_BLK_SED_OPAL)    += sed-opal.
   obj-$(CONFIG_BLK_PM)          += blk-pm.o
   obj-$(CONFIG_BLK_INLINE_ENCRYPTION)   += keyslot-manager.o blk-crypto.o
   obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK)  += blk-crypto-fallback.o
+ obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED) += holder.o
diff --combined block/blk-cgroup.c

index 31fe9be,f575aa4..3c88a79
--- 1/block/blk-cgroup.c
--- 2/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@@ -489,10 -489,9 +489,9 @@@ static int blkcg_reset_stats(struct cgr
   
   const char *blkg_dev_name(struct blkcg_gq *blkg)
   {
-       /* some drivers (floppy) instantiate a queue w/o disk registered */
-       if (blkg->q->backing_dev_info->dev)
-               return bdi_dev_name(blkg->q->backing_dev_info);
-       return NULL;
+       if (!blkg->q->disk || !blkg->q->disk->bdi->dev)
+               return NULL;
+       return bdi_dev_name(blkg->q->disk->bdi);
   }
   
   /**
@@@ -790,7 -789,6 +789,7 @@@ static void blkcg_rstat_flush(struct cg
                 struct blkcg_gq *parent = blkg->parent;
                 struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
                 struct blkg_iostat cur, delta;
+ +              unsigned long flags;
                 unsigned int seq;
   
                 /* fetch the current per-cpu values */
@@@ -800,21 -798,21 +799,21 @@@
                 } while (u64_stats_fetch_retry(&bisc->sync, seq));
   
                 /* propagate percpu delta to global */
- -              u64_stats_update_begin(&blkg->iostat.sync);
+ +              flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
                 blkg_iostat_set(&delta, &cur);
                 blkg_iostat_sub(&delta, &bisc->last);
                 blkg_iostat_add(&blkg->iostat.cur, &delta);
                 blkg_iostat_add(&bisc->last, &delta);
- -              u64_stats_update_end(&blkg->iostat.sync);
+ +              u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
   
                 /* propagate global delta to parent (unless that's root) */
                 if (parent && parent->parent) {
- -                      u64_stats_update_begin(&parent->iostat.sync);
+ +                      flags = u64_stats_update_begin_irqsave(&parent->iostat.sync);
                         blkg_iostat_set(&delta, &blkg->iostat.cur);
                         blkg_iostat_sub(&delta, &blkg->iostat.last);
                         blkg_iostat_add(&parent->iostat.cur, &delta);
                         blkg_iostat_add(&blkg->iostat.last, &delta);
- -                      u64_stats_update_end(&parent->iostat.sync);
+ +                      u64_stats_update_end_irqrestore(&parent->iostat.sync, flags);
                 }
         }
   
@@@ -849,7 -847,6 +848,7 @@@ static void blkcg_fill_root_iostats(voi
                 memset(&tmp, 0, sizeof(tmp));
                 for_each_possible_cpu(cpu) {
                         struct disk_stats *cpu_dkstats;
+ +                      unsigned long flags;
   
                         cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu);
                         tmp.ios[BLKG_IOSTAT_READ] +=
@@@ -866,104 -863,86 +865,86 @@@
                         tmp.bytes[BLKG_IOSTAT_DISCARD] +=
                                 cpu_dkstats->sectors[STAT_DISCARD] << 9;
   
- -                      u64_stats_update_begin(&blkg->iostat.sync);
+ +                      flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
                         blkg_iostat_set(&blkg->iostat.cur, &tmp);
- -                      u64_stats_update_end(&blkg->iostat.sync);
+ +                      u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
                 }
         }
   }
   
- static int blkcg_print_stat(struct seq_file *sf, void *v)
+ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
   {
-       struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
-       struct blkcg_gq *blkg;
- 
-       if (!seq_css(sf)->parent)
-               blkcg_fill_root_iostats();
-       else
-               cgroup_rstat_flush(blkcg->css.cgroup);
+       struct blkg_iostat_set *bis = &blkg->iostat;
+       u64 rbytes, wbytes, rios, wios, dbytes, dios;
+       bool has_stats = false;
+       const char *dname;
+       unsigned seq;
+       int i;
   
-       rcu_read_lock();
+       if (!blkg->online)
+               return;
   
-       hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
-               struct blkg_iostat_set *bis = &blkg->iostat;
-               const char *dname;
-               char *buf;
-               u64 rbytes, wbytes, rios, wios, dbytes, dios;
-               size_t size = seq_get_buf(sf, &buf), off = 0;
-               int i;
-               bool has_stats = false;
-               unsigned seq;
+       dname = blkg_dev_name(blkg);
+       if (!dname)
+               return;
   
-               spin_lock_irq(&blkg->q->queue_lock);
+       seq_printf(s, "%s ", dname);
   
-               if (!blkg->online)
-                       goto skip;
+       do {
+               seq = u64_stats_fetch_begin(&bis->sync);
   
-               dname = blkg_dev_name(blkg);
-               if (!dname)
-                       goto skip;
+               rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
+               wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
+               dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
+               rios = bis->cur.ios[BLKG_IOSTAT_READ];
+               wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
+               dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
+       } while (u64_stats_fetch_retry(&bis->sync, seq));
   
-               /*
-                * Hooray string manipulation, count is the size written NOT
-                * INCLUDING THE \0, so size is now count+1 less than what we
-                * had before, but we want to start writing the next bit from
-                * the \0 so we only add count to buf.
-                */
-               off += scnprintf(buf+off, size-off, "%s ", dname);
+       if (rbytes || wbytes || rios || wios) {
+               has_stats = true;
+               seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
+                       rbytes, wbytes, rios, wios,
+                       dbytes, dios);
+       }
   
-               do {
-                       seq = u64_stats_fetch_begin(&bis->sync);
+       if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
+               has_stats = true;
+               seq_printf(s, " use_delay=%d delay_nsec=%llu",
+                       atomic_read(&blkg->use_delay),
+                       atomic64_read(&blkg->delay_nsec));
+       }
   
-                       rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
-                       wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
-                       dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
-                       rios = bis->cur.ios[BLKG_IOSTAT_READ];
-                       wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
-                       dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
-               } while (u64_stats_fetch_retry(&bis->sync, seq));
+       for (i = 0; i < BLKCG_MAX_POLS; i++) {
+               struct blkcg_policy *pol = blkcg_policy[i];
   
-               if (rbytes || wbytes || rios || wios) {
-                       has_stats = true;
-                       off += scnprintf(buf+off, size-off,
-                                        "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
-                                        rbytes, wbytes, rios, wios,
-                                        dbytes, dios);
-               }
+               if (!blkg->pd[i] || !pol->pd_stat_fn)
+                       continue;
   
-               if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
+               if (pol->pd_stat_fn(blkg->pd[i], s))
                         has_stats = true;
-                       off += scnprintf(buf+off, size-off,
-                                        " use_delay=%d delay_nsec=%llu",
-                                        atomic_read(&blkg->use_delay),
-                                       (unsigned long long)atomic64_read(&blkg->delay_nsec));
-               }
+       }
   
-               for (i = 0; i < BLKCG_MAX_POLS; i++) {
-                       struct blkcg_policy *pol = blkcg_policy[i];
-                       size_t written;
+       if (has_stats)
+               seq_printf(s, "\n");
+ }
   
-                       if (!blkg->pd[i] || !pol->pd_stat_fn)
-                               continue;
+ static int blkcg_print_stat(struct seq_file *sf, void *v)
+ {
+       struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+       struct blkcg_gq *blkg;
   
-                       written = pol->pd_stat_fn(blkg->pd[i], buf+off, size-off);
-                       if (written)
-                               has_stats = true;
-                       off += written;
-               }
+       if (!seq_css(sf)->parent)
+               blkcg_fill_root_iostats();
+       else
+               cgroup_rstat_flush(blkcg->css.cgroup);
   
-               if (has_stats) {
-                       if (off < size - 1) {
-                               off += scnprintf(buf+off, size-off, "\n");
-                               seq_commit(sf, off);
-                       } else {
-                               seq_commit(sf, -1);
-                       }
-               }
-       skip:
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+               spin_lock_irq(&blkg->q->queue_lock);
+               blkcg_print_one_stat(blkg, sf);
                 spin_unlock_irq(&blkg->q->queue_lock);
         }
- 
         rcu_read_unlock();
         return 0;
   }
diff --combined block/blk-core.c

index 4f8449b,0874bc2..b509873
--- 1/block/blk-core.c
--- 2/block/blk-core.c
+++ b/block/blk-core.c
@@@ -14,7 -14,6 +14,6 @@@
    */
   #include <linux/kernel.h>
   #include <linux/module.h>
- #include <linux/backing-dev.h>
   #include <linux/bio.h>
   #include <linux/blkdev.h>
   #include <linux/blk-mq.h>
@@@ -122,6 -121,7 +121,6 @@@ void blk_rq_init(struct request_queue *
         rq->internal_tag = BLK_MQ_NO_TAG;
         rq->start_time_ns = ktime_get_ns();
         rq->part = NULL;
- -      refcount_set(&rq->ref, 1);
         blk_crypto_rq_set_defaults(rq);
   }
   EXPORT_SYMBOL(blk_rq_init);
@@@ -393,10 -393,7 +392,7 @@@ void blk_cleanup_queue(struct request_q
         /* for synchronous bio-based driver finish in-flight integrity i/o */
         blk_flush_integrity();
   
-       /* @q won't process any more request, flush async actions */
-       del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
         blk_sync_queue(q);
- 
         if (queue_is_mq(q))
                 blk_mq_exit_queue(q);
   
@@@ -533,20 -530,14 +529,14 @@@ struct request_queue *blk_alloc_queue(i
         if (ret)
                 goto fail_id;
   
-       q->backing_dev_info = bdi_alloc(node_id);
-       if (!q->backing_dev_info)
-               goto fail_split;
- 
         q->stats = blk_alloc_queue_stats();
         if (!q->stats)
-               goto fail_stats;
+               goto fail_split;
   
         q->node = node_id;
   
         atomic_set(&q->nr_active_requests_shared_sbitmap, 0);
   
-       timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
-                   laptop_mode_timer_fn, 0);
         timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
         INIT_WORK(&q->timeout_work, blk_timeout_work);
         INIT_LIST_HEAD(&q->icq_list);
@@@ -571,7 -562,7 +561,7 @@@
         if (percpu_ref_init(&q->q_usage_counter,
                                 blk_queue_usage_counter_release,
                                 PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
-               goto fail_bdi;
+               goto fail_stats;
   
         if (blkcg_init_queue(q))
                 goto fail_ref;
@@@ -584,10 -575,8 +574,8 @@@
   
   fail_ref:
         percpu_ref_exit(&q->q_usage_counter);
- fail_bdi:
-       blk_free_queue_stats(q->stats);
   fail_stats:
-       bdi_put(q->backing_dev_info);
+       blk_free_queue_stats(q->stats);
   fail_split:
         bioset_exit(&q->bio_split);
   fail_id:
diff --combined block/blk-iocost.c

index 0e56557,89b21a3..b3880e4
--- 1/block/blk-iocost.c
--- 2/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@@ -2988,34 -2988,29 +2988,29 @@@ static void ioc_pd_free(struct blkg_pol
         kfree(iocg);
   }
   
- static size_t ioc_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size)
+ static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
   {
         struct ioc_gq *iocg = pd_to_iocg(pd);
         struct ioc *ioc = iocg->ioc;
-       size_t pos = 0;
   
         if (!ioc->enabled)
-               return 0;
+               return false;
   
         if (iocg->level == 0) {
                 unsigned vp10k = DIV64_U64_ROUND_CLOSEST(
                         ioc->vtime_base_rate * 10000,
                         VTIME_PER_USEC);
-               pos += scnprintf(buf + pos, size - pos, " cost.vrate=%u.%02u",
-                                 vp10k / 100, vp10k % 100);
+               seq_printf(s, " cost.vrate=%u.%02u", vp10k / 100, vp10k % 100);
         }
   
-       pos += scnprintf(buf + pos, size - pos, " cost.usage=%llu",
-                        iocg->last_stat.usage_us);
+       seq_printf(s, " cost.usage=%llu", iocg->last_stat.usage_us);
   
         if (blkcg_debug_stats)
-               pos += scnprintf(buf + pos, size - pos,
-                                " cost.wait=%llu cost.indebt=%llu cost.indelay=%llu",
-                                iocg->last_stat.wait_us,
-                                iocg->last_stat.indebt_us,
-                                iocg->last_stat.indelay_us);
- 
-       return pos;
+               seq_printf(s, " cost.wait=%llu cost.indebt=%llu cost.indelay=%llu",
+                       iocg->last_stat.wait_us,
+                       iocg->last_stat.indebt_us,
+                       iocg->last_stat.indelay_us);
+       return true;
   }
   
   static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
@@@ -3061,19 -3056,19 +3056,19 @@@ static ssize_t ioc_weight_write(struct 
                 if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX)
                         return -EINVAL;
   
- -              spin_lock(&blkcg->lock);
+ +              spin_lock_irq(&blkcg->lock);
                 iocc->dfl_weight = v * WEIGHT_ONE;
                 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
                         struct ioc_gq *iocg = blkg_to_iocg(blkg);
   
                         if (iocg) {
- -                              spin_lock_irq(&iocg->ioc->lock);
+ +                              spin_lock(&iocg->ioc->lock);
                                 ioc_now(iocg->ioc, &now);
                                 weight_updated(iocg, &now);
- -                              spin_unlock_irq(&iocg->ioc->lock);
+ +                              spin_unlock(&iocg->ioc->lock);
                         }
                 }
- -              spin_unlock(&blkcg->lock);
+ +              spin_unlock_irq(&blkcg->lock);
   
                 return nbytes;
         }
diff --combined block/blk-iolatency.c

index d8b0d8b,4c06faf..c0545f9
--- 1/block/blk-iolatency.c
--- 2/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@@ -833,11 -833,7 +833,11 @@@ static ssize_t iolatency_set_limit(stru
   
         enable = iolatency_set_min_lat_nsec(blkg, lat_val);
         if (enable) {
- -              WARN_ON_ONCE(!blk_get_queue(blkg->q));
+ +              if (!blk_get_queue(blkg->q)) {
+ +                      ret = -ENODEV;
+ +                      goto out;
+ +              }
+ +
                 blkg_get(blkg);
         }
   
@@@ -890,8 -886,7 +890,7 @@@ static int iolatency_print_limit(struc
         return 0;
   }
   
- static size_t iolatency_ssd_stat(struct iolatency_grp *iolat, char *buf,
-                                size_t size)
+ static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
   {
         struct latency_stat stat;
         int cpu;
@@@ -906,39 -901,40 +905,40 @@@
         preempt_enable();
   
         if (iolat->rq_depth.max_depth == UINT_MAX)
-               return scnprintf(buf, size, " missed=%llu total=%llu depth=max",
-                                (unsigned long long)stat.ps.missed,
-                                (unsigned long long)stat.ps.total);
-       return scnprintf(buf, size, " missed=%llu total=%llu depth=%u",
-                        (unsigned long long)stat.ps.missed,
-                        (unsigned long long)stat.ps.total,
-                        iolat->rq_depth.max_depth);
+               seq_printf(s, " missed=%llu total=%llu depth=max",
+                       (unsigned long long)stat.ps.missed,
+                       (unsigned long long)stat.ps.total);
+       else
+               seq_printf(s, " missed=%llu total=%llu depth=%u",
+                       (unsigned long long)stat.ps.missed,
+                       (unsigned long long)stat.ps.total,
+                       iolat->rq_depth.max_depth);
+       return true;
   }
   
- static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
-                               size_t size)
+ static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
   {
         struct iolatency_grp *iolat = pd_to_lat(pd);
         unsigned long long avg_lat;
         unsigned long long cur_win;
   
         if (!blkcg_debug_stats)
-               return 0;
+               return false;
   
         if (iolat->ssd)
-               return iolatency_ssd_stat(iolat, buf, size);
+               return iolatency_ssd_stat(iolat, s);
   
         avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
         cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
         if (iolat->rq_depth.max_depth == UINT_MAX)
-               return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu",
-                                avg_lat, cur_win);
- 
-       return scnprintf(buf, size, " depth=%u avg_lat=%llu win=%llu",
-                        iolat->rq_depth.max_depth, avg_lat, cur_win);
+               seq_printf(s, " depth=max avg_lat=%llu win=%llu",
+                       avg_lat, cur_win);
+       else
+               seq_printf(s, " depth=%u avg_lat=%llu win=%llu",
+                       iolat->rq_depth.max_depth, avg_lat, cur_win);
+       return true;
   }
   
- 
   static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
                                                    struct request_queue *q,
                                                    struct blkcg *blkcg)
diff --combined block/blk-mq.c

index d0b881e,0a33d16..9440499
--- 1/block/blk-mq.c
--- 2/block/blk-mq.c
+++ b/block/blk-mq.c
@@@ -525,7 -525,7 +525,7 @@@ void blk_mq_free_request(struct reques
                 __blk_mq_dec_active_requests(hctx);
   
         if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
-               laptop_io_completion(q->backing_dev_info);
+               laptop_io_completion(q->disk->bdi);
   
         rq_qos_done(q, rq);
   
@@@ -606,7 -606,7 +606,7 @@@ static inline bool blk_mq_complete_need
          * This is probably worse than completing the request on a different
          * cache domain.
          */
- -      if (force_irqthreads)
+ +      if (force_irqthreads())
                 return false;
   
         /* same CPU or cache domain?  Complete locally */
@@@ -911,7 -911,7 +911,7 @@@ static bool blk_mq_req_expired(struct r
   
   void blk_mq_put_rq_ref(struct request *rq)
   {
- -      if (is_flush_rq(rq, rq->mq_hctx))
+ +      if (is_flush_rq(rq))
                 rq->end_io(rq, 0);
         else if (refcount_dec_and_test(&rq->ref))
                 __blk_mq_free_request(rq);
@@@ -923,14 -923,34 +923,14 @@@ static bool blk_mq_check_expired(struc
         unsigned long *next = priv;
   
         /*
- -       * Just do a quick check if it is expired before locking the request in
- -       * so we're not unnecessarilly synchronizing across CPUs.
- -       */
- -      if (!blk_mq_req_expired(rq, next))
- -              return true;
- -
- -      /*
- -       * We have reason to believe the request may be expired. Take a
- -       * reference on the request to lock this request lifetime into its
- -       * currently allocated context to prevent it from being reallocated in
- -       * the event the completion by-passes this timeout handler.
- -       *
- -       * If the reference was already released, then the driver beat the
- -       * timeout handler to posting a natural completion.
- -       */
- -      if (!refcount_inc_not_zero(&rq->ref))
- -              return true;
- -
- -      /*
- -       * The request is now locked and cannot be reallocated underneath the
- -       * timeout handler's processing. Re-verify this exact request is truly
- -       * expired; if it is not expired, then the request was completed and
- -       * reallocated as a new request.
+ +       * blk_mq_queue_tag_busy_iter() has locked the request, so it cannot
+ +       * be reallocated underneath the timeout handler's processing, then
+ +       * the expire check is reliable. If the request is not expired, then
+ +       * it was completed and reallocated as a new request after returning
+ +       * from blk_mq_check_expired().
          */
         if (blk_mq_req_expired(rq, next))
                 blk_mq_rq_timed_out(rq, reserved);
- -
- -      blk_mq_put_rq_ref(rq);
         return true;
   }
   
@@@ -2974,12 -2994,10 +2974,12 @@@ static void queue_set_hctx_shared(struc
         int i;
   
         queue_for_each_hw_ctx(q, hctx, i) {
- -              if (shared)
+ +              if (shared) {
                         hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
- -              else
+ +              } else {
+ +                      blk_mq_tag_idle(hctx);
                         hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
+ +              }
         }
   }
   
@@@ -3115,7 -3133,8 +3115,8 @@@ struct request_queue *blk_mq_init_queue
   }
   EXPORT_SYMBOL(blk_mq_init_queue);
   
- struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata)
+ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
+               struct lock_class_key *lkclass)
   {
         struct request_queue *q;
         struct gendisk *disk;
@@@ -3124,12 -3143,11 +3125,11 @@@
         if (IS_ERR(q))
                 return ERR_CAST(q);
   
-       disk = __alloc_disk_node(0, set->numa_node);
+       disk = __alloc_disk_node(q, set->numa_node, lkclass);
         if (!disk) {
                 blk_cleanup_queue(q);
                 return ERR_PTR(-ENOMEM);
         }
-       disk->queue = q;
         return disk;
   }
   EXPORT_SYMBOL(__blk_mq_alloc_disk);
diff --combined block/blk.h

index cb01429,bbbcc1a..346d184
--- 1/block/blk.h
--- 2/block/blk.h
+++ b/block/blk.h
@@@ -44,7 -44,11 +44,7 @@@ static inline void __blk_get_queue(stru
         kobject_get(&q->kobj);
   }
   
- -static inline bool
- -is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx)
- -{
- -      return hctx->fq->flush_rq == req;
- -}
+ +bool is_flush_rq(struct request *req);
   
   struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
                                               gfp_t flags);
@@@ -128,7 -132,7 +128,7 @@@ static inline bool integrity_req_gap_fr
                                 bip_next->bip_vec[0].bv_offset);
   }
   
- void blk_integrity_add(struct gendisk *);
+ int blk_integrity_add(struct gendisk *disk);
   void blk_integrity_del(struct gendisk *);
   #else /* CONFIG_BLK_DEV_INTEGRITY */
   static inline bool blk_integrity_merge_rq(struct request_queue *rq,
@@@ -162,8 -166,9 +162,9 @@@ static inline bool bio_integrity_endio(
   static inline void bio_integrity_free(struct bio *bio)
   {
   }
- static inline void blk_integrity_add(struct gendisk *disk)
+ static inline int blk_integrity_add(struct gendisk *disk)
   {
+       return 0;
   }
   static inline void blk_integrity_del(struct gendisk *disk)
   {
@@@ -289,11 -294,13 +290,13 @@@ int create_task_io_context(struct task_
   extern int blk_throtl_init(struct request_queue *q);
   extern void blk_throtl_exit(struct request_queue *q);
   extern void blk_throtl_register_queue(struct request_queue *q);
+ extern void blk_throtl_charge_bio_split(struct bio *bio);
   bool blk_throtl_bio(struct bio *bio);
   #else /* CONFIG_BLK_DEV_THROTTLING */
   static inline int blk_throtl_init(struct request_queue *q) { return 0; }
   static inline void blk_throtl_exit(struct request_queue *q) { }
   static inline void blk_throtl_register_queue(struct request_queue *q) { }
+ static inline void blk_throtl_charge_bio_split(struct bio *bio) { }
   static inline bool blk_throtl_bio(struct bio *bio) { return false; }
   #endif /* CONFIG_BLK_DEV_THROTTLING */
   #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
@@@ -340,15 -347,14 +343,14 @@@ static inline void blk_queue_clear_zone
   
   int blk_alloc_ext_minor(void);
   void blk_free_ext_minor(unsigned int minor);
- char *disk_name(struct gendisk *hd, int partno, char *buf);
   #define ADDPART_FLAG_NONE     0
   #define ADDPART_FLAG_RAID     1
   #define ADDPART_FLAG_WHOLEDISK        2
- int bdev_add_partition(struct block_device *bdev, int partno,
-               sector_t start, sector_t length);
- int bdev_del_partition(struct block_device *bdev, int partno);
- int bdev_resize_partition(struct block_device *bdev, int partno,
-               sector_t start, sector_t length);
+ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
+               sector_t length);
+ int bdev_del_partition(struct gendisk *disk, int partno);
+ int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
+               sector_t length);
   
   int bio_add_hw_page(struct request_queue *q, struct bio *bio,
                 struct page *page, unsigned int len, unsigned int offset,
@@@ -356,7 -362,7 +358,7 @@@
   
   struct request_queue *blk_alloc_queue(int node_id);
   
- void disk_alloc_events(struct gendisk *disk);
+ int disk_alloc_events(struct gendisk *disk);
   void disk_add_events(struct gendisk *disk);
   void disk_del_events(struct gendisk *disk);
   void disk_release_events(struct gendisk *disk);
diff --combined block/mq-deadline.c

index 3692067,0000000..3c3693c

mode 100644,000000..100644
--- 1/block/mq-deadline.c
--- /dev/null
+++ b/block/mq-deadline.c
@@@ -1,1104 -1,0 +1,1106 @@@
+ +// SPDX-License-Identifier: GPL-2.0
+ +/*
+ + *  MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler,
+ + *  for the blk-mq scheduling framework
+ + *
+ + *  Copyright (C) 2016 Jens Axboe <axboe@kernel.dk>
+ + */
+ +#include <linux/kernel.h>
+ +#include <linux/fs.h>
+ +#include <linux/blkdev.h>
+ +#include <linux/blk-mq.h>
+ +#include <linux/elevator.h>
+ +#include <linux/bio.h>
+ +#include <linux/module.h>
+ +#include <linux/slab.h>
+ +#include <linux/init.h>
+ +#include <linux/compiler.h>
+ +#include <linux/rbtree.h>
+ +#include <linux/sbitmap.h>
+ +
+ +#include <trace/events/block.h>
+ +
+ +#include "blk.h"
+ +#include "blk-mq.h"
+ +#include "blk-mq-debugfs.h"
+ +#include "blk-mq-tag.h"
+ +#include "blk-mq-sched.h"
+ +
+ +/*
+ + * See Documentation/block/deadline-iosched.rst
+ + */
+ +static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
+ +static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
+ +static const int writes_starved = 2;    /* max times reads can starve a write */
+ +static const int fifo_batch = 16;       /* # of sequential requests treated as one
+ +                                   by the above parameters. For throughput. */
+ +
+ +enum dd_data_dir {
+ +      DD_READ         = READ,
+ +      DD_WRITE        = WRITE,
+ +};
+ +
+ +enum { DD_DIR_COUNT = 2 };
+ +
+ +enum dd_prio {
+ +      DD_RT_PRIO      = 0,
+ +      DD_BE_PRIO      = 1,
+ +      DD_IDLE_PRIO    = 2,
+ +      DD_PRIO_MAX     = 2,
+ +};
+ +
+ +enum { DD_PRIO_COUNT = 3 };
+ +
+ +/* I/O statistics per I/O priority. */
+ +struct io_stats_per_prio {
+ +      local_t inserted;
+ +      local_t merged;
+ +      local_t dispatched;
+ +      local_t completed;
+ +};
+ +
+ +/* I/O statistics for all I/O priorities (enum dd_prio). */
+ +struct io_stats {
+ +      struct io_stats_per_prio stats[DD_PRIO_COUNT];
+ +};
+ +
+ +/*
+ + * Deadline scheduler data per I/O priority (enum dd_prio). Requests are
+ + * present on both sort_list[] and fifo_list[].
+ + */
+ +struct dd_per_prio {
+ +      struct list_head dispatch;
+ +      struct rb_root sort_list[DD_DIR_COUNT];
+ +      struct list_head fifo_list[DD_DIR_COUNT];
+ +      /* Next request in FIFO order. Read, write or both are NULL. */
+ +      struct request *next_rq[DD_DIR_COUNT];
+ +};
+ +
+ +struct deadline_data {
+ +      /*
+ +       * run time data
+ +       */
+ +
+ +      struct dd_per_prio per_prio[DD_PRIO_COUNT];
+ +
+ +      /* Data direction of latest dispatched request. */
+ +      enum dd_data_dir last_dir;
+ +      unsigned int batching;          /* number of sequential requests made */
+ +      unsigned int starved;           /* times reads have starved writes */
+ +
+ +      struct io_stats __percpu *stats;
+ +
+ +      /*
+ +       * settings that change how the i/o scheduler behaves
+ +       */
+ +      int fifo_expire[DD_DIR_COUNT];
+ +      int fifo_batch;
+ +      int writes_starved;
+ +      int front_merges;
+ +      u32 async_depth;
+ +
+ +      spinlock_t lock;
+ +      spinlock_t zone_lock;
+ +};
+ +
+ +/* Count one event of type 'event_type' and with I/O priority 'prio' */
+ +#define dd_count(dd, event_type, prio) do {                           \
+ +      struct io_stats *io_stats = get_cpu_ptr((dd)->stats);           \
+ +                                                                      \
+ +      BUILD_BUG_ON(!__same_type((dd), struct deadline_data *));       \
+ +      BUILD_BUG_ON(!__same_type((prio), enum dd_prio));               \
+ +      local_inc(&io_stats->stats[(prio)].event_type);                 \
+ +      put_cpu_ptr(io_stats);                                          \
+ +} while (0)
+ +
+ +/*
+ + * Returns the total number of dd_count(dd, event_type, prio) calls across all
+ + * CPUs. No locking or barriers since it is fine if the returned sum is slightly
+ + * outdated.
+ + */
+ +#define dd_sum(dd, event_type, prio) ({                                       \
+ +      unsigned int cpu;                                               \
+ +      u32 sum = 0;                                                    \
+ +                                                                      \
+ +      BUILD_BUG_ON(!__same_type((dd), struct deadline_data *));       \
+ +      BUILD_BUG_ON(!__same_type((prio), enum dd_prio));               \
+ +      for_each_present_cpu(cpu)                                       \
+ +              sum += local_read(&per_cpu_ptr((dd)->stats, cpu)->      \
+ +                                stats[(prio)].event_type);            \
+ +      sum;                                                            \
+ +})
+ +
+ +/* Maps an I/O priority class to a deadline scheduler priority. */
+ +static const enum dd_prio ioprio_class_to_prio[] = {
+ +      [IOPRIO_CLASS_NONE]     = DD_BE_PRIO,
+ +      [IOPRIO_CLASS_RT]       = DD_RT_PRIO,
+ +      [IOPRIO_CLASS_BE]       = DD_BE_PRIO,
+ +      [IOPRIO_CLASS_IDLE]     = DD_IDLE_PRIO,
+ +};
+ +
+ +static inline struct rb_root *
+ +deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
+ +{
+ +      return &per_prio->sort_list[rq_data_dir(rq)];
+ +}
+ +
+ +/*
+ + * Returns the I/O priority class (IOPRIO_CLASS_*) that has been assigned to a
+ + * request.
+ + */
+ +static u8 dd_rq_ioclass(struct request *rq)
+ +{
+ +      return IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
+ +}
+ +
+ +/*
+ + * get the request after `rq' in sector-sorted order
+ + */
+ +static inline struct request *
+ +deadline_latter_request(struct request *rq)
+ +{
+ +      struct rb_node *node = rb_next(&rq->rb_node);
+ +
+ +      if (node)
+ +              return rb_entry_rq(node);
+ +
+ +      return NULL;
+ +}
+ +
+ +static void
+ +deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
+ +{
+ +      struct rb_root *root = deadline_rb_root(per_prio, rq);
+ +
+ +      elv_rb_add(root, rq);
+ +}
+ +
+ +static inline void
+ +deadline_del_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
+ +{
+ +      const enum dd_data_dir data_dir = rq_data_dir(rq);
+ +
+ +      if (per_prio->next_rq[data_dir] == rq)
+ +              per_prio->next_rq[data_dir] = deadline_latter_request(rq);
+ +
+ +      elv_rb_del(deadline_rb_root(per_prio, rq), rq);
+ +}
+ +
+ +/*
+ + * remove rq from rbtree and fifo.
+ + */
+ +static void deadline_remove_request(struct request_queue *q,
+ +                                  struct dd_per_prio *per_prio,
+ +                                  struct request *rq)
+ +{
+ +      list_del_init(&rq->queuelist);
+ +
+ +      /*
+ +       * We might not be on the rbtree, if we are doing an insert merge
+ +       */
+ +      if (!RB_EMPTY_NODE(&rq->rb_node))
+ +              deadline_del_rq_rb(per_prio, rq);
+ +
+ +      elv_rqhash_del(q, rq);
+ +      if (q->last_merge == rq)
+ +              q->last_merge = NULL;
+ +}
+ +
+ +static void dd_request_merged(struct request_queue *q, struct request *req,
+ +                            enum elv_merge type)
+ +{
+ +      struct deadline_data *dd = q->elevator->elevator_data;
+ +      const u8 ioprio_class = dd_rq_ioclass(req);
+ +      const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+ +      struct dd_per_prio *per_prio = &dd->per_prio[prio];
+ +
+ +      /*
+ +       * if the merge was a front merge, we need to reposition request
+ +       */
+ +      if (type == ELEVATOR_FRONT_MERGE) {
+ +              elv_rb_del(deadline_rb_root(per_prio, req), req);
+ +              deadline_add_rq_rb(per_prio, req);
+ +      }
+ +}
+ +
+ +/*
+ + * Callback function that is invoked after @next has been merged into @req.
+ + */
+ +static void dd_merged_requests(struct request_queue *q, struct request *req,
+ +                             struct request *next)
+ +{
+ +      struct deadline_data *dd = q->elevator->elevator_data;
+ +      const u8 ioprio_class = dd_rq_ioclass(next);
+ +      const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+ +
+ +      dd_count(dd, merged, prio);
+ +
+ +      /*
+ +       * if next expires before rq, assign its expire time to rq
+ +       * and move into next position (next will be deleted) in fifo
+ +       */
+ +      if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
+ +              if (time_before((unsigned long)next->fifo_time,
+ +                              (unsigned long)req->fifo_time)) {
+ +                      list_move(&req->queuelist, &next->queuelist);
+ +                      req->fifo_time = next->fifo_time;
+ +              }
+ +      }
+ +
+ +      /*
+ +       * kill knowledge of next, this one is a goner
+ +       */
+ +      deadline_remove_request(q, &dd->per_prio[prio], next);
+ +}
+ +
+ +/*
+ + * move an entry to dispatch queue
+ + */
+ +static void
+ +deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
+ +                    struct request *rq)
+ +{
+ +      const enum dd_data_dir data_dir = rq_data_dir(rq);
+ +
+ +      per_prio->next_rq[data_dir] = deadline_latter_request(rq);
+ +
+ +      /*
+ +       * take it off the sort and fifo list
+ +       */
+ +      deadline_remove_request(rq->q, per_prio, rq);
+ +}
+ +
+ +/* Number of requests queued for a given priority level. */
+ +static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
+ +{
+ +      return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio);
+ +}
+ +
+ +/*
+ + * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
+ + * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
+ + */
+ +static inline int deadline_check_fifo(struct dd_per_prio *per_prio,
+ +                                    enum dd_data_dir data_dir)
+ +{
+ +      struct request *rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
+ +
+ +      /*
+ +       * rq is expired!
+ +       */
+ +      if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
+ +              return 1;
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * For the specified data direction, return the next request to
+ + * dispatch using arrival ordered lists.
+ + */
+ +static struct request *
+ +deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
+ +                    enum dd_data_dir data_dir)
+ +{
+ +      struct request *rq;
+ +      unsigned long flags;
+ +
+ +      if (list_empty(&per_prio->fifo_list[data_dir]))
+ +              return NULL;
+ +
+ +      rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
+ +      if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
+ +              return rq;
+ +
+ +      /*
+ +       * Look for a write request that can be dispatched, that is one with
+ +       * an unlocked target zone.
+ +       */
+ +      spin_lock_irqsave(&dd->zone_lock, flags);
+ +      list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) {
+ +              if (blk_req_can_dispatch_to_zone(rq))
+ +                      goto out;
+ +      }
+ +      rq = NULL;
+ +out:
+ +      spin_unlock_irqrestore(&dd->zone_lock, flags);
+ +
+ +      return rq;
+ +}
+ +
+ +/*
+ + * For the specified data direction, return the next request to
+ + * dispatch using sector position sorted lists.
+ + */
+ +static struct request *
+ +deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
+ +                    enum dd_data_dir data_dir)
+ +{
+ +      struct request *rq;
+ +      unsigned long flags;
+ +
+ +      rq = per_prio->next_rq[data_dir];
+ +      if (!rq)
+ +              return NULL;
+ +
+ +      if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
+ +              return rq;
+ +
+ +      /*
+ +       * Look for a write request that can be dispatched, that is one with
+ +       * an unlocked target zone.
+ +       */
+ +      spin_lock_irqsave(&dd->zone_lock, flags);
+ +      while (rq) {
+ +              if (blk_req_can_dispatch_to_zone(rq))
+ +                      break;
+ +              rq = deadline_latter_request(rq);
+ +      }
+ +      spin_unlock_irqrestore(&dd->zone_lock, flags);
+ +
+ +      return rq;
+ +}
+ +
+ +/*
+ + * deadline_dispatch_requests selects the best request according to
+ + * read/write expire, fifo_batch, etc
+ + */
+ +static struct request *__dd_dispatch_request(struct deadline_data *dd,
+ +                                           struct dd_per_prio *per_prio)
+ +{
+ +      struct request *rq, *next_rq;
+ +      enum dd_data_dir data_dir;
+ +      enum dd_prio prio;
+ +      u8 ioprio_class;
+ +
+ +      lockdep_assert_held(&dd->lock);
+ +
+ +      if (!list_empty(&per_prio->dispatch)) {
+ +              rq = list_first_entry(&per_prio->dispatch, struct request,
+ +                                    queuelist);
+ +              list_del_init(&rq->queuelist);
+ +              goto done;
+ +      }
+ +
+ +      /*
+ +       * batches are currently reads XOR writes
+ +       */
+ +      rq = deadline_next_request(dd, per_prio, dd->last_dir);
+ +      if (rq && dd->batching < dd->fifo_batch)
+ +              /* we have a next request are still entitled to batch */
+ +              goto dispatch_request;
+ +
+ +      /*
+ +       * at this point we are not running a batch. select the appropriate
+ +       * data direction (read / write)
+ +       */
+ +
+ +      if (!list_empty(&per_prio->fifo_list[DD_READ])) {
+ +              BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_READ]));
+ +
+ +              if (deadline_fifo_request(dd, per_prio, DD_WRITE) &&
+ +                  (dd->starved++ >= dd->writes_starved))
+ +                      goto dispatch_writes;
+ +
+ +              data_dir = DD_READ;
+ +
+ +              goto dispatch_find_request;
+ +      }
+ +
+ +      /*
+ +       * there are either no reads or writes have been starved
+ +       */
+ +
+ +      if (!list_empty(&per_prio->fifo_list[DD_WRITE])) {
+ +dispatch_writes:
+ +              BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_WRITE]));
+ +
+ +              dd->starved = 0;
+ +
+ +              data_dir = DD_WRITE;
+ +
+ +              goto dispatch_find_request;
+ +      }
+ +
+ +      return NULL;
+ +
+ +dispatch_find_request:
+ +      /*
+ +       * we are not running a batch, find best request for selected data_dir
+ +       */
+ +      next_rq = deadline_next_request(dd, per_prio, data_dir);
+ +      if (deadline_check_fifo(per_prio, data_dir) || !next_rq) {
+ +              /*
+ +               * A deadline has expired, the last request was in the other
+ +               * direction, or we have run out of higher-sectored requests.
+ +               * Start again from the request with the earliest expiry time.
+ +               */
+ +              rq = deadline_fifo_request(dd, per_prio, data_dir);
+ +      } else {
+ +              /*
+ +               * The last req was the same dir and we have a next request in
+ +               * sort order. No expired requests so continue on from here.
+ +               */
+ +              rq = next_rq;
+ +      }
+ +
+ +      /*
+ +       * For a zoned block device, if we only have writes queued and none of
+ +       * them can be dispatched, rq will be NULL.
+ +       */
+ +      if (!rq)
+ +              return NULL;
+ +
+ +      dd->last_dir = data_dir;
+ +      dd->batching = 0;
+ +
+ +dispatch_request:
+ +      /*
+ +       * rq is the selected appropriate request.
+ +       */
+ +      dd->batching++;
+ +      deadline_move_request(dd, per_prio, rq);
+ +done:
+ +      ioprio_class = dd_rq_ioclass(rq);
+ +      prio = ioprio_class_to_prio[ioprio_class];
+ +      dd_count(dd, dispatched, prio);
+ +      /*
+ +       * If the request needs its target zone locked, do it.
+ +       */
+ +      blk_req_zone_write_lock(rq);
+ +      rq->rq_flags |= RQF_STARTED;
+ +      return rq;
+ +}
+ +
+ +/*
+ + * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
+ + *
+ + * One confusing aspect here is that we get called for a specific
+ + * hardware queue, but we may return a request that is for a
+ + * different hardware queue. This is because mq-deadline has shared
+ + * state for all hardware queues, in terms of sorting, FIFOs, etc.
+ + */
+ +static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
+ +{
+ +      struct deadline_data *dd = hctx->queue->elevator->elevator_data;
+ +      struct request *rq;
+ +      enum dd_prio prio;
+ +
+ +      spin_lock(&dd->lock);
+ +      for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
+ +              rq = __dd_dispatch_request(dd, &dd->per_prio[prio]);
+ +              if (rq)
+ +                      break;
+ +      }
+ +      spin_unlock(&dd->lock);
+ +
+ +      return rq;
+ +}
+ +
+ +/*
+ + * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
+ + * function is used by __blk_mq_get_tag().
+ + */
+ +static void dd_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
+ +{
+ +      struct deadline_data *dd = data->q->elevator->elevator_data;
+ +
+ +      /* Do not throttle synchronous reads. */
+ +      if (op_is_sync(op) && !op_is_write(op))
+ +              return;
+ +
+ +      /*
+ +       * Throttle asynchronous requests and writes such that these requests
+ +       * do not block the allocation of synchronous requests.
+ +       */
+ +      data->shallow_depth = dd->async_depth;
+ +}
+ +
+ +/* Called by blk_mq_update_nr_requests(). */
+ +static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
+ +{
+ +      struct request_queue *q = hctx->queue;
+ +      struct deadline_data *dd = q->elevator->elevator_data;
+ +      struct blk_mq_tags *tags = hctx->sched_tags;
+ +
+ +      dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
+ +
+ +      sbitmap_queue_min_shallow_depth(tags->bitmap_tags, dd->async_depth);
+ +}
+ +
+ +/* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
+ +static int dd_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+ +{
+ +      dd_depth_updated(hctx);
+ +      return 0;
+ +}
+ +
+ +static void dd_exit_sched(struct elevator_queue *e)
+ +{
+ +      struct deadline_data *dd = e->elevator_data;
+ +      enum dd_prio prio;
+ +
+ +      for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
+ +              struct dd_per_prio *per_prio = &dd->per_prio[prio];
+ +
+ +              WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ]));
+ +              WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE]));
+ +      }
+ +
+ +      free_percpu(dd->stats);
+ +
+ +      kfree(dd);
+ +}
+ +
+ +/*
+ + * initialize elevator private data (deadline_data).
+ + */
+ +static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
+ +{
+ +      struct deadline_data *dd;
+ +      struct elevator_queue *eq;
+ +      enum dd_prio prio;
+ +      int ret = -ENOMEM;
+ +
+ +      eq = elevator_alloc(q, e);
+ +      if (!eq)
+ +              return ret;
+ +
+ +      dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
+ +      if (!dd)
+ +              goto put_eq;
+ +
+ +      eq->elevator_data = dd;
+ +
+ +      dd->stats = alloc_percpu_gfp(typeof(*dd->stats),
+ +                                   GFP_KERNEL | __GFP_ZERO);
+ +      if (!dd->stats)
+ +              goto free_dd;
+ +
+ +      for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
+ +              struct dd_per_prio *per_prio = &dd->per_prio[prio];
+ +
+ +              INIT_LIST_HEAD(&per_prio->dispatch);
+ +              INIT_LIST_HEAD(&per_prio->fifo_list[DD_READ]);
+ +              INIT_LIST_HEAD(&per_prio->fifo_list[DD_WRITE]);
+ +              per_prio->sort_list[DD_READ] = RB_ROOT;
+ +              per_prio->sort_list[DD_WRITE] = RB_ROOT;
+ +      }
+ +      dd->fifo_expire[DD_READ] = read_expire;
+ +      dd->fifo_expire[DD_WRITE] = write_expire;
+ +      dd->writes_starved = writes_starved;
+ +      dd->front_merges = 1;
+ +      dd->last_dir = DD_WRITE;
+ +      dd->fifo_batch = fifo_batch;
+ +      spin_lock_init(&dd->lock);
+ +      spin_lock_init(&dd->zone_lock);
+ +
+ +      q->elevator = eq;
+ +      return 0;
+ +
+ +free_dd:
+ +      kfree(dd);
+ +
+ +put_eq:
+ +      kobject_put(&eq->kobj);
+ +      return ret;
+ +}
+ +
+ +/*
+ + * Try to merge @bio into an existing request. If @bio has been merged into
+ + * an existing request, store the pointer to that request into *@rq.
+ + */
+ +static int dd_request_merge(struct request_queue *q, struct request **rq,
+ +                          struct bio *bio)
+ +{
+ +      struct deadline_data *dd = q->elevator->elevator_data;
+ +      const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
+ +      const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+ +      struct dd_per_prio *per_prio = &dd->per_prio[prio];
+ +      sector_t sector = bio_end_sector(bio);
+ +      struct request *__rq;
+ +
+ +      if (!dd->front_merges)
+ +              return ELEVATOR_NO_MERGE;
+ +
+ +      __rq = elv_rb_find(&per_prio->sort_list[bio_data_dir(bio)], sector);
+ +      if (__rq) {
+ +              BUG_ON(sector != blk_rq_pos(__rq));
+ +
+ +              if (elv_bio_merge_ok(__rq, bio)) {
+ +                      *rq = __rq;
++                      if (blk_discard_mergable(__rq))
++                              return ELEVATOR_DISCARD_MERGE;
+ +                      return ELEVATOR_FRONT_MERGE;
+ +              }
+ +      }
+ +
+ +      return ELEVATOR_NO_MERGE;
+ +}
+ +
+ +/*
+ + * Attempt to merge a bio into an existing request. This function is called
+ + * before @bio is associated with a request.
+ + */
+ +static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
+ +              unsigned int nr_segs)
+ +{
+ +      struct deadline_data *dd = q->elevator->elevator_data;
+ +      struct request *free = NULL;
+ +      bool ret;
+ +
+ +      spin_lock(&dd->lock);
+ +      ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
+ +      spin_unlock(&dd->lock);
+ +
+ +      if (free)
+ +              blk_mq_free_request(free);
+ +
+ +      return ret;
+ +}
+ +
+ +/*
+ + * add rq to rbtree and fifo
+ + */
+ +static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
+ +                            bool at_head)
+ +{
+ +      struct request_queue *q = hctx->queue;
+ +      struct deadline_data *dd = q->elevator->elevator_data;
+ +      const enum dd_data_dir data_dir = rq_data_dir(rq);
+ +      u16 ioprio = req_get_ioprio(rq);
+ +      u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
+ +      struct dd_per_prio *per_prio;
+ +      enum dd_prio prio;
+ +      LIST_HEAD(free);
+ +
+ +      lockdep_assert_held(&dd->lock);
+ +
+ +      /*
+ +       * This may be a requeue of a write request that has locked its
+ +       * target zone. If it is the case, this releases the zone lock.
+ +       */
+ +      blk_req_zone_write_unlock(rq);
+ +
+ +      prio = ioprio_class_to_prio[ioprio_class];
+ +      dd_count(dd, inserted, prio);
+ +      rq->elv.priv[0] = (void *)(uintptr_t)1;
+ +
+ +      if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
+ +              blk_mq_free_requests(&free);
+ +              return;
+ +      }
+ +
+ +      trace_block_rq_insert(rq);
+ +
+ +      per_prio = &dd->per_prio[prio];
+ +      if (at_head) {
+ +              list_add(&rq->queuelist, &per_prio->dispatch);
+ +      } else {
+ +              deadline_add_rq_rb(per_prio, rq);
+ +
+ +              if (rq_mergeable(rq)) {
+ +                      elv_rqhash_add(q, rq);
+ +                      if (!q->last_merge)
+ +                              q->last_merge = rq;
+ +              }
+ +
+ +              /*
+ +               * set expire time and add to fifo list
+ +               */
+ +              rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
+ +              list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
+ +      }
+ +}
+ +
+ +/*
+ + * Called from blk_mq_sched_insert_request() or blk_mq_sched_insert_requests().
+ + */
+ +static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
+ +                             struct list_head *list, bool at_head)
+ +{
+ +      struct request_queue *q = hctx->queue;
+ +      struct deadline_data *dd = q->elevator->elevator_data;
+ +
+ +      spin_lock(&dd->lock);
+ +      while (!list_empty(list)) {
+ +              struct request *rq;
+ +
+ +              rq = list_first_entry(list, struct request, queuelist);
+ +              list_del_init(&rq->queuelist);
+ +              dd_insert_request(hctx, rq, at_head);
+ +      }
+ +      spin_unlock(&dd->lock);
+ +}
+ +
+ +/* Callback from inside blk_mq_rq_ctx_init(). */
+ +static void dd_prepare_request(struct request *rq)
+ +{
+ +      rq->elv.priv[0] = NULL;
+ +}
+ +
+ +/*
+ + * Callback from inside blk_mq_free_request().
+ + *
+ + * For zoned block devices, write unlock the target zone of
+ + * completed write requests. Do this while holding the zone lock
+ + * spinlock so that the zone is never unlocked while deadline_fifo_request()
+ + * or deadline_next_request() are executing. This function is called for
+ + * all requests, whether or not these requests complete successfully.
+ + *
+ + * For a zoned block device, __dd_dispatch_request() may have stopped
+ + * dispatching requests if all the queued requests are write requests directed
+ + * at zones that are already locked due to on-going write requests. To ensure
+ + * write request dispatch progress in this case, mark the queue as needing a
+ + * restart to ensure that the queue is run again after completion of the
+ + * request and zones being unlocked.
+ + */
+ +static void dd_finish_request(struct request *rq)
+ +{
+ +      struct request_queue *q = rq->q;
+ +      struct deadline_data *dd = q->elevator->elevator_data;
+ +      const u8 ioprio_class = dd_rq_ioclass(rq);
+ +      const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
+ +      struct dd_per_prio *per_prio = &dd->per_prio[prio];
+ +
+ +      /*
+ +       * The block layer core may call dd_finish_request() without having
+ +       * called dd_insert_requests(). Hence only update statistics for
+ +       * requests for which dd_insert_requests() has been called. See also
+ +       * blk_mq_request_bypass_insert().
+ +       */
+ +      if (rq->elv.priv[0])
+ +              dd_count(dd, completed, prio);
+ +
+ +      if (blk_queue_is_zoned(q)) {
+ +              unsigned long flags;
+ +
+ +              spin_lock_irqsave(&dd->zone_lock, flags);
+ +              blk_req_zone_write_unlock(rq);
+ +              if (!list_empty(&per_prio->fifo_list[DD_WRITE]))
+ +                      blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
+ +              spin_unlock_irqrestore(&dd->zone_lock, flags);
+ +      }
+ +}
+ +
+ +static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)
+ +{
+ +      return !list_empty_careful(&per_prio->dispatch) ||
+ +              !list_empty_careful(&per_prio->fifo_list[DD_READ]) ||
+ +              !list_empty_careful(&per_prio->fifo_list[DD_WRITE]);
+ +}
+ +
+ +static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
+ +{
+ +      struct deadline_data *dd = hctx->queue->elevator->elevator_data;
+ +      enum dd_prio prio;
+ +
+ +      for (prio = 0; prio <= DD_PRIO_MAX; prio++)
+ +              if (dd_has_work_for_prio(&dd->per_prio[prio]))
+ +                      return true;
+ +
+ +      return false;
+ +}
+ +
+ +/*
+ + * sysfs parts below
+ + */
+ +#define SHOW_INT(__FUNC, __VAR)                                               \
+ +static ssize_t __FUNC(struct elevator_queue *e, char *page)           \
+ +{                                                                     \
+ +      struct deadline_data *dd = e->elevator_data;                    \
+ +                                                                      \
+ +      return sysfs_emit(page, "%d\n", __VAR);                         \
+ +}
+ +#define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
+ +SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
+ +SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
+ +SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
+ +SHOW_INT(deadline_front_merges_show, dd->front_merges);
+ +SHOW_INT(deadline_async_depth_show, dd->front_merges);
+ +SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
+ +#undef SHOW_INT
+ +#undef SHOW_JIFFIES
+ +
+ +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)                       \
+ +static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)       \
+ +{                                                                     \
+ +      struct deadline_data *dd = e->elevator_data;                    \
+ +      int __data, __ret;                                              \
+ +                                                                      \
+ +      __ret = kstrtoint(page, 0, &__data);                            \
+ +      if (__ret < 0)                                                  \
+ +              return __ret;                                           \
+ +      if (__data < (MIN))                                             \
+ +              __data = (MIN);                                         \
+ +      else if (__data > (MAX))                                        \
+ +              __data = (MAX);                                         \
+ +      *(__PTR) = __CONV(__data);                                      \
+ +      return count;                                                   \
+ +}
+ +#define STORE_INT(__FUNC, __PTR, MIN, MAX)                            \
+ +      STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, )
+ +#define STORE_JIFFIES(__FUNC, __PTR, MIN, MAX)                                \
+ +      STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies)
+ +STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX);
+ +STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
+ +STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
+ +STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
+ +STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX);
+ +STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
+ +#undef STORE_FUNCTION
+ +#undef STORE_INT
+ +#undef STORE_JIFFIES
+ +
+ +#define DD_ATTR(name) \
+ +      __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
+ +
+ +static struct elv_fs_entry deadline_attrs[] = {
+ +      DD_ATTR(read_expire),
+ +      DD_ATTR(write_expire),
+ +      DD_ATTR(writes_starved),
+ +      DD_ATTR(front_merges),
+ +      DD_ATTR(async_depth),
+ +      DD_ATTR(fifo_batch),
+ +      __ATTR_NULL
+ +};
+ +
+ +#ifdef CONFIG_BLK_DEBUG_FS
+ +#define DEADLINE_DEBUGFS_DDIR_ATTRS(prio, data_dir, name)             \
+ +static void *deadline_##name##_fifo_start(struct seq_file *m,         \
+ +                                        loff_t *pos)                  \
+ +      __acquires(&dd->lock)                                           \
+ +{                                                                     \
+ +      struct request_queue *q = m->private;                           \
+ +      struct deadline_data *dd = q->elevator->elevator_data;          \
+ +      struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
+ +                                                                      \
+ +      spin_lock(&dd->lock);                                           \
+ +      return seq_list_start(&per_prio->fifo_list[data_dir], *pos);    \
+ +}                                                                     \
+ +                                                                      \
+ +static void *deadline_##name##_fifo_next(struct seq_file *m, void *v, \
+ +                                       loff_t *pos)                   \
+ +{                                                                     \
+ +      struct request_queue *q = m->private;                           \
+ +      struct deadline_data *dd = q->elevator->elevator_data;          \
+ +      struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
+ +                                                                      \
+ +      return seq_list_next(v, &per_prio->fifo_list[data_dir], pos);   \
+ +}                                                                     \
+ +                                                                      \
+ +static void deadline_##name##_fifo_stop(struct seq_file *m, void *v)  \
+ +      __releases(&dd->lock)                                           \
+ +{                                                                     \
+ +      struct request_queue *q = m->private;                           \
+ +      struct deadline_data *dd = q->elevator->elevator_data;          \
+ +                                                                      \
+ +      spin_unlock(&dd->lock);                                         \
+ +}                                                                     \
+ +                                                                      \
+ +static const struct seq_operations deadline_##name##_fifo_seq_ops = { \
+ +      .start  = deadline_##name##_fifo_start,                         \
+ +      .next   = deadline_##name##_fifo_next,                          \
+ +      .stop   = deadline_##name##_fifo_stop,                          \
+ +      .show   = blk_mq_debugfs_rq_show,                               \
+ +};                                                                    \
+ +                                                                      \
+ +static int deadline_##name##_next_rq_show(void *data,                 \
+ +                                        struct seq_file *m)           \
+ +{                                                                     \
+ +      struct request_queue *q = data;                                 \
+ +      struct deadline_data *dd = q->elevator->elevator_data;          \
+ +      struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
+ +      struct request *rq = per_prio->next_rq[data_dir];               \
+ +                                                                      \
+ +      if (rq)                                                         \
+ +              __blk_mq_debugfs_rq_show(m, rq);                        \
+ +      return 0;                                                       \
+ +}
+ +
+ +DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_READ, read0);
+ +DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_WRITE, write0);
+ +DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_READ, read1);
+ +DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_WRITE, write1);
+ +DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_READ, read2);
+ +DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_WRITE, write2);
+ +#undef DEADLINE_DEBUGFS_DDIR_ATTRS
+ +
+ +static int deadline_batching_show(void *data, struct seq_file *m)
+ +{
+ +      struct request_queue *q = data;
+ +      struct deadline_data *dd = q->elevator->elevator_data;
+ +
+ +      seq_printf(m, "%u\n", dd->batching);
+ +      return 0;
+ +}
+ +
+ +static int deadline_starved_show(void *data, struct seq_file *m)
+ +{
+ +      struct request_queue *q = data;
+ +      struct deadline_data *dd = q->elevator->elevator_data;
+ +
+ +      seq_printf(m, "%u\n", dd->starved);
+ +      return 0;
+ +}
+ +
+ +static int dd_async_depth_show(void *data, struct seq_file *m)
+ +{
+ +      struct request_queue *q = data;
+ +      struct deadline_data *dd = q->elevator->elevator_data;
+ +
+ +      seq_printf(m, "%u\n", dd->async_depth);
+ +      return 0;
+ +}
+ +
+ +static int dd_queued_show(void *data, struct seq_file *m)
+ +{
+ +      struct request_queue *q = data;
+ +      struct deadline_data *dd = q->elevator->elevator_data;
+ +
+ +      seq_printf(m, "%u %u %u\n", dd_queued(dd, DD_RT_PRIO),
+ +                 dd_queued(dd, DD_BE_PRIO),
+ +                 dd_queued(dd, DD_IDLE_PRIO));
+ +      return 0;
+ +}
+ +
+ +/* Number of requests owned by the block driver for a given priority. */
+ +static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio)
+ +{
+ +      return dd_sum(dd, dispatched, prio) + dd_sum(dd, merged, prio)
+ +              - dd_sum(dd, completed, prio);
+ +}
+ +
+ +static int dd_owned_by_driver_show(void *data, struct seq_file *m)
+ +{
+ +      struct request_queue *q = data;
+ +      struct deadline_data *dd = q->elevator->elevator_data;
+ +
+ +      seq_printf(m, "%u %u %u\n", dd_owned_by_driver(dd, DD_RT_PRIO),
+ +                 dd_owned_by_driver(dd, DD_BE_PRIO),
+ +                 dd_owned_by_driver(dd, DD_IDLE_PRIO));
+ +      return 0;
+ +}
+ +
+ +#define DEADLINE_DISPATCH_ATTR(prio)                                  \
+ +static void *deadline_dispatch##prio##_start(struct seq_file *m,      \
+ +                                           loff_t *pos)               \
+ +      __acquires(&dd->lock)                                           \
+ +{                                                                     \
+ +      struct request_queue *q = m->private;                           \
+ +      struct deadline_data *dd = q->elevator->elevator_data;          \
+ +      struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
+ +                                                                      \
+ +      spin_lock(&dd->lock);                                           \
+ +      return seq_list_start(&per_prio->dispatch, *pos);               \
+ +}                                                                     \
+ +                                                                      \
+ +static void *deadline_dispatch##prio##_next(struct seq_file *m,               \
+ +                                          void *v, loff_t *pos)       \
+ +{                                                                     \
+ +      struct request_queue *q = m->private;                           \
+ +      struct deadline_data *dd = q->elevator->elevator_data;          \
+ +      struct dd_per_prio *per_prio = &dd->per_prio[prio];             \
+ +                                                                      \
+ +      return seq_list_next(v, &per_prio->dispatch, pos);              \
+ +}                                                                     \
+ +                                                                      \
+ +static void deadline_dispatch##prio##_stop(struct seq_file *m, void *v)       \
+ +      __releases(&dd->lock)                                           \
+ +{                                                                     \
+ +      struct request_queue *q = m->private;                           \
+ +      struct deadline_data *dd = q->elevator->elevator_data;          \
+ +                                                                      \
+ +      spin_unlock(&dd->lock);                                         \
+ +}                                                                     \
+ +                                                                      \
+ +static const struct seq_operations deadline_dispatch##prio##_seq_ops = { \
+ +      .start  = deadline_dispatch##prio##_start,                      \
+ +      .next   = deadline_dispatch##prio##_next,                       \
+ +      .stop   = deadline_dispatch##prio##_stop,                       \
+ +      .show   = blk_mq_debugfs_rq_show,                               \
+ +}
+ +
+ +DEADLINE_DISPATCH_ATTR(0);
+ +DEADLINE_DISPATCH_ATTR(1);
+ +DEADLINE_DISPATCH_ATTR(2);
+ +#undef DEADLINE_DISPATCH_ATTR
+ +
+ +#define DEADLINE_QUEUE_DDIR_ATTRS(name)                                       \
+ +      {#name "_fifo_list", 0400,                                      \
+ +                      .seq_ops = &deadline_##name##_fifo_seq_ops}
+ +#define DEADLINE_NEXT_RQ_ATTR(name)                                   \
+ +      {#name "_next_rq", 0400, deadline_##name##_next_rq_show}
+ +static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
+ +      DEADLINE_QUEUE_DDIR_ATTRS(read0),
+ +      DEADLINE_QUEUE_DDIR_ATTRS(write0),
+ +      DEADLINE_QUEUE_DDIR_ATTRS(read1),
+ +      DEADLINE_QUEUE_DDIR_ATTRS(write1),
+ +      DEADLINE_QUEUE_DDIR_ATTRS(read2),
+ +      DEADLINE_QUEUE_DDIR_ATTRS(write2),
+ +      DEADLINE_NEXT_RQ_ATTR(read0),
+ +      DEADLINE_NEXT_RQ_ATTR(write0),
+ +      DEADLINE_NEXT_RQ_ATTR(read1),
+ +      DEADLINE_NEXT_RQ_ATTR(write1),
+ +      DEADLINE_NEXT_RQ_ATTR(read2),
+ +      DEADLINE_NEXT_RQ_ATTR(write2),
+ +      {"batching", 0400, deadline_batching_show},
+ +      {"starved", 0400, deadline_starved_show},
+ +      {"async_depth", 0400, dd_async_depth_show},
+ +      {"dispatch0", 0400, .seq_ops = &deadline_dispatch0_seq_ops},
+ +      {"dispatch1", 0400, .seq_ops = &deadline_dispatch1_seq_ops},
+ +      {"dispatch2", 0400, .seq_ops = &deadline_dispatch2_seq_ops},
+ +      {"owned_by_driver", 0400, dd_owned_by_driver_show},
+ +      {"queued", 0400, dd_queued_show},
+ +      {},
+ +};
+ +#undef DEADLINE_QUEUE_DDIR_ATTRS
+ +#endif
+ +
+ +static struct elevator_type mq_deadline = {
+ +      .ops = {
+ +              .depth_updated          = dd_depth_updated,
+ +              .limit_depth            = dd_limit_depth,
+ +              .insert_requests        = dd_insert_requests,
+ +              .dispatch_request       = dd_dispatch_request,
+ +              .prepare_request        = dd_prepare_request,
+ +              .finish_request         = dd_finish_request,
+ +              .next_request           = elv_rb_latter_request,
+ +              .former_request         = elv_rb_former_request,
+ +              .bio_merge              = dd_bio_merge,
+ +              .request_merge          = dd_request_merge,
+ +              .requests_merged        = dd_merged_requests,
+ +              .request_merged         = dd_request_merged,
+ +              .has_work               = dd_has_work,
+ +              .init_sched             = dd_init_sched,
+ +              .exit_sched             = dd_exit_sched,
+ +              .init_hctx              = dd_init_hctx,
+ +      },
+ +
+ +#ifdef CONFIG_BLK_DEBUG_FS
+ +      .queue_debugfs_attrs = deadline_queue_debugfs_attrs,
+ +#endif
+ +      .elevator_attrs = deadline_attrs,
+ +      .elevator_name = "mq-deadline",
+ +      .elevator_alias = "deadline",
+ +      .elevator_features = ELEVATOR_F_ZBD_SEQ_WRITE,
+ +      .elevator_owner = THIS_MODULE,
+ +};
+ +MODULE_ALIAS("mq-deadline-iosched");
+ +
+ +static int __init deadline_init(void)
+ +{
+ +      return elv_register(&mq_deadline);
+ +}
+ +
+ +static void __exit deadline_exit(void)
+ +{
+ +      elv_unregister(&mq_deadline);
+ +}
+ +
+ +module_init(deadline_init);
+ +module_exit(deadline_exit);
+ +
+ +MODULE_AUTHOR("Jens Axboe, Damien Le Moal and Bart Van Assche");
+ +MODULE_LICENSE("GPL");
+ +MODULE_DESCRIPTION("MQ deadline IO scheduler");
diff --combined block/partitions/ldm.c

index b8b518d,a6f0c9e..27f6c7d
--- 1/block/partitions/ldm.c
--- 2/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@@ -1,5 -1,5 +1,5 @@@
   // SPDX-License-Identifier: GPL-2.0-or-later
- -/**
+ +/*
    * ldm - Support for Windows Logical Disk Manager (Dynamic Disks)
    *
    * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>
@@@ -304,7 -304,7 +304,7 @@@ static bool ldm_validate_privheads(stru
                 }
         }
   
-       num_sects = state->bdev->bd_inode->i_size >> 9;
+       num_sects = get_capacity(state->disk);
   
         if ((ph[0]->config_start > num_sects) ||
            ((ph[0]->config_start + ph[0]->config_size) > num_sects)) {
@@@ -339,11 -339,11 +339,11 @@@ out
   /**
    * ldm_validate_tocblocks - Validate the table of contents and its backups
    * @state: Partition check state including device holding the LDM Database
-  * @base:  Offset, into @state->bdev, of the database
+  * @base:  Offset, into @state->disk, of the database
    * @ldb:   Cache of the database structures
    *
    * Find and compare the four tables of contents of the LDM Database stored on
-  * @state->bdev and return the parsed information into @toc1.
+  * @state->disk and return the parsed information into @toc1.
    *
    * The offsets and sizes of the configs are range-checked against a privhead.
    *
@@@ -486,8 -486,8 +486,8 @@@ out
    *       only likely to happen if the underlying device is strange.  If that IS
    *       the case we should return zero to let someone else try.
    *
-  * Return:  'true'   @state->bdev is a dynamic disk
-  *          'false'  @state->bdev is not a dynamic disk, or an error occurred
+  * Return:  'true'   @state->disk is a dynamic disk
+  *          'false'  @state->disk is not a dynamic disk, or an error occurred
    */
   static bool ldm_validate_partition_table(struct parsed_partitions *state)
   {
@@@ -1340,7 -1340,7 +1340,7 @@@ static bool ldm_frag_commit (struct lis
   /**
    * ldm_get_vblks - Read the on-disk database of VBLKs into memory
    * @state: Partition check state including device holding the LDM Database
-  * @base:  Offset, into @state->bdev, of the database
+  * @base:  Offset, into @state->disk, of the database
    * @ldb:   Cache of the database structures
    *
    * To use the information from the VBLKs, they need to be read from the disk,
@@@ -1432,10 -1432,10 +1432,10 @@@ static void ldm_free_vblks (struct list
    * example, if the device is hda, we would have: hda1: LDM database, hda2, hda3,
    * and so on: the actual data containing partitions.
    *
-  * Return:  1 Success, @state->bdev is a dynamic disk and we handled it
-  *          0 Success, @state->bdev is not a dynamic disk
+  * Return:  1 Success, @state->disk is a dynamic disk and we handled it
+  *          0 Success, @state->disk is not a dynamic disk
    *         -1 An error occurred before enough information had been read
-  *            Or @state->bdev is a dynamic disk, but it may be corrupted
+  *            Or @state->disk is a dynamic disk, but it may be corrupted
    */
   int ldm_partition(struct parsed_partitions *state)
   {
diff --combined drivers/block/virtio_blk.c

index afb37aa,63dc121..57c6ae7
--- 1/drivers/block/virtio_blk.c
--- 2/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@@ -166,11 -166,8 +166,8 @@@ static inline void virtblk_request_done
   {
         struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
   
-       if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
-               kfree(page_address(req->special_vec.bv_page) +
-                     req->special_vec.bv_offset);
-       }
- 
+       if (req->rq_flags & RQF_SPECIAL_PAYLOAD)
+               kfree(bvec_virt(&req->special_vec));
         blk_mq_end_request(req, virtblk_result(vbr));
   }
   
@@@ -692,28 -689,6 +689,28 @@@ static const struct blk_mq_ops virtio_m
   static unsigned int virtblk_queue_depth;
   module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
   
+ +static int virtblk_validate(struct virtio_device *vdev)
+ +{
+ +      u32 blk_size;
+ +
+ +      if (!vdev->config->get) {
+ +              dev_err(&vdev->dev, "%s failure: config access disabled\n",
+ +                      __func__);
+ +              return -EINVAL;
+ +      }
+ +
+ +      if (!virtio_has_feature(vdev, VIRTIO_BLK_F_BLK_SIZE))
+ +              return 0;
+ +
+ +      blk_size = virtio_cread32(vdev,
+ +                      offsetof(struct virtio_blk_config, blk_size));
+ +
+ +      if (blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE)
+ +              __virtio_clear_bit(vdev, VIRTIO_BLK_F_BLK_SIZE);
+ +
+ +      return 0;
+ +}
+ +
   static int virtblk_probe(struct virtio_device *vdev)
   {
         struct virtio_blk *vblk;
@@@ -725,6 -700,12 +722,6 @@@
         u8 physical_block_exp, alignment_offset;
         unsigned int queue_depth;
   
- -      if (!vdev->config->get) {
- -              dev_err(&vdev->dev, "%s failure: config access disabled\n",
- -                      __func__);
- -              return -EINVAL;
- -      }
- -
         err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
                              GFP_KERNEL);
         if (err < 0)
@@@ -839,14 -820,6 +836,14 @@@
         else
                 blk_size = queue_logical_block_size(q);
   
-               goto err_cleanup_disk;
+ +      if (unlikely(blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE)) {
+ +              dev_err(&vdev->dev,
+ +                      "block size is changed unexpectedly, now is %u\n",
+ +                      blk_size);
+ +              err = -EINVAL;
++              goto out_cleanup_disk;
+ +      }
+ +
         /* Use topology information if available */
         err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
                                    struct virtio_blk_config, physical_block_exp,
@@@ -902,10 -875,13 +899,13 @@@
         virtblk_update_capacity(vblk, false);
         virtio_device_ready(vdev);
   
-       device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
+       err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
+       if (err)
+               goto out_cleanup_disk;
+ 
         return 0;
   
- err_cleanup_disk:
+ out_cleanup_disk:
         blk_cleanup_disk(vblk->disk);
   out_free_tags:
         blk_mq_free_tag_set(&vblk->tag_set);
@@@ -1009,7 -985,6 +1009,7 @@@ static struct virtio_driver virtio_blk 
         .driver.name                    = KBUILD_MODNAME,
         .driver.owner                   = THIS_MODULE,
         .id_table                       = id_table,
+ +      .validate                       = virtblk_validate,
         .probe                          = virtblk_probe,
         .remove                         = virtblk_remove,
         .config_changed                 = virtblk_config_changed,
diff --combined drivers/s390/block/dasd_eckd.c

index fb5d815,8610ea4..460e0f1
--- 1/drivers/s390/block/dasd_eckd.c
--- 2/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@@ -1004,23 -1004,15 +1004,23 @@@ static unsigned char dasd_eckd_path_acc
   static void dasd_eckd_store_conf_data(struct dasd_device *device,
                                       struct dasd_conf_data *conf_data, int chp)
   {
+ +      struct dasd_eckd_private *private = device->private;
         struct channel_path_desc_fmt0 *chp_desc;
         struct subchannel_id sch_id;
+ +      void *cdp;
   
- -      ccw_device_get_schid(device->cdev, &sch_id);
         /*
          * path handling and read_conf allocate data
          * free it before replacing the pointer
+ +       * also replace the old private->conf_data pointer
+ +       * with the new one if this points to the same data
          */
- -      kfree(device->path[chp].conf_data);
+ +      cdp = device->path[chp].conf_data;
+ +      if (private->conf_data == cdp) {
+ +              private->conf_data = (void *)conf_data;
+ +              dasd_eckd_identify_conf_parts(private);
+ +      }
+ +      ccw_device_get_schid(device->cdev, &sch_id);
         device->path[chp].conf_data = conf_data;
         device->path[chp].cssid = sch_id.cssid;
         device->path[chp].ssid = sch_id.ssid;
@@@ -1028,7 -1020,6 +1028,7 @@@
         if (chp_desc)
                 device->path[chp].chpid = chp_desc->chpid;
         kfree(chp_desc);
+ +      kfree(cdp);
   }
   
   static void dasd_eckd_clear_conf_data(struct dasd_device *device)
@@@ -3276,7 -3267,7 +3276,7 @@@ static int dasd_eckd_ese_read(struct da
         end_blk = (curr_trk + 1) * recs_per_trk;
   
         rq_for_each_segment(bv, req, iter) {
-               dst = page_address(bv.bv_page) + bv.bv_offset;
+               dst = bvec_virt(&bv);
                 for (off = 0; off < bv.bv_len; off += blksize) {
                         if (first_blk + blk_count >= end_blk) {
                                 cqr->proc_bytes = blk_count * blksize;
@@@ -4008,7 -3999,7 +4008,7 @@@ static struct dasd_ccw_req *dasd_eckd_b
                               last_rec - recid + 1, cmd, basedev, blksize);
         }
         rq_for_each_segment(bv, req, iter) {
-               dst = page_address(bv.bv_page) + bv.bv_offset;
+               dst = bvec_virt(&bv);
                 if (dasd_page_cache) {
                         char *copy = kmem_cache_alloc(dasd_page_cache,
                                                       GFP_DMA | __GFP_NOWARN);
@@@ -4175,7 -4166,7 +4175,7 @@@ static struct dasd_ccw_req *dasd_eckd_b
         idaw_dst = NULL;
         idaw_len = 0;
         rq_for_each_segment(bv, req, iter) {
-               dst = page_address(bv.bv_page) + bv.bv_offset;
+               dst = bvec_virt(&bv);
                 seg_len = bv.bv_len;
                 while (seg_len) {
                         if (new_track) {
@@@ -4518,7 -4509,7 +4518,7 @@@ static struct dasd_ccw_req *dasd_eckd_b
                 new_track = 1;
                 recid = first_rec;
                 rq_for_each_segment(bv, req, iter) {
-                       dst = page_address(bv.bv_page) + bv.bv_offset;
+                       dst = bvec_virt(&bv);
                         seg_len = bv.bv_len;
                         while (seg_len) {
                                 if (new_track) {
@@@ -4551,7 -4542,7 +4551,7 @@@
                 }
         } else {
                 rq_for_each_segment(bv, req, iter) {
-                       dst = page_address(bv.bv_page) + bv.bv_offset;
+                       dst = bvec_virt(&bv);
                         last_tidaw = itcw_add_tidaw(itcw, 0x00,
                                                     dst, bv.bv_len);
                         if (IS_ERR(last_tidaw)) {
@@@ -4787,7 -4778,7 +4787,7 @@@ static struct dasd_ccw_req *dasd_eckd_b
                         idaws = idal_create_words(idaws, rawpadpage, PAGE_SIZE);
         }
         rq_for_each_segment(bv, req, iter) {
-               dst = page_address(bv.bv_page) + bv.bv_offset;
+               dst = bvec_virt(&bv);
                 seg_len = bv.bv_len;
                 if (cmd == DASD_ECKD_CCW_READ_TRACK)
                         memset(dst, 0, seg_len);
@@@ -4848,7 -4839,7 +4848,7 @@@ dasd_eckd_free_cp(struct dasd_ccw_req *
         if (private->uses_cdl == 0 || recid > 2*blk_per_trk)
                 ccw++;
         rq_for_each_segment(bv, req, iter) {
-               dst = page_address(bv.bv_page) + bv.bv_offset;
+               dst = bvec_virt(&bv);
                 for (off = 0; off < bv.bv_len; off += blksize) {
                         /* Skip locate record. */
                         if (private->uses_cdl && recid <= 2*blk_per_trk)
diff --combined drivers/scsi/sr.c

index a6d3ac0,a0df27d..2942a4e
--- 1/drivers/scsi/sr.c
--- 2/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@@ -106,6 -106,8 +106,8 @@@ static struct scsi_driver sr_template 
   static unsigned long sr_index_bits[SR_DISKS / BITS_PER_LONG];
   static DEFINE_SPINLOCK(sr_index_lock);
   
+ static struct lock_class_key sr_bio_compl_lkclass;
+ 
   /* This semaphore is used to mediate the 0->1 reference get in the
    * face of object destruction (i.e. we can't allow a get on an
    * object after last put) */
@@@ -221,7 -223,7 +223,7 @@@ static unsigned int sr_get_events(struc
         else if (med->media_event_code == 2)
                 return DISK_EVENT_MEDIA_CHANGE;
         else if (med->media_event_code == 3)
- -              return DISK_EVENT_EJECT_REQUEST;
+ +              return DISK_EVENT_MEDIA_CHANGE;
         return 0;
   }
   
@@@ -712,7 -714,8 +714,8 @@@ static int sr_probe(struct device *dev
   
         kref_init(&cd->kref);
   
-       disk = alloc_disk(1);
+       disk = __alloc_disk_node(sdev->request_queue, NUMA_NO_NODE,
+                                &sr_bio_compl_lkclass);
         if (!disk)
                 goto fail_free;
         mutex_init(&cd->lock);
@@@ -729,6 -732,7 +732,7 @@@
   
         disk->major = SCSI_CDROM_MAJOR;
         disk->first_minor = minor;
+       disk->minors = 1;
         sprintf(disk->disk_name, "sr%d", minor);
         disk->fops = &sr_bdops;
         disk->flags = GENHD_FL_CD | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
@@@ -762,7 -766,6 +766,6 @@@
   
         set_capacity(disk, cd->capacity);
         disk->private_data = &cd->driver;
-       disk->queue = sdev->request_queue;
   
         if (register_cdrom(disk, &cd->cdi))
                 goto fail_minor;
diff --combined include/linux/fs.h

index 7193457,9220cdf..7eae53f
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -436,10 -436,6 +436,10 @@@ int pagecache_write_end(struct file *, 
    * struct address_space - Contents of a cacheable, mappable object.
    * @host: Owner, either the inode or the block_device.
    * @i_pages: Cached pages.
+ + * @invalidate_lock: Guards coherency between page cache contents and
+ + *   file offset->disk block mappings in the filesystem during invalidates.
+ + *   It is also used to block modification of page cache contents through
+ + *   memory mappings.
    * @gfp_mask: Memory allocation flags to use for allocating pages.
    * @i_mmap_writable: Number of VM_SHARED mappings.
    * @nr_thps: Number of THPs in the pagecache (non-shmem only).
@@@ -457,7 -453,6 +457,7 @@@
   struct address_space {
         struct inode            *host;
         struct xarray           i_pages;
+ +      struct rw_semaphore     invalidate_lock;
         gfp_t                   gfp_mask;
         atomic_t                i_mmap_writable;
   #ifdef CONFIG_READ_ONLY_THP_FOR_FS
@@@ -819,42 -814,9 +819,42 @@@ static inline void inode_lock_shared_ne
         down_read_nested(&inode->i_rwsem, subclass);
   }
   
+ +static inline void filemap_invalidate_lock(struct address_space *mapping)
+ +{
+ +      down_write(&mapping->invalidate_lock);
+ +}
+ +
+ +static inline void filemap_invalidate_unlock(struct address_space *mapping)
+ +{
+ +      up_write(&mapping->invalidate_lock);
+ +}
+ +
+ +static inline void filemap_invalidate_lock_shared(struct address_space *mapping)
+ +{
+ +      down_read(&mapping->invalidate_lock);
+ +}
+ +
+ +static inline int filemap_invalidate_trylock_shared(
+ +                                      struct address_space *mapping)
+ +{
+ +      return down_read_trylock(&mapping->invalidate_lock);
+ +}
+ +
+ +static inline void filemap_invalidate_unlock_shared(
+ +                                      struct address_space *mapping)
+ +{
+ +      up_read(&mapping->invalidate_lock);
+ +}
+ +
   void lock_two_nondirectories(struct inode *, struct inode*);
   void unlock_two_nondirectories(struct inode *, struct inode*);
   
+ +void filemap_invalidate_lock_two(struct address_space *mapping1,
+ +                               struct address_space *mapping2);
+ +void filemap_invalidate_unlock_two(struct address_space *mapping1,
+ +                                 struct address_space *mapping2);
+ +
+ +
   /*
    * NOTE: in a 32bit arch with a preemptable kernel and
    * an UP compile the i_size_read/write must be atomic
@@@ -1545,11 -1507,8 +1545,11 @@@ struct super_block 
         /* Number of inodes with nlink == 0 but still referenced */
         atomic_long_t s_remove_count;
   
- -      /* Pending fsnotify inode refs */
- -      atomic_long_t s_fsnotify_inode_refs;
+ +      /*
+ +       * Number of inode/mount/sb objects that are being watched, note that
+ +       * inodes objects are currently double-accounted.
+ +       */
+ +      atomic_long_t s_fsnotify_connectors;
   
         /* Being remounted read-only */
         int s_readonly_remount;
@@@ -2528,7 -2487,6 +2528,7 @@@ struct file_system_type 
   
         struct lock_class_key i_lock_key;
         struct lock_class_key i_mutex_key;
+ +      struct lock_class_key invalidate_lock_key;
         struct lock_class_key i_mutex_dir_key;
   };
   
@@@ -2612,6 -2570,90 +2612,6 @@@ extern struct kobject *fs_kobj
   
   #define MAX_RW_COUNT (INT_MAX & PAGE_MASK)
   
- -#ifdef CONFIG_MANDATORY_FILE_LOCKING
- -extern int locks_mandatory_locked(struct file *);
- -extern int locks_mandatory_area(struct inode *, struct file *, loff_t, loff_t, unsigned char);
- -
- -/*
- - * Candidates for mandatory locking have the setgid bit set
- - * but no group execute bit -  an otherwise meaningless combination.
- - */
- -
- -static inline int __mandatory_lock(struct inode *ino)
- -{
- -      return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID;
- -}
- -
- -/*
- - * ... and these candidates should be on SB_MANDLOCK mounted fs,
- - * otherwise these will be advisory locks
- - */
- -
- -static inline int mandatory_lock(struct inode *ino)
- -{
- -      return IS_MANDLOCK(ino) && __mandatory_lock(ino);
- -}
- -
- -static inline int locks_verify_locked(struct file *file)
- -{
- -      if (mandatory_lock(locks_inode(file)))
- -              return locks_mandatory_locked(file);
- -      return 0;
- -}
- -
- -static inline int locks_verify_truncate(struct inode *inode,
- -                                  struct file *f,
- -                                  loff_t size)
- -{
- -      if (!inode->i_flctx || !mandatory_lock(inode))
- -              return 0;
- -
- -      if (size < inode->i_size) {
- -              return locks_mandatory_area(inode, f, size, inode->i_size - 1,
- -                              F_WRLCK);
- -      } else {
- -              return locks_mandatory_area(inode, f, inode->i_size, size - 1,
- -                              F_WRLCK);
- -      }
- -}
- -
- -#else /* !CONFIG_MANDATORY_FILE_LOCKING */
- -
- -static inline int locks_mandatory_locked(struct file *file)
- -{
- -      return 0;
- -}
- -
- -static inline int locks_mandatory_area(struct inode *inode, struct file *filp,
- -                                       loff_t start, loff_t end, unsigned char type)
- -{
- -      return 0;
- -}
- -
- -static inline int __mandatory_lock(struct inode *inode)
- -{
- -      return 0;
- -}
- -
- -static inline int mandatory_lock(struct inode *inode)
- -{
- -      return 0;
- -}
- -
- -static inline int locks_verify_locked(struct file *file)
- -{
- -      return 0;
- -}
- -
- -static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
- -                                      size_t size)
- -{
- -      return 0;
- -}
- -
- -#endif /* CONFIG_MANDATORY_FILE_LOCKING */
- -
- -
   #ifdef CONFIG_FILE_LOCKING
   static inline int break_lease(struct inode *inode, unsigned int mode)
   {
@@@ -3204,10 -3246,6 +3204,6 @@@ ssize_t vfs_iocb_iter_read(struct file 
   ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
                             struct iov_iter *iter);
   
- /* fs/block_dev.c */
- extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
-                       int datasync);
- 
   /* fs/splice.c */
   extern ssize_t generic_file_splice_read(struct file *, loff_t *,
                 struct pipe_inode_info *, size_t, unsigned int);
diff --combined lib/Kconfig.debug

index e5cdf98,22a4aa5..73604bf
--- 1/lib/Kconfig.debug
--- 2/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@ -1235,7 -1235,7 +1235,7 @@@ config PROVE_LOCKIN
         depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
         select LOCKDEP
         select DEBUG_SPINLOCK
- -      select DEBUG_MUTEXES
+ +      select DEBUG_MUTEXES if !PREEMPT_RT
         select DEBUG_RT_MUTEXES if RT_MUTEXES
         select DEBUG_RWSEMS
         select DEBUG_WW_MUTEX_SLOWPATH
@@@ -1299,7 -1299,7 +1299,7 @@@ config LOCK_STA
         depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
         select LOCKDEP
         select DEBUG_SPINLOCK
- -      select DEBUG_MUTEXES
+ +      select DEBUG_MUTEXES if !PREEMPT_RT
         select DEBUG_RT_MUTEXES if RT_MUTEXES
         select DEBUG_LOCK_ALLOC
         default n
@@@ -1335,7 -1335,7 +1335,7 @@@ config DEBUG_SPINLOC
   
   config DEBUG_MUTEXES
         bool "Mutex debugging: basic checks"
- -      depends on DEBUG_KERNEL
+ +      depends on DEBUG_KERNEL && !PREEMPT_RT
         help
          This feature allows mutex semantics violations to be detected and
          reported.
@@@ -1345,8 -1345,7 +1345,8 @@@ config DEBUG_WW_MUTEX_SLOWPAT
         depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
         select DEBUG_LOCK_ALLOC
         select DEBUG_SPINLOCK
- -      select DEBUG_MUTEXES
+ +      select DEBUG_MUTEXES if !PREEMPT_RT
+ +      select DEBUG_RT_MUTEXES if PREEMPT_RT
         help
          This feature enables slowpath testing for w/w mutex users by
          injecting additional -EDEADLK wound/backoff cases. Together with
@@@ -1369,7 -1368,7 +1369,7 @@@ config DEBUG_LOCK_ALLO
         bool "Lock debugging: detect incorrect freeing of live locks"
         depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
         select DEBUG_SPINLOCK
- -      select DEBUG_MUTEXES
+ +      select DEBUG_MUTEXES if !PREEMPT_RT
         select DEBUG_RT_MUTEXES if RT_MUTEXES
         select LOCKDEP
         help
@@@ -1680,33 -1679,6 +1680,6 @@@ config DEBUG_WQ_FORCE_RR_CP
           feature by default.  When enabled, memory and cache locality will
           be impacted.
   
- config DEBUG_BLOCK_EXT_DEVT
-       bool "Force extended block device numbers and spread them"
-       depends on DEBUG_KERNEL
-       depends on BLOCK
-       default n
-       help
-         BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON
-         SOME DISTRIBUTIONS.  DO NOT ENABLE THIS UNLESS YOU KNOW WHAT
-         YOU ARE DOING.  Distros, please enable this and fix whatever
-         is broken.
- 
-         Conventionally, block device numbers are allocated from
-         predetermined contiguous area.  However, extended block area
-         may introduce non-contiguous block device numbers.  This
-         option forces most block device numbers to be allocated from
-         the extended space and spreads them to discover kernel or
-         userland code paths which assume predetermined contiguous
-         device number allocation.
- 
-         Note that turning on this debug option shuffles all the
-         device numbers for all IDE and SCSI devices including libata
-         ones, so root partition specified using device number
-         directly (via rdev or root=MAJ:MIN) won't work anymore.
-         Textual device names (root=/dev/sdXn) will continue to work.
- 
-         Say N if you are unsure.
- 
   config CPU_HOTPLUG_STATE_CONTROL
         bool "Enable CPU hotplug state control"
         depends on DEBUG_KERNEL
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 31 Aug 2021 01:52:11 +0000 (18:52 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 31 Aug 2021 01:52:11 +0000 (18:52 -0700)
		1	2
block/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-cgroup.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-core.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-iocost.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-iolatency.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-mq.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk.h	patch \|	diff1 \|	diff2 \|	blob \| history
block/mq-deadline.c	patch \|	diff1 \|	\|	blob \| history
block/partitions/ldm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/virtio_blk.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/s390/block/dasd_eckd.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/sr.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Kconfig.debug	patch \|	diff1 \|	diff2 \|	blob \| history