Merge tag 'for-linus-20180204' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 4 Feb 2018 19:16:35 +0000 (11:16 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 4 Feb 2018 19:16:35 +0000 (11:16 -0800)
Pull more block updates from Jens Axboe:
 "Most of this is fixes and not new code/features:

   - skd fix from Arnd, fixing a build error dependent on sla allocator
     type.

   - blk-mq scheduler discard merging fixes, one from me and one from
     Keith. This fixes a segment miscalculation for blk-mq-sched, where
     we mistakenly think two segments are physically contigious even
     though the request isn't carrying real data. Also fixes a bio-to-rq
     merge case.

   - Don't re-set a bit on the buffer_head flags, if it's already set.
     This can cause scalability concerns on bigger machines and
     workloads. From Kemi Wang.

   - Add BLK_STS_DEV_RESOURCE return value to blk-mq, allowing us to
     distuingish between a local (device related) resource starvation
     and a global one. The latter might happen without IO being in
     flight, so it has to be handled a bit differently. From Ming"

* tag 'for-linus-20180204' of git://git.kernel.dk/linux-block:
  block: skd: fix incorrect linux/slab_def.h inclusion
  buffer: Avoid setting buffer bits that are already set
  blk-mq-sched: Enable merging discard bio into request
  blk-mq: fix discard merge with scheduler attached
  blk-mq: introduce BLK_STS_DEV_RESOURCE

1  2 
drivers/md/dm-rq.c
drivers/scsi/scsi_lib.c
include/linux/buffer_head.h

diff --combined drivers/md/dm-rq.c
@@@ -315,10 -315,6 +315,10 @@@ static void dm_done(struct request *clo
                /* The target wants to requeue the I/O */
                dm_requeue_original_request(tio, false);
                break;
 +      case DM_ENDIO_DELAY_REQUEUE:
 +              /* The target wants to requeue the I/O after a delay */
 +              dm_requeue_original_request(tio, true);
 +              break;
        default:
                DMWARN("unimplemented target endio return value: %d", r);
                BUG();
@@@ -408,7 -404,7 +408,7 @@@ static blk_status_t dm_dispatch_clone_r
  
        clone->start_time = jiffies;
        r = blk_insert_cloned_request(clone->q, clone);
-       if (r != BLK_STS_OK && r != BLK_STS_RESOURCE)
+       if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
                /* must complete clone in terms of original request */
                dm_complete_request(rq, r);
        return r;
@@@ -500,7 -496,7 +500,7 @@@ check_again
                trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
                                     blk_rq_pos(rq));
                ret = dm_dispatch_clone_request(clone, rq);
-               if (ret == BLK_STS_RESOURCE) {
+               if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
                        blk_rq_unprep_clone(clone);
                        tio->ti->type->release_clone_rq(clone);
                        tio->clone = NULL;
@@@ -717,6 -713,7 +717,6 @@@ int dm_old_init_request_queue(struct ma
        /* disable dm_old_request_fn's merge heuristic by default */
        md->seq_rq_merge_deadline_usecs = 0;
  
 -      dm_init_normal_md_queue(md);
        blk_queue_softirq_done(md->queue, dm_softirq_done);
  
        /* Initialize the request-based DM worker thread */
@@@ -772,7 -769,6 +772,6 @@@ static blk_status_t dm_mq_queue_rq(stru
                /* Undo dm_start_request() before requeuing */
                rq_end_stats(md, rq);
                rq_completed(md, rq_data_dir(rq), false);
-               blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
                return BLK_STS_RESOURCE;
        }
  
@@@ -824,6 -820,7 +823,6 @@@ int dm_mq_init_request_queue(struct map
                err = PTR_ERR(q);
                goto out_tag_set;
        }
 -      dm_init_md_queue(md);
  
        return 0;
  
diff --combined drivers/scsi/scsi_lib.c
@@@ -79,15 -79,14 +79,15 @@@ int scsi_init_sense_cache(struct Scsi_H
        if (shost->unchecked_isa_dma) {
                scsi_sense_isadma_cache =
                        kmem_cache_create("scsi_sense_cache(DMA)",
 -                      SCSI_SENSE_BUFFERSIZE, 0,
 -                      SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL);
 +                              SCSI_SENSE_BUFFERSIZE, 0,
 +                              SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL);
                if (!scsi_sense_isadma_cache)
                        ret = -ENOMEM;
        } else {
                scsi_sense_cache =
 -                      kmem_cache_create("scsi_sense_cache",
 -                      SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, NULL);
 +                      kmem_cache_create_usercopy("scsi_sense_cache",
 +                              SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN,
 +                              0, SCSI_SENSE_BUFFERSIZE, NULL);
                if (!scsi_sense_cache)
                        ret = -ENOMEM;
        }
@@@ -165,7 -164,7 +165,7 @@@ static void scsi_mq_requeue_cmd(struct 
   * for a requeue after completion, which should only occur in this
   * file.
   */
 -static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
 +static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy)
  {
        struct scsi_device *device = cmd->device;
        struct request_queue *q = device->request_queue;
   */
  void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
  {
 -      __scsi_queue_insert(cmd, reason, 1);
 +      __scsi_queue_insert(cmd, reason, true);
  }
  
  
@@@ -319,39 -318,22 +319,39 @@@ static void scsi_init_cmd_errh(struct s
                cmd->cmd_len = scsi_command_size(cmd->cmnd);
  }
  
 -void scsi_device_unbusy(struct scsi_device *sdev)
 +/*
 + * Decrement the host_busy counter and wake up the error handler if necessary.
 + * Avoid as follows that the error handler is not woken up if shost->host_busy
 + * == shost->host_failed: use call_rcu() in scsi_eh_scmd_add() in combination
 + * with an RCU read lock in this function to ensure that this function in its
 + * entirety either finishes before scsi_eh_scmd_add() increases the
 + * host_failed counter or that it notices the shost state change made by
 + * scsi_eh_scmd_add().
 + */
 +static void scsi_dec_host_busy(struct Scsi_Host *shost)
  {
 -      struct Scsi_Host *shost = sdev->host;
 -      struct scsi_target *starget = scsi_target(sdev);
        unsigned long flags;
  
 +      rcu_read_lock();
        atomic_dec(&shost->host_busy);
 -      if (starget->can_queue > 0)
 -              atomic_dec(&starget->target_busy);
 -
 -      if (unlikely(scsi_host_in_recovery(shost) &&
 -                   (shost->host_failed || shost->host_eh_scheduled))) {
 +      if (unlikely(scsi_host_in_recovery(shost))) {
                spin_lock_irqsave(shost->host_lock, flags);
 -              scsi_eh_wakeup(shost);
 +              if (shost->host_failed || shost->host_eh_scheduled)
 +                      scsi_eh_wakeup(shost);
                spin_unlock_irqrestore(shost->host_lock, flags);
        }
 +      rcu_read_unlock();
 +}
 +
 +void scsi_device_unbusy(struct scsi_device *sdev)
 +{
 +      struct Scsi_Host *shost = sdev->host;
 +      struct scsi_target *starget = scsi_target(sdev);
 +
 +      scsi_dec_host_busy(shost);
 +
 +      if (starget->can_queue > 0)
 +              atomic_dec(&starget->target_busy);
  
        atomic_dec(&sdev->device_busy);
  }
@@@ -1016,11 -998,11 +1016,11 @@@ void scsi_io_completion(struct scsi_cmn
                break;
        case ACTION_RETRY:
                /* Retry the same command immediately */
 -              __scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY, 0);
 +              __scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY, false);
                break;
        case ACTION_DELAYED_RETRY:
                /* Retry the same command after a delay */
 -              __scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY, 0);
 +              __scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY, false);
                break;
        }
  }
@@@ -1146,7 -1128,7 +1146,7 @@@ EXPORT_SYMBOL(scsi_init_io)
   * Called from inside blk_get_request() for pass-through requests and from
   * inside scsi_init_command() for filesystem requests.
   */
 -void scsi_initialize_rq(struct request *rq)
 +static void scsi_initialize_rq(struct request *rq)
  {
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
  
        cmd->jiffies_at_alloc = jiffies;
        cmd->retries = 0;
  }
 -EXPORT_SYMBOL(scsi_initialize_rq);
  
  /* Add a command to the list used by the aacraid and dpt_i2o drivers */
  void scsi_add_cmd_to_list(struct scsi_cmnd *cmd)
@@@ -1549,7 -1532,7 +1549,7 @@@ starved
                list_add_tail(&sdev->starved_entry, &shost->starved_list);
        spin_unlock_irq(shost->host_lock);
  out_dec:
 -      atomic_dec(&shost->host_busy);
 +      scsi_dec_host_busy(shost);
        return 0;
  }
  
@@@ -2037,7 -2020,7 +2037,7 @@@ static blk_status_t scsi_queue_rq(struc
        return BLK_STS_OK;
  
  out_dec_host_busy:
 -       atomic_dec(&shost->host_busy);
 +      scsi_dec_host_busy(shost);
  out_dec_target_busy:
        if (scsi_target(sdev)->can_queue > 0)
                atomic_dec(&scsi_target(sdev)->target_busy);
@@@ -2047,9 -2030,9 +2047,9 @@@ out_put_budget
        case BLK_STS_OK:
                break;
        case BLK_STS_RESOURCE:
-               if (atomic_read(&sdev->device_busy) == 0 &&
-                   !scsi_device_blocked(sdev))
-                       blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
+               if (atomic_read(&sdev->device_busy) ||
+                   scsi_device_blocked(sdev))
+                       ret = BLK_STS_DEV_RESOURCE;
                break;
        default:
                /*
@@@ -81,11 -81,14 +81,14 @@@ struct buffer_head 
  /*
   * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
   * and buffer_foo() functions.
+  * To avoid reset buffer flags that are already set, because that causes
+  * a costly cache line transition, check the flag first.
   */
  #define BUFFER_FNS(bit, name)                                         \
  static __always_inline void set_buffer_##name(struct buffer_head *bh) \
  {                                                                     \
-       set_bit(BH_##bit, &(bh)->b_state);                              \
+       if (!test_bit(BH_##bit, &(bh)->b_state))                        \
+               set_bit(BH_##bit, &(bh)->b_state);                      \
  }                                                                     \
  static __always_inline void clear_buffer_##name(struct buffer_head *bh)       \
  {                                                                     \
@@@ -151,6 -154,7 +154,6 @@@ void buffer_check_dirty_writeback(struc
  
  void mark_buffer_dirty(struct buffer_head *bh);
  void mark_buffer_write_io_error(struct buffer_head *bh);
 -void init_buffer(struct buffer_head *, bh_end_io_t *, void *);
  void touch_buffer(struct buffer_head *bh);
  void set_bh_page(struct buffer_head *bh,
                struct page *page, unsigned long offset);