Merge tag 'for-5.9/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/devic...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Aug 2020 20:08:09 +0000 (13:08 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Aug 2020 20:08:09 +0000 (13:08 -0700)
Pull device mapper updates from Mike Snitzer:

 - DM multipath locking fixes around m->flags tests and improvements to
   bio-based code so that it follows patterns established by
   request-based code.

 - Request-based DM core improvement to eliminate unnecessary call to
   blk_mq_queue_stopped().

 - Add "panic_on_corruption" error handling mode to DM verity target.

 - DM bufio fix to to perform buffer cleanup from a workqueue rather
   than wait for IO in reclaim context from shrinker.

 - DM crypt improvement to optionally avoid async processing via
   workqueues for reads and/or writes -- via "no_read_workqueue" and
   "no_write_workqueue" features. This more direct IO processing
   improves latency and throughput with faster storage. Avoiding
   workqueue IO submission for writes (DM_CRYPT_NO_WRITE_WORKQUEUE) is a
   requirement for adding zoned block device support to DM crypt.

 - Add zoned block device support to DM crypt. Makes use of
   DM_CRYPT_NO_WRITE_WORKQUEUE and a new optional feature
   (DM_CRYPT_WRITE_INLINE) that allows write completion to wait for
   encryption to complete. This allows write ordering to be preserved,
   which is needed for zoned block devices.

 - Fix DM ebs target's check for REQ_OP_FLUSH.

 - Fix DM core's report zones support to not report more zones than were
   requested.

 - A few small compiler warning fixes.

 - DM dust improvements to return output directly to the user rather
   than require they scrape the system log for output.

* tag 'for-5.9/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm: don't call report zones for more than the user requested
  dm ebs: Fix incorrect checking for REQ_OP_FLUSH
  dm init: Set file local variable static
  dm ioctl: Fix compilation warning
  dm raid: Remove empty if statement
  dm verity: Fix compilation warning
  dm crypt: Enable zoned block device support
  dm crypt: add flags to optionally bypass kcryptd workqueues
  dm bufio: do buffer cleanup from a workqueue
  dm rq: don't call blk_mq_queue_stopped() in dm_stop_queue()
  dm dust: add interface to list all badblocks
  dm dust: report some message results directly back to user
  dm verity: add "panic_on_corruption" error handling mode
  dm mpath: use double checked locking in fast path
  dm mpath: rename current_pgpath to pgpath in multipath_prepare_ioctl
  dm mpath: rework __map_bio()
  dm mpath: factor out multipath_queue_bio
  dm mpath: push locking down to must_push_back_rq()
  dm mpath: take m->lock spinlock when testing QUEUE_IF_NO_PATH
  dm mpath: changes from initial m->flags locking audit

15 files changed:
Documentation/admin-guide/device-mapper/dm-dust.rst
Documentation/admin-guide/device-mapper/verity.rst
drivers/md/dm-bufio.c
drivers/md/dm-crypt.c
drivers/md/dm-dust.c
drivers/md/dm-ebs-target.c
drivers/md/dm-init.c
drivers/md/dm-ioctl.c
drivers/md/dm-mpath.c
drivers/md/dm-raid.c
drivers/md/dm-rq.c
drivers/md/dm-verity-target.c
drivers/md/dm-verity-verify-sig.h
drivers/md/dm-verity.h
drivers/md/dm.c

index b6e7e7e..e35ec8c 100644 (file)
@@ -69,10 +69,11 @@ Create the dm-dust device:
         $ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 4096'
 
 Check the status of the read behavior ("bypass" indicates that all I/O
-will be passed through to the underlying device)::
+will be passed through to the underlying device; "verbose" indicates that
+bad block additions, removals, and remaps will be verbosely logged)::
 
         $ sudo dmsetup status dust1
-        0 33552384 dust 252:17 bypass
+        0 33552384 dust 252:17 bypass verbose
 
         $ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=128 iflag=direct
         128+0 records in
@@ -164,7 +165,7 @@ following message command::
 A message will print with the number of bad blocks currently
 configured on the device::
 
-        kernel: device-mapper: dust: countbadblocks: 895 badblock(s) found
+        countbadblocks: 895 badblock(s) found
 
 Querying for specific bad blocks
 --------------------------------
@@ -176,11 +177,11 @@ following message command::
 
 The following message will print if the block is in the list::
 
-        device-mapper: dust: queryblock: block 72 found in badblocklist
+        dust_query_block: block 72 found in badblocklist
 
 The following message will print if the block is not in the list::
 
-        device-mapper: dust: queryblock: block 72 not found in badblocklist
+        dust_query_block: block 72 not found in badblocklist
 
 The "queryblock" message command will work in both the "enabled"
 and "disabled" modes, allowing the verification of whether a block
@@ -198,12 +199,28 @@ following message command::
 
 After clearing the bad block list, the following message will appear::
 
-        kernel: device-mapper: dust: clearbadblocks: badblocks cleared
+        dust_clear_badblocks: badblocks cleared
 
 If there were no bad blocks to clear, the following message will
 appear::
 
-        kernel: device-mapper: dust: clearbadblocks: no badblocks found
+        dust_clear_badblocks: no badblocks found
+
+Listing the bad block list
+--------------------------
+
+To list all bad blocks in the bad block list (using an example device
+with blocks 1 and 2 in the bad block list), run the following message
+command::
+
+        $ sudo dmsetup message dust1 0 listbadblocks
+        1
+        2
+
+If there are no bad blocks in the bad block list, the command will
+execute with no output::
+
+        $ sudo dmsetup message dust1 0 listbadblocks
 
 Message commands list
 ---------------------
@@ -223,6 +240,7 @@ Single argument message commands::
 
         countbadblocks
         clearbadblocks
+        listbadblocks
         disable
         enable
         quiet
index bb02caa..66f71f0 100644 (file)
@@ -83,6 +83,10 @@ restart_on_corruption
     not compatible with ignore_corruption and requires user space support to
     avoid restart loops.
 
+panic_on_corruption
+    Panic the device when a corrupted block is discovered. This option is
+    not compatible with ignore_corruption and restart_on_corruption.
+
 ignore_zero_blocks
     Do not verify blocks that are expected to contain zeroes and always return
     zeroes instead. This may be useful if the partition contains unused blocks
index 6d15650..9c1a86b 100644 (file)
@@ -108,7 +108,10 @@ struct dm_bufio_client {
        int async_write_error;
 
        struct list_head client_list;
+
        struct shrinker shrinker;
+       struct work_struct shrink_work;
+       atomic_long_t need_shrink;
 };
 
 /*
@@ -1634,8 +1637,7 @@ static unsigned long get_retain_buffers(struct dm_bufio_client *c)
        return retain_bytes;
 }
 
-static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
-                           gfp_t gfp_mask)
+static void __scan(struct dm_bufio_client *c)
 {
        int l;
        struct dm_buffer *b, *tmp;
@@ -1646,42 +1648,58 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
 
        for (l = 0; l < LIST_SIZE; l++) {
                list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
-                       if (__try_evict_buffer(b, gfp_mask))
+                       if (count - freed <= retain_target)
+                               atomic_long_set(&c->need_shrink, 0);
+                       if (!atomic_long_read(&c->need_shrink))
+                               return;
+                       if (__try_evict_buffer(b, GFP_KERNEL)) {
+                               atomic_long_dec(&c->need_shrink);
                                freed++;
-                       if (!--nr_to_scan || ((count - freed) <= retain_target))
-                               return freed;
+                       }
                        cond_resched();
                }
        }
-       return freed;
 }
 
-static unsigned long
-dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+static void shrink_work(struct work_struct *w)
+{
+       struct dm_bufio_client *c = container_of(w, struct dm_bufio_client, shrink_work);
+
+       dm_bufio_lock(c);
+       __scan(c);
+       dm_bufio_unlock(c);
+}
+
+static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        struct dm_bufio_client *c;
-       unsigned long freed;
 
        c = container_of(shrink, struct dm_bufio_client, shrinker);
-       if (sc->gfp_mask & __GFP_FS)
-               dm_bufio_lock(c);
-       else if (!dm_bufio_trylock(c))
-               return SHRINK_STOP;
+       atomic_long_add(sc->nr_to_scan, &c->need_shrink);
+       queue_work(dm_bufio_wq, &c->shrink_work);
 
-       freed  = __scan(c, sc->nr_to_scan, sc->gfp_mask);
-       dm_bufio_unlock(c);
-       return freed;
+       return sc->nr_to_scan;
 }
 
-static unsigned long
-dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
 {
        struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
        unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) +
                              READ_ONCE(c->n_buffers[LIST_DIRTY]);
        unsigned long retain_target = get_retain_buffers(c);
+       unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink);
+
+       if (unlikely(count < retain_target))
+               count = 0;
+       else
+               count -= retain_target;
 
-       return (count < retain_target) ? 0 : (count - retain_target);
+       if (unlikely(count < queued_for_cleanup))
+               count = 0;
+       else
+               count -= queued_for_cleanup;
+
+       return count;
 }
 
 /*
@@ -1772,6 +1790,9 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
                __free_buffer_wake(b);
        }
 
+       INIT_WORK(&c->shrink_work, shrink_work);
+       atomic_long_set(&c->need_shrink, 0);
+
        c->shrinker.count_objects = dm_bufio_shrink_count;
        c->shrinker.scan_objects = dm_bufio_shrink_scan;
        c->shrinker.seeks = 1;
@@ -1817,6 +1838,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
        drop_buffers(c);
 
        unregister_shrinker(&c->shrinker);
+       flush_work(&c->shrink_work);
 
        mutex_lock(&dm_bufio_clients_lock);
 
index 37dcc52..1489607 100644 (file)
@@ -69,6 +69,7 @@ struct dm_crypt_io {
        u8 *integrity_metadata;
        bool integrity_metadata_from_pool;
        struct work_struct work;
+       struct tasklet_struct tasklet;
 
        struct convert_context ctx;
 
@@ -127,7 +128,9 @@ struct iv_elephant_private {
  * and encrypts / decrypts at the same time.
  */
 enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
-            DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };
+            DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
+            DM_CRYPT_NO_READ_WORKQUEUE, DM_CRYPT_NO_WRITE_WORKQUEUE,
+            DM_CRYPT_WRITE_INLINE };
 
 enum cipher_flags {
        CRYPT_MODE_INTEGRITY_AEAD,      /* Use authenticated mode for cihper */
@@ -1523,7 +1526,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
  * Encrypt / decrypt data from one bio to another one (can be the same one)
  */
 static blk_status_t crypt_convert(struct crypt_config *cc,
-                        struct convert_context *ctx)
+                        struct convert_context *ctx, bool atomic)
 {
        unsigned int tag_offset = 0;
        unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT;
@@ -1566,7 +1569,8 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
                        atomic_dec(&ctx->cc_pending);
                        ctx->cc_sector += sector_step;
                        tag_offset++;
-                       cond_resched();
+                       if (!atomic)
+                               cond_resched();
                        continue;
                /*
                 * There was a data integrity error.
@@ -1892,7 +1896,8 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
 
        clone->bi_iter.bi_sector = cc->start + io->sector;
 
-       if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) {
+       if ((likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) ||
+           test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)) {
                submit_bio_noacct(clone);
                return;
        }
@@ -1915,9 +1920,32 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
        spin_unlock_irqrestore(&cc->write_thread_lock, flags);
 }
 
+static bool kcryptd_crypt_write_inline(struct crypt_config *cc,
+                                      struct convert_context *ctx)
+
+{
+       if (!test_bit(DM_CRYPT_WRITE_INLINE, &cc->flags))
+               return false;
+
+       /*
+        * Note: zone append writes (REQ_OP_ZONE_APPEND) do not have ordering
+        * constraints so they do not need to be issued inline by
+        * kcryptd_crypt_write_convert().
+        */
+       switch (bio_op(ctx->bio_in)) {
+       case REQ_OP_WRITE:
+       case REQ_OP_WRITE_SAME:
+       case REQ_OP_WRITE_ZEROES:
+               return true;
+       default:
+               return false;
+       }
+}
+
 static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 {
        struct crypt_config *cc = io->cc;
+       struct convert_context *ctx = &io->ctx;
        struct bio *clone;
        int crypt_finished;
        sector_t sector = io->sector;
@@ -1927,7 +1955,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
         * Prevent io from disappearing until this function completes.
         */
        crypt_inc_pending(io);
-       crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector);
+       crypt_convert_init(cc, ctx, NULL, io->base_bio, sector);
 
        clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
        if (unlikely(!clone)) {
@@ -1941,10 +1969,16 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
        sector += bio_sectors(clone);
 
        crypt_inc_pending(io);
-       r = crypt_convert(cc, &io->ctx);
+       r = crypt_convert(cc, ctx,
+                         test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags));
        if (r)
                io->error = r;
-       crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
+       crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
+       if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) {
+               /* Wait for completion signaled by kcryptd_async_done() */
+               wait_for_completion(&ctx->restart);
+               crypt_finished = 1;
+       }
 
        /* Encryption was already finished, submit io now */
        if (crypt_finished) {
@@ -1971,7 +2005,8 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
        crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
                           io->sector);
 
-       r = crypt_convert(cc, &io->ctx);
+       r = crypt_convert(cc, &io->ctx,
+                         test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags));
        if (r)
                io->error = r;
 
@@ -2015,10 +2050,21 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
        if (!atomic_dec_and_test(&ctx->cc_pending))
                return;
 
-       if (bio_data_dir(io->base_bio) == READ)
+       /*
+        * The request is fully completed: for inline writes, let
+        * kcryptd_crypt_write_convert() do the IO submission.
+        */
+       if (bio_data_dir(io->base_bio) == READ) {
                kcryptd_crypt_read_done(io);
-       else
-               kcryptd_crypt_write_io_submit(io, 1);
+               return;
+       }
+
+       if (kcryptd_crypt_write_inline(cc, ctx)) {
+               complete(&ctx->restart);
+               return;
+       }
+
+       kcryptd_crypt_write_io_submit(io, 1);
 }
 
 static void kcryptd_crypt(struct work_struct *work)
@@ -2031,10 +2077,28 @@ static void kcryptd_crypt(struct work_struct *work)
                kcryptd_crypt_write_convert(io);
 }
 
+static void kcryptd_crypt_tasklet(unsigned long work)
+{
+       kcryptd_crypt((struct work_struct *)work);
+}
+
 static void kcryptd_queue_crypt(struct dm_crypt_io *io)
 {
        struct crypt_config *cc = io->cc;
 
+       if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) ||
+           (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) {
+               if (in_irq()) {
+                       /* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */
+                       tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work);
+                       tasklet_schedule(&io->tasklet);
+                       return;
+               }
+
+               kcryptd_crypt(&io->work);
+               return;
+       }
+
        INIT_WORK(&io->work, kcryptd_crypt);
        queue_work(cc->crypt_queue, &io->work);
 }
@@ -2838,7 +2902,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
        struct crypt_config *cc = ti->private;
        struct dm_arg_set as;
        static const struct dm_arg _args[] = {
-               {0, 6, "Invalid number of feature args"},
+               {0, 8, "Invalid number of feature args"},
        };
        unsigned int opt_params, val;
        const char *opt_string, *sval;
@@ -2868,6 +2932,10 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
 
                else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
                        set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
+               else if (!strcasecmp(opt_string, "no_read_workqueue"))
+                       set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
+               else if (!strcasecmp(opt_string, "no_write_workqueue"))
+                       set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
                else if (sscanf(opt_string, "integrity:%u:", &val) == 1) {
                        if (val == 0 || val > MAX_TAG_SIZE) {
                                ti->error = "Invalid integrity arguments";
@@ -2908,6 +2976,21 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
        return 0;
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
+
+static int crypt_report_zones(struct dm_target *ti,
+               struct dm_report_zones_args *args, unsigned int nr_zones)
+{
+       struct crypt_config *cc = ti->private;
+       sector_t sector = cc->start + dm_target_offset(ti, args->next_sector);
+
+       args->start = cc->start;
+       return blkdev_report_zones(cc->dev->bdev, sector, nr_zones,
+                                  dm_report_zones_cb, args);
+}
+
+#endif
+
 /*
  * Construct an encryption mapping:
  * <cipher> [<key>|:<key_size>:<user|logon>:<key_description>] <iv_offset> <dev_path> <start>
@@ -3041,6 +3124,16 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        }
        cc->start = tmpll;
 
+       /*
+        * For zoned block devices, we need to preserve the issuer write
+        * ordering. To do so, disable write workqueues and force inline
+        * encryption completion.
+        */
+       if (bdev_is_zoned(cc->dev->bdev)) {
+               set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
+               set_bit(DM_CRYPT_WRITE_INLINE, &cc->flags);
+       }
+
        if (crypt_integrity_aead(cc) || cc->integrity_iv_size) {
                ret = crypt_integrity_ctr(cc, ti);
                if (ret)
@@ -3196,6 +3289,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
                num_feature_args += !!ti->num_discard_bios;
                num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
                num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
+               num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
+               num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
                num_feature_args += cc->sector_size != (1 << SECTOR_SHIFT);
                num_feature_args += test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags);
                if (cc->on_disk_tag_size)
@@ -3208,6 +3303,10 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
                                DMEMIT(" same_cpu_crypt");
                        if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
                                DMEMIT(" submit_from_crypt_cpus");
+                       if (test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags))
+                               DMEMIT(" no_read_workqueue");
+                       if (test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))
+                               DMEMIT(" no_write_workqueue");
                        if (cc->on_disk_tag_size)
                                DMEMIT(" integrity:%u:%s", cc->on_disk_tag_size, cc->cipher_auth);
                        if (cc->sector_size != (1 << SECTOR_SHIFT))
@@ -3320,10 +3419,14 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type crypt_target = {
        .name   = "crypt",
-       .version = {1, 21, 0},
+       .version = {1, 22, 0},
        .module = THIS_MODULE,
        .ctr    = crypt_ctr,
        .dtr    = crypt_dtr,
+#ifdef CONFIG_BLK_DEV_ZONED
+       .features = DM_TARGET_ZONED_HM,
+       .report_zones = crypt_report_zones,
+#endif
        .map    = crypt_map,
        .status = crypt_status,
        .postsuspend = crypt_postsuspend,
index ff03b90..072ea91 100644 (file)
@@ -138,20 +138,22 @@ static int dust_add_block(struct dust_device *dd, unsigned long long block,
        return 0;
 }
 
-static int dust_query_block(struct dust_device *dd, unsigned long long block)
+static int dust_query_block(struct dust_device *dd, unsigned long long block, char *result,
+                           unsigned int maxlen, unsigned int *sz_ptr)
 {
        struct badblock *bblock;
        unsigned long flags;
+       unsigned int sz = *sz_ptr;
 
        spin_lock_irqsave(&dd->dust_lock, flags);
        bblock = dust_rb_search(&dd->badblocklist, block);
        if (bblock != NULL)
-               DMINFO("%s: block %llu found in badblocklist", __func__, block);
+               DMEMIT("%s: block %llu found in badblocklist", __func__, block);
        else
-               DMINFO("%s: block %llu not found in badblocklist", __func__, block);
+               DMEMIT("%s: block %llu not found in badblocklist", __func__, block);
        spin_unlock_irqrestore(&dd->dust_lock, flags);
 
-       return 0;
+       return 1;
 }
 
 static int __dust_map_read(struct dust_device *dd, sector_t thisblock)
@@ -259,11 +261,13 @@ static bool __dust_clear_badblocks(struct rb_root *tree,
        return true;
 }
 
-static int dust_clear_badblocks(struct dust_device *dd)
+static int dust_clear_badblocks(struct dust_device *dd, char *result, unsigned int maxlen,
+                               unsigned int *sz_ptr)
 {
        unsigned long flags;
        struct rb_root badblocklist;
        unsigned long long badblock_count;
+       unsigned int sz = *sz_ptr;
 
        spin_lock_irqsave(&dd->dust_lock, flags);
        badblocklist = dd->badblocklist;
@@ -273,11 +277,36 @@ static int dust_clear_badblocks(struct dust_device *dd)
        spin_unlock_irqrestore(&dd->dust_lock, flags);
 
        if (!__dust_clear_badblocks(&badblocklist, badblock_count))
-               DMINFO("%s: no badblocks found", __func__);
+               DMEMIT("%s: no badblocks found", __func__);
        else
-               DMINFO("%s: badblocks cleared", __func__);
+               DMEMIT("%s: badblocks cleared", __func__);
 
-       return 0;
+       return 1;
+}
+
+static int dust_list_badblocks(struct dust_device *dd, char *result, unsigned int maxlen,
+                               unsigned int *sz_ptr)
+{
+       unsigned long flags;
+       struct rb_root badblocklist;
+       struct rb_node *node;
+       struct badblock *bblk;
+       unsigned int sz = *sz_ptr;
+       unsigned long long num = 0;
+
+       spin_lock_irqsave(&dd->dust_lock, flags);
+       badblocklist = dd->badblocklist;
+       for (node = rb_first(&badblocklist); node; node = rb_next(node)) {
+               bblk = rb_entry(node, struct badblock, node);
+               DMEMIT("%llu\n", bblk->bb);
+               num++;
+       }
+
+       spin_unlock_irqrestore(&dd->dust_lock, flags);
+       if (!num)
+               DMEMIT("No blocks in badblocklist");
+
+       return 1;
 }
 
 /*
@@ -383,7 +412,7 @@ static void dust_dtr(struct dm_target *ti)
 }
 
 static int dust_message(struct dm_target *ti, unsigned int argc, char **argv,
-                       char *result_buf, unsigned int maxlen)
+                       char *result, unsigned int maxlen)
 {
        struct dust_device *dd = ti->private;
        sector_t size = i_size_read(dd->dev->bdev->bd_inode) >> SECTOR_SHIFT;
@@ -393,6 +422,7 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv,
        unsigned char wr_fail_cnt;
        unsigned int tmp_ui;
        unsigned long flags;
+       unsigned int sz = 0;
        char dummy;
 
        if (argc == 1) {
@@ -410,18 +440,20 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv,
                        r = 0;
                } else if (!strcasecmp(argv[0], "countbadblocks")) {
                        spin_lock_irqsave(&dd->dust_lock, flags);
-                       DMINFO("countbadblocks: %llu badblock(s) found",
+                       DMEMIT("countbadblocks: %llu badblock(s) found",
                               dd->badblock_count);
                        spin_unlock_irqrestore(&dd->dust_lock, flags);
-                       r = 0;
+                       r = 1;
                } else if (!strcasecmp(argv[0], "clearbadblocks")) {
-                       r = dust_clear_badblocks(dd);
+                       r = dust_clear_badblocks(dd, result, maxlen, &sz);
                } else if (!strcasecmp(argv[0], "quiet")) {
                        if (!dd->quiet_mode)
                                dd->quiet_mode = true;
                        else
                                dd->quiet_mode = false;
                        r = 0;
+               } else if (!strcasecmp(argv[0], "listbadblocks")) {
+                       r = dust_list_badblocks(dd, result, maxlen, &sz);
                } else {
                        invalid_msg = true;
                }
@@ -441,7 +473,7 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv,
                else if (!strcasecmp(argv[0], "removebadblock"))
                        r = dust_remove_block(dd, block);
                else if (!strcasecmp(argv[0], "queryblock"))
-                       r = dust_query_block(dd, block);
+                       r = dust_query_block(dd, block, result, maxlen, &sz);
                else
                        invalid_msg = true;
 
index 4445127..cb85610 100644 (file)
@@ -363,7 +363,7 @@ static int ebs_map(struct dm_target *ti, struct bio *bio)
        bio_set_dev(bio, ec->dev->bdev);
        bio->bi_iter.bi_sector = ec->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
 
-       if (unlikely(bio->bi_opf & REQ_OP_FLUSH))
+       if (unlikely(bio_op(bio) == REQ_OP_FLUSH))
                return DM_MAPIO_REMAPPED;
        /*
         * Only queue for bufio processing in case of partial or overlapping buffers
index b869316..b0c45c6 100644 (file)
@@ -36,7 +36,7 @@ struct dm_device {
        struct list_head list;
 };
 
-const char * const dm_allowed_targets[] __initconst = {
+static const char * const dm_allowed_targets[] __initconst = {
        "crypt",
        "delay",
        "linear",
index 056d891..28122e8 100644 (file)
@@ -1168,7 +1168,7 @@ static void retrieve_status(struct dm_table *table,
                spec->sector_start = ti->begin;
                spec->length = ti->len;
                strncpy(spec->target_type, ti->type->name,
-                       sizeof(spec->target_type));
+                       sizeof(spec->target_type) - 1);
 
                outptr += sizeof(struct dm_target_spec);
                remaining = len - (outptr - outbuf);
index 73bb23d..53645a6 100644 (file)
@@ -128,6 +128,20 @@ static void queue_if_no_path_timeout_work(struct timer_list *t);
 #define MPATHF_PG_INIT_REQUIRED 5              /* pg_init needs calling? */
 #define MPATHF_PG_INIT_DELAY_RETRY 6           /* Delay pg_init retry? */
 
+static bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m)
+{
+       bool r = test_bit(MPATHF_bit, &m->flags);
+
+       if (r) {
+               unsigned long flags;
+               spin_lock_irqsave(&m->lock, flags);
+               r = test_bit(MPATHF_bit, &m->flags);
+               spin_unlock_irqrestore(&m->lock, flags);
+       }
+
+       return r;
+}
+
 /*-----------------------------------------------
  * Allocation routines
  *-----------------------------------------------*/
@@ -335,6 +349,8 @@ static int pg_init_all_paths(struct multipath *m)
 
 static void __switch_pg(struct multipath *m, struct priority_group *pg)
 {
+       lockdep_assert_held(&m->lock);
+
        m->current_pg = pg;
 
        /* Must we initialise the PG first, and queue I/O till it's ready? */
@@ -382,7 +398,9 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
        unsigned bypassed = 1;
 
        if (!atomic_read(&m->nr_valid_paths)) {
+               spin_lock_irqsave(&m->lock, flags);
                clear_bit(MPATHF_QUEUE_IO, &m->flags);
+               spin_unlock_irqrestore(&m->lock, flags);
                goto failed;
        }
 
@@ -422,8 +440,11 @@ check_current_pg:
                                continue;
                        pgpath = choose_path_in_pg(m, pg, nr_bytes);
                        if (!IS_ERR_OR_NULL(pgpath)) {
-                               if (!bypassed)
+                               if (!bypassed) {
+                                       spin_lock_irqsave(&m->lock, flags);
                                        set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
+                                       spin_unlock_irqrestore(&m->lock, flags);
+                               }
                                return pgpath;
                        }
                }
@@ -465,7 +486,14 @@ static bool __must_push_back(struct multipath *m)
 
 static bool must_push_back_rq(struct multipath *m)
 {
-       return test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || __must_push_back(m);
+       unsigned long flags;
+       bool ret;
+
+       spin_lock_irqsave(&m->lock, flags);
+       ret = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || __must_push_back(m));
+       spin_unlock_irqrestore(&m->lock, flags);
+
+       return ret;
 }
 
 /*
@@ -485,7 +513,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
 
        /* Do we need to select a new pgpath? */
        pgpath = READ_ONCE(m->current_pgpath);
-       if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
+       if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
                pgpath = choose_pgpath(m, nr_bytes);
 
        if (!pgpath) {
@@ -493,8 +521,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
                        return DM_MAPIO_DELAY_REQUEUE;
                dm_report_EIO(m);       /* Failed */
                return DM_MAPIO_KILL;
-       } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
-                  test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
+       } else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) ||
+                  mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) {
                pg_init_all_paths(m);
                return DM_MAPIO_DELAY_REQUEUE;
        }
@@ -560,33 +588,45 @@ static void multipath_release_clone(struct request *clone,
  * Map cloned bios (bio-based multipath)
  */
 
+static void __multipath_queue_bio(struct multipath *m, struct bio *bio)
+{
+       /* Queue for the daemon to resubmit */
+       bio_list_add(&m->queued_bios, bio);
+       if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
+               queue_work(kmultipathd, &m->process_queued_bios);
+}
+
+static void multipath_queue_bio(struct multipath *m, struct bio *bio)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&m->lock, flags);
+       __multipath_queue_bio(m, bio);
+       spin_unlock_irqrestore(&m->lock, flags);
+}
+
 static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
 {
        struct pgpath *pgpath;
        unsigned long flags;
-       bool queue_io;
 
        /* Do we need to select a new pgpath? */
        pgpath = READ_ONCE(m->current_pgpath);
-       if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
+       if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
                pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
 
-       /* MPATHF_QUEUE_IO might have been cleared by choose_pgpath. */
-       queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
-
-       if ((pgpath && queue_io) ||
-           (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {
-               /* Queue for the daemon to resubmit */
+       if (!pgpath) {
                spin_lock_irqsave(&m->lock, flags);
-               bio_list_add(&m->queued_bios, bio);
+               if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
+                       __multipath_queue_bio(m, bio);
+                       pgpath = ERR_PTR(-EAGAIN);
+               }
                spin_unlock_irqrestore(&m->lock, flags);
 
-               /* PG_INIT_REQUIRED cannot be set without QUEUE_IO */
-               if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
-                       pg_init_all_paths(m);
-               else if (!queue_io)
-                       queue_work(kmultipathd, &m->process_queued_bios);
-
+       } else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) ||
+                  mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) {
+               multipath_queue_bio(m, bio);
+               pg_init_all_paths(m);
                return ERR_PTR(-EAGAIN);
        }
 
@@ -835,7 +875,7 @@ static int setup_scsi_dh(struct block_device *bdev, struct multipath *m,
        struct request_queue *q = bdev_get_queue(bdev);
        int r;
 
-       if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {
+       if (mpath_double_check_test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, m)) {
 retain:
                if (*attached_handler_name) {
                        /*
@@ -1614,7 +1654,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
                if (pgpath)
                        fail_path(pgpath);
 
-               if (atomic_read(&m->nr_valid_paths) == 0 &&
+               if (!atomic_read(&m->nr_valid_paths) &&
                    !must_push_back_rq(m)) {
                        if (error == BLK_STS_IOERR)
                                dm_report_EIO(m);
@@ -1649,23 +1689,22 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
        if (pgpath)
                fail_path(pgpath);
 
-       if (atomic_read(&m->nr_valid_paths) == 0 &&
-           !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
-               if (__must_push_back(m)) {
-                       r = DM_ENDIO_REQUEUE;
-               } else {
-                       dm_report_EIO(m);
-                       *error = BLK_STS_IOERR;
+       if (!atomic_read(&m->nr_valid_paths)) {
+               spin_lock_irqsave(&m->lock, flags);
+               if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
+                       if (__must_push_back(m)) {
+                               r = DM_ENDIO_REQUEUE;
+                       } else {
+                               dm_report_EIO(m);
+                               *error = BLK_STS_IOERR;
+                       }
+                       spin_unlock_irqrestore(&m->lock, flags);
+                       goto done;
                }
-               goto done;
+               spin_unlock_irqrestore(&m->lock, flags);
        }
 
-       spin_lock_irqsave(&m->lock, flags);
-       bio_list_add(&m->queued_bios, clone);
-       spin_unlock_irqrestore(&m->lock, flags);
-       if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
-               queue_work(kmultipathd, &m->process_queued_bios);
-
+       multipath_queue_bio(m, clone);
        r = DM_ENDIO_INCOMPLETE;
 done:
        if (pgpath) {
@@ -1937,16 +1976,17 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
                                   struct block_device **bdev)
 {
        struct multipath *m = ti->private;
-       struct pgpath *current_pgpath;
+       struct pgpath *pgpath;
+       unsigned long flags;
        int r;
 
-       current_pgpath = READ_ONCE(m->current_pgpath);
-       if (!current_pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
-               current_pgpath = choose_pgpath(m, 0);
+       pgpath = READ_ONCE(m->current_pgpath);
+       if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
+               pgpath = choose_pgpath(m, 0);
 
-       if (current_pgpath) {
-               if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) {
-                       *bdev = current_pgpath->path.dev->bdev;
+       if (pgpath) {
+               if (!mpath_double_check_test_bit(MPATHF_QUEUE_IO, m)) {
+                       *bdev = pgpath->path.dev->bdev;
                        r = 0;
                } else {
                        /* pg_init has not started or completed */
@@ -1954,10 +1994,11 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
                }
        } else {
                /* No path is available */
+               r = -EIO;
+               spin_lock_irqsave(&m->lock, flags);
                if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
                        r = -ENOTCONN;
-               else
-                       r = -EIO;
+               spin_unlock_irqrestore(&m->lock, flags);
        }
 
        if (r == -ENOTCONN) {
@@ -1965,8 +2006,10 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
                        /* Path status changed, redo selection */
                        (void) choose_pgpath(m, 0);
                }
+               spin_lock_irqsave(&m->lock, flags);
                if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
-                       pg_init_all_paths(m);
+                       (void) __pg_init_all_paths(m);
+               spin_unlock_irqrestore(&m->lock, flags);
                dm_table_run_md_queue_async(m->ti->table);
                process_queued_io_list(m);
        }
@@ -2026,8 +2069,15 @@ static int multipath_busy(struct dm_target *ti)
                return true;
 
        /* no paths available, for blk-mq: rely on IO mapping to delay requeue */
-       if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
-               return (m->queue_mode != DM_TYPE_REQUEST_BASED);
+       if (!atomic_read(&m->nr_valid_paths)) {
+               unsigned long flags;
+               spin_lock_irqsave(&m->lock, flags);
+               if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
+                       spin_unlock_irqrestore(&m->lock, flags);
+                       return (m->queue_mode != DM_TYPE_REQUEST_BASED);
+               }
+               spin_unlock_irqrestore(&m->lock, flags);
+       }
 
        /* Guess which priority_group will be used at next mapping time */
        pg = READ_ONCE(m->current_pg);
index d9e2709..8d2b835 100644 (file)
@@ -2337,8 +2337,6 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
 
        if (new_devs == rs->raid_disks || !rebuilds) {
                /* Replace a broken device */
-               if (new_devs == 1 && !rs->delta_disks)
-                       ;
                if (new_devs == rs->raid_disks) {
                        DMINFO("Superblocks created for new raid set");
                        set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
index 7ce387a..6d743ff 100644 (file)
@@ -70,9 +70,6 @@ void dm_start_queue(struct request_queue *q)
 
 void dm_stop_queue(struct request_queue *q)
 {
-       if (blk_mq_queue_stopped(q))
-               return;
-
        blk_mq_quiesce_queue(q);
 }
 
index 75fa4d9..f74982d 100644 (file)
@@ -30,6 +30,7 @@
 
 #define DM_VERITY_OPT_LOGGING          "ignore_corruption"
 #define DM_VERITY_OPT_RESTART          "restart_on_corruption"
+#define DM_VERITY_OPT_PANIC            "panic_on_corruption"
 #define DM_VERITY_OPT_IGN_ZEROES       "ignore_zero_blocks"
 #define DM_VERITY_OPT_AT_MOST_ONCE     "check_at_most_once"
 
@@ -254,6 +255,9 @@ out:
        if (v->mode == DM_VERITY_MODE_RESTART)
                kernel_restart("dm-verity device corrupted");
 
+       if (v->mode == DM_VERITY_MODE_PANIC)
+               panic("dm-verity device corrupted");
+
        return 1;
 }
 
@@ -742,6 +746,9 @@ static void verity_status(struct dm_target *ti, status_type_t type,
                        case DM_VERITY_MODE_RESTART:
                                DMEMIT(DM_VERITY_OPT_RESTART);
                                break;
+                       case DM_VERITY_MODE_PANIC:
+                               DMEMIT(DM_VERITY_OPT_PANIC);
+                               break;
                        default:
                                BUG();
                        }
@@ -907,6 +914,10 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
                        v->mode = DM_VERITY_MODE_RESTART;
                        continue;
 
+               } else if (!strcasecmp(arg_name, DM_VERITY_OPT_PANIC)) {
+                       v->mode = DM_VERITY_MODE_PANIC;
+                       continue;
+
                } else if (!strcasecmp(arg_name, DM_VERITY_OPT_IGN_ZEROES)) {
                        r = verity_alloc_zero_digest(v);
                        if (r) {
@@ -1221,7 +1232,7 @@ bad:
 
 static struct target_type verity_target = {
        .name           = "verity",
-       .version        = {1, 6, 0},
+       .version        = {1, 7, 0},
        .module         = THIS_MODULE,
        .ctr            = verity_ctr,
        .dtr            = verity_dtr,
index 19b1547..3987c71 100644 (file)
@@ -34,25 +34,25 @@ void verity_verify_sig_opts_cleanup(struct dm_verity_sig_opts *sig_opts);
 
 #define DM_VERITY_ROOT_HASH_VERIFICATION_OPTS 0
 
-int verity_verify_root_hash(const void *data, size_t data_len,
-                           const void *sig_data, size_t sig_len)
+static inline int verity_verify_root_hash(const void *data, size_t data_len,
+                                         const void *sig_data, size_t sig_len)
 {
        return 0;
 }
 
-bool verity_verify_is_sig_opt_arg(const char *arg_name)
+static inline bool verity_verify_is_sig_opt_arg(const char *arg_name)
 {
        return false;
 }
 
-int verity_verify_sig_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
-                                   struct dm_verity_sig_opts *sig_opts,
-                                   unsigned int *argc, const char *arg_name)
+static inline int verity_verify_sig_parse_opt_args(struct dm_arg_set *as,
+                       struct dm_verity *v, struct dm_verity_sig_opts *sig_opts,
+                       unsigned int *argc, const char *arg_name)
 {
        return -EINVAL;
 }
 
-void verity_verify_sig_opts_cleanup(struct dm_verity_sig_opts *sig_opts)
+static inline void verity_verify_sig_opts_cleanup(struct dm_verity_sig_opts *sig_opts)
 {
 }
 
index 641b9e3..4e769d1 100644 (file)
@@ -20,7 +20,8 @@
 enum verity_mode {
        DM_VERITY_MODE_EIO,
        DM_VERITY_MODE_LOGGING,
-       DM_VERITY_MODE_RESTART
+       DM_VERITY_MODE_RESTART,
+       DM_VERITY_MODE_PANIC
 };
 
 enum verity_block_type {
index 87cf45f..32fa649 100644 (file)
@@ -504,7 +504,8 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
                }
 
                args.tgt = tgt;
-               ret = tgt->type->report_zones(tgt, &args, nr_zones);
+               ret = tgt->type->report_zones(tgt, &args,
+                                             nr_zones - args.zone_idx);
                if (ret < 0)
                        goto out;
        } while (args.zone_idx < nr_zones &&