dm crypt: add flags to optionally bypass kcryptd workqueues
authorIgnat Korchagin <ignat@cloudflare.com>
Mon, 6 Jul 2020 17:37:31 +0000 (18:37 +0100)
committerMike Snitzer <snitzer@redhat.com>
Mon, 20 Jul 2020 15:17:44 +0000 (11:17 -0400)
This is a follow up to [1] that detailed latency problems associated
with dm-crypt's use of workqueues when processing IO.

Current dm-crypt implementation creates a significant IO performance
overhead (at least on small IO block sizes) for both latency and
throughput. We suspect offloading IO request processing into
workqueues and async threads is more harmful these days with the
modern fast storage. I also did some digging into the dm-crypt git
history and much of this async processing is not needed anymore,
because the reasons it was added are mostly gone from the kernel. More
details can be found in [2] (see "Git archeology" section).

This change adds DM_CRYPT_NO_READ_WORKQUEUE and
DM_CRYPT_NO_WRITE_WORKQUEUE flags for read and write BIOs, which
direct dm-crypt to not offload crypto operations into kcryptd
workqueues.  In addition, writes are not buffered to be sorted in the
dm-crypt red-black tree, but dispatched immediately. For cases, where
crypto operations cannot happen (hard interrupt context, for example
the read path of some NVME drivers), we offload the work to a tasklet
rather than a workqueue.

These flags only ensure no async BIO processing in the dm-crypt
module. It is worth noting that some Crypto API implementations may
offload encryption into their own workqueues, which are independent of
the dm-crypt and its configuration. However upon enabling these new
flags dm-crypt will instruct Crypto API not to backlog crypto
requests.

To give an idea of the performance gains for certain workloads,
consider the script, and results when tested against various
devices, detailed here:
https://www.redhat.com/archives/dm-devel/2020-July/msg00138.html

[1]: https://www.spinics.net/lists/dm-crypt/msg07516.html
[2]: https://blog.cloudflare.com/speeding-up-linux-disk-encryption/

Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
drivers/md/dm-crypt.c

index 000ddfa..7536ecb 100644 (file)
@@ -69,6 +69,7 @@ struct dm_crypt_io {
        u8 *integrity_metadata;
        bool integrity_metadata_from_pool;
        struct work_struct work;
+       struct tasklet_struct tasklet;
 
        struct convert_context ctx;
 
@@ -127,7 +128,8 @@ struct iv_elephant_private {
  * and encrypts / decrypts at the same time.
  */
 enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
-            DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };
+            DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
+            DM_CRYPT_NO_READ_WORKQUEUE, DM_CRYPT_NO_WRITE_WORKQUEUE };
 
 enum cipher_flags {
        CRYPT_MODE_INTEGRITY_AEAD,      /* Use authenticated mode for cihper */
@@ -1523,7 +1525,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
  * Encrypt / decrypt data from one bio to another one (can be the same one)
  */
 static blk_status_t crypt_convert(struct crypt_config *cc,
-                        struct convert_context *ctx)
+                        struct convert_context *ctx, bool atomic)
 {
        unsigned int tag_offset = 0;
        unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT;
@@ -1566,7 +1568,8 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
                        atomic_dec(&ctx->cc_pending);
                        ctx->cc_sector += sector_step;
                        tag_offset++;
-                       cond_resched();
+                       if (!atomic)
+                               cond_resched();
                        continue;
                /*
                 * There was a data integrity error.
@@ -1892,7 +1895,8 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
 
        clone->bi_iter.bi_sector = cc->start + io->sector;
 
-       if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) {
+       if ((likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) ||
+           test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)) {
                generic_make_request(clone);
                return;
        }
@@ -1941,7 +1945,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
        sector += bio_sectors(clone);
 
        crypt_inc_pending(io);
-       r = crypt_convert(cc, &io->ctx);
+       r = crypt_convert(cc, &io->ctx,
+                         test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags));
        if (r)
                io->error = r;
        crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
@@ -1971,7 +1976,8 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
        crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
                           io->sector);
 
-       r = crypt_convert(cc, &io->ctx);
+       r = crypt_convert(cc, &io->ctx,
+                         test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags));
        if (r)
                io->error = r;
 
@@ -2031,10 +2037,28 @@ static void kcryptd_crypt(struct work_struct *work)
                kcryptd_crypt_write_convert(io);
 }
 
+static void kcryptd_crypt_tasklet(unsigned long work)
+{
+       kcryptd_crypt((struct work_struct *)work);
+}
+
 static void kcryptd_queue_crypt(struct dm_crypt_io *io)
 {
        struct crypt_config *cc = io->cc;
 
+       if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) ||
+           (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) {
+               if (in_irq()) {
+                       /* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */
+                       tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work);
+                       tasklet_schedule(&io->tasklet);
+                       return;
+               }
+
+               kcryptd_crypt(&io->work);
+               return;
+       }
+
        INIT_WORK(&io->work, kcryptd_crypt);
        queue_work(cc->crypt_queue, &io->work);
 }
@@ -2838,7 +2862,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
        struct crypt_config *cc = ti->private;
        struct dm_arg_set as;
        static const struct dm_arg _args[] = {
-               {0, 6, "Invalid number of feature args"},
+               {0, 8, "Invalid number of feature args"},
        };
        unsigned int opt_params, val;
        const char *opt_string, *sval;
@@ -2868,6 +2892,10 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
 
                else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
                        set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
+               else if (!strcasecmp(opt_string, "no_read_workqueue"))
+                       set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
+               else if (!strcasecmp(opt_string, "no_write_workqueue"))
+                       set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
                else if (sscanf(opt_string, "integrity:%u:", &val) == 1) {
                        if (val == 0 || val > MAX_TAG_SIZE) {
                                ti->error = "Invalid integrity arguments";
@@ -3196,6 +3224,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
                num_feature_args += !!ti->num_discard_bios;
                num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
                num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
+               num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
+               num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
                num_feature_args += cc->sector_size != (1 << SECTOR_SHIFT);
                num_feature_args += test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags);
                if (cc->on_disk_tag_size)
@@ -3208,6 +3238,10 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
                                DMEMIT(" same_cpu_crypt");
                        if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
                                DMEMIT(" submit_from_crypt_cpus");
+                       if (test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags))
+                               DMEMIT(" no_read_workqueue");
+                       if (test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))
+                               DMEMIT(" no_write_workqueue");
                        if (cc->on_disk_tag_size)
                                DMEMIT(" integrity:%u:%s", cc->on_disk_tag_size, cc->cipher_auth);
                        if (cc->sector_size != (1 << SECTOR_SHIFT))
@@ -3320,7 +3354,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type crypt_target = {
        .name   = "crypt",
-       .version = {1, 21, 0},
+       .version = {1, 22, 0},
        .module = THIS_MODULE,
        .ctr    = crypt_ctr,
        .dtr    = crypt_dtr,