dm integrity: allow resize of the integrity device
[linux-2.6-microblaze.git] / drivers / md / dm-integrity.c
index 166727a..fafd9ec 100644 (file)
@@ -6,6 +6,8 @@
  * This file is released under the GPL.
  */
 
+#include "dm-bio-record.h"
+
 #include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/device-mapper.h>
@@ -201,17 +203,19 @@ struct dm_integrity_c {
        __u8 log2_blocks_per_bitmap_bit;
 
        unsigned char mode;
-       int suspending;
 
        int failed;
 
        struct crypto_shash *internal_hash;
 
+       struct dm_target *ti;
+
        /* these variables are locked with endio_wait.lock */
        struct rb_root in_progress;
        struct list_head wait_list;
        wait_queue_head_t endio_wait;
        struct workqueue_struct *wait_wq;
+       struct workqueue_struct *offload_wq;
 
        unsigned char commit_seq;
        commit_id_t commit_ids[N_COMMIT_IDS];
@@ -293,11 +297,7 @@ struct dm_integrity_io {
 
        struct completion *completion;
 
-       struct gendisk *orig_bi_disk;
-       u8 orig_bi_partno;
-       bio_end_io_t *orig_bi_end_io;
-       struct bio_integrity_payload *orig_bi_integrity;
-       struct bvec_iter orig_bi_iter;
+       struct dm_bio_details bio_details;
 };
 
 struct journal_completion {
@@ -510,8 +510,8 @@ static bool block_bitmap_op(struct dm_integrity_c *ic, struct page_list *bitmap,
 
        if (unlikely(((sector | n_sectors) & ((1 << ic->sb->log2_sectors_per_block) - 1)) != 0)) {
                DMCRIT("invalid bitmap access (%llx,%llx,%d,%d,%d)",
-                       (unsigned long long)sector,
-                       (unsigned long long)n_sectors,
+                       sector,
+                       n_sectors,
                        ic->sb->log2_sectors_per_block,
                        ic->log2_blocks_per_bitmap_bit,
                        mode);
@@ -1439,7 +1439,7 @@ static void dec_in_flight(struct dm_integrity_io *dio)
                        dio->range.logical_sector += dio->range.n_sectors;
                        bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
                        INIT_WORK(&dio->work, integrity_bio_wait);
-                       queue_work(ic->wait_wq, &dio->work);
+                       queue_work(ic->offload_wq, &dio->work);
                        return;
                }
                do_endio_flush(ic, dio);
@@ -1450,14 +1450,9 @@ static void integrity_end_io(struct bio *bio)
 {
        struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
 
-       bio->bi_iter = dio->orig_bi_iter;
-       bio->bi_disk = dio->orig_bi_disk;
-       bio->bi_partno = dio->orig_bi_partno;
-       if (dio->orig_bi_integrity) {
-               bio->bi_integrity = dio->orig_bi_integrity;
+       dm_bio_restore(&dio->bio_details, bio);
+       if (bio->bi_integrity)
                bio->bi_opf |= REQ_INTEGRITY;
-       }
-       bio->bi_end_io = dio->orig_bi_end_io;
 
        if (dio->completion)
                complete(dio->completion);
@@ -1524,7 +1519,7 @@ static void integrity_metadata(struct work_struct *w)
                struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
                char *checksums;
                unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
-               char checksums_onstack[HASH_MAX_DIGESTSIZE];
+               char checksums_onstack[max((size_t)HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
                unsigned sectors_to_process = dio->range.n_sectors;
                sector_t sector = dio->range.logical_sector;
 
@@ -1542,7 +1537,7 @@ static void integrity_metadata(struct work_struct *w)
                        }
                }
 
-               __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
+               __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
                        unsigned pos;
                        char *mem, *checksums_ptr;
 
@@ -1563,8 +1558,9 @@ again:
                                                checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
                        if (unlikely(r)) {
                                if (r > 0) {
-                                       DMERR_LIMIT("Checksum failed at sector 0x%llx",
-                                                   (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
+                                       char b[BDEVNAME_SIZE];
+                                       DMERR_LIMIT("%s: Checksum failed at sector 0x%llx", bio_devname(bio, b),
+                                                   (sector - ((r + ic->tag_size - 1) / ic->tag_size)));
                                        r = -EILSEQ;
                                        atomic64_inc(&ic->number_of_mismatches);
                                }
@@ -1586,7 +1582,7 @@ again:
                if (likely(checksums != checksums_onstack))
                        kfree(checksums);
        } else {
-               struct bio_integrity_payload *bip = dio->orig_bi_integrity;
+               struct bio_integrity_payload *bip = dio->bio_details.bi_integrity;
 
                if (bip) {
                        struct bio_vec biv;
@@ -1648,14 +1644,14 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
        }
        if (unlikely(dio->range.logical_sector + bio_sectors(bio) > ic->provided_data_sectors)) {
                DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
-                     (unsigned long long)dio->range.logical_sector, bio_sectors(bio),
-                     (unsigned long long)ic->provided_data_sectors);
+                     dio->range.logical_sector, bio_sectors(bio),
+                     ic->provided_data_sectors);
                return DM_MAPIO_KILL;
        }
        if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
                DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
                      ic->sectors_per_block,
-                     (unsigned long long)dio->range.logical_sector, bio_sectors(bio));
+                     dio->range.logical_sector, bio_sectors(bio));
                return DM_MAPIO_KILL;
        }
 
@@ -1753,12 +1749,12 @@ retry_kmap:
                                } while (++s < ic->sectors_per_block);
 #ifdef INTERNAL_VERIFY
                                if (ic->internal_hash) {
-                                       char checksums_onstack[max(HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
+                                       char checksums_onstack[max((size_t)HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
 
                                        integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack);
                                        if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
                                                DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx",
-                                                           (unsigned long long)logical_sector);
+                                                           logical_sector);
                                        }
                                }
 #endif
@@ -1865,7 +1861,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
 
        if (need_sync_io && from_map) {
                INIT_WORK(&dio->work, integrity_bio_wait);
-               queue_work(ic->metadata_wq, &dio->work);
+               queue_work(ic->offload_wq, &dio->work);
                return;
        }
 
@@ -2005,20 +2001,13 @@ offload_to_thread:
        } else
                dio->completion = NULL;
 
-       dio->orig_bi_iter = bio->bi_iter;
-
-       dio->orig_bi_disk = bio->bi_disk;
-       dio->orig_bi_partno = bio->bi_partno;
+       dm_bio_record(&dio->bio_details, bio);
        bio_set_dev(bio, ic->dev->bdev);
-
-       dio->orig_bi_integrity = bio_integrity(bio);
        bio->bi_integrity = NULL;
        bio->bi_opf &= ~REQ_INTEGRITY;
-
-       dio->orig_bi_end_io = bio->bi_end_io;
        bio->bi_end_io = integrity_end_io;
-
        bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT;
+
        generic_make_request(bio);
 
        if (need_sync_io) {
@@ -2205,6 +2194,8 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
                                        sec &= ~(sector_t)(ic->sectors_per_block - 1);
                                }
                        }
+                       if (unlikely(sec >= ic->provided_data_sectors))
+                               continue;
                        get_area_and_offset(ic, sec, &area, &offset);
                        restore_last_bytes(ic, access_journal_data(ic, i, j), je);
                        for (k = j + 1; k < ic->journal_section_entries; k++) {
@@ -2214,6 +2205,8 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
                                        break;
                                BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay);
                                sec2 = journal_entry_get_sector(je2);
+                               if (unlikely(sec2 >= ic->provided_data_sectors))
+                                       break;
                                get_area_and_offset(ic, sec2, &area2, &offset2);
                                if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block))
                                        break;
@@ -2315,7 +2308,7 @@ static void integrity_writer(struct work_struct *w)
        unsigned prev_free_sectors;
 
        /* the following test is not needed, but it tests the replay code */
-       if (READ_ONCE(ic->suspending) && !ic->meta_dev)
+       if (unlikely(dm_suspended(ic->ti)) && !ic->meta_dev)
                return;
 
        spin_lock_irq(&ic->endio_wait.lock);
@@ -2376,7 +2369,7 @@ static void integrity_recalc(struct work_struct *w)
 
 next_chunk:
 
-       if (unlikely(READ_ONCE(ic->suspending)))
+       if (unlikely(dm_suspended(ic->ti)))
                goto unlock_ret;
 
        range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
@@ -2416,7 +2409,7 @@ next_chunk:
                get_area_and_offset(ic, logical_sector, &area, &offset);
        }
 
-       DEBUG_print("recalculating: %lx, %lx\n", logical_sector, n_sectors);
+       DEBUG_print("recalculating: %llx, %llx\n", logical_sector, n_sectors);
 
        if (unlikely(++super_counter == RECALC_WRITE_SUPER)) {
                recalc_write_super(ic);
@@ -2501,7 +2494,7 @@ static void bitmap_block_work(struct work_struct *w)
                                    dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
                        remove_range(ic, &dio->range);
                        INIT_WORK(&dio->work, integrity_bio_wait);
-                       queue_work(ic->wait_wq, &dio->work);
+                       queue_work(ic->offload_wq, &dio->work);
                } else {
                        block_bitmap_op(ic, ic->journal, dio->range.logical_sector,
                                        dio->range.n_sectors, BITMAP_OP_SET);
@@ -2524,7 +2517,7 @@ static void bitmap_block_work(struct work_struct *w)
 
                remove_range(ic, &dio->range);
                INIT_WORK(&dio->work, integrity_bio_wait);
-               queue_work(ic->wait_wq, &dio->work);
+               queue_work(ic->offload_wq, &dio->work);
        }
 
        queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
@@ -2804,8 +2797,6 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
 
        del_timer_sync(&ic->autocommit_timer);
 
-       WRITE_ONCE(ic->suspending, 1);
-
        if (ic->recalc_wq)
                drain_workqueue(ic->recalc_wq);
 
@@ -2834,8 +2825,6 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
 #endif
        }
 
-       WRITE_ONCE(ic->suspending, 0);
-
        BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
 
        ic->journal_uptodate = true;
@@ -2844,9 +2833,29 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
 static void dm_integrity_resume(struct dm_target *ti)
 {
        struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
+       __u64 old_provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors);
        int r;
+
        DEBUG_print("resume\n");
 
+       if (ic->provided_data_sectors != old_provided_data_sectors) {
+               if (ic->provided_data_sectors > old_provided_data_sectors &&
+                   ic->mode == 'B' &&
+                   ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) {
+                       rw_journal_sectors(ic, REQ_OP_READ, 0, 0,
+                                          ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
+                       block_bitmap_op(ic, ic->journal, old_provided_data_sectors,
+                                       ic->provided_data_sectors - old_provided_data_sectors, BITMAP_OP_SET);
+                       rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
+                                          ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
+               }
+
+               ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors);
+               r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
+               if (unlikely(r))
+                       dm_integrity_io_error(ic, "writing superblock", r);
+       }
+
        if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) {
                DEBUG_print("resume dirty_bitmap\n");
                rw_journal_sectors(ic, REQ_OP_READ, 0, 0,
@@ -2914,7 +2923,7 @@ static void dm_integrity_resume(struct dm_target *ti)
        DEBUG_print("testing recalc: %x\n", ic->sb->flags);
        if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
                __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector);
-               DEBUG_print("recalc pos: %lx / %lx\n", (long)recalc_pos, ic->provided_data_sectors);
+               DEBUG_print("recalc pos: %llx / %llx\n", recalc_pos, ic->provided_data_sectors);
                if (recalc_pos < ic->provided_data_sectors) {
                        queue_work(ic->recalc_wq, &ic->recalc_work);
                } else if (recalc_pos > ic->provided_data_sectors) {
@@ -2944,10 +2953,10 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
        switch (type) {
        case STATUSTYPE_INFO:
                DMEMIT("%llu %llu",
-                       (unsigned long long)atomic64_read(&ic->number_of_mismatches),
-                       (unsigned long long)ic->provided_data_sectors);
+                       atomic64_read(&ic->number_of_mismatches),
+                       ic->provided_data_sectors);
                if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
-                       DMEMIT(" %llu", (unsigned long long)le64_to_cpu(ic->sb->recalc_sector));
+                       DMEMIT(" %llu", le64_to_cpu(ic->sb->recalc_sector));
                else
                        DMEMIT(" -");
                break;
@@ -2968,13 +2977,13 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
                arg_count += !!ic->journal_crypt_alg.alg_string;
                arg_count += !!ic->journal_mac_alg.alg_string;
                arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0;
-               DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start,
+               DMEMIT("%s %llu %u %c %u", ic->dev->name, ic->start,
                       ic->tag_size, ic->mode, arg_count);
                if (ic->meta_dev)
                        DMEMIT(" meta_device:%s", ic->meta_dev->name);
                if (ic->sectors_per_block != 1)
                        DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
-               if (ic->recalculate_flag)
+               if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
                        DMEMIT(" recalculate");
                DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
                DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
@@ -2984,7 +2993,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
                        DMEMIT(" commit_time:%u", ic->autocommit_msec);
                }
                if (ic->mode == 'B') {
-                       DMEMIT(" sectors_per_bit:%llu", (unsigned long long)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit);
+                       DMEMIT(" sectors_per_bit:%llu", (sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit);
                        DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval));
                }
                if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0)
@@ -3089,6 +3098,24 @@ static int calculate_device_limits(struct dm_integrity_c *ic)
        return 0;
 }
 
+static void get_provided_data_sectors(struct dm_integrity_c *ic)
+{
+       if (!ic->meta_dev) {
+               int test_bit;
+               ic->provided_data_sectors = 0;
+               for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) {
+                       __u64 prev_data_sectors = ic->provided_data_sectors;
+
+                       ic->provided_data_sectors |= (sector_t)1 << test_bit;
+                       if (calculate_device_limits(ic))
+                               ic->provided_data_sectors = prev_data_sectors;
+               }
+       } else {
+               ic->provided_data_sectors = ic->data_device_sectors;
+               ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1);
+       }
+}
+
 static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sectors, unsigned interleave_sectors)
 {
        unsigned journal_sections;
@@ -3116,20 +3143,15 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec
                ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
                ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
 
-               ic->provided_data_sectors = 0;
-               for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) {
-                       __u64 prev_data_sectors = ic->provided_data_sectors;
-
-                       ic->provided_data_sectors |= (sector_t)1 << test_bit;
-                       if (calculate_device_limits(ic))
-                               ic->provided_data_sectors = prev_data_sectors;
-               }
+               get_provided_data_sectors(ic);
                if (!ic->provided_data_sectors)
                        return -EINVAL;
        } else {
                ic->sb->log2_interleave_sectors = 0;
-               ic->provided_data_sectors = ic->data_device_sectors;
-               ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1);
+
+               get_provided_data_sectors(ic);
+               if (!ic->provided_data_sectors)
+                       return -EINVAL;
 
 try_smaller_buffer:
                ic->sb->journal_sections = cpu_to_le32(0);
@@ -3630,6 +3652,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
        }
        ti->private = ic;
        ti->per_io_data_size = sizeof(struct dm_integrity_io);
+       ic->ti = ti;
 
        ic->in_progress = RB_ROOT;
        INIT_LIST_HEAD(&ic->wait_list);
@@ -3843,6 +3866,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
                goto bad;
        }
 
+       ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM,
+                                         METADATA_WORKQUEUE_MAX_ACTIVE);
+       if (!ic->offload_wq) {
+               ti->error = "Cannot allocate workqueue";
+               r = -ENOMEM;
+               goto bad;
+       }
+
        ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1);
        if (!ic->commit_wq) {
                ti->error = "Cannot allocate workqueue";
@@ -3927,16 +3958,16 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
                        goto bad;
                }
        }
-       ic->provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors);
-       if (ic->provided_data_sectors != le64_to_cpu(ic->sb->provided_data_sectors)) {
-               /* test for overflow */
+       if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) {
                r = -EINVAL;
-               ti->error = "The superblock has 64-bit device size, but the kernel was compiled with 32-bit sectors";
+               ti->error = "Journal mac mismatch";
                goto bad;
        }
-       if (!!(ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC)) != !!ic->journal_mac_alg.alg_string) {
+
+       get_provided_data_sectors(ic);
+       if (!ic->provided_data_sectors) {
                r = -EINVAL;
-               ti->error = "Journal mac mismatch";
+               ti->error = "The device is too small";
                goto bad;
        }
 
@@ -4001,10 +4032,9 @@ try_smaller_buffer:
        DEBUG_print("   initial_sectors 0x%x\n", ic->initial_sectors);
        DEBUG_print("   metadata_run 0x%x\n", ic->metadata_run);
        DEBUG_print("   log2_metadata_run %d\n", ic->log2_metadata_run);
-       DEBUG_print("   provided_data_sectors 0x%llx (%llu)\n", (unsigned long long)ic->provided_data_sectors,
-                   (unsigned long long)ic->provided_data_sectors);
+       DEBUG_print("   provided_data_sectors 0x%llx (%llu)\n", ic->provided_data_sectors, ic->provided_data_sectors);
        DEBUG_print("   log2_buffer_sectors %u\n", ic->log2_buffer_sectors);
-       DEBUG_print("   bits_in_journal %llu\n", (unsigned long long)bits_in_journal);
+       DEBUG_print("   bits_in_journal %llu\n", bits_in_journal);
 
        if (ic->recalculate_flag && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) {
                ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
@@ -4147,6 +4177,8 @@ static void dm_integrity_dtr(struct dm_target *ti)
                destroy_workqueue(ic->metadata_wq);
        if (ic->wait_wq)
                destroy_workqueue(ic->wait_wq);
+       if (ic->offload_wq)
+               destroy_workqueue(ic->offload_wq);
        if (ic->commit_wq)
                destroy_workqueue(ic->commit_wq);
        if (ic->writer_wq)
@@ -4207,7 +4239,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
 
 static struct target_type integrity_target = {
        .name                   = "integrity",
-       .version                = {1, 4, 0},
+       .version                = {1, 6, 0},
        .module                 = THIS_MODULE,
        .features               = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
        .ctr                    = dm_integrity_ctr,