dm: unexport dm_{get,put}_table_device
[linux-2.6-microblaze.git] / drivers / md / dm.c
index 479ec5b..ca2aedd 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/refcount.h>
 #include <linux/part_stat.h>
 #include <linux/blk-crypto.h>
+#include <linux/keyslot-manager.h>
 
 #define DM_MSG_PREFIX "core"
 
@@ -105,12 +106,16 @@ struct dm_io {
        struct dm_target_io tio;
 };
 
+#define DM_TARGET_IO_BIO_OFFSET (offsetof(struct dm_target_io, clone))
+#define DM_IO_BIO_OFFSET \
+       (offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio))
+
 void *dm_per_bio_data(struct bio *bio, size_t data_size)
 {
        struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
        if (!tio->inside_dm_io)
-               return (char *)bio - offsetof(struct dm_target_io, clone) - data_size;
-       return (char *)bio - offsetof(struct dm_target_io, clone) - offsetof(struct dm_io, tio) - data_size;
+               return (char *)bio - DM_TARGET_IO_BIO_OFFSET - data_size;
+       return (char *)bio - DM_IO_BIO_OFFSET - data_size;
 }
 EXPORT_SYMBOL_GPL(dm_per_bio_data);
 
@@ -118,9 +123,9 @@ struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
 {
        struct dm_io *io = (struct dm_io *)((char *)data + data_size);
        if (io->magic == DM_IO_MAGIC)
-               return (struct bio *)((char *)io + offsetof(struct dm_io, tio) + offsetof(struct dm_target_io, clone));
+               return (struct bio *)((char *)io + DM_IO_BIO_OFFSET);
        BUG_ON(io->magic != DM_TIO_MAGIC);
-       return (struct bio *)((char *)io + offsetof(struct dm_target_io, clone));
+       return (struct bio *)((char *)io + DM_TARGET_IO_BIO_OFFSET);
 }
 EXPORT_SYMBOL_GPL(dm_bio_from_per_bio_data);
 
@@ -148,6 +153,16 @@ EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_nr);
 #define DM_NUMA_NODE NUMA_NO_NODE
 static int dm_numa_node = DM_NUMA_NODE;
 
+#define DEFAULT_SWAP_BIOS      (8 * 1048576 / PAGE_SIZE)
+static int swap_bios = DEFAULT_SWAP_BIOS;
+static int get_swap_bios(void)
+{
+       int latch = READ_ONCE(swap_bios);
+       if (unlikely(latch <= 0))
+               latch = DEFAULT_SWAP_BIOS;
+       return latch;
+}
+
 /*
  * For mempools pre-allocation at the table loading time.
  */
@@ -825,7 +840,6 @@ int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
        *result = &td->dm_dev;
        return 0;
 }
-EXPORT_SYMBOL_GPL(dm_get_table_device);
 
 void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
 {
@@ -839,7 +853,6 @@ void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
        }
        mutex_unlock(&md->table_devices_lock);
 }
-EXPORT_SYMBOL(dm_put_table_device);
 
 static void free_table_devices(struct list_head *devices)
 {
@@ -969,6 +982,11 @@ void disable_write_zeroes(struct mapped_device *md)
        limits->max_write_zeroes_sectors = 0;
 }
 
+static bool swap_bios_limit(struct dm_target *ti, struct bio *bio)
+{
+       return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios);
+}
+
 static void clone_endio(struct bio *bio)
 {
        blk_status_t error = bio->bi_status;
@@ -1020,6 +1038,11 @@ static void clone_endio(struct bio *bio)
                }
        }
 
+       if (unlikely(swap_bios_limit(tio->ti, bio))) {
+               struct mapped_device *md = io->md;
+               up(&md->swap_bios_semaphore);
+       }
+
        free_tio(tio);
        dec_pending(io, error);
 }
@@ -1129,7 +1152,7 @@ static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bd
        if (!map)
                goto out;
 
-       ret = dm_table_supports_dax(map, device_supports_dax, &blocksize);
+       ret = dm_table_supports_dax(map, device_not_dax_capable, &blocksize);
 
 out:
        dm_put_live_table(md, srcu_idx);
@@ -1253,6 +1276,22 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
 }
 EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
 
+static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
+{
+       mutex_lock(&md->swap_bios_lock);
+       while (latch < md->swap_bios) {
+               cond_resched();
+               down(&md->swap_bios_semaphore);
+               md->swap_bios--;
+       }
+       while (latch > md->swap_bios) {
+               cond_resched();
+               up(&md->swap_bios_semaphore);
+               md->swap_bios++;
+       }
+       mutex_unlock(&md->swap_bios_lock);
+}
+
 static blk_qc_t __map_bio(struct dm_target_io *tio)
 {
        int r;
@@ -1272,6 +1311,14 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
        atomic_inc(&io->io_count);
        sector = clone->bi_iter.bi_sector;
 
+       if (unlikely(swap_bios_limit(ti, clone))) {
+               struct mapped_device *md = io->md;
+               int latch = get_swap_bios();
+               if (unlikely(latch != md->swap_bios))
+                       __set_swap_bios_limit(md, latch);
+               down(&md->swap_bios_semaphore);
+       }
+
        r = ti->type->map(ti, clone);
        switch (r) {
        case DM_MAPIO_SUBMITTED:
@@ -1282,10 +1329,18 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
                ret = submit_bio_noacct(clone);
                break;
        case DM_MAPIO_KILL:
+               if (unlikely(swap_bios_limit(ti, clone))) {
+                       struct mapped_device *md = io->md;
+                       up(&md->swap_bios_semaphore);
+               }
                free_tio(tio);
                dec_pending(io, BLK_STS_IOERR);
                break;
        case DM_MAPIO_REQUEUE:
+               if (unlikely(swap_bios_limit(ti, clone))) {
+                       struct mapped_device *md = io->md;
+                       up(&md->swap_bios_semaphore);
+               }
                free_tio(tio);
                dec_pending(io, BLK_STS_DM_REQUEUE);
                break;
@@ -1584,38 +1639,35 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
        } else {
                ci.bio = bio;
                ci.sector_count = bio_sectors(bio);
-               while (ci.sector_count && !error) {
-                       error = __split_and_process_non_flush(&ci);
-                       if (ci.sector_count && !error) {
-                               /*
-                                * Remainder must be passed to submit_bio_noacct()
-                                * so that it gets handled *after* bios already submitted
-                                * have been completely processed.
-                                * We take a clone of the original to store in
-                                * ci.io->orig_bio to be used by end_io_acct() and
-                                * for dec_pending to use for completion handling.
-                                */
-                               struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count,
-                                                         GFP_NOIO, &md->queue->bio_split);
-                               ci.io->orig_bio = b;
-
-                               /*
-                                * Adjust IO stats for each split, otherwise upon queue
-                                * reentry there will be redundant IO accounting.
-                                * NOTE: this is a stop-gap fix, a proper fix involves
-                                * significant refactoring of DM core's bio splitting
-                                * (by eliminating DM's splitting and just using bio_split)
-                                */
-                               part_stat_lock();
-                               __dm_part_stat_sub(dm_disk(md)->part0,
-                                                  sectors[op_stat_group(bio_op(bio))], ci.sector_count);
-                               part_stat_unlock();
-
-                               bio_chain(b, bio);
-                               trace_block_split(b, bio->bi_iter.bi_sector);
-                               ret = submit_bio_noacct(bio);
-                               break;
-                       }
+               error = __split_and_process_non_flush(&ci);
+               if (ci.sector_count && !error) {
+                       /*
+                        * Remainder must be passed to submit_bio_noacct()
+                        * so that it gets handled *after* bios already submitted
+                        * have been completely processed.
+                        * We take a clone of the original to store in
+                        * ci.io->orig_bio to be used by end_io_acct() and
+                        * for dec_pending to use for completion handling.
+                        */
+                       struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count,
+                                                 GFP_NOIO, &md->queue->bio_split);
+                       ci.io->orig_bio = b;
+
+                       /*
+                        * Adjust IO stats for each split, otherwise upon queue
+                        * reentry there will be redundant IO accounting.
+                        * NOTE: this is a stop-gap fix, a proper fix involves
+                        * significant refactoring of DM core's bio splitting
+                        * (by eliminating DM's splitting and just using bio_split)
+                        */
+                       part_stat_lock();
+                       __dm_part_stat_sub(dm_disk(md)->part0,
+                                          sectors[op_stat_group(bio_op(bio))], ci.sector_count);
+                       part_stat_unlock();
+
+                       bio_chain(b, bio);
+                       trace_block_split(b, bio->bi_iter.bi_sector);
+                       ret = submit_bio_noacct(bio);
                }
        }
 
@@ -1718,6 +1770,19 @@ static const struct dax_operations dm_dax_ops;
 
 static void dm_wq_work(struct work_struct *work);
 
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+static void dm_queue_destroy_keyslot_manager(struct request_queue *q)
+{
+       dm_destroy_keyslot_manager(q->ksm);
+}
+
+#else /* CONFIG_BLK_INLINE_ENCRYPTION */
+
+static inline void dm_queue_destroy_keyslot_manager(struct request_queue *q)
+{
+}
+#endif /* !CONFIG_BLK_INLINE_ENCRYPTION */
+
 static void cleanup_mapped_device(struct mapped_device *md)
 {
        if (md->wq)
@@ -1739,14 +1804,17 @@ static void cleanup_mapped_device(struct mapped_device *md)
                put_disk(md->disk);
        }
 
-       if (md->queue)
+       if (md->queue) {
+               dm_queue_destroy_keyslot_manager(md->queue);
                blk_cleanup_queue(md->queue);
+       }
 
        cleanup_srcu_struct(&md->io_barrier);
 
        mutex_destroy(&md->suspend_lock);
        mutex_destroy(&md->type_lock);
        mutex_destroy(&md->table_devices_lock);
+       mutex_destroy(&md->swap_bios_lock);
 
        dm_mq_cleanup_mapped_device(md);
 }
@@ -1814,6 +1882,10 @@ static struct mapped_device *alloc_dev(int minor)
        init_waitqueue_head(&md->eventq);
        init_completion(&md->kobj_holder.completion);
 
+       md->swap_bios = get_swap_bios();
+       sema_init(&md->swap_bios_semaphore, md->swap_bios);
+       mutex_init(&md->swap_bios_lock);
+
        md->disk->major = _major;
        md->disk->first_minor = minor;
        md->disk->fops = &dm_blk_dops;
@@ -1959,7 +2031,10 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
        if (size != dm_get_size(md))
                memset(&md->geometry, 0, sizeof(md->geometry));
 
-       set_capacity_and_notify(md->disk, size);
+       if (!get_capacity(md->disk))
+               set_capacity(md->disk, size);
+       else
+               set_capacity_and_notify(md->disk, size);
 
        dm_table_event_callback(t, event_callback, md);
 
@@ -2849,8 +2924,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
        case DM_TYPE_BIO_BASED:
        case DM_TYPE_DAX_BIO_BASED:
                pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
-               front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
-               io_front_pad = roundup(front_pad,  __alignof__(struct dm_io)) + offsetof(struct dm_io, tio);
+               front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET;
+               io_front_pad = roundup(per_io_data_size,  __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
                ret = bioset_init(&pools->io_bs, pool_size, io_front_pad, 0);
                if (ret)
                        goto out;
@@ -3097,6 +3172,9 @@ MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
 module_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
 
+module_param(swap_bios, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(swap_bios, "Maximum allowed inflight swap IOs");
+
 MODULE_DESCRIPTION(DM_NAME " driver");
 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
 MODULE_LICENSE("GPL");