Merge tag 'for-5.5/drivers-20191121' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 25 Nov 2019 19:15:41 +0000 (11:15 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 25 Nov 2019 19:15:41 +0000 (11:15 -0800)
Pull block driver updates from Jens Axboe:
 "Here are the main block driver updates for 5.5. Nothing major in here,
  mostly just fixes. This contains:

   - a set of bcache changes via Coly

   - MD changes from Song

   - loop unmap write-zeroes fix (Darrick)

   - spelling fixes (Geert)

   - zoned additions cleanups to null_blk/dm (Ajay)

   - allow null_blk online submit queue changes (Bart)

   - NVMe changes via Keith, nothing major here either"

* tag 'for-5.5/drivers-20191121' of git://git.kernel.dk/linux-block: (56 commits)
  Revert "bcache: fix fifo index swapping condition in journal_pin_cmp()"
  drivers/md/raid5-ppl.c: use the new spelling of RWH_WRITE_LIFE_NOT_SET
  drivers/md/raid5.c: use the new spelling of RWH_WRITE_LIFE_NOT_SET
  bcache: don't export symbols
  bcache: remove the extra cflags for request.o
  bcache: at least try to shrink 1 node in bch_mca_scan()
  bcache: add idle_max_writeback_rate sysfs interface
  bcache: add code comments in bch_btree_leaf_dirty()
  bcache: fix deadlock in bcache_allocator
  bcache: add code comment bch_keylist_pop() and bch_keylist_pop_front()
  bcache: deleted code comments for dead code in bch_data_insert_keys()
  bcache: add more accurate error messages in read_super()
  bcache: fix static checker warning in bcache_device_free()
  bcache: fix a lost wake-up problem caused by mca_cannibalize_lock
  bcache: fix fifo index swapping condition in journal_pin_cmp()
  md/raid10: prevent access of uninitialized resync_pages offset
  md: avoid invalid memory access for array sb->dev_roles
  md/raid1: avoid soft lockup under high load
  null_blk: add zone open, close, and finish support
  dm: add zone open, close and finish support
  ...

48 files changed:
drivers/block/loop.c
drivers/block/mtip32xx/mtip32xx.c
drivers/block/null_blk.h
drivers/block/null_blk_main.c
drivers/block/null_blk_zoned.c
drivers/md/bcache/Makefile
drivers/md/bcache/alloc.c
drivers/md/bcache/bcache.h
drivers/md/bcache/bset.c
drivers/md/bcache/btree.c
drivers/md/bcache/closure.c
drivers/md/bcache/request.c
drivers/md/bcache/super.c
drivers/md/bcache/sysfs.c
drivers/md/bcache/writeback.c
drivers/md/dm-flakey.c
drivers/md/dm-linear.c
drivers/md/dm.c
drivers/md/md-bitmap.c
drivers/md/md-linear.c
drivers/md/md-multipath.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5-ppl.c
drivers/md/raid5.c
drivers/nvme/host/core.c
drivers/nvme/host/fc.c
drivers/nvme/host/multipath.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/host/tcp.c
drivers/nvme/target/admin-cmd.c
drivers/nvme/target/core.c
drivers/nvme/target/discovery.c
drivers/nvme/target/fabrics-cmd.c
drivers/nvme/target/fc.c
drivers/nvme/target/io-cmd-bdev.c
drivers/nvme/target/io-cmd-file.c
drivers/nvme/target/loop.c
drivers/nvme/target/nvmet.h
drivers/nvme/target/rdma.c
drivers/nvme/target/tcp.c
include/linux/nvme-fc.h
include/linux/nvme.h

index f6f77ea..ef6e251 100644 (file)
@@ -417,18 +417,20 @@ out_free_page:
        return ret;
 }
 
-static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos)
+static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
+                       int mode)
 {
        /*
-        * We use punch hole to reclaim the free space used by the
-        * image a.k.a. discard. However we do not support discard if
-        * encryption is enabled, because it may give an attacker
-        * useful information.
+        * We use fallocate to manipulate the space mappings used by the image
+        * a.k.a. discard/zerorange. However we do not support this if
+        * encryption is enabled, because it may give an attacker useful
+        * information.
         */
        struct file *file = lo->lo_backing_file;
-       int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
        int ret;
 
+       mode |= FALLOC_FL_KEEP_SIZE;
+
        if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) {
                ret = -EOPNOTSUPP;
                goto out;
@@ -596,9 +598,17 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
        switch (req_op(rq)) {
        case REQ_OP_FLUSH:
                return lo_req_flush(lo, rq);
-       case REQ_OP_DISCARD:
        case REQ_OP_WRITE_ZEROES:
-               return lo_discard(lo, rq, pos);
+               /*
+                * If the caller doesn't want deallocation, call zeroout to
+                * write zeroes the range.  Otherwise, punch them out.
+                */
+               return lo_fallocate(lo, rq, pos,
+                       (rq->cmd_flags & REQ_NOUNMAP) ?
+                               FALLOC_FL_ZERO_RANGE :
+                               FALLOC_FL_PUNCH_HOLE);
+       case REQ_OP_DISCARD:
+               return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE);
        case REQ_OP_WRITE:
                if (lo->transfer)
                        return lo_write_transfer(lo, rq, pos);
index 964f78c..f6bafa9 100644 (file)
@@ -129,7 +129,7 @@ struct mtip_compat_ide_task_request_s {
 /*
  * This function check_for_surprise_removal is called
  * while card is removed from the system and it will
- * read the vendor id from the configration space
+ * read the vendor id from the configuration space
  *
  * @pdev Pointer to the pci_dev structure.
  *
index a235c45..93c2a3d 100644 (file)
@@ -96,6 +96,8 @@ int null_zone_report(struct gendisk *disk, sector_t sector,
 blk_status_t null_handle_zoned(struct nullb_cmd *cmd,
                                enum req_opf op, sector_t sector,
                                sector_t nr_sectors);
+size_t null_zone_valid_read_len(struct nullb *nullb,
+                               sector_t sector, unsigned int len);
 #else
 static inline int null_zone_init(struct nullb_device *dev)
 {
@@ -115,5 +117,11 @@ static inline blk_status_t null_handle_zoned(struct nullb_cmd *cmd,
 {
        return BLK_STS_NOTSUPP;
 }
+static inline size_t null_zone_valid_read_len(struct nullb *nullb,
+                                             sector_t sector,
+                                             unsigned int len)
+{
+       return len;
+}
 #endif /* CONFIG_BLK_DEV_ZONED */
 #endif /* __NULL_BLK_H */
index 0e7da50..ea7a4d6 100644 (file)
@@ -227,7 +227,7 @@ static ssize_t nullb_device_uint_attr_store(unsigned int *val,
        int result;
 
        result = kstrtouint(page, 0, &tmp);
-       if (result)
+       if (result < 0)
                return result;
 
        *val = tmp;
@@ -241,7 +241,7 @@ static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
        unsigned long tmp;
 
        result = kstrtoul(page, 0, &tmp);
-       if (result)
+       if (result < 0)
                return result;
 
        *val = tmp;
@@ -255,7 +255,7 @@ static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
        int result;
 
        result = kstrtobool(page,  &tmp);
-       if (result)
+       if (result < 0)
                return result;
 
        *val = tmp;
@@ -263,7 +263,7 @@ static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
 }
 
 /* The following macro should only be used with TYPE = {uint, ulong, bool}. */
-#define NULLB_DEVICE_ATTR(NAME, TYPE)                                          \
+#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY)                                   \
 static ssize_t                                                                 \
 nullb_device_##NAME##_show(struct config_item *item, char *page)               \
 {                                                                              \
@@ -274,31 +274,57 @@ static ssize_t                                                                    \
 nullb_device_##NAME##_store(struct config_item *item, const char *page,                \
                            size_t count)                                       \
 {                                                                              \
-       if (test_bit(NULLB_DEV_FL_CONFIGURED, &to_nullb_device(item)->flags))   \
-               return -EBUSY;                                                  \
-       return nullb_device_##TYPE##_attr_store(                                \
-                       &to_nullb_device(item)->NAME, page, count);             \
+       int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;      \
+       struct nullb_device *dev = to_nullb_device(item);                       \
+       TYPE new_value;                                                         \
+       int ret;                                                                \
+                                                                               \
+       ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);        \
+       if (ret < 0)                                                            \
+               return ret;                                                     \
+       if (apply_fn)                                                           \
+               ret = apply_fn(dev, new_value);                                 \
+       else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags))                \
+               ret = -EBUSY;                                                   \
+       if (ret < 0)                                                            \
+               return ret;                                                     \
+       dev->NAME = new_value;                                                  \
+       return count;                                                           \
 }                                                                              \
 CONFIGFS_ATTR(nullb_device_, NAME);
 
-NULLB_DEVICE_ATTR(size, ulong);
-NULLB_DEVICE_ATTR(completion_nsec, ulong);
-NULLB_DEVICE_ATTR(submit_queues, uint);
-NULLB_DEVICE_ATTR(home_node, uint);
-NULLB_DEVICE_ATTR(queue_mode, uint);
-NULLB_DEVICE_ATTR(blocksize, uint);
-NULLB_DEVICE_ATTR(irqmode, uint);
-NULLB_DEVICE_ATTR(hw_queue_depth, uint);
-NULLB_DEVICE_ATTR(index, uint);
-NULLB_DEVICE_ATTR(blocking, bool);
-NULLB_DEVICE_ATTR(use_per_node_hctx, bool);
-NULLB_DEVICE_ATTR(memory_backed, bool);
-NULLB_DEVICE_ATTR(discard, bool);
-NULLB_DEVICE_ATTR(mbps, uint);
-NULLB_DEVICE_ATTR(cache_size, ulong);
-NULLB_DEVICE_ATTR(zoned, bool);
-NULLB_DEVICE_ATTR(zone_size, ulong);
-NULLB_DEVICE_ATTR(zone_nr_conv, uint);
+static int nullb_apply_submit_queues(struct nullb_device *dev,
+                                    unsigned int submit_queues)
+{
+       struct nullb *nullb = dev->nullb;
+       struct blk_mq_tag_set *set;
+
+       if (!nullb)
+               return 0;
+
+       set = nullb->tag_set;
+       blk_mq_update_nr_hw_queues(set, submit_queues);
+       return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM;
+}
+
+NULLB_DEVICE_ATTR(size, ulong, NULL);
+NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
+NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
+NULLB_DEVICE_ATTR(home_node, uint, NULL);
+NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
+NULLB_DEVICE_ATTR(blocksize, uint, NULL);
+NULLB_DEVICE_ATTR(irqmode, uint, NULL);
+NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
+NULLB_DEVICE_ATTR(index, uint, NULL);
+NULLB_DEVICE_ATTR(blocking, bool, NULL);
+NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL);
+NULLB_DEVICE_ATTR(memory_backed, bool, NULL);
+NULLB_DEVICE_ATTR(discard, bool, NULL);
+NULLB_DEVICE_ATTR(mbps, uint, NULL);
+NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
+NULLB_DEVICE_ATTR(zoned, bool, NULL);
+NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
+NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
 
 static ssize_t nullb_device_power_show(struct config_item *item, char *page)
 {
@@ -996,6 +1022,16 @@ next:
        return 0;
 }
 
+static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
+                              unsigned int len, unsigned int off)
+{
+       void *dst;
+
+       dst = kmap_atomic(page);
+       memset(dst + off, 0xFF, len);
+       kunmap_atomic(dst);
+}
+
 static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n)
 {
        size_t temp;
@@ -1036,10 +1072,24 @@ static int null_transfer(struct nullb *nullb, struct page *page,
        unsigned int len, unsigned int off, bool is_write, sector_t sector,
        bool is_fua)
 {
+       struct nullb_device *dev = nullb->dev;
+       unsigned int valid_len = len;
        int err = 0;
 
        if (!is_write) {
-               err = copy_from_nullb(nullb, page, off, sector, len);
+               if (dev->zoned)
+                       valid_len = null_zone_valid_read_len(nullb,
+                               sector, len);
+
+               if (valid_len) {
+                       err = copy_from_nullb(nullb, page, off,
+                               sector, valid_len);
+                       off += valid_len;
+                       len -= valid_len;
+               }
+
+               if (len)
+                       nullb_fill_pattern(nullb, page, len, off);
                flush_dcache_page(page);
        } else {
                flush_dcache_page(page);
index 3d7fdea..02f41a3 100644 (file)
@@ -84,6 +84,24 @@ int null_zone_report(struct gendisk *disk, sector_t sector,
        return 0;
 }
 
+size_t null_zone_valid_read_len(struct nullb *nullb,
+                               sector_t sector, unsigned int len)
+{
+       struct nullb_device *dev = nullb->dev;
+       struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
+       unsigned int nr_sectors = len >> SECTOR_SHIFT;
+
+       /* Read must be below the write pointer position */
+       if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL ||
+           sector + nr_sectors <= zone->wp)
+               return len;
+
+       if (sector > zone->wp)
+               return 0;
+
+       return (zone->wp - sector) << SECTOR_SHIFT;
+}
+
 static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
                     unsigned int nr_sectors)
 {
@@ -118,14 +136,14 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
        return BLK_STS_OK;
 }
 
-static blk_status_t null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
+static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
+                                  sector_t sector)
 {
        struct nullb_device *dev = cmd->nq->dev;
-       unsigned int zno = null_zone_no(dev, sector);
-       struct blk_zone *zone = &dev->zones[zno];
+       struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
        size_t i;
 
-       switch (req_op(cmd->rq)) {
+       switch (op) {
        case REQ_OP_ZONE_RESET_ALL:
                for (i = 0; i < dev->nr_zones; i++) {
                        if (zone[i].type == BLK_ZONE_TYPE_CONVENTIONAL)
@@ -141,6 +159,29 @@ static blk_status_t null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
                zone->cond = BLK_ZONE_COND_EMPTY;
                zone->wp = zone->start;
                break;
+       case REQ_OP_ZONE_OPEN:
+               if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+                       return BLK_STS_IOERR;
+               if (zone->cond == BLK_ZONE_COND_FULL)
+                       return BLK_STS_IOERR;
+
+               zone->cond = BLK_ZONE_COND_EXP_OPEN;
+               break;
+       case REQ_OP_ZONE_CLOSE:
+               if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+                       return BLK_STS_IOERR;
+               if (zone->cond == BLK_ZONE_COND_FULL)
+                       return BLK_STS_IOERR;
+
+               zone->cond = BLK_ZONE_COND_CLOSED;
+               break;
+       case REQ_OP_ZONE_FINISH:
+               if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+                       return BLK_STS_IOERR;
+
+               zone->cond = BLK_ZONE_COND_FULL;
+               zone->wp = zone->start + zone->len;
+               break;
        default:
                return BLK_STS_NOTSUPP;
        }
@@ -155,7 +196,10 @@ blk_status_t null_handle_zoned(struct nullb_cmd *cmd, enum req_opf op,
                return null_zone_write(cmd, sector, nr_sectors);
        case REQ_OP_ZONE_RESET:
        case REQ_OP_ZONE_RESET_ALL:
-               return null_zone_reset(cmd, sector);
+       case REQ_OP_ZONE_OPEN:
+       case REQ_OP_ZONE_CLOSE:
+       case REQ_OP_ZONE_FINISH:
+               return null_zone_mgmt(cmd, op, sector);
        default:
                return BLK_STS_OK;
        }
index d26b351..fd71462 100644 (file)
@@ -5,5 +5,3 @@ obj-$(CONFIG_BCACHE)    += bcache.o
 bcache-y               := alloc.o bset.o btree.o closure.o debug.o extents.o\
        io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
        util.o writeback.o
-
-CFLAGS_request.o       += -Iblock
index 6f77682..a1df0d9 100644 (file)
@@ -377,7 +377,10 @@ retry_invalidate:
                        if (!fifo_full(&ca->free_inc))
                                goto retry_invalidate;
 
-                       bch_prio_write(ca);
+                       if (bch_prio_write(ca, false) < 0) {
+                               ca->invalidate_needs_gc = 1;
+                               wake_up_gc(ca->set);
+                       }
                }
        }
 out:
index 013e35a..9198c1b 100644 (file)
@@ -582,6 +582,7 @@ struct cache_set {
         */
        wait_queue_head_t       btree_cache_wait;
        struct task_struct      *btree_cache_alloc_lock;
+       spinlock_t              btree_cannibalize_lock;
 
        /*
         * When we free a btree node, we increment the gen of the bucket the
@@ -723,6 +724,7 @@ struct cache_set {
        unsigned int            gc_always_rewrite:1;
        unsigned int            shrinker_disabled:1;
        unsigned int            copy_gc_enabled:1;
+       unsigned int            idle_max_writeback_rate_enabled:1;
 
 #define BUCKET_HASH_BITS       12
        struct hlist_head       bucket_hash[1 << BUCKET_HASH_BITS];
@@ -977,7 +979,7 @@ bool bch_cached_dev_error(struct cached_dev *dc);
 __printf(2, 3)
 bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
 
-void bch_prio_write(struct cache *ca);
+int bch_prio_write(struct cache *ca, bool wait);
 void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
 
 extern struct workqueue_struct *bcache_wq;
index 0876879..cffcdc9 100644 (file)
@@ -155,6 +155,7 @@ int __bch_keylist_realloc(struct keylist *l, unsigned int u64s)
        return 0;
 }
 
+/* Pop the top key of keylist by pointing l->top to its previous key */
 struct bkey *bch_keylist_pop(struct keylist *l)
 {
        struct bkey *k = l->keys;
@@ -168,6 +169,7 @@ struct bkey *bch_keylist_pop(struct keylist *l)
        return l->top = k;
 }
 
+/* Pop the bottom key of keylist and update l->top_p */
 void bch_keylist_pop_front(struct keylist *l)
 {
        l->top_p -= bkey_u64s(l->keys);
@@ -309,7 +311,6 @@ void bch_btree_keys_free(struct btree_keys *b)
        t->tree = NULL;
        t->data = NULL;
 }
-EXPORT_SYMBOL(bch_btree_keys_free);
 
 int bch_btree_keys_alloc(struct btree_keys *b,
                         unsigned int page_order,
@@ -342,7 +343,6 @@ err:
        bch_btree_keys_free(b);
        return -ENOMEM;
 }
-EXPORT_SYMBOL(bch_btree_keys_alloc);
 
 void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
                         bool *expensive_debug_checks)
@@ -361,7 +361,6 @@ void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
         * any more.
         */
 }
-EXPORT_SYMBOL(bch_btree_keys_init);
 
 /* Binary tree stuff for auxiliary search trees */
 
@@ -678,7 +677,6 @@ void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic)
 
        bch_bset_build_unwritten_tree(b);
 }
-EXPORT_SYMBOL(bch_bset_init_next);
 
 /*
  * Build auxiliary binary tree 'struct bset_tree *t', this tree is used to
@@ -732,7 +730,6 @@ void bch_bset_build_written_tree(struct btree_keys *b)
             j = inorder_next(j, t->size))
                make_bfloat(t, j);
 }
-EXPORT_SYMBOL(bch_bset_build_written_tree);
 
 /* Insert */
 
@@ -780,7 +777,6 @@ fix_right:  do {
                        j = j * 2 + 1;
                } while (j < t->size);
 }
-EXPORT_SYMBOL(bch_bset_fix_invalidated_key);
 
 static void bch_bset_fix_lookup_table(struct btree_keys *b,
                                      struct bset_tree *t,
@@ -855,7 +851,6 @@ bool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r)
 
        return b->ops->key_merge(b, l, r);
 }
-EXPORT_SYMBOL(bch_bkey_try_merge);
 
 void bch_bset_insert(struct btree_keys *b, struct bkey *where,
                     struct bkey *insert)
@@ -875,7 +870,6 @@ void bch_bset_insert(struct btree_keys *b, struct bkey *where,
        bkey_copy(where, insert);
        bch_bset_fix_lookup_table(b, t, where);
 }
-EXPORT_SYMBOL(bch_bset_insert);
 
 unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
                              struct bkey *replace_key)
@@ -931,7 +925,6 @@ copy:       bkey_copy(m, k);
 merged:
        return status;
 }
-EXPORT_SYMBOL(bch_btree_insert_key);
 
 /* Lookup */
 
@@ -1077,7 +1070,6 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
 
        return i.l;
 }
-EXPORT_SYMBOL(__bch_bset_search);
 
 /* Btree iterator */
 
@@ -1132,7 +1124,6 @@ struct bkey *bch_btree_iter_init(struct btree_keys *b,
 {
        return __bch_btree_iter_init(b, iter, search, b->set);
 }
-EXPORT_SYMBOL(bch_btree_iter_init);
 
 static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
                                                 btree_iter_cmp_fn *cmp)
@@ -1165,7 +1156,6 @@ struct bkey *bch_btree_iter_next(struct btree_iter *iter)
        return __bch_btree_iter_next(iter, btree_iter_cmp);
 
 }
-EXPORT_SYMBOL(bch_btree_iter_next);
 
 struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
                                        struct btree_keys *b, ptr_filter_fn fn)
@@ -1196,7 +1186,6 @@ int bch_bset_sort_state_init(struct bset_sort_state *state,
 
        return mempool_init_page_pool(&state->pool, 1, page_order);
 }
-EXPORT_SYMBOL(bch_bset_sort_state_init);
 
 static void btree_mergesort(struct btree_keys *b, struct bset *out,
                            struct btree_iter *iter,
@@ -1313,7 +1302,6 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
 
        EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
 }
-EXPORT_SYMBOL(bch_btree_sort_partial);
 
 void bch_btree_sort_and_fix_extents(struct btree_keys *b,
                                    struct btree_iter *iter,
@@ -1366,7 +1354,6 @@ void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state)
 out:
        bch_bset_build_written_tree(b);
 }
-EXPORT_SYMBOL(bch_btree_sort_lazy);
 
 void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
 {
index ba434d9..14d6c33 100644 (file)
@@ -543,6 +543,11 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
 
        set_btree_node_dirty(b);
 
+       /*
+        * w->journal is always the oldest journal pin of all bkeys
+        * in the leaf node, to make sure the oldest jset seq won't
+        * be increased before this btree node is flushed.
+        */
        if (journal_ref) {
                if (w->journal &&
                    journal_pin_cmp(b->c, w->journal, journal_ref)) {
@@ -723,6 +728,8 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
         * IO can always make forward progress:
         */
        nr /= c->btree_pages;
+       if (nr == 0)
+               nr = 1;
        nr = min_t(unsigned long, nr, mca_can_free(c));
 
        i = 0;
@@ -884,15 +891,17 @@ out:
 
 static int mca_cannibalize_lock(struct cache_set *c, struct btree_op *op)
 {
-       struct task_struct *old;
-
-       old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current);
-       if (old && old != current) {
+       spin_lock(&c->btree_cannibalize_lock);
+       if (likely(c->btree_cache_alloc_lock == NULL)) {
+               c->btree_cache_alloc_lock = current;
+       } else if (c->btree_cache_alloc_lock != current) {
                if (op)
                        prepare_to_wait(&c->btree_cache_wait, &op->wait,
                                        TASK_UNINTERRUPTIBLE);
+               spin_unlock(&c->btree_cannibalize_lock);
                return -EINTR;
        }
+       spin_unlock(&c->btree_cannibalize_lock);
 
        return 0;
 }
@@ -927,10 +936,12 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op,
  */
 static void bch_cannibalize_unlock(struct cache_set *c)
 {
+       spin_lock(&c->btree_cannibalize_lock);
        if (c->btree_cache_alloc_lock == current) {
                c->btree_cache_alloc_lock = NULL;
                wake_up(&c->btree_cache_wait);
        }
+       spin_unlock(&c->btree_cannibalize_lock);
 }
 
 static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op,
index c12cd80..0164a1f 100644 (file)
@@ -45,7 +45,6 @@ void closure_sub(struct closure *cl, int v)
 {
        closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
 }
-EXPORT_SYMBOL(closure_sub);
 
 /*
  * closure_put - decrement a closure's refcount
@@ -54,7 +53,6 @@ void closure_put(struct closure *cl)
 {
        closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
 }
-EXPORT_SYMBOL(closure_put);
 
 /*
  * closure_wake_up - wake up all closures on a wait list, without memory barrier
@@ -76,7 +74,6 @@ void __closure_wake_up(struct closure_waitlist *wait_list)
                closure_sub(cl, CLOSURE_WAITING + 1);
        }
 }
-EXPORT_SYMBOL(__closure_wake_up);
 
 /**
  * closure_wait - add a closure to a waitlist
@@ -96,7 +93,6 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
 
        return true;
 }
-EXPORT_SYMBOL(closure_wait);
 
 struct closure_syncer {
        struct task_struct      *task;
@@ -131,7 +127,6 @@ void __sched __closure_sync(struct closure *cl)
 
        __set_current_state(TASK_RUNNING);
 }
-EXPORT_SYMBOL(__closure_sync);
 
 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
 
@@ -149,7 +144,6 @@ void closure_debug_create(struct closure *cl)
        list_add(&cl->all, &closure_list);
        spin_unlock_irqrestore(&closure_list_lock, flags);
 }
-EXPORT_SYMBOL(closure_debug_create);
 
 void closure_debug_destroy(struct closure *cl)
 {
@@ -162,7 +156,6 @@ void closure_debug_destroy(struct closure *cl)
        list_del(&cl->all);
        spin_unlock_irqrestore(&closure_list_lock, flags);
 }
-EXPORT_SYMBOL(closure_debug_destroy);
 
 static struct dentry *closure_debug;
 
index 41adcd1..73478a9 100644 (file)
@@ -62,18 +62,6 @@ static void bch_data_insert_keys(struct closure *cl)
        struct bkey *replace_key = op->replace ? &op->replace_key : NULL;
        int ret;
 
-       /*
-        * If we're looping, might already be waiting on
-        * another journal write - can't wait on more than one journal write at
-        * a time
-        *
-        * XXX: this looks wrong
-        */
-#if 0
-       while (atomic_read(&s->cl.remaining) & CLOSURE_WAITING)
-               closure_sync(&s->cl);
-#endif
-
        if (!op->replace)
                journal_ref = bch_journal(op->c, &op->insert_keys,
                                          op->flush_journal ? cl : NULL);
index 20ed838..77e9869 100644 (file)
@@ -92,10 +92,11 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
        pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
                 sb->version, sb->flags, sb->seq, sb->keys);
 
-       err = "Not a bcache superblock";
+       err = "Not a bcache superblock (bad offset)";
        if (sb->offset != SB_SECTOR)
                goto err;
 
+       err = "Not a bcache superblock (bad magic)";
        if (memcmp(sb->magic, bcache_magic, 16))
                goto err;
 
@@ -529,12 +530,29 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
        closure_sync(cl);
 }
 
-void bch_prio_write(struct cache *ca)
+int bch_prio_write(struct cache *ca, bool wait)
 {
        int i;
        struct bucket *b;
        struct closure cl;
 
+       pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu",
+                fifo_used(&ca->free[RESERVE_PRIO]),
+                fifo_used(&ca->free[RESERVE_NONE]),
+                fifo_used(&ca->free_inc));
+
+       /*
+        * Pre-check if there are enough free buckets. In the non-blocking
+        * scenario it's better to fail early rather than starting to allocate
+        * buckets and do a cleanup later in case of failure.
+        */
+       if (!wait) {
+               size_t avail = fifo_used(&ca->free[RESERVE_PRIO]) +
+                              fifo_used(&ca->free[RESERVE_NONE]);
+               if (prio_buckets(ca) > avail)
+                       return -ENOMEM;
+       }
+
        closure_init_stack(&cl);
 
        lockdep_assert_held(&ca->set->bucket_lock);
@@ -544,9 +562,6 @@ void bch_prio_write(struct cache *ca)
        atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
                        &ca->meta_sectors_written);
 
-       //pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
-       //       fifo_used(&ca->free_inc), fifo_used(&ca->unused));
-
        for (i = prio_buckets(ca) - 1; i >= 0; --i) {
                long bucket;
                struct prio_set *p = ca->disk_buckets;
@@ -564,7 +579,7 @@ void bch_prio_write(struct cache *ca)
                p->magic        = pset_magic(&ca->sb);
                p->csum         = bch_crc64(&p->magic, bucket_bytes(ca) - 8);
 
-               bucket = bch_bucket_alloc(ca, RESERVE_PRIO, true);
+               bucket = bch_bucket_alloc(ca, RESERVE_PRIO, wait);
                BUG_ON(bucket == -1);
 
                mutex_unlock(&ca->set->bucket_lock);
@@ -593,6 +608,7 @@ void bch_prio_write(struct cache *ca)
 
                ca->prio_last_buckets[i] = ca->prio_buckets[i];
        }
+       return 0;
 }
 
 static void prio_read(struct cache *ca, uint64_t bucket)
@@ -761,20 +777,28 @@ static inline int idx_to_first_minor(int idx)
 
 static void bcache_device_free(struct bcache_device *d)
 {
+       struct gendisk *disk = d->disk;
+
        lockdep_assert_held(&bch_register_lock);
 
-       pr_info("%s stopped", d->disk->disk_name);
+       if (disk)
+               pr_info("%s stopped", disk->disk_name);
+       else
+               pr_err("bcache device (NULL gendisk) stopped");
 
        if (d->c)
                bcache_device_detach(d);
-       if (d->disk && d->disk->flags & GENHD_FL_UP)
-               del_gendisk(d->disk);
-       if (d->disk && d->disk->queue)
-               blk_cleanup_queue(d->disk->queue);
-       if (d->disk) {
+
+       if (disk) {
+               if (disk->flags & GENHD_FL_UP)
+                       del_gendisk(disk);
+
+               if (disk->queue)
+                       blk_cleanup_queue(disk->queue);
+
                ida_simple_remove(&bcache_device_idx,
-                                 first_minor_to_idx(d->disk->first_minor));
-               put_disk(d->disk);
+                                 first_minor_to_idx(disk->first_minor));
+               put_disk(disk);
        }
 
        bioset_exit(&d->bio_split);
@@ -1769,6 +1793,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
        sema_init(&c->sb_write_mutex, 1);
        mutex_init(&c->bucket_lock);
        init_waitqueue_head(&c->btree_cache_wait);
+       spin_lock_init(&c->btree_cannibalize_lock);
        init_waitqueue_head(&c->bucket_wait);
        init_waitqueue_head(&c->gc_wait);
        sema_init(&c->uuid_write_mutex, 1);
@@ -1809,6 +1834,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
        c->congested_read_threshold_us  = 2000;
        c->congested_write_threshold_us = 20000;
        c->error_limit  = DEFAULT_IO_ERROR_LIMIT;
+       c->idle_max_writeback_rate_enabled = 1;
        WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
 
        return c;
@@ -1954,7 +1980,7 @@ static int run_cache_set(struct cache_set *c)
 
                mutex_lock(&c->bucket_lock);
                for_each_cache(ca, c, i)
-                       bch_prio_write(ca);
+                       bch_prio_write(ca, true);
                mutex_unlock(&c->bucket_lock);
 
                err = "cannot allocate new UUID bucket";
index 627dcea..733e2dd 100644 (file)
@@ -134,6 +134,7 @@ rw_attribute(expensive_debug_checks);
 rw_attribute(cache_replacement_policy);
 rw_attribute(btree_shrinker_disabled);
 rw_attribute(copy_gc_enabled);
+rw_attribute(idle_max_writeback_rate);
 rw_attribute(gc_after_writeback);
 rw_attribute(size);
 
@@ -747,6 +748,8 @@ SHOW(__bch_cache_set)
        sysfs_printf(gc_always_rewrite,         "%i", c->gc_always_rewrite);
        sysfs_printf(btree_shrinker_disabled,   "%i", c->shrinker_disabled);
        sysfs_printf(copy_gc_enabled,           "%i", c->copy_gc_enabled);
+       sysfs_printf(idle_max_writeback_rate,   "%i",
+                    c->idle_max_writeback_rate_enabled);
        sysfs_printf(gc_after_writeback,        "%i", c->gc_after_writeback);
        sysfs_printf(io_disable,                "%i",
                     test_bit(CACHE_SET_IO_DISABLE, &c->flags));
@@ -864,6 +867,9 @@ STORE(__bch_cache_set)
        sysfs_strtoul_bool(gc_always_rewrite,   c->gc_always_rewrite);
        sysfs_strtoul_bool(btree_shrinker_disabled, c->shrinker_disabled);
        sysfs_strtoul_bool(copy_gc_enabled,     c->copy_gc_enabled);
+       sysfs_strtoul_bool(idle_max_writeback_rate,
+                          c->idle_max_writeback_rate_enabled);
+
        /*
         * write gc_after_writeback here may overwrite an already set
         * BCH_DO_AUTO_GC, it doesn't matter because this flag will be
@@ -954,6 +960,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
        &sysfs_gc_always_rewrite,
        &sysfs_btree_shrinker_disabled,
        &sysfs_copy_gc_enabled,
+       &sysfs_idle_max_writeback_rate,
        &sysfs_gc_after_writeback,
        &sysfs_io_disable,
        &sysfs_cutoff_writeback,
index d60268f..4a40f9e 100644 (file)
@@ -122,6 +122,10 @@ static void __update_writeback_rate(struct cached_dev *dc)
 static bool set_at_max_writeback_rate(struct cache_set *c,
                                       struct cached_dev *dc)
 {
+       /* Don't sst max writeback rate if it is disabled */
+       if (!c->idle_max_writeback_rate_enabled)
+               return false;
+
        /* Don't set max writeback rate if gc is running */
        if (!c->gc_mark_valid)
                return false;
index 2900fbd..76587e9 100644 (file)
@@ -280,7 +280,7 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
        struct flakey_c *fc = ti->private;
 
        bio_set_dev(bio, fc->dev->bdev);
-       if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
+       if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio)))
                bio->bi_iter.bi_sector =
                        flakey_map_sector(ti, bio->bi_iter.bi_sector);
 }
@@ -322,8 +322,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
        struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
        pb->bio_submitted = false;
 
-       /* Do not fail reset zone */
-       if (bio_op(bio) == REQ_OP_ZONE_RESET)
+       if (op_is_zone_mgmt(bio_op(bio)))
                goto map_bio;
 
        /* Are we alive ? */
@@ -384,7 +383,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio,
        struct flakey_c *fc = ti->private;
        struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
 
-       if (bio_op(bio) == REQ_OP_ZONE_RESET)
+       if (op_is_zone_mgmt(bio_op(bio)))
                return DM_ENDIO_DONE;
 
        if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
index ecefe67..97acafd 100644 (file)
@@ -90,7 +90,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
        struct linear_c *lc = ti->private;
 
        bio_set_dev(bio, lc->dev->bdev);
-       if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
+       if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio)))
                bio->bi_iter.bi_sector =
                        linear_map_sector(ti, bio->bi_iter.bi_sector);
 }
index 1a5e328..bc143c1 100644 (file)
@@ -1174,7 +1174,8 @@ static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
 
 /*
  * A target may call dm_accept_partial_bio only from the map routine.  It is
- * allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET.
+ * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_RESET,
+ * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH.
  *
  * dm_accept_partial_bio informs the dm that the target only wants to process
  * additional n_sectors sectors of the bio and the rest of the data should be
@@ -1627,7 +1628,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
                ci.sector_count = 0;
                error = __send_empty_flush(&ci);
                /* dec_pending submits any data associated with flush */
-       } else if (bio_op(bio) == REQ_OP_ZONE_RESET) {
+       } else if (op_is_zone_mgmt(bio_op(bio))) {
                ci.bio = bio;
                ci.sector_count = 0;
                error = __split_and_process_non_flush(&ci);
index b092c7b..3ad1824 100644 (file)
@@ -2139,6 +2139,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
                memcpy(page_address(store.sb_page),
                       page_address(bitmap->storage.sb_page),
                       sizeof(bitmap_super_t));
+       spin_lock_irq(&bitmap->counts.lock);
        md_bitmap_file_unmap(&bitmap->storage);
        bitmap->storage = store;
 
@@ -2154,7 +2155,6 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
        blocks = min(old_counts.chunks << old_counts.chunkshift,
                     chunks << chunkshift);
 
-       spin_lock_irq(&bitmap->counts.lock);
        /* For cluster raid, need to pre-allocate bitmap */
        if (mddev_is_clustered(bitmap->mddev)) {
                unsigned long page;
index c766c55..26c75c0 100644 (file)
@@ -244,10 +244,9 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
        sector_t start_sector, end_sector, data_offset;
        sector_t bio_sector = bio->bi_iter.bi_sector;
 
-       if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
-               md_flush_request(mddev, bio);
+       if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+           && md_flush_request(mddev, bio))
                return true;
-       }
 
        tmp_dev = which_dev(mddev, bio_sector);
        start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
index 6780938..152f9e6 100644 (file)
@@ -104,10 +104,9 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
        struct multipath_bh * mp_bh;
        struct multipath_info *multipath;
 
-       if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
-               md_flush_request(mddev, bio);
+       if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+           && md_flush_request(mddev, bio))
                return true;
-       }
 
        mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
 
index 1be7abe..805b33e 100644 (file)
@@ -550,7 +550,13 @@ static void md_submit_flush_data(struct work_struct *ws)
        }
 }
 
-void md_flush_request(struct mddev *mddev, struct bio *bio)
+/*
+ * Manages consolidation of flushes and submitting any flushes needed for
+ * a bio with REQ_PREFLUSH.  Returns true if the bio is finished or is
+ * being finished in another context.  Returns false if the flushing is
+ * complete but still needs the I/O portion of the bio to be processed.
+ */
+bool md_flush_request(struct mddev *mddev, struct bio *bio)
 {
        ktime_t start = ktime_get_boottime();
        spin_lock_irq(&mddev->lock);
@@ -575,9 +581,10 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
                        bio_endio(bio);
                else {
                        bio->bi_opf &= ~REQ_PREFLUSH;
-                       mddev->pers->make_request(mddev, bio);
+                       return false;
                }
        }
+       return true;
 }
 EXPORT_SYMBOL(md_flush_request);
 
@@ -1098,6 +1105,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
        char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
        mdp_super_t *sb;
        int ret;
+       bool spare_disk = true;
 
        /*
         * Calculate the position of the superblock (512byte sectors),
@@ -1148,8 +1156,18 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
        else
                rdev->desc_nr = sb->this_disk.number;
 
+       /* not spare disk, or LEVEL_MULTIPATH */
+       if (sb->level == LEVEL_MULTIPATH ||
+               (rdev->desc_nr >= 0 &&
+                sb->disks[rdev->desc_nr].state &
+                ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
+               spare_disk = false;
+
        if (!refdev) {
-               ret = 1;
+               if (!spare_disk)
+                       ret = 1;
+               else
+                       ret = 0;
        } else {
                __u64 ev1, ev2;
                mdp_super_t *refsb = page_address(refdev->sb_page);
@@ -1165,7 +1183,8 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
                }
                ev1 = md_event(sb);
                ev2 = md_event(refsb);
-               if (ev1 > ev2)
+
+               if (!spare_disk && ev1 > ev2)
                        ret = 1;
                else
                        ret = 0;
@@ -1525,6 +1544,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
        sector_t sectors;
        char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
        int bmask;
+       bool spare_disk = true;
 
        /*
         * Calculate the position of the superblock in 512byte sectors.
@@ -1658,8 +1678,19 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
            sb->level != 0)
                return -EINVAL;
 
+       /* not spare disk, or LEVEL_MULTIPATH */
+       if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
+               (rdev->desc_nr >= 0 &&
+               rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
+               (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
+                le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
+               spare_disk = false;
+
        if (!refdev) {
-               ret = 1;
+               if (!spare_disk)
+                       ret = 1;
+               else
+                       ret = 0;
        } else {
                __u64 ev1, ev2;
                struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
@@ -1676,7 +1707,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
                ev1 = le64_to_cpu(sb->events);
                ev2 = le64_to_cpu(refsb->events);
 
-               if (ev1 > ev2)
+               if (!spare_disk && ev1 > ev2)
                        ret = 1;
                else
                        ret = 0;
@@ -3597,7 +3628,7 @@ abort_free:
  * Check a full RAID array for plausibility
  */
 
-static void analyze_sbs(struct mddev *mddev)
+static int analyze_sbs(struct mddev *mddev)
 {
        int i;
        struct md_rdev *rdev, *freshest, *tmp;
@@ -3618,6 +3649,12 @@ static void analyze_sbs(struct mddev *mddev)
                        md_kick_rdev_from_array(rdev);
                }
 
+       /* Cannot find a valid fresh disk */
+       if (!freshest) {
+               pr_warn("md: cannot find a valid disk\n");
+               return -EINVAL;
+       }
+
        super_types[mddev->major_version].
                validate_super(mddev, freshest);
 
@@ -3652,6 +3689,8 @@ static void analyze_sbs(struct mddev *mddev)
                        clear_bit(In_sync, &rdev->flags);
                }
        }
+
+       return 0;
 }
 
 /* Read a fixed-point number.
@@ -5570,7 +5609,9 @@ int md_run(struct mddev *mddev)
        if (!mddev->raid_disks) {
                if (!mddev->persistent)
                        return -EINVAL;
-               analyze_sbs(mddev);
+               err = analyze_sbs(mddev);
+               if (err)
+                       return -EINVAL;
        }
 
        if (mddev->level != LEVEL_NONE)
index c5e3ff3..5f86f8a 100644 (file)
@@ -550,7 +550,7 @@ struct md_personality
        int level;
        struct list_head list;
        struct module *owner;
-       bool (*make_request)(struct mddev *mddev, struct bio *bio);
+       bool __must_check (*make_request)(struct mddev *mddev, struct bio *bio);
        /*
         * start up works that do NOT require md_thread. tasks that
         * requires md_thread should go into start()
@@ -703,7 +703,7 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
 extern void md_finish_reshape(struct mddev *mddev);
 
 extern int mddev_congested(struct mddev *mddev, int bits);
-extern void md_flush_request(struct mddev *mddev, struct bio *bio);
+extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
 extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
                           sector_t sector, int size, struct page *page);
 extern int md_super_wait(struct mddev *mddev);
index 1e77228..b7c2097 100644 (file)
@@ -575,10 +575,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
        unsigned chunk_sects;
        unsigned sectors;
 
-       if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
-               md_flush_request(mddev, bio);
+       if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+           && md_flush_request(mddev, bio))
                return true;
-       }
 
        if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
                raid0_handle_discard(mddev, bio);
@@ -615,7 +614,7 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
                tmp_dev = map_sector(mddev, zone, sector, &sector);
                break;
        default:
-               WARN("md/raid0:%s: Invalid layout\n", mdname(mddev));
+               WARN(1, "md/raid0:%s: Invalid layout\n", mdname(mddev));
                bio_io_error(bio);
                return true;
        }
index 0466ee2..a409ab6 100644 (file)
@@ -819,6 +819,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
                else
                        generic_make_request(bio);
                bio = next;
+               cond_resched();
        }
 }
 
@@ -1567,10 +1568,9 @@ static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
 {
        sector_t sectors;
 
-       if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
-               md_flush_request(mddev, bio);
+       if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+           && md_flush_request(mddev, bio))
                return true;
-       }
 
        /*
         * There is a limit to the maximum size, but
index 299c7b1..ec136e4 100644 (file)
@@ -191,7 +191,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
 
 out_free_pages:
        while (--j >= 0)
-               resync_free_pages(&rps[j * 2]);
+               resync_free_pages(&rps[j]);
 
        j = 0;
 out_free_bio:
@@ -1525,10 +1525,9 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
        int chunk_sects = chunk_mask + 1;
        int sectors = bio_sectors(bio);
 
-       if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
-               md_flush_request(mddev, bio);
+       if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+           && md_flush_request(mddev, bio))
                return true;
-       }
 
        if (!md_write_start(mddev, bio))
                return false;
index 18a4064..cab5b13 100644 (file)
@@ -1404,7 +1404,7 @@ int ppl_init_log(struct r5conf *conf)
        atomic64_set(&ppl_conf->seq, 0);
        INIT_LIST_HEAD(&ppl_conf->no_mem_stripes);
        spin_lock_init(&ppl_conf->no_mem_stripes_lock);
-       ppl_conf->write_hint = RWF_WRITE_LIFE_NOT_SET;
+       ppl_conf->write_hint = RWH_WRITE_LIFE_NOT_SET;
 
        if (!mddev->external) {
                ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
index 223e97a..f0fc538 100644 (file)
@@ -1134,7 +1134,7 @@ again:
                        bi->bi_iter.bi_size = STRIPE_SIZE;
                        bi->bi_write_hint = sh->dev[i].write_hint;
                        if (!rrdev)
-                               sh->dev[i].write_hint = RWF_WRITE_LIFE_NOT_SET;
+                               sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET;
                        /*
                         * If this is discard request, set bi_vcnt 0. We don't
                         * want to confuse SCSI because SCSI will replace payload
@@ -1187,7 +1187,7 @@ again:
                        rbi->bi_io_vec[0].bv_offset = 0;
                        rbi->bi_iter.bi_size = STRIPE_SIZE;
                        rbi->bi_write_hint = sh->dev[i].write_hint;
-                       sh->dev[i].write_hint = RWF_WRITE_LIFE_NOT_SET;
+                       sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET;
                        /*
                         * If this is discard request, set bi_vcnt 0. We don't
                         * want to confuse SCSI because SCSI will replace payload
@@ -5592,8 +5592,8 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
                if (ret == 0)
                        return true;
                if (ret == -ENODEV) {
-                       md_flush_request(mddev, bi);
-                       return true;
+                       if (md_flush_request(mddev, bi))
+                               return true;
                }
                /* ret == -EAGAIN, fallback */
                /*
index fa7ba09..c5e434c 100644 (file)
@@ -283,6 +283,8 @@ void nvme_complete_rq(struct request *req)
 
        trace_nvme_complete_rq(req);
 
+       nvme_cleanup_cmd(req);
+
        if (nvme_req(req)->ctrl->kas)
                nvme_req(req)->ctrl->comp_seen = true;
 
@@ -313,7 +315,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
        if (blk_mq_request_completed(req))
                return true;
 
-       nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR;
+       nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
        blk_mq_complete_request(req);
        return true;
 }
@@ -626,7 +628,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
        }
 
        __rq_for_each_bio(bio, req) {
-               u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
+               u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
                u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
 
                if (n < segments) {
@@ -667,7 +669,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
        cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes;
        cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id);
        cmnd->write_zeroes.slba =
-               cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+               cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
        cmnd->write_zeroes.length =
                cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
        cmnd->write_zeroes.control = 0;
@@ -691,7 +693,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 
        cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
        cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
-       cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+       cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
        cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
 
        if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
@@ -1647,7 +1649,7 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
 
 static void nvme_set_chunk_size(struct nvme_ns *ns)
 {
-       u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
+       u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob);
        blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
 }
 
@@ -1684,8 +1686,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
 
 static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
 {
-       u32 max_sectors;
-       unsigned short bs = 1 << ns->lba_shift;
+       u64 max_blocks;
 
        if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
            (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
@@ -1701,11 +1702,12 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
         * nvme_init_identify() if available.
         */
        if (ns->ctrl->max_hw_sectors == UINT_MAX)
-               max_sectors = ((u32)(USHRT_MAX + 1) * bs) >> 9;
+               max_blocks = (u64)USHRT_MAX + 1;
        else
-               max_sectors = ((u32)(ns->ctrl->max_hw_sectors + 1) * bs) >> 9;
+               max_blocks = ns->ctrl->max_hw_sectors + 1;
 
-       blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors);
+       blk_queue_max_write_zeroes_sectors(disk->queue,
+                                          nvme_lba_to_sect(ns, max_blocks));
 }
 
 static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
@@ -1748,7 +1750,7 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
 static void nvme_update_disk_info(struct gendisk *disk,
                struct nvme_ns *ns, struct nvme_id_ns *id)
 {
-       sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
+       sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
        unsigned short bs = 1 << ns->lba_shift;
        u32 atomic_bs, phys_bs, io_opt;
 
index 265f89e..679a721 100644 (file)
@@ -1224,7 +1224,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
        lsreq->rqstlen = sizeof(*assoc_rqst);
        lsreq->rspaddr = assoc_acc;
        lsreq->rsplen = sizeof(*assoc_acc);
-       lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
+       lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
 
        ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
        if (ret)
@@ -1264,7 +1264,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
        if (fcret) {
                ret = -EBADF;
                dev_err(ctrl->dev,
-                       "q %d connect failed: %s\n",
+                       "q %d Create Association LS failed: %s\n",
                        queue->qnum, validation_errors[fcret]);
        } else {
                ctrl->association_id =
@@ -1332,7 +1332,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
        lsreq->rqstlen = sizeof(*conn_rqst);
        lsreq->rspaddr = conn_acc;
        lsreq->rsplen = sizeof(*conn_acc);
-       lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
+       lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
 
        ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
        if (ret)
@@ -1363,7 +1363,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
        if (fcret) {
                ret = -EBADF;
                dev_err(ctrl->dev,
-                       "q %d connect failed: %s\n",
+                       "q %d Create I/O Connection LS failed: %s\n",
                        queue->qnum, validation_errors[fcret]);
        } else {
                queue->connection_id =
@@ -1376,7 +1376,7 @@ out_free_buffer:
 out_no_memory:
        if (ret)
                dev_err(ctrl->dev,
-                       "queue %d connect command failed (%d).\n",
+                       "queue %d connect I/O queue failed (%d).\n",
                        queue->qnum, ret);
        return ret;
 }
@@ -1413,8 +1413,8 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)
 static void
 nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
 {
-       struct fcnvme_ls_disconnect_rqst *discon_rqst;
-       struct fcnvme_ls_disconnect_acc *discon_acc;
+       struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst;
+       struct fcnvme_ls_disconnect_assoc_acc *discon_acc;
        struct nvmefc_ls_req_op *lsop;
        struct nvmefc_ls_req *lsreq;
        int ret;
@@ -1430,11 +1430,11 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
        lsreq = &lsop->ls_req;
 
        lsreq->private = (void *)&lsop[1];
-       discon_rqst = (struct fcnvme_ls_disconnect_rqst *)
+       discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)
                        (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
-       discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1];
+       discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
 
-       discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT;
+       discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC;
        discon_rqst->desc_list_len = cpu_to_be32(
                                sizeof(struct fcnvme_lsdesc_assoc_id) +
                                sizeof(struct fcnvme_lsdesc_disconn_cmd));
@@ -1451,22 +1451,17 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
        discon_rqst->discon_cmd.desc_len =
                        fcnvme_lsdesc_len(
                                sizeof(struct fcnvme_lsdesc_disconn_cmd));
-       discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION;
-       discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id);
 
        lsreq->rqstaddr = discon_rqst;
        lsreq->rqstlen = sizeof(*discon_rqst);
        lsreq->rspaddr = discon_acc;
        lsreq->rsplen = sizeof(*discon_acc);
-       lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
+       lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
 
        ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
                                nvme_fc_disconnect_assoc_done);
        if (ret)
                kfree(lsop);
-
-       /* only meaningful part to terminating the association */
-       ctrl->association_id = 0;
 }
 
 
@@ -1662,7 +1657,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
                                        (freq->rcv_rsplen / 4) ||
                             be32_to_cpu(op->rsp_iu.xfrd_len) !=
                                        freq->transferred_length ||
-                            op->rsp_iu.status_code ||
+                            op->rsp_iu.ersp_result ||
                             sqe->common.command_id != cqe->command_id)) {
                        status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
                        dev_info(ctrl->ctrl.device,
@@ -1672,7 +1667,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
                                ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len),
                                be32_to_cpu(op->rsp_iu.xfrd_len),
                                freq->transferred_length,
-                               op->rsp_iu.status_code,
+                               op->rsp_iu.ersp_result,
                                sqe->common.command_id,
                                cqe->command_id);
                        goto done;
@@ -1731,9 +1726,14 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
        op->rq = rq;
        op->rqno = rqno;
 
-       cmdiu->scsi_id = NVME_CMD_SCSI_ID;
+       cmdiu->format_id = NVME_CMD_FORMAT_ID;
        cmdiu->fc_id = NVME_CMD_FC_ID;
        cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
+       if (queue->qnum)
+               cmdiu->rsv_cat = fccmnd_set_cat_css(0,
+                                       (NVME_CC_CSS_NVM >> NVME_CC_CSS_SHIFT));
+       else
+               cmdiu->rsv_cat = fccmnd_set_cat_admin(0);
 
        op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev,
                                &op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE);
@@ -2173,8 +2173,6 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
        fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents,
                        rq_dma_dir(rq));
 
-       nvme_cleanup_cmd(rq);
-
        sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE);
 
        freq->sg_cnt = 0;
@@ -2305,6 +2303,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
                if (!(op->flags & FCOP_FLAGS_AEN))
                        nvme_fc_unmap_data(ctrl, op->rq, op);
 
+               nvme_cleanup_cmd(op->rq);
                nvme_fc_ctrl_put(ctrl);
 
                if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE &&
@@ -2695,7 +2694,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
                /* warn if maxcmd is lower than queue_size */
                dev_warn(ctrl->ctrl.device,
                        "queue_size %zu > ctrl maxcmd %u, reducing "
-                       "to queue_size\n",
+                       "to maxcmd\n",
                        opts->queue_size, ctrl->ctrl.maxcmd);
                opts->queue_size = ctrl->ctrl.maxcmd;
        }
@@ -2703,7 +2702,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
        if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
                /* warn if sqsize is lower than queue_size */
                dev_warn(ctrl->ctrl.device,
-                       "queue_size %zu > ctrl sqsize %u, clamping down\n",
+                       "queue_size %zu > ctrl sqsize %u, reducing "
+                       "to sqsize\n",
                        opts->queue_size, ctrl->ctrl.sqsize + 1);
                opts->queue_size = ctrl->ctrl.sqsize + 1;
        }
@@ -2739,6 +2739,7 @@ out_term_aen_ops:
 out_disconnect_admin_queue:
        /* send a Disconnect(association) LS to fc-nvme target */
        nvme_fc_xmt_disconnect_assoc(ctrl);
+       ctrl->association_id = 0;
 out_delete_hw_queue:
        __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
 out_free_queue:
@@ -2830,6 +2831,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
        if (ctrl->association_id)
                nvme_fc_xmt_disconnect_assoc(ctrl);
 
+       ctrl->association_id = 0;
+
        if (ctrl->ctrl.tagset) {
                nvme_fc_delete_hw_io_queues(ctrl);
                nvme_fc_free_io_queues(ctrl);
index e0f064d..797c183 100644 (file)
@@ -95,6 +95,7 @@ void nvme_failover_req(struct request *req)
                }
                break;
        case NVME_SC_HOST_PATH_ERROR:
+       case NVME_SC_HOST_ABORTED_CMD:
                /*
                 * Temporary transport disruption in talking to the controller.
                 * Try to send on a new path.
@@ -446,8 +447,14 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
 
        for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
                struct nvme_ana_group_desc *desc = base + offset;
-               u32 nr_nsids = le32_to_cpu(desc->nnsids);
-               size_t nsid_buf_size = nr_nsids * sizeof(__le32);
+               u32 nr_nsids;
+               size_t nsid_buf_size;
+
+               if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
+                       return -EINVAL;
+
+               nr_nsids = le32_to_cpu(desc->nnsids);
+               nsid_buf_size = nr_nsids * sizeof(__le32);
 
                if (WARN_ON_ONCE(desc->grpid == 0))
                        return -EINVAL;
@@ -467,8 +474,6 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
                        return error;
 
                offset += nsid_buf_size;
-               if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
-                       return -EINVAL;
        }
 
        return 0;
index 22e8401..fdda273 100644 (file)
@@ -419,9 +419,20 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
        return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
 }
 
-static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
+/*
+ * Convert a 512B sector number to a device logical block number.
+ */
+static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector)
+{
+       return sector >> (ns->lba_shift - SECTOR_SHIFT);
+}
+
+/*
+ * Convert a device logical block number to a 512B sector number.
+ */
+static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba)
 {
-       return (sector >> (ns->lba_shift - 9));
+       return lba << (ns->lba_shift - SECTOR_SHIFT);
 }
 
 static inline void nvme_end_request(struct request *req, __le16 status,
@@ -446,6 +457,11 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)
        put_device(ctrl->device);
 }
 
+static inline bool nvme_is_aen_req(u16 qid, __u16 command_id)
+{
+       return !qid && command_id >= NVME_AQ_BLK_MQ_DEPTH;
+}
+
 void nvme_complete_rq(struct request *req);
 bool nvme_cancel_request(struct request *req, void *data, bool reserved);
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
index 869f462..3aab92b 100644 (file)
@@ -925,7 +925,6 @@ static void nvme_pci_complete_rq(struct request *req)
        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
        struct nvme_dev *dev = iod->nvmeq->dev;
 
-       nvme_cleanup_cmd(req);
        if (blk_integrity_rq(req))
                dma_unmap_page(dev->dev, iod->meta_dma,
                               rq_integrity_vec(req)->bv_len, rq_data_dir(req));
@@ -968,8 +967,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
         * aborts.  We don't even bother to allocate a struct request
         * for them but rather special case them here.
         */
-       if (unlikely(nvmeq->qid == 0 &&
-                       cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) {
+       if (unlikely(nvme_is_aen_req(nvmeq->qid, cqe->command_id))) {
                nvme_complete_async_event(&nvmeq->dev->ctrl,
                                cqe->status, &cqe->result);
                return;
@@ -2982,7 +2980,7 @@ static int nvme_suspend(struct device *dev)
 
                /*
                 * Clearing npss forces a controller reset on resume. The
-                * correct value will be resdicovered then.
+                * correct value will be rediscovered then.
                 */
                ret = nvme_disable_prepare_reset(ndev, true);
                ctrl->npss = 0;
index cb4c300..dce5945 100644 (file)
@@ -1160,8 +1160,6 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
        }
 
        ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
-
-       nvme_cleanup_cmd(rq);
        sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE);
 }
 
@@ -1501,8 +1499,8 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
         * aborts.  We don't even bother to allocate a struct request
         * for them but rather special case them here.
         */
-       if (unlikely(nvme_rdma_queue_idx(queue) == 0 &&
-                       cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH))
+       if (unlikely(nvme_is_aen_req(nvme_rdma_queue_idx(queue),
+                                    cqe->command_id)))
                nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
                                &cqe->result);
        else
@@ -1768,7 +1766,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
        if (unlikely(err < 0)) {
                dev_err(queue->ctrl->ctrl.device,
                             "Failed to map data (%d)\n", err);
-               nvme_cleanup_cmd(rq);
                goto err;
        }
 
@@ -1779,18 +1776,19 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
        err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
                        req->mr ? &req->reg_wr.wr : NULL);
-       if (unlikely(err)) {
-               nvme_rdma_unmap_data(queue, rq);
-               goto err;
-       }
+       if (unlikely(err))
+               goto err_unmap;
 
        return BLK_STS_OK;
 
+err_unmap:
+       nvme_rdma_unmap_data(queue, rq);
 err:
        if (err == -ENOMEM || err == -EAGAIN)
                ret = BLK_STS_RESOURCE;
        else
                ret = BLK_STS_IOERR;
+       nvme_cleanup_cmd(rq);
 unmap_qe:
        ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command),
                            DMA_TO_DEVICE);
index 7544be8..6d43b23 100644 (file)
@@ -491,8 +491,8 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
         * aborts.  We don't even bother to allocate a struct request
         * for them but rather special case them here.
         */
-       if (unlikely(nvme_tcp_queue_id(queue) == 0 &&
-           cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH))
+       if (unlikely(nvme_is_aen_req(nvme_tcp_queue_id(queue),
+                                    cqe->command_id)))
                nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
                                &cqe->result);
        else
index 831a062..56c21b5 100644 (file)
@@ -31,7 +31,7 @@ u64 nvmet_get_log_page_offset(struct nvme_command *cmd)
 
 static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
 {
-       nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
+       nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->transfer_len));
 }
 
 static void nvmet_execute_get_log_page_error(struct nvmet_req *req)
@@ -134,7 +134,7 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
        u16 status = NVME_SC_INTERNAL;
        unsigned long flags;
 
-       if (req->data_len != sizeof(*log))
+       if (req->transfer_len != sizeof(*log))
                goto out;
 
        log = kzalloc(sizeof(*log), GFP_KERNEL);
@@ -196,7 +196,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
        u16 status = NVME_SC_INTERNAL;
        size_t len;
 
-       if (req->data_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32))
+       if (req->transfer_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32))
                goto out;
 
        mutex_lock(&ctrl->lock);
@@ -206,7 +206,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
                len = ctrl->nr_changed_ns * sizeof(__le32);
        status = nvmet_copy_to_sgl(req, 0, ctrl->changed_ns_list, len);
        if (!status)
-               status = nvmet_zero_sgl(req, len, req->data_len - len);
+               status = nvmet_zero_sgl(req, len, req->transfer_len - len);
        ctrl->nr_changed_ns = 0;
        nvmet_clear_aen_bit(req, NVME_AEN_BIT_NS_ATTR);
        mutex_unlock(&ctrl->lock);
@@ -282,6 +282,36 @@ out:
        nvmet_req_complete(req, status);
 }
 
+static void nvmet_execute_get_log_page(struct nvmet_req *req)
+{
+       if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd)))
+               return;
+
+       switch (req->cmd->get_log_page.lid) {
+       case NVME_LOG_ERROR:
+               return nvmet_execute_get_log_page_error(req);
+       case NVME_LOG_SMART:
+               return nvmet_execute_get_log_page_smart(req);
+       case NVME_LOG_FW_SLOT:
+               /*
+                * We only support a single firmware slot which always is
+                * active, so we can zero out the whole firmware slot log and
+                * still claim to fully implement this mandatory log page.
+                */
+               return nvmet_execute_get_log_page_noop(req);
+       case NVME_LOG_CHANGED_NS:
+               return nvmet_execute_get_log_changed_ns(req);
+       case NVME_LOG_CMD_EFFECTS:
+               return nvmet_execute_get_log_cmd_effects_ns(req);
+       case NVME_LOG_ANA:
+               return nvmet_execute_get_log_page_ana(req);
+       }
+       pr_err("unhandled lid %d on qid %d\n",
+              req->cmd->get_log_page.lid, req->sq->qid);
+       req->error_loc = offsetof(struct nvme_get_log_page_command, lid);
+       nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+}
+
 static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 {
        struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -565,6 +595,28 @@ out:
        nvmet_req_complete(req, status);
 }
 
+static void nvmet_execute_identify(struct nvmet_req *req)
+{
+       if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
+               return;
+
+       switch (req->cmd->identify.cns) {
+       case NVME_ID_CNS_NS:
+               return nvmet_execute_identify_ns(req);
+       case NVME_ID_CNS_CTRL:
+               return nvmet_execute_identify_ctrl(req);
+       case NVME_ID_CNS_NS_ACTIVE_LIST:
+               return nvmet_execute_identify_nslist(req);
+       case NVME_ID_CNS_NS_DESC_LIST:
+               return nvmet_execute_identify_desclist(req);
+       }
+
+       pr_err("unhandled identify cns %d on qid %d\n",
+              req->cmd->identify.cns, req->sq->qid);
+       req->error_loc = offsetof(struct nvme_identify, cns);
+       nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+}
+
 /*
  * A "minimum viable" abort implementation: the command is mandatory in the
  * spec, but we are not required to do any useful work.  We couldn't really
@@ -574,6 +626,8 @@ out:
  */
 static void nvmet_execute_abort(struct nvmet_req *req)
 {
+       if (!nvmet_check_data_len(req, 0))
+               return;
        nvmet_set_result(req, 1);
        nvmet_req_complete(req, 0);
 }
@@ -658,6 +712,9 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
        u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
        u16 status = 0;
 
+       if (!nvmet_check_data_len(req, 0))
+               return;
+
        switch (cdw10 & 0xff) {
        case NVME_FEAT_NUM_QUEUES:
                nvmet_set_result(req,
@@ -721,6 +778,9 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
        u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
        u16 status = 0;
 
+       if (!nvmet_check_data_len(req, 0))
+               return;
+
        switch (cdw10 & 0xff) {
        /*
         * These features are mandatory in the spec, but we don't
@@ -785,6 +845,9 @@ void nvmet_execute_async_event(struct nvmet_req *req)
 {
        struct nvmet_ctrl *ctrl = req->sq->ctrl;
 
+       if (!nvmet_check_data_len(req, 0))
+               return;
+
        mutex_lock(&ctrl->lock);
        if (ctrl->nr_async_event_cmds >= NVMET_ASYNC_EVENTS) {
                mutex_unlock(&ctrl->lock);
@@ -801,6 +864,9 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
 {
        struct nvmet_ctrl *ctrl = req->sq->ctrl;
 
+       if (!nvmet_check_data_len(req, 0))
+               return;
+
        pr_debug("ctrl %d update keep-alive timer for %d secs\n",
                ctrl->cntlid, ctrl->kato);
 
@@ -813,77 +879,36 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
        struct nvme_command *cmd = req->cmd;
        u16 ret;
 
+       if (nvme_is_fabrics(cmd))
+               return nvmet_parse_fabrics_cmd(req);
+       if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
+               return nvmet_parse_discovery_cmd(req);
+
        ret = nvmet_check_ctrl_status(req, cmd);
        if (unlikely(ret))
                return ret;
 
        switch (cmd->common.opcode) {
        case nvme_admin_get_log_page:
-               req->data_len = nvmet_get_log_page_len(cmd);
-
-               switch (cmd->get_log_page.lid) {
-               case NVME_LOG_ERROR:
-                       req->execute = nvmet_execute_get_log_page_error;
-                       return 0;
-               case NVME_LOG_SMART:
-                       req->execute = nvmet_execute_get_log_page_smart;
-                       return 0;
-               case NVME_LOG_FW_SLOT:
-                       /*
-                        * We only support a single firmware slot which always
-                        * is active, so we can zero out the whole firmware slot
-                        * log and still claim to fully implement this mandatory
-                        * log page.
-                        */
-                       req->execute = nvmet_execute_get_log_page_noop;
-                       return 0;
-               case NVME_LOG_CHANGED_NS:
-                       req->execute = nvmet_execute_get_log_changed_ns;
-                       return 0;
-               case NVME_LOG_CMD_EFFECTS:
-                       req->execute = nvmet_execute_get_log_cmd_effects_ns;
-                       return 0;
-               case NVME_LOG_ANA:
-                       req->execute = nvmet_execute_get_log_page_ana;
-                       return 0;
-               }
-               break;
+               req->execute = nvmet_execute_get_log_page;
+               return 0;
        case nvme_admin_identify:
-               req->data_len = NVME_IDENTIFY_DATA_SIZE;
-               switch (cmd->identify.cns) {
-               case NVME_ID_CNS_NS:
-                       req->execute = nvmet_execute_identify_ns;
-                       return 0;
-               case NVME_ID_CNS_CTRL:
-                       req->execute = nvmet_execute_identify_ctrl;
-                       return 0;
-               case NVME_ID_CNS_NS_ACTIVE_LIST:
-                       req->execute = nvmet_execute_identify_nslist;
-                       return 0;
-               case NVME_ID_CNS_NS_DESC_LIST:
-                       req->execute = nvmet_execute_identify_desclist;
-                       return 0;
-               }
-               break;
+               req->execute = nvmet_execute_identify;
+               return 0;
        case nvme_admin_abort_cmd:
                req->execute = nvmet_execute_abort;
-               req->data_len = 0;
                return 0;
        case nvme_admin_set_features:
                req->execute = nvmet_execute_set_features;
-               req->data_len = 0;
                return 0;
        case nvme_admin_get_features:
                req->execute = nvmet_execute_get_features;
-               req->data_len = 0;
                return 0;
        case nvme_admin_async_event:
                req->execute = nvmet_execute_async_event;
-               req->data_len = 0;
                return 0;
        case nvme_admin_keep_alive:
                req->execute = nvmet_execute_keep_alive;
-               req->data_len = 0;
                return 0;
        }
 
index 3a67e24..28438b8 100644 (file)
@@ -892,14 +892,10 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
        }
 
        if (unlikely(!req->sq->ctrl))
-               /* will return an error for any Non-connect command: */
+               /* will return an error for any non-connect command: */
                status = nvmet_parse_connect_cmd(req);
        else if (likely(req->sq->qid != 0))
                status = nvmet_parse_io_cmd(req);
-       else if (nvme_is_fabrics(req->cmd))
-               status = nvmet_parse_fabrics_cmd(req);
-       else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
-               status = nvmet_parse_discovery_cmd(req);
        else
                status = nvmet_parse_admin_cmd(req);
 
@@ -930,15 +926,17 @@ void nvmet_req_uninit(struct nvmet_req *req)
 }
 EXPORT_SYMBOL_GPL(nvmet_req_uninit);
 
-void nvmet_req_execute(struct nvmet_req *req)
+bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
 {
-       if (unlikely(req->data_len != req->transfer_len)) {
+       if (unlikely(data_len != req->transfer_len)) {
                req->error_loc = offsetof(struct nvme_common_command, dptr);
                nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
-       } else
-               req->execute(req);
+               return false;
+       }
+
+       return true;
 }
-EXPORT_SYMBOL_GPL(nvmet_req_execute);
+EXPORT_SYMBOL_GPL(nvmet_check_data_len);
 
 int nvmet_req_alloc_sgl(struct nvmet_req *req)
 {
@@ -966,7 +964,7 @@ int nvmet_req_alloc_sgl(struct nvmet_req *req)
        }
 
        req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
-       if (!req->sg)
+       if (unlikely(!req->sg))
                return -ENOMEM;
 
        return 0;
index 3764a89..0c2274b 100644 (file)
@@ -157,7 +157,7 @@ static size_t discovery_log_entries(struct nvmet_req *req)
        return entries;
 }
 
-static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
+static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
 {
        const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
        struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -171,6 +171,16 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
        u16 status = 0;
        void *buffer;
 
+       if (!nvmet_check_data_len(req, data_len))
+               return;
+
+       if (req->cmd->get_log_page.lid != NVME_LOG_DISC) {
+               req->error_loc =
+                       offsetof(struct nvme_get_log_page_command, lid);
+               status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+               goto out;
+       }
+
        /* Spec requires dword aligned offsets */
        if (offset & 0x3) {
                status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
@@ -227,20 +237,35 @@ out:
        nvmet_req_complete(req, status);
 }
 
-static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req)
+static void nvmet_execute_disc_identify(struct nvmet_req *req)
 {
        struct nvmet_ctrl *ctrl = req->sq->ctrl;
        struct nvme_id_ctrl *id;
+       const char model[] = "Linux";
        u16 status = 0;
 
+       if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
+               return;
+
+       if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) {
+               req->error_loc = offsetof(struct nvme_identify, cns);
+               status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+               goto out;
+       }
+
        id = kzalloc(sizeof(*id), GFP_KERNEL);
        if (!id) {
                status = NVME_SC_INTERNAL;
                goto out;
        }
 
+       memset(id->sn, ' ', sizeof(id->sn));
+       bin2hex(id->sn, &ctrl->subsys->serial,
+               min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
        memset(id->fr, ' ', sizeof(id->fr));
-       strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr));
+       memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' ');
+       memcpy_and_pad(id->fr, sizeof(id->fr),
+                      UTS_RELEASE, strlen(UTS_RELEASE), ' ');
 
        /* no limit on data transfer sizes for now */
        id->mdts = 0;
@@ -273,6 +298,9 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req)
        u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
        u16 stat;
 
+       if (!nvmet_check_data_len(req, 0))
+               return;
+
        switch (cdw10 & 0xff) {
        case NVME_FEAT_KATO:
                stat = nvmet_set_feat_kato(req);
@@ -296,6 +324,9 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req)
        u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
        u16 stat = 0;
 
+       if (!nvmet_check_data_len(req, 0))
+               return;
+
        switch (cdw10 & 0xff) {
        case NVME_FEAT_KATO:
                nvmet_get_feat_kato(req);
@@ -328,47 +359,22 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
        switch (cmd->common.opcode) {
        case nvme_admin_set_features:
                req->execute = nvmet_execute_disc_set_features;
-               req->data_len = 0;
                return 0;
        case nvme_admin_get_features:
                req->execute = nvmet_execute_disc_get_features;
-               req->data_len = 0;
                return 0;
        case nvme_admin_async_event:
                req->execute = nvmet_execute_async_event;
-               req->data_len = 0;
                return 0;
        case nvme_admin_keep_alive:
                req->execute = nvmet_execute_keep_alive;
-               req->data_len = 0;
                return 0;
        case nvme_admin_get_log_page:
-               req->data_len = nvmet_get_log_page_len(cmd);
-
-               switch (cmd->get_log_page.lid) {
-               case NVME_LOG_DISC:
-                       req->execute = nvmet_execute_get_disc_log_page;
-                       return 0;
-               default:
-                       pr_err("unsupported get_log_page lid %d\n",
-                              cmd->get_log_page.lid);
-                       req->error_loc =
-                               offsetof(struct nvme_get_log_page_command, lid);
-                       return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
-               }
+               req->execute = nvmet_execute_disc_get_log_page;
+               return 0;
        case nvme_admin_identify:
-               req->data_len = NVME_IDENTIFY_DATA_SIZE;
-               switch (cmd->identify.cns) {
-               case NVME_ID_CNS_CTRL:
-                       req->execute =
-                               nvmet_execute_identify_disc_ctrl;
-                       return 0;
-               default:
-                       pr_err("unsupported identify cns %d\n",
-                              cmd->identify.cns);
-                       req->error_loc = offsetof(struct nvme_identify, cns);
-                       return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
-               }
+               req->execute = nvmet_execute_disc_identify;
+               return 0;
        default:
                pr_err("unhandled cmd %d\n", cmd->common.opcode);
                req->error_loc = offsetof(struct nvme_common_command, opcode);
index d16b55f..f729747 100644 (file)
@@ -12,6 +12,9 @@ static void nvmet_execute_prop_set(struct nvmet_req *req)
        u64 val = le64_to_cpu(req->cmd->prop_set.value);
        u16 status = 0;
 
+       if (!nvmet_check_data_len(req, 0))
+               return;
+
        if (req->cmd->prop_set.attrib & 1) {
                req->error_loc =
                        offsetof(struct nvmf_property_set_command, attrib);
@@ -38,6 +41,9 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
        u16 status = 0;
        u64 val = 0;
 
+       if (!nvmet_check_data_len(req, 0))
+               return;
+
        if (req->cmd->prop_get.attrib & 1) {
                switch (le32_to_cpu(req->cmd->prop_get.offset)) {
                case NVME_REG_CAP:
@@ -82,11 +88,9 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
 
        switch (cmd->fabrics.fctype) {
        case nvme_fabrics_type_property_set:
-               req->data_len = 0;
                req->execute = nvmet_execute_prop_set;
                break;
        case nvme_fabrics_type_property_get:
-               req->data_len = 0;
                req->execute = nvmet_execute_prop_get;
                break;
        default:
@@ -147,6 +151,9 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
        struct nvmet_ctrl *ctrl = NULL;
        u16 status = 0;
 
+       if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
+               return;
+
        d = kmalloc(sizeof(*d), GFP_KERNEL);
        if (!d) {
                status = NVME_SC_INTERNAL;
@@ -211,6 +218,9 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
        u16 qid = le16_to_cpu(c->qid);
        u16 status = 0;
 
+       if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
+               return;
+
        d = kmalloc(sizeof(*d), GFP_KERNEL);
        if (!d) {
                status = NVME_SC_INTERNAL;
@@ -281,7 +291,6 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
                return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
        }
 
-       req->data_len = sizeof(struct nvmf_connect_data);
        if (cmd->connect.qid == 0)
                req->execute = nvmet_execute_admin_connect;
        else
index ce8d819..a0db637 100644 (file)
@@ -1495,20 +1495,20 @@ static void
 nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
                        struct nvmet_fc_ls_iod *iod)
 {
-       struct fcnvme_ls_disconnect_rqst *rqst =
-                       (struct fcnvme_ls_disconnect_rqst *)iod->rqstbuf;
-       struct fcnvme_ls_disconnect_acc *acc =
-                       (struct fcnvme_ls_disconnect_acc *)iod->rspbuf;
+       struct fcnvme_ls_disconnect_assoc_rqst *rqst =
+                       (struct fcnvme_ls_disconnect_assoc_rqst *)iod->rqstbuf;
+       struct fcnvme_ls_disconnect_assoc_acc *acc =
+                       (struct fcnvme_ls_disconnect_assoc_acc *)iod->rspbuf;
        struct nvmet_fc_tgt_assoc *assoc;
        int ret = 0;
 
        memset(acc, 0, sizeof(*acc));
 
-       if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_rqst))
+       if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst))
                ret = VERR_DISCONN_LEN;
        else if (rqst->desc_list_len !=
                        fcnvme_lsdesc_len(
-                               sizeof(struct fcnvme_ls_disconnect_rqst)))
+                               sizeof(struct fcnvme_ls_disconnect_assoc_rqst)))
                ret = VERR_DISCONN_RQST_LEN;
        else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID))
                ret = VERR_ASSOC_ID;
@@ -1523,8 +1523,11 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
                        fcnvme_lsdesc_len(
                                sizeof(struct fcnvme_lsdesc_disconn_cmd)))
                ret = VERR_DISCONN_CMD_LEN;
-       else if ((rqst->discon_cmd.scope != FCNVME_DISCONN_ASSOCIATION) &&
-                       (rqst->discon_cmd.scope != FCNVME_DISCONN_CONNECTION))
+       /*
+        * As the standard changed on the LS, check if old format and scope
+        * something other than Association (e.g. 0).
+        */
+       else if (rqst->discon_cmd.rsvd8[0])
                ret = VERR_DISCONN_SCOPE;
        else {
                /* match an active association */
@@ -1556,8 +1559,8 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
 
        nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
                        fcnvme_lsdesc_len(
-                               sizeof(struct fcnvme_ls_disconnect_acc)),
-                       FCNVME_LS_DISCONNECT);
+                               sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
+                       FCNVME_LS_DISCONNECT_ASSOC);
 
        /* release get taken in nvmet_fc_find_target_assoc */
        nvmet_fc_tgt_a_put(iod->assoc);
@@ -1632,7 +1635,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport,
                /* Creates an IO Queue/Connection */
                nvmet_fc_ls_create_connection(tgtport, iod);
                break;
-       case FCNVME_LS_DISCONNECT:
+       case FCNVME_LS_DISCONNECT_ASSOC:
                /* Terminate a Queue/Connection or the Association */
                nvmet_fc_ls_disconnect(tgtport, iod);
                break;
@@ -2015,7 +2018,7 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod)
                }
 
                /* data transfer complete, resume with nvmet layer */
-               nvmet_req_execute(&fod->req);
+               fod->req.execute(&fod->req);
                break;
 
        case NVMET_FCOP_READDATA:
@@ -2231,7 +2234,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
         * can invoke the nvmet_layer now. If read data, cmd completion will
         * push the data
         */
-       nvmet_req_execute(&fod->req);
+       fod->req.execute(&fod->req);
        return;
 
 transport_error:
@@ -2299,7 +2302,7 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
 
        /* validate iu, so the connection id can be used to find the queue */
        if ((cmdiubuf_len != sizeof(*cmdiu)) ||
-                       (cmdiu->scsi_id != NVME_CMD_SCSI_ID) ||
+                       (cmdiu->format_id != NVME_CMD_FORMAT_ID) ||
                        (cmdiu->fc_id != NVME_CMD_FC_ID) ||
                        (be16_to_cpu(cmdiu->iu_len) != (sizeof(*cmdiu)/4)))
                return -EIO;
index 32008d8..b6fca0e 100644 (file)
@@ -147,8 +147,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
        int sg_cnt = req->sg_cnt;
        struct bio *bio;
        struct scatterlist *sg;
+       struct blk_plug plug;
        sector_t sector;
-       int op, op_flags = 0, i;
+       int op, i;
+
+       if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
+               return;
 
        if (!req->sg_cnt) {
                nvmet_req_complete(req, 0);
@@ -156,21 +160,20 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
        }
 
        if (req->cmd->rw.opcode == nvme_cmd_write) {
-               op = REQ_OP_WRITE;
-               op_flags = REQ_SYNC | REQ_IDLE;
+               op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
                if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
-                       op_flags |= REQ_FUA;
+                       op |= REQ_FUA;
        } else {
                op = REQ_OP_READ;
        }
 
        if (is_pci_p2pdma_page(sg_page(req->sg)))
-               op_flags |= REQ_NOMERGE;
+               op |= REQ_NOMERGE;
 
        sector = le64_to_cpu(req->cmd->rw.slba);
        sector <<= (req->ns->blksize_shift - 9);
 
-       if (req->data_len <= NVMET_MAX_INLINE_DATA_LEN) {
+       if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
                bio = &req->b.inline_bio;
                bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
        } else {
@@ -180,8 +183,9 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
        bio->bi_iter.bi_sector = sector;
        bio->bi_private = req;
        bio->bi_end_io = nvmet_bio_done;
-       bio_set_op_attrs(bio, op, op_flags);
+       bio->bi_opf = op;
 
+       blk_start_plug(&plug);
        for_each_sg(req->sg, sg, req->sg_cnt, i) {
                while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
                                != sg->length) {
@@ -190,7 +194,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
                        bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
                        bio_set_dev(bio, req->ns->bdev);
                        bio->bi_iter.bi_sector = sector;
-                       bio_set_op_attrs(bio, op, op_flags);
+                       bio->bi_opf = op;
 
                        bio_chain(bio, prev);
                        submit_bio(prev);
@@ -201,12 +205,16 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
        }
 
        submit_bio(bio);
+       blk_finish_plug(&plug);
 }
 
 static void nvmet_bdev_execute_flush(struct nvmet_req *req)
 {
        struct bio *bio = &req->b.inline_bio;
 
+       if (!nvmet_check_data_len(req, 0))
+               return;
+
        bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
        bio_set_dev(bio, req->ns->bdev);
        bio->bi_private = req;
@@ -261,12 +269,10 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req)
        if (bio) {
                bio->bi_private = req;
                bio->bi_end_io = nvmet_bio_done;
-               if (status) {
-                       bio->bi_status = BLK_STS_IOERR;
-                       bio_endio(bio);
-               } else {
+               if (status)
+                       bio_io_error(bio);
+               else
                        submit_bio(bio);
-               }
        } else {
                nvmet_req_complete(req, status);
        }
@@ -274,6 +280,9 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req)
 
 static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
 {
+       if (!nvmet_check_data_len(req, nvmet_dsm_len(req)))
+               return;
+
        switch (le32_to_cpu(req->cmd->dsm.attributes)) {
        case NVME_DSMGMT_AD:
                nvmet_bdev_execute_discard(req);
@@ -295,6 +304,9 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
        sector_t nr_sector;
        int ret;
 
+       if (!nvmet_check_data_len(req, 0))
+               return;
+
        sector = le64_to_cpu(write_zeroes->slba) <<
                (req->ns->blksize_shift - 9);
        nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
@@ -319,20 +331,15 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
        case nvme_cmd_read:
        case nvme_cmd_write:
                req->execute = nvmet_bdev_execute_rw;
-               req->data_len = nvmet_rw_len(req);
                return 0;
        case nvme_cmd_flush:
                req->execute = nvmet_bdev_execute_flush;
-               req->data_len = 0;
                return 0;
        case nvme_cmd_dsm:
                req->execute = nvmet_bdev_execute_dsm;
-               req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
-                       sizeof(struct nvme_dsm_range);
                return 0;
        case nvme_cmd_write_zeroes:
                req->execute = nvmet_bdev_execute_write_zeroes;
-               req->data_len = 0;
                return 0;
        default:
                pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
index 05453f5..caebfce 100644 (file)
@@ -126,7 +126,7 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
                        mempool_free(req->f.bvec, req->ns->bvec_pool);
        }
 
-       if (unlikely(ret != req->data_len))
+       if (unlikely(ret != req->transfer_len))
                status = errno_to_nvme_status(req, ret);
        nvmet_req_complete(req, status);
 }
@@ -146,7 +146,7 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
                is_sync = true;
 
        pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
-       if (unlikely(pos + req->data_len > req->ns->size)) {
+       if (unlikely(pos + req->transfer_len > req->ns->size)) {
                nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
                return true;
        }
@@ -173,7 +173,7 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
                nr_bvec--;
        }
 
-       if (WARN_ON_ONCE(total_len != req->data_len)) {
+       if (WARN_ON_ONCE(total_len != req->transfer_len)) {
                ret = -EIO;
                goto complete;
        }
@@ -232,6 +232,9 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
 {
        ssize_t nr_bvec = req->sg_cnt;
 
+       if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
+               return;
+
        if (!req->sg_cnt || !nr_bvec) {
                nvmet_req_complete(req, 0);
                return;
@@ -273,6 +276,8 @@ static void nvmet_file_flush_work(struct work_struct *w)
 
 static void nvmet_file_execute_flush(struct nvmet_req *req)
 {
+       if (!nvmet_check_data_len(req, 0))
+               return;
        INIT_WORK(&req->f.work, nvmet_file_flush_work);
        schedule_work(&req->f.work);
 }
@@ -331,6 +336,8 @@ static void nvmet_file_dsm_work(struct work_struct *w)
 
 static void nvmet_file_execute_dsm(struct nvmet_req *req)
 {
+       if (!nvmet_check_data_len(req, nvmet_dsm_len(req)))
+               return;
        INIT_WORK(&req->f.work, nvmet_file_dsm_work);
        schedule_work(&req->f.work);
 }
@@ -359,6 +366,8 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w)
 
 static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
 {
+       if (!nvmet_check_data_len(req, 0))
+               return;
        INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
        schedule_work(&req->f.work);
 }
@@ -371,20 +380,15 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
        case nvme_cmd_read:
        case nvme_cmd_write:
                req->execute = nvmet_file_execute_rw;
-               req->data_len = nvmet_rw_len(req);
                return 0;
        case nvme_cmd_flush:
                req->execute = nvmet_file_execute_flush;
-               req->data_len = 0;
                return 0;
        case nvme_cmd_dsm:
                req->execute = nvmet_file_execute_dsm;
-               req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
-                       sizeof(struct nvme_dsm_range);
                return 0;
        case nvme_cmd_write_zeroes:
                req->execute = nvmet_file_execute_write_zeroes;
-               req->data_len = 0;
                return 0;
        default:
                pr_err("unhandled cmd for file ns %d on qid %d\n",
index 11f5aea..a758bb3 100644 (file)
@@ -76,7 +76,6 @@ static void nvme_loop_complete_rq(struct request *req)
 {
        struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
 
-       nvme_cleanup_cmd(req);
        sg_free_table_chained(&iod->sg_table, SG_CHUNK_SIZE);
        nvme_complete_rq(req);
 }
@@ -102,8 +101,8 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
         * aborts.  We don't even bother to allocate a struct request
         * for them but rather special case them here.
         */
-       if (unlikely(nvme_loop_queue_idx(queue) == 0 &&
-                       cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) {
+       if (unlikely(nvme_is_aen_req(nvme_loop_queue_idx(queue),
+                                    cqe->command_id))) {
                nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
                                &cqe->result);
        } else {
@@ -126,7 +125,7 @@ static void nvme_loop_execute_work(struct work_struct *work)
        struct nvme_loop_iod *iod =
                container_of(work, struct nvme_loop_iod, work);
 
-       nvmet_req_execute(&iod->req);
+       iod->req.execute(&iod->req);
 }
 
 static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
index c51f8dd..46df45e 100644 (file)
@@ -304,8 +304,6 @@ struct nvmet_req {
                } f;
        };
        int                     sg_cnt;
-       /* data length as parsed from the command: */
-       size_t                  data_len;
        /* data length as parsed from the SGL descriptor: */
        size_t                  transfer_len;
 
@@ -375,7 +373,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
                struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
 void nvmet_req_uninit(struct nvmet_req *req);
-void nvmet_req_execute(struct nvmet_req *req);
+bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len);
 void nvmet_req_complete(struct nvmet_req *req, u16 status);
 int nvmet_req_alloc_sgl(struct nvmet_req *req);
 void nvmet_req_free_sgl(struct nvmet_req *req);
@@ -495,6 +493,12 @@ static inline u32 nvmet_rw_len(struct nvmet_req *req)
                        req->ns->blksize_shift;
 }
 
+static inline u32 nvmet_dsm_len(struct nvmet_req *req)
+{
+       return (le32_to_cpu(req->cmd->dsm.nr) + 1) *
+               sizeof(struct nvme_dsm_range);
+}
+
 u16 errno_to_nvme_status(struct nvmet_req *req, int errno);
 
 /* Convert a 32-bit number to a 16-bit 0's based number */
index 36d906a..37d262a 100644 (file)
@@ -603,7 +603,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
                return;
        }
 
-       nvmet_req_execute(&rsp->req);
+       rsp->req.execute(&rsp->req);
 }
 
 static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
@@ -672,13 +672,13 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
                return 0;
 
        ret = nvmet_req_alloc_sgl(&rsp->req);
-       if (ret < 0)
+       if (unlikely(ret < 0))
                goto error_out;
 
        ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
                        rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
                        nvmet_data_dir(&rsp->req));
-       if (ret < 0)
+       if (unlikely(ret < 0))
                goto error_out;
        rsp->n_rdma += ret;
 
@@ -746,7 +746,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
                                queue->cm_id->port_num, &rsp->read_cqe, NULL))
                        nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
        } else {
-               nvmet_req_execute(&rsp->req);
+               rsp->req.execute(&rsp->req);
        }
 
        return true;
index d535080..af674fc 100644 (file)
@@ -320,7 +320,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
        struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl;
        u32 len = le32_to_cpu(sgl->length);
 
-       if (!cmd->req.data_len)
+       if (!len)
                return 0;
 
        if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) |
@@ -813,13 +813,11 @@ free_crypto:
 static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
                struct nvmet_tcp_cmd *cmd, struct nvmet_req *req)
 {
+       size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
        int ret;
 
-       /* recover the expected data transfer length */
-       req->data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
-
        if (!nvme_is_write(cmd->req.cmd) ||
-           req->data_len > cmd->req.port->inline_data_size) {
+           data_len > cmd->req.port->inline_data_size) {
                nvmet_prepare_receive_pdu(queue);
                return;
        }
@@ -932,7 +930,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
                goto out;
        }
 
-       nvmet_req_execute(&queue->cmd->req);
+       queue->cmd->req.execute(&queue->cmd->req);
 out:
        nvmet_prepare_receive_pdu(queue);
        return ret;
@@ -1052,7 +1050,7 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
                        nvmet_tcp_prep_recv_ddgst(cmd);
                        return 0;
                }
-               nvmet_req_execute(&cmd->req);
+               cmd->req.execute(&cmd->req);
        }
 
        nvmet_prepare_receive_pdu(queue);
@@ -1092,7 +1090,7 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
 
        if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
            cmd->rbytes_done == cmd->req.transfer_len)
-               nvmet_req_execute(&cmd->req);
+               cmd->req.execute(&cmd->req);
        ret = 0;
 out:
        nvmet_prepare_receive_pdu(queue);
index 067c9fe..e8c30b3 100644 (file)
@@ -4,33 +4,60 @@
  */
 
 /*
- * This file contains definitions relative to FC-NVME r1.14 (16-020vB).
- * The fcnvme_lsdesc_cr_assoc_cmd struct reflects expected r1.16 content.
+ * This file contains definitions relative to FC-NVME-2 r1.06
+ * (T11-2019-00210-v001).
  */
 
 #ifndef _NVME_FC_H
 #define _NVME_FC_H 1
 
+#include <uapi/scsi/fc/fc_fs.h>
 
-#define NVME_CMD_SCSI_ID               0xFD
+#define NVME_CMD_FORMAT_ID             0xFD
 #define NVME_CMD_FC_ID                 FC_TYPE_NVME
 
 /* FC-NVME Cmd IU Flags */
-#define FCNVME_CMD_FLAGS_DIRMASK       0x03
-#define FCNVME_CMD_FLAGS_WRITE         0x01
-#define FCNVME_CMD_FLAGS_READ          0x02
+enum {
+       FCNVME_CMD_FLAGS_DIRMASK        = 0x03,
+       FCNVME_CMD_FLAGS_WRITE          = (1 << 0),
+       FCNVME_CMD_FLAGS_READ           = (1 << 1),
+
+       FCNVME_CMD_FLAGS_PICWP          = (1 << 2),
+};
+
+enum {
+       FCNVME_CMD_CAT_MASK             = 0x0F,
+       FCNVME_CMD_CAT_ADMINQ           = 0x01,
+       FCNVME_CMD_CAT_CSSMASK          = 0x07,
+       FCNVME_CMD_CAT_CSSFLAG          = 0x08,
+};
+
+static inline __u8 fccmnd_set_cat_admin(__u8 rsv_cat)
+{
+       return (rsv_cat & ~FCNVME_CMD_CAT_MASK) | FCNVME_CMD_CAT_ADMINQ;
+}
+
+static inline __u8 fccmnd_set_cat_css(__u8 rsv_cat, __u8 css)
+{
+       return (rsv_cat & ~FCNVME_CMD_CAT_MASK) | FCNVME_CMD_CAT_CSSFLAG |
+               (css & FCNVME_CMD_CAT_CSSMASK);
+}
 
 struct nvme_fc_cmd_iu {
-       __u8                    scsi_id;
+       __u8                    format_id;
        __u8                    fc_id;
        __be16                  iu_len;
-       __u8                    rsvd4[3];
+       __u8                    rsvd4[2];
+       __u8                    rsv_cat;
        __u8                    flags;
        __be64                  connection_id;
        __be32                  csn;
        __be32                  data_len;
        struct nvme_command     sqe;
-       __be32                  rsvd88[2];
+       __u8                    dps;
+       __u8                    lbads;
+       __be16                  ms;
+       __be32                  rsvd92;
 };
 
 #define NVME_FC_SIZEOF_ZEROS_RSP       12
@@ -38,11 +65,12 @@ struct nvme_fc_cmd_iu {
 enum {
        FCNVME_SC_SUCCESS               = 0,
        FCNVME_SC_INVALID_FIELD         = 1,
-       FCNVME_SC_INVALID_CONNID        = 2,
+       /* reserved                       2 */
+       FCNVME_SC_ILL_CONN_PARAMS       = 3,
 };
 
 struct nvme_fc_ersp_iu {
-       __u8                    status_code;
+       __u8                    ersp_result;
        __u8                    rsvd1;
        __be16                  iu_len;
        __be32                  rsn;
@@ -53,14 +81,44 @@ struct nvme_fc_ersp_iu {
 };
 
 
-/* FC-NVME Link Services */
+#define FCNVME_NVME_SR_OPCODE  0x01
+
+struct nvme_fc_nvme_sr_iu {
+       __u8                    fc_id;
+       __u8                    opcode;
+       __u8                    rsvd2;
+       __u8                    retry_rctl;
+       __be32                  rsvd4;
+};
+
+
+enum {
+       FCNVME_SRSTAT_ACC               = 0x0,
+       FCNVME_SRSTAT_INV_FCID          = 0x1,
+       /* reserved                       0x2 */
+       FCNVME_SRSTAT_LOGICAL_ERR       = 0x3,
+       FCNVME_SRSTAT_INV_QUALIF        = 0x4,
+       FCNVME_SRSTAT_UNABL2PERFORM     = 0x9,
+};
+
+struct nvme_fc_nvme_sr_rsp_iu {
+       __u8                    fc_id;
+       __u8                    opcode;
+       __u8                    rsvd2;
+       __u8                    status;
+       __be32                  rsvd4;
+};
+
+
+/* FC-NVME Link Services - LS cmd values (w0 bits 31:24) */
 enum {
        FCNVME_LS_RSVD                  = 0,
        FCNVME_LS_RJT                   = 1,
        FCNVME_LS_ACC                   = 2,
-       FCNVME_LS_CREATE_ASSOCIATION    = 3,
-       FCNVME_LS_CREATE_CONNECTION     = 4,
-       FCNVME_LS_DISCONNECT            = 5,
+       FCNVME_LS_CREATE_ASSOCIATION    = 3,    /* Create Association */
+       FCNVME_LS_CREATE_CONNECTION     = 4,    /* Create I/O Connection */
+       FCNVME_LS_DISCONNECT_ASSOC      = 5,    /* Disconnect Association */
+       FCNVME_LS_DISCONNECT_CONN       = 6,    /* Disconnect Connection */
 };
 
 /* FC-NVME Link Service Descriptors */
@@ -117,14 +175,17 @@ enum fcnvme_ls_rjt_reason {
        FCNVME_RJT_RC_UNSUP             = 0x0b,
        /* command not supported */
 
-       FCNVME_RJT_RC_INPROG            = 0x0e,
-       /* command already in progress */
-
        FCNVME_RJT_RC_INV_ASSOC         = 0x40,
-       /* Invalid Association ID*/
+       /* Invalid Association ID */
 
        FCNVME_RJT_RC_INV_CONN          = 0x41,
-       /* Invalid Connection ID*/
+       /* Invalid Connection ID */
+
+       FCNVME_RJT_RC_INV_PARAM         = 0x42,
+       /* Invalid Parameters */
+
+       FCNVME_RJT_RC_INSUF_RES         = 0x43,
+       /* Insufficient Resources */
 
        FCNVME_RJT_RC_VENDOR            = 0xff,
        /* vendor specific error */
@@ -138,14 +199,32 @@ enum fcnvme_ls_rjt_explan {
        FCNVME_RJT_EXP_OXID_RXID        = 0x17,
        /* invalid OX_ID-RX_ID combination */
 
-       FCNVME_RJT_EXP_INSUF_RES        = 0x29,
-       /* insufficient resources */
-
        FCNVME_RJT_EXP_UNAB_DATA        = 0x2a,
        /* unable to supply requested data */
 
        FCNVME_RJT_EXP_INV_LEN          = 0x2d,
        /* Invalid payload length */
+
+       FCNVME_RJT_EXP_INV_ERSP_RAT     = 0x40,
+       /* Invalid NVMe_ERSP Ratio */
+
+       FCNVME_RJT_EXP_INV_CTLR_ID      = 0x41,
+       /* Invalid Controller ID */
+
+       FCNVME_RJT_EXP_INV_QUEUE_ID     = 0x42,
+       /* Invalid Queue ID */
+
+       FCNVME_RJT_EXP_INV_SQSIZE       = 0x43,
+       /* Invalid Submission Queue Size */
+
+       FCNVME_RJT_EXP_INV_HOSTID       = 0x44,
+       /* Invalid HOST ID */
+
+       FCNVME_RJT_EXP_INV_HOSTNQN      = 0x45,
+       /* Invalid HOSTNQN */
+
+       FCNVME_RJT_EXP_INV_SUBNQN       = 0x46,
+       /* Invalid SUBNQN */
 };
 
 /* FCNVME_LSDESC_RJT */
@@ -209,21 +288,11 @@ struct fcnvme_lsdesc_cr_conn_cmd {
        __be32  rsvd52;
 };
 
-/* Disconnect Scope Values */
-enum {
-       FCNVME_DISCONN_ASSOCIATION      = 0,
-       FCNVME_DISCONN_CONNECTION       = 1,
-};
-
 /* FCNVME_LSDESC_DISCONN_CMD */
 struct fcnvme_lsdesc_disconn_cmd {
        __be32  desc_tag;               /* FCNVME_LSDESC_xxx */
        __be32  desc_len;
-       u8      rsvd8[3];
-       /* note: scope is really a 1 bit field */
-       u8      scope;                  /* FCNVME_DISCONN_xxx */
-       __be32  rsvd12;
-       __be64  id;
+       __be32  rsvd8[4];
 };
 
 /* FCNVME_LSDESC_CONN_ID */
@@ -242,9 +311,14 @@ struct fcnvme_lsdesc_assoc_id {
 
 /* r_ctl values */
 enum {
-       FCNVME_RS_RCTL_DATA             = 1,
-       FCNVME_RS_RCTL_XFER_RDY         = 5,
-       FCNVME_RS_RCTL_RSP              = 8,
+       FCNVME_RS_RCTL_CMND             = 0x6,
+       FCNVME_RS_RCTL_DATA             = 0x1,
+       FCNVME_RS_RCTL_CONF             = 0x3,
+       FCNVME_RS_RCTL_SR               = 0x9,
+       FCNVME_RS_RCTL_XFER_RDY         = 0x5,
+       FCNVME_RS_RCTL_RSP              = 0x7,
+       FCNVME_RS_RCTL_ERSP             = 0x8,
+       FCNVME_RS_RCTL_SR_RSP           = 0xA,
 };
 
 
@@ -264,7 +338,10 @@ struct fcnvme_ls_acc_hdr {
        struct fcnvme_ls_rqst_w0                w0;
        __be32                                  desc_list_len;
        struct fcnvme_lsdesc_rqst               rqst;
-       /* Followed by cmd-specific ACC descriptors, see next definitions */
+       /*
+        * Followed by cmd-specific ACCEPT descriptors, see xxx_acc
+        * definitions below
+        */
 };
 
 /* FCNVME_LS_CREATE_ASSOCIATION */
@@ -302,25 +379,39 @@ struct fcnvme_ls_cr_conn_acc {
        struct fcnvme_lsdesc_conn_id            connectid;
 };
 
-/* FCNVME_LS_DISCONNECT */
-struct fcnvme_ls_disconnect_rqst {
+/* FCNVME_LS_DISCONNECT_ASSOC */
+struct fcnvme_ls_disconnect_assoc_rqst {
        struct fcnvme_ls_rqst_w0                w0;
        __be32                                  desc_list_len;
        struct fcnvme_lsdesc_assoc_id           associd;
        struct fcnvme_lsdesc_disconn_cmd        discon_cmd;
 };
 
-struct fcnvme_ls_disconnect_acc {
+struct fcnvme_ls_disconnect_assoc_acc {
+       struct fcnvme_ls_acc_hdr                hdr;
+};
+
+
+/* FCNVME_LS_DISCONNECT_CONN */
+struct fcnvme_ls_disconnect_conn_rqst {
+       struct fcnvme_ls_rqst_w0                w0;
+       __be32                                  desc_list_len;
+       struct fcnvme_lsdesc_assoc_id           associd;
+       struct fcnvme_lsdesc_disconn_cmd        connectid;
+};
+
+struct fcnvme_ls_disconnect_conn_acc {
        struct fcnvme_ls_acc_hdr                hdr;
 };
 
 
 /*
- * Yet to be defined in FC-NVME:
+ * Default R_A_TOV is pulled in from fc_fs.h but needs conversion
+ * from ms to seconds for our use.
  */
-#define NVME_FC_CONNECT_TIMEOUT_SEC    2               /* 2 seconds */
-#define NVME_FC_LS_TIMEOUT_SEC         2               /* 2 seconds */
-#define NVME_FC_TGTOP_TIMEOUT_SEC      2               /* 2 seconds */
+#define FC_TWO_TIMES_R_A_TOV           (2 * (FC_DEF_R_A_TOV / 1000))
+#define NVME_FC_LS_TIMEOUT_SEC         FC_TWO_TIMES_R_A_TOV
+#define NVME_FC_TGTOP_TIMEOUT_SEC      FC_TWO_TIMES_R_A_TOV
 
 /*
  * TRADDR string must be of form "nn-<16hexdigits>:pn-<16hexdigits>"
@@ -328,6 +419,7 @@ struct fcnvme_ls_disconnect_acc {
  * infront of the <16hexdigits>.  Without is considered the "min" string
  * and with is considered the "max" string. The hexdigits may be upper
  * or lower case.
+ * Note: FC-NVME-2 standard requires a "0x" prefix.
  */
 #define NVME_FC_TRADDR_NNLEN           3       /* "?n-" */
 #define NVME_FC_TRADDR_OXNNLEN         5       /* "?n-0x" */
index f61d690..3eca4f7 100644 (file)
@@ -107,8 +107,22 @@ enum {
        NVME_REG_AQA    = 0x0024,       /* Admin Queue Attributes */
        NVME_REG_ASQ    = 0x0028,       /* Admin SQ Base Address */
        NVME_REG_ACQ    = 0x0030,       /* Admin CQ Base Address */
-       NVME_REG_CMBLOC = 0x0038,       /* Controller Memory Buffer Location */
+       NVME_REG_CMBLOC = 0x0038,       /* Controller Memory Buffer Location */
        NVME_REG_CMBSZ  = 0x003c,       /* Controller Memory Buffer Size */
+       NVME_REG_BPINFO = 0x0040,       /* Boot Partition Information */
+       NVME_REG_BPRSEL = 0x0044,       /* Boot Partition Read Select */
+       NVME_REG_BPMBL  = 0x0048,       /* Boot Partition Memory Buffer
+                                        * Location
+                                        */
+       NVME_REG_PMRCAP = 0x0e00,       /* Persistent Memory Capabilities */
+       NVME_REG_PMRCTL = 0x0e04,       /* Persistent Memory Region Control */
+       NVME_REG_PMRSTS = 0x0e08,       /* Persistent Memory Region Status */
+       NVME_REG_PMREBS = 0x0e0c,       /* Persistent Memory Region Elasticity
+                                        * Buffer Size
+                                        */
+       NVME_REG_PMRSWTP = 0x0e10,      /* Persistent Memory Region Sustained
+                                        * Write Throughput
+                                        */
        NVME_REG_DBS    = 0x1000,       /* SQ 0 Tail Doorbell */
 };
 
@@ -295,6 +309,14 @@ enum {
        NVME_CTRL_OACS_DIRECTIVES               = 1 << 5,
        NVME_CTRL_OACS_DBBUF_SUPP               = 1 << 8,
        NVME_CTRL_LPA_CMD_EFFECTS_LOG           = 1 << 1,
+       NVME_CTRL_CTRATT_128_ID                 = 1 << 0,
+       NVME_CTRL_CTRATT_NON_OP_PSP             = 1 << 1,
+       NVME_CTRL_CTRATT_NVM_SETS               = 1 << 2,
+       NVME_CTRL_CTRATT_READ_RECV_LVLS         = 1 << 3,
+       NVME_CTRL_CTRATT_ENDURANCE_GROUPS       = 1 << 4,
+       NVME_CTRL_CTRATT_PREDICTABLE_LAT        = 1 << 5,
+       NVME_CTRL_CTRATT_NAMESPACE_GRANULARITY  = 1 << 7,
+       NVME_CTRL_CTRATT_UUID_LIST              = 1 << 9,
 };
 
 struct nvme_lbaf {
@@ -352,6 +374,9 @@ enum {
        NVME_ID_CNS_NS_PRESENT          = 0x11,
        NVME_ID_CNS_CTRL_NS_LIST        = 0x12,
        NVME_ID_CNS_CTRL_LIST           = 0x13,
+       NVME_ID_CNS_SCNDRY_CTRL_LIST    = 0x15,
+       NVME_ID_CNS_NS_GRANULARITY      = 0x16,
+       NVME_ID_CNS_UUID_LIST           = 0x17,
 };
 
 enum {
@@ -409,7 +434,8 @@ struct nvme_smart_log {
        __u8                    avail_spare;
        __u8                    spare_thresh;
        __u8                    percent_used;
-       __u8                    rsvd6[26];
+       __u8                    endu_grp_crit_warn_sumry;
+       __u8                    rsvd7[25];
        __u8                    data_units_read[16];
        __u8                    data_units_written[16];
        __u8                    host_reads[16];
@@ -423,7 +449,11 @@ struct nvme_smart_log {
        __le32                  warning_temp_time;
        __le32                  critical_comp_time;
        __le16                  temp_sensor[8];
-       __u8                    rsvd216[296];
+       __le32                  thm_temp1_trans_count;
+       __le32                  thm_temp2_trans_count;
+       __le32                  thm_temp1_total_time;
+       __le32                  thm_temp2_total_time;
+       __u8                    rsvd232[280];
 };
 
 struct nvme_fw_slot_info_log {
@@ -440,6 +470,7 @@ enum {
        NVME_CMD_EFFECTS_NIC            = 1 << 3,
        NVME_CMD_EFFECTS_CCC            = 1 << 4,
        NVME_CMD_EFFECTS_CSE_MASK       = 3 << 16,
+       NVME_CMD_EFFECTS_UUID_SEL       = 1 << 19,
 };
 
 struct nvme_effects_log {
@@ -563,6 +594,7 @@ enum nvme_opcode {
        nvme_cmd_compare        = 0x05,
        nvme_cmd_write_zeroes   = 0x08,
        nvme_cmd_dsm            = 0x09,
+       nvme_cmd_verify         = 0x0c,
        nvme_cmd_resv_register  = 0x0d,
        nvme_cmd_resv_report    = 0x0e,
        nvme_cmd_resv_acquire   = 0x11,
@@ -806,10 +838,14 @@ enum nvme_admin_opcode {
        nvme_admin_ns_mgmt              = 0x0d,
        nvme_admin_activate_fw          = 0x10,
        nvme_admin_download_fw          = 0x11,
+       nvme_admin_dev_self_test        = 0x14,
        nvme_admin_ns_attach            = 0x15,
        nvme_admin_keep_alive           = 0x18,
        nvme_admin_directive_send       = 0x19,
        nvme_admin_directive_recv       = 0x1a,
+       nvme_admin_virtual_mgmt         = 0x1c,
+       nvme_admin_nvme_mi_send         = 0x1d,
+       nvme_admin_nvme_mi_recv         = 0x1e,
        nvme_admin_dbbuf                = 0x7C,
        nvme_admin_format_nvm           = 0x80,
        nvme_admin_security_send        = 0x81,
@@ -873,6 +909,7 @@ enum {
        NVME_FEAT_PLM_CONFIG    = 0x13,
        NVME_FEAT_PLM_WINDOW    = 0x14,
        NVME_FEAT_HOST_BEHAVIOR = 0x16,
+       NVME_FEAT_SANITIZE      = 0x17,
        NVME_FEAT_SW_PROGRESS   = 0x80,
        NVME_FEAT_HOST_ID       = 0x81,
        NVME_FEAT_RESV_MASK     = 0x82,
@@ -883,6 +920,10 @@ enum {
        NVME_LOG_FW_SLOT        = 0x03,
        NVME_LOG_CHANGED_NS     = 0x04,
        NVME_LOG_CMD_EFFECTS    = 0x05,
+       NVME_LOG_DEVICE_SELF_TEST = 0x06,
+       NVME_LOG_TELEMETRY_HOST = 0x07,
+       NVME_LOG_TELEMETRY_CTRL = 0x08,
+       NVME_LOG_ENDURANCE_GROUP = 0x09,
        NVME_LOG_ANA            = 0x0c,
        NVME_LOG_DISC           = 0x70,
        NVME_LOG_RESERVATION    = 0x80,
@@ -1290,7 +1331,11 @@ enum {
        NVME_SC_SGL_INVALID_OFFSET      = 0x16,
        NVME_SC_SGL_INVALID_SUBTYPE     = 0x17,
 
+       NVME_SC_SANITIZE_FAILED         = 0x1C,
+       NVME_SC_SANITIZE_IN_PROGRESS    = 0x1D,
+
        NVME_SC_NS_WRITE_PROTECTED      = 0x20,
+       NVME_SC_CMD_INTERRUPTED         = 0x21,
 
        NVME_SC_LBA_RANGE               = 0x80,
        NVME_SC_CAP_EXCEEDED            = 0x81,
@@ -1328,6 +1373,8 @@ enum {
        NVME_SC_NS_NOT_ATTACHED         = 0x11a,
        NVME_SC_THIN_PROV_NOT_SUPP      = 0x11b,
        NVME_SC_CTRL_LIST_INVALID       = 0x11c,
+       NVME_SC_BP_WRITE_PROHIBITED     = 0x11e,
+       NVME_SC_PMR_SAN_PROHIBITED      = 0x123,
 
        /*
         * I/O Command Set Specific - NVM commands:
@@ -1368,6 +1415,7 @@ enum {
        NVME_SC_ANA_INACCESSIBLE        = 0x302,
        NVME_SC_ANA_TRANSITION          = 0x303,
        NVME_SC_HOST_PATH_ERROR         = 0x370,
+       NVME_SC_HOST_ABORTED_CMD        = 0x371,
 
        NVME_SC_CRD                     = 0x1800,
        NVME_SC_DNR                     = 0x4000,