Merge tag 'io_uring-5.15-2021-09-11' of git://git.kernel.dk/linux-block
[linux-2.6-microblaze.git] / drivers / nvme / target / passthru.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * NVMe Over Fabrics Target Passthrough command implementation.
4  *
5  * Copyright (c) 2017-2018 Western Digital Corporation or its
6  * affiliates.
7  * Copyright (c) 2019-2020, Eideticom Inc.
8  *
9  */
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #include <linux/module.h>
12
13 #include "../host/nvme.h"
14 #include "nvmet.h"
15
16 MODULE_IMPORT_NS(NVME_TARGET_PASSTHRU);
17
18 /*
19  * xarray to maintain one passthru subsystem per nvme controller.
20  */
21 static DEFINE_XARRAY(passthru_subsystems);
22
23 void nvmet_passthrough_override_cap(struct nvmet_ctrl *ctrl)
24 {
25         /*
26          * Multiple command set support can only be declared if the underlying
27          * controller actually supports it.
28          */
29         if (!nvme_multi_css(ctrl->subsys->passthru_ctrl))
30                 ctrl->cap &= ~(1ULL << 43);
31 }
32
33 static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
34 {
35         struct nvmet_ctrl *ctrl = req->sq->ctrl;
36         struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
37         u16 status = NVME_SC_SUCCESS;
38         struct nvme_id_ctrl *id;
39         unsigned int max_hw_sectors;
40         int page_shift;
41
42         id = kzalloc(sizeof(*id), GFP_KERNEL);
43         if (!id)
44                 return NVME_SC_INTERNAL;
45
46         status = nvmet_copy_from_sgl(req, 0, id, sizeof(*id));
47         if (status)
48                 goto out_free;
49
50         id->cntlid = cpu_to_le16(ctrl->cntlid);
51         id->ver = cpu_to_le32(ctrl->subsys->ver);
52
53         /*
54          * The passthru NVMe driver may have a limit on the number of segments
55          * which depends on the host's memory fragementation. To solve this,
56          * ensure mdts is limited to the pages equal to the number of segments.
57          */
58         max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
59                                       pctrl->max_hw_sectors);
60
61         /*
62          * nvmet_passthru_map_sg is limitted to using a single bio so limit
63          * the mdts based on BIO_MAX_VECS as well
64          */
65         max_hw_sectors = min_not_zero(BIO_MAX_VECS << (PAGE_SHIFT - 9),
66                                       max_hw_sectors);
67
68         page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
69
70         id->mdts = ilog2(max_hw_sectors) + 9 - page_shift;
71
72         id->acl = 3;
73         /*
74          * We export aerl limit for the fabrics controller, update this when
75          * passthru based aerl support is added.
76          */
77         id->aerl = NVMET_ASYNC_EVENTS - 1;
78
79         /* emulate kas as most of the PCIe ctrl don't have a support for kas */
80         id->kas = cpu_to_le16(NVMET_KAS);
81
82         /* don't support host memory buffer */
83         id->hmpre = 0;
84         id->hmmin = 0;
85
86         id->sqes = min_t(__u8, ((0x6 << 4) | 0x6), id->sqes);
87         id->cqes = min_t(__u8, ((0x4 << 4) | 0x4), id->cqes);
88         id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
89
90         /* don't support fuse commands */
91         id->fuses = 0;
92
93         id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
94         if (ctrl->ops->flags & NVMF_KEYED_SGLS)
95                 id->sgls |= cpu_to_le32(1 << 2);
96         if (req->port->inline_data_size)
97                 id->sgls |= cpu_to_le32(1 << 20);
98
99         /*
100          * When passsthru controller is setup using nvme-loop transport it will
101          * export the passthru ctrl subsysnqn (PCIe NVMe ctrl) and will fail in
102          * the nvme/host/core.c in the nvme_init_subsystem()->nvme_active_ctrl()
103          * code path with duplicate ctr subsynqn. In order to prevent that we
104          * mask the passthru-ctrl subsysnqn with the target ctrl subsysnqn.
105          */
106         memcpy(id->subnqn, ctrl->subsysnqn, sizeof(id->subnqn));
107
108         /* use fabric id-ctrl values */
109         id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
110                                 req->port->inline_data_size) / 16);
111         id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
112
113         id->msdbd = ctrl->ops->msdbd;
114
115         /* Support multipath connections with fabrics */
116         id->cmic |= 1 << 1;
117
118         /* Disable reservations, see nvmet_parse_passthru_io_cmd() */
119         id->oncs &= cpu_to_le16(~NVME_CTRL_ONCS_RESERVATIONS);
120
121         status = nvmet_copy_to_sgl(req, 0, id, sizeof(struct nvme_id_ctrl));
122
123 out_free:
124         kfree(id);
125         return status;
126 }
127
128 static u16 nvmet_passthru_override_id_ns(struct nvmet_req *req)
129 {
130         u16 status = NVME_SC_SUCCESS;
131         struct nvme_id_ns *id;
132         int i;
133
134         id = kzalloc(sizeof(*id), GFP_KERNEL);
135         if (!id)
136                 return NVME_SC_INTERNAL;
137
138         status = nvmet_copy_from_sgl(req, 0, id, sizeof(struct nvme_id_ns));
139         if (status)
140                 goto out_free;
141
142         for (i = 0; i < (id->nlbaf + 1); i++)
143                 if (id->lbaf[i].ms)
144                         memset(&id->lbaf[i], 0, sizeof(id->lbaf[i]));
145
146         id->flbas = id->flbas & ~(1 << 4);
147
148         /*
149          * Presently the NVMEof target code does not support sending
150          * metadata, so we must disable it here. This should be updated
151          * once target starts supporting metadata.
152          */
153         id->mc = 0;
154
155         status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
156
157 out_free:
158         kfree(id);
159         return status;
160 }
161
162 static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
163 {
164         struct nvmet_req *req = container_of(w, struct nvmet_req, p.work);
165         struct request *rq = req->p.rq;
166         int status;
167
168         status = nvme_execute_passthru_rq(rq);
169
170         if (status == NVME_SC_SUCCESS &&
171             req->cmd->common.opcode == nvme_admin_identify) {
172                 switch (req->cmd->identify.cns) {
173                 case NVME_ID_CNS_CTRL:
174                         nvmet_passthru_override_id_ctrl(req);
175                         break;
176                 case NVME_ID_CNS_NS:
177                         nvmet_passthru_override_id_ns(req);
178                         break;
179                 }
180         } else if (status < 0)
181                 status = NVME_SC_INTERNAL;
182
183         req->cqe->result = nvme_req(rq)->result;
184         nvmet_req_complete(req, status);
185         blk_mq_free_request(rq);
186 }
187
188 static void nvmet_passthru_req_done(struct request *rq,
189                                     blk_status_t blk_status)
190 {
191         struct nvmet_req *req = rq->end_io_data;
192
193         req->cqe->result = nvme_req(rq)->result;
194         nvmet_req_complete(req, nvme_req(rq)->status);
195         blk_mq_free_request(rq);
196 }
197
198 static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
199 {
200         struct scatterlist *sg;
201         struct bio *bio;
202         int i;
203
204         if (req->sg_cnt > BIO_MAX_VECS)
205                 return -EINVAL;
206
207         if (nvmet_use_inline_bvec(req)) {
208                 bio = &req->p.inline_bio;
209                 bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
210         } else {
211                 bio = bio_alloc(GFP_KERNEL, bio_max_segs(req->sg_cnt));
212                 bio->bi_end_io = bio_put;
213         }
214         bio->bi_opf = req_op(rq);
215
216         for_each_sg(req->sg, sg, req->sg_cnt, i) {
217                 if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length,
218                                     sg->offset) < sg->length) {
219                         nvmet_req_bio_put(req, bio);
220                         return -EINVAL;
221                 }
222         }
223
224         blk_rq_bio_prep(rq, bio, req->sg_cnt);
225
226         return 0;
227 }
228
229 static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
230 {
231         struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl;
232         struct request_queue *q = ctrl->admin_q;
233         struct nvme_ns *ns = NULL;
234         struct request *rq = NULL;
235         unsigned int timeout;
236         u32 effects;
237         u16 status;
238         int ret;
239
240         if (likely(req->sq->qid != 0)) {
241                 u32 nsid = le32_to_cpu(req->cmd->common.nsid);
242
243                 ns = nvme_find_get_ns(ctrl, nsid);
244                 if (unlikely(!ns)) {
245                         pr_err("failed to get passthru ns nsid:%u\n", nsid);
246                         status = NVME_SC_INVALID_NS | NVME_SC_DNR;
247                         goto out;
248                 }
249
250                 q = ns->queue;
251                 timeout = nvmet_req_subsys(req)->io_timeout;
252         } else {
253                 timeout = nvmet_req_subsys(req)->admin_timeout;
254         }
255
256         rq = nvme_alloc_request(q, req->cmd, 0);
257         if (IS_ERR(rq)) {
258                 status = NVME_SC_INTERNAL;
259                 goto out_put_ns;
260         }
261
262         if (timeout)
263                 rq->timeout = timeout;
264
265         if (req->sg_cnt) {
266                 ret = nvmet_passthru_map_sg(req, rq);
267                 if (unlikely(ret)) {
268                         status = NVME_SC_INTERNAL;
269                         goto out_put_req;
270                 }
271         }
272
273         /*
274          * If there are effects for the command we are about to execute, or
275          * an end_req function we need to use nvme_execute_passthru_rq()
276          * synchronously in a work item seeing the end_req function and
277          * nvme_passthru_end() can't be called in the request done callback
278          * which is typically in interrupt context.
279          */
280         effects = nvme_command_effects(ctrl, ns, req->cmd->common.opcode);
281         if (req->p.use_workqueue || effects) {
282                 INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work);
283                 req->p.rq = rq;
284                 schedule_work(&req->p.work);
285         } else {
286                 rq->end_io_data = req;
287                 blk_execute_rq_nowait(ns ? ns->disk : NULL, rq, 0,
288                                       nvmet_passthru_req_done);
289         }
290
291         if (ns)
292                 nvme_put_ns(ns);
293
294         return;
295
296 out_put_req:
297         blk_mq_free_request(rq);
298 out_put_ns:
299         if (ns)
300                 nvme_put_ns(ns);
301 out:
302         nvmet_req_complete(req, status);
303 }
304
305 /*
306  * We need to emulate set host behaviour to ensure that any requested
307  * behaviour of the target's host matches the requested behaviour
308  * of the device's host and fail otherwise.
309  */
310 static void nvmet_passthru_set_host_behaviour(struct nvmet_req *req)
311 {
312         struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl;
313         struct nvme_feat_host_behavior *host;
314         u16 status = NVME_SC_INTERNAL;
315         int ret;
316
317         host = kzalloc(sizeof(*host) * 2, GFP_KERNEL);
318         if (!host)
319                 goto out_complete_req;
320
321         ret = nvme_get_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0,
322                                 host, sizeof(*host), NULL);
323         if (ret)
324                 goto out_free_host;
325
326         status = nvmet_copy_from_sgl(req, 0, &host[1], sizeof(*host));
327         if (status)
328                 goto out_free_host;
329
330         if (memcmp(&host[0], &host[1], sizeof(host[0]))) {
331                 pr_warn("target host has requested different behaviour from the local host\n");
332                 status = NVME_SC_INTERNAL;
333         }
334
335 out_free_host:
336         kfree(host);
337 out_complete_req:
338         nvmet_req_complete(req, status);
339 }
340
341 static u16 nvmet_setup_passthru_command(struct nvmet_req *req)
342 {
343         req->p.use_workqueue = false;
344         req->execute = nvmet_passthru_execute_cmd;
345         return NVME_SC_SUCCESS;
346 }
347
348 u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
349 {
350         /* Reject any commands with non-sgl flags set (ie. fused commands) */
351         if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
352                 return NVME_SC_INVALID_FIELD;
353
354         switch (req->cmd->common.opcode) {
355         case nvme_cmd_resv_register:
356         case nvme_cmd_resv_report:
357         case nvme_cmd_resv_acquire:
358         case nvme_cmd_resv_release:
359                 /*
360                  * Reservations cannot be supported properly because the
361                  * underlying device has no way of differentiating different
362                  * hosts that connect via fabrics. This could potentially be
363                  * emulated in the future if regular targets grow support for
364                  * this feature.
365                  */
366                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
367         }
368
369         return nvmet_setup_passthru_command(req);
370 }
371
372 /*
373  * Only features that are emulated or specifically allowed in the list  are
374  * passed down to the controller. This function implements the allow list for
375  * both get and set features.
376  */
377 static u16 nvmet_passthru_get_set_features(struct nvmet_req *req)
378 {
379         switch (le32_to_cpu(req->cmd->features.fid)) {
380         case NVME_FEAT_ARBITRATION:
381         case NVME_FEAT_POWER_MGMT:
382         case NVME_FEAT_LBA_RANGE:
383         case NVME_FEAT_TEMP_THRESH:
384         case NVME_FEAT_ERR_RECOVERY:
385         case NVME_FEAT_VOLATILE_WC:
386         case NVME_FEAT_WRITE_ATOMIC:
387         case NVME_FEAT_AUTO_PST:
388         case NVME_FEAT_TIMESTAMP:
389         case NVME_FEAT_HCTM:
390         case NVME_FEAT_NOPSC:
391         case NVME_FEAT_RRL:
392         case NVME_FEAT_PLM_CONFIG:
393         case NVME_FEAT_PLM_WINDOW:
394         case NVME_FEAT_HOST_BEHAVIOR:
395         case NVME_FEAT_SANITIZE:
396         case NVME_FEAT_VENDOR_START ... NVME_FEAT_VENDOR_END:
397                 return nvmet_setup_passthru_command(req);
398
399         case NVME_FEAT_ASYNC_EVENT:
400                 /* There is no support for forwarding ASYNC events */
401         case NVME_FEAT_IRQ_COALESCE:
402         case NVME_FEAT_IRQ_CONFIG:
403                 /* The IRQ settings will not apply to the target controller */
404         case NVME_FEAT_HOST_MEM_BUF:
405                 /*
406                  * Any HMB that's set will not be passed through and will
407                  * not work as expected
408                  */
409         case NVME_FEAT_SW_PROGRESS:
410                 /*
411                  * The Pre-Boot Software Load Count doesn't make much
412                  * sense for a target to export
413                  */
414         case NVME_FEAT_RESV_MASK:
415         case NVME_FEAT_RESV_PERSIST:
416                 /* No reservations, see nvmet_parse_passthru_io_cmd() */
417         default:
418                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
419         }
420 }
421
422 u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
423 {
424         /* Reject any commands with non-sgl flags set (ie. fused commands) */
425         if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
426                 return NVME_SC_INVALID_FIELD;
427
428         /*
429          * Passthru all vendor specific commands
430          */
431         if (req->cmd->common.opcode >= nvme_admin_vendor_start)
432                 return nvmet_setup_passthru_command(req);
433
434         switch (req->cmd->common.opcode) {
435         case nvme_admin_async_event:
436                 req->execute = nvmet_execute_async_event;
437                 return NVME_SC_SUCCESS;
438         case nvme_admin_keep_alive:
439                 /*
440                  * Most PCIe ctrls don't support keep alive cmd, we route keep
441                  * alive to the non-passthru mode. In future please change this
442                  * code when PCIe ctrls with keep alive support available.
443                  */
444                 req->execute = nvmet_execute_keep_alive;
445                 return NVME_SC_SUCCESS;
446         case nvme_admin_set_features:
447                 switch (le32_to_cpu(req->cmd->features.fid)) {
448                 case NVME_FEAT_ASYNC_EVENT:
449                 case NVME_FEAT_KATO:
450                 case NVME_FEAT_NUM_QUEUES:
451                 case NVME_FEAT_HOST_ID:
452                         req->execute = nvmet_execute_set_features;
453                         return NVME_SC_SUCCESS;
454                 case NVME_FEAT_HOST_BEHAVIOR:
455                         req->execute = nvmet_passthru_set_host_behaviour;
456                         return NVME_SC_SUCCESS;
457                 default:
458                         return nvmet_passthru_get_set_features(req);
459                 }
460                 break;
461         case nvme_admin_get_features:
462                 switch (le32_to_cpu(req->cmd->features.fid)) {
463                 case NVME_FEAT_ASYNC_EVENT:
464                 case NVME_FEAT_KATO:
465                 case NVME_FEAT_NUM_QUEUES:
466                 case NVME_FEAT_HOST_ID:
467                         req->execute = nvmet_execute_get_features;
468                         return NVME_SC_SUCCESS;
469                 default:
470                         return nvmet_passthru_get_set_features(req);
471                 }
472                 break;
473         case nvme_admin_identify:
474                 switch (req->cmd->identify.cns) {
475                 case NVME_ID_CNS_CTRL:
476                         req->execute = nvmet_passthru_execute_cmd;
477                         req->p.use_workqueue = true;
478                         return NVME_SC_SUCCESS;
479                 case NVME_ID_CNS_CS_CTRL:
480                         switch (req->cmd->identify.csi) {
481                         case NVME_CSI_ZNS:
482                                 req->execute = nvmet_passthru_execute_cmd;
483                                 req->p.use_workqueue = true;
484                                 return NVME_SC_SUCCESS;
485                         }
486                         return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
487                 case NVME_ID_CNS_NS:
488                         req->execute = nvmet_passthru_execute_cmd;
489                         req->p.use_workqueue = true;
490                         return NVME_SC_SUCCESS;
491                 case NVME_ID_CNS_CS_NS:
492                         switch (req->cmd->identify.csi) {
493                         case NVME_CSI_ZNS:
494                                 req->execute = nvmet_passthru_execute_cmd;
495                                 req->p.use_workqueue = true;
496                                 return NVME_SC_SUCCESS;
497                         }
498                         return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
499                 default:
500                         return nvmet_setup_passthru_command(req);
501                 }
502         case nvme_admin_get_log_page:
503                 return nvmet_setup_passthru_command(req);
504         default:
505                 /* Reject commands not in the allowlist above */
506                 return nvmet_report_invalid_opcode(req);
507         }
508 }
509
510 int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
511 {
512         struct nvme_ctrl *ctrl;
513         struct file *file;
514         int ret = -EINVAL;
515         void *old;
516
517         mutex_lock(&subsys->lock);
518         if (!subsys->passthru_ctrl_path)
519                 goto out_unlock;
520         if (subsys->passthru_ctrl)
521                 goto out_unlock;
522
523         if (subsys->nr_namespaces) {
524                 pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
525                 goto out_unlock;
526         }
527
528         file = filp_open(subsys->passthru_ctrl_path, O_RDWR, 0);
529         if (IS_ERR(file)) {
530                 ret = PTR_ERR(file);
531                 goto out_unlock;
532         }
533
534         ctrl = nvme_ctrl_from_file(file);
535         if (!ctrl) {
536                 pr_err("failed to open nvme controller %s\n",
537                        subsys->passthru_ctrl_path);
538
539                 goto out_put_file;
540         }
541
542         old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL,
543                          subsys, GFP_KERNEL);
544         if (xa_is_err(old)) {
545                 ret = xa_err(old);
546                 goto out_put_file;
547         }
548
549         if (old)
550                 goto out_put_file;
551
552         subsys->passthru_ctrl = ctrl;
553         subsys->ver = ctrl->vs;
554
555         if (subsys->ver < NVME_VS(1, 2, 1)) {
556                 pr_warn("nvme controller version is too old: %llu.%llu.%llu, advertising 1.2.1\n",
557                         NVME_MAJOR(subsys->ver), NVME_MINOR(subsys->ver),
558                         NVME_TERTIARY(subsys->ver));
559                 subsys->ver = NVME_VS(1, 2, 1);
560         }
561         nvme_get_ctrl(ctrl);
562         __module_get(subsys->passthru_ctrl->ops->module);
563         ret = 0;
564
565 out_put_file:
566         filp_close(file, NULL);
567 out_unlock:
568         mutex_unlock(&subsys->lock);
569         return ret;
570 }
571
572 static void __nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
573 {
574         if (subsys->passthru_ctrl) {
575                 xa_erase(&passthru_subsystems, subsys->passthru_ctrl->cntlid);
576                 module_put(subsys->passthru_ctrl->ops->module);
577                 nvme_put_ctrl(subsys->passthru_ctrl);
578         }
579         subsys->passthru_ctrl = NULL;
580         subsys->ver = NVMET_DEFAULT_VS;
581 }
582
583 void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
584 {
585         mutex_lock(&subsys->lock);
586         __nvmet_passthru_ctrl_disable(subsys);
587         mutex_unlock(&subsys->lock);
588 }
589
590 void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys)
591 {
592         mutex_lock(&subsys->lock);
593         __nvmet_passthru_ctrl_disable(subsys);
594         mutex_unlock(&subsys->lock);
595         kfree(subsys->passthru_ctrl_path);
596 }