xen/events: reset active flag for lateeoi events later
[linux-2.6-microblaze.git] / drivers / nvme / target / passthru.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * NVMe Over Fabrics Target Passthrough command implementation.
4  *
5  * Copyright (c) 2017-2018 Western Digital Corporation or its
6  * affiliates.
7  * Copyright (c) 2019-2020, Eideticom Inc.
8  *
9  */
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #include <linux/module.h>
12
13 #include "../host/nvme.h"
14 #include "nvmet.h"
15
16 MODULE_IMPORT_NS(NVME_TARGET_PASSTHRU);
17
18 /*
19  * xarray to maintain one passthru subsystem per nvme controller.
20  */
21 static DEFINE_XARRAY(passthru_subsystems);
22
23 static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
24 {
25         struct nvmet_ctrl *ctrl = req->sq->ctrl;
26         struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
27         u16 status = NVME_SC_SUCCESS;
28         struct nvme_id_ctrl *id;
29         unsigned int max_hw_sectors;
30         int page_shift;
31
32         id = kzalloc(sizeof(*id), GFP_KERNEL);
33         if (!id)
34                 return NVME_SC_INTERNAL;
35
36         status = nvmet_copy_from_sgl(req, 0, id, sizeof(*id));
37         if (status)
38                 goto out_free;
39
40         id->cntlid = cpu_to_le16(ctrl->cntlid);
41         id->ver = cpu_to_le32(ctrl->subsys->ver);
42
43         /*
44          * The passthru NVMe driver may have a limit on the number of segments
45          * which depends on the host's memory fragementation. To solve this,
46          * ensure mdts is limited to the pages equal to the number of segments.
47          */
48         max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
49                                       pctrl->max_hw_sectors);
50
51         /*
52          * nvmet_passthru_map_sg is limitted to using a single bio so limit
53          * the mdts based on BIO_MAX_VECS as well
54          */
55         max_hw_sectors = min_not_zero(BIO_MAX_VECS << (PAGE_SHIFT - 9),
56                                       max_hw_sectors);
57
58         page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
59
60         id->mdts = ilog2(max_hw_sectors) + 9 - page_shift;
61
62         id->acl = 3;
63         /*
64          * We export aerl limit for the fabrics controller, update this when
65          * passthru based aerl support is added.
66          */
67         id->aerl = NVMET_ASYNC_EVENTS - 1;
68
69         /* emulate kas as most of the PCIe ctrl don't have a support for kas */
70         id->kas = cpu_to_le16(NVMET_KAS);
71
72         /* don't support host memory buffer */
73         id->hmpre = 0;
74         id->hmmin = 0;
75
76         id->sqes = min_t(__u8, ((0x6 << 4) | 0x6), id->sqes);
77         id->cqes = min_t(__u8, ((0x4 << 4) | 0x4), id->cqes);
78         id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
79
80         /* don't support fuse commands */
81         id->fuses = 0;
82
83         id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
84         if (ctrl->ops->flags & NVMF_KEYED_SGLS)
85                 id->sgls |= cpu_to_le32(1 << 2);
86         if (req->port->inline_data_size)
87                 id->sgls |= cpu_to_le32(1 << 20);
88
89         /*
90          * When passsthru controller is setup using nvme-loop transport it will
91          * export the passthru ctrl subsysnqn (PCIe NVMe ctrl) and will fail in
92          * the nvme/host/core.c in the nvme_init_subsystem()->nvme_active_ctrl()
93          * code path with duplicate ctr subsynqn. In order to prevent that we
94          * mask the passthru-ctrl subsysnqn with the target ctrl subsysnqn.
95          */
96         memcpy(id->subnqn, ctrl->subsysnqn, sizeof(id->subnqn));
97
98         /* use fabric id-ctrl values */
99         id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
100                                 req->port->inline_data_size) / 16);
101         id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
102
103         id->msdbd = ctrl->ops->msdbd;
104
105         /* Support multipath connections with fabrics */
106         id->cmic |= 1 << 1;
107
108         /* Disable reservations, see nvmet_parse_passthru_io_cmd() */
109         id->oncs &= cpu_to_le16(~NVME_CTRL_ONCS_RESERVATIONS);
110
111         status = nvmet_copy_to_sgl(req, 0, id, sizeof(struct nvme_id_ctrl));
112
113 out_free:
114         kfree(id);
115         return status;
116 }
117
118 static u16 nvmet_passthru_override_id_ns(struct nvmet_req *req)
119 {
120         u16 status = NVME_SC_SUCCESS;
121         struct nvme_id_ns *id;
122         int i;
123
124         id = kzalloc(sizeof(*id), GFP_KERNEL);
125         if (!id)
126                 return NVME_SC_INTERNAL;
127
128         status = nvmet_copy_from_sgl(req, 0, id, sizeof(struct nvme_id_ns));
129         if (status)
130                 goto out_free;
131
132         for (i = 0; i < (id->nlbaf + 1); i++)
133                 if (id->lbaf[i].ms)
134                         memset(&id->lbaf[i], 0, sizeof(id->lbaf[i]));
135
136         id->flbas = id->flbas & ~(1 << 4);
137
138         /*
139          * Presently the NVMEof target code does not support sending
140          * metadata, so we must disable it here. This should be updated
141          * once target starts supporting metadata.
142          */
143         id->mc = 0;
144
145         status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
146
147 out_free:
148         kfree(id);
149         return status;
150 }
151
152 static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
153 {
154         struct nvmet_req *req = container_of(w, struct nvmet_req, p.work);
155         struct request *rq = req->p.rq;
156         u16 status;
157
158         nvme_execute_passthru_rq(rq);
159
160         status = nvme_req(rq)->status;
161         if (status == NVME_SC_SUCCESS &&
162             req->cmd->common.opcode == nvme_admin_identify) {
163                 switch (req->cmd->identify.cns) {
164                 case NVME_ID_CNS_CTRL:
165                         nvmet_passthru_override_id_ctrl(req);
166                         break;
167                 case NVME_ID_CNS_NS:
168                         nvmet_passthru_override_id_ns(req);
169                         break;
170                 }
171         }
172
173         req->cqe->result = nvme_req(rq)->result;
174         nvmet_req_complete(req, status);
175         blk_mq_free_request(rq);
176 }
177
178 static void nvmet_passthru_req_done(struct request *rq,
179                                     blk_status_t blk_status)
180 {
181         struct nvmet_req *req = rq->end_io_data;
182
183         req->cqe->result = nvme_req(rq)->result;
184         nvmet_req_complete(req, nvme_req(rq)->status);
185         blk_mq_free_request(rq);
186 }
187
188 static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
189 {
190         struct scatterlist *sg;
191         struct bio *bio;
192         int i;
193
194         if (req->sg_cnt > BIO_MAX_VECS)
195                 return -EINVAL;
196
197         if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
198                 bio = &req->p.inline_bio;
199                 bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
200         } else {
201                 bio = bio_alloc(GFP_KERNEL, bio_max_segs(req->sg_cnt));
202                 bio->bi_end_io = bio_put;
203         }
204         bio->bi_opf = req_op(rq);
205
206         for_each_sg(req->sg, sg, req->sg_cnt, i) {
207                 if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length,
208                                     sg->offset) < sg->length) {
209                         if (bio != &req->p.inline_bio)
210                                 bio_put(bio);
211                         return -EINVAL;
212                 }
213         }
214
215         blk_rq_bio_prep(rq, bio, req->sg_cnt);
216
217         return 0;
218 }
219
220 static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
221 {
222         struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
223         struct request_queue *q = ctrl->admin_q;
224         struct nvme_ns *ns = NULL;
225         struct request *rq = NULL;
226         unsigned int timeout;
227         u32 effects;
228         u16 status;
229         int ret;
230
231         if (likely(req->sq->qid != 0)) {
232                 u32 nsid = le32_to_cpu(req->cmd->common.nsid);
233
234                 ns = nvme_find_get_ns(ctrl, nsid);
235                 if (unlikely(!ns)) {
236                         pr_err("failed to get passthru ns nsid:%u\n", nsid);
237                         status = NVME_SC_INVALID_NS | NVME_SC_DNR;
238                         goto out;
239                 }
240
241                 q = ns->queue;
242                 timeout = nvmet_req_subsys(req)->io_timeout;
243         } else {
244                 timeout = nvmet_req_subsys(req)->admin_timeout;
245         }
246
247         rq = nvme_alloc_request(q, req->cmd, 0);
248         if (IS_ERR(rq)) {
249                 status = NVME_SC_INTERNAL;
250                 goto out_put_ns;
251         }
252
253         if (timeout)
254                 rq->timeout = timeout;
255
256         if (req->sg_cnt) {
257                 ret = nvmet_passthru_map_sg(req, rq);
258                 if (unlikely(ret)) {
259                         status = NVME_SC_INTERNAL;
260                         goto out_put_req;
261                 }
262         }
263
264         /*
265          * If there are effects for the command we are about to execute, or
266          * an end_req function we need to use nvme_execute_passthru_rq()
267          * synchronously in a work item seeing the end_req function and
268          * nvme_passthru_end() can't be called in the request done callback
269          * which is typically in interrupt context.
270          */
271         effects = nvme_command_effects(ctrl, ns, req->cmd->common.opcode);
272         if (req->p.use_workqueue || effects) {
273                 INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work);
274                 req->p.rq = rq;
275                 schedule_work(&req->p.work);
276         } else {
277                 rq->end_io_data = req;
278                 blk_execute_rq_nowait(ns ? ns->disk : NULL, rq, 0,
279                                       nvmet_passthru_req_done);
280         }
281
282         if (ns)
283                 nvme_put_ns(ns);
284
285         return;
286
287 out_put_req:
288         blk_mq_free_request(rq);
289 out_put_ns:
290         if (ns)
291                 nvme_put_ns(ns);
292 out:
293         nvmet_req_complete(req, status);
294 }
295
296 /*
297  * We need to emulate set host behaviour to ensure that any requested
298  * behaviour of the target's host matches the requested behaviour
299  * of the device's host and fail otherwise.
300  */
301 static void nvmet_passthru_set_host_behaviour(struct nvmet_req *req)
302 {
303         struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
304         struct nvme_feat_host_behavior *host;
305         u16 status = NVME_SC_INTERNAL;
306         int ret;
307
308         host = kzalloc(sizeof(*host) * 2, GFP_KERNEL);
309         if (!host)
310                 goto out_complete_req;
311
312         ret = nvme_get_features(ctrl, NVME_FEAT_HOST_BEHAVIOR, 0,
313                                 host, sizeof(*host), NULL);
314         if (ret)
315                 goto out_free_host;
316
317         status = nvmet_copy_from_sgl(req, 0, &host[1], sizeof(*host));
318         if (status)
319                 goto out_free_host;
320
321         if (memcmp(&host[0], &host[1], sizeof(host[0]))) {
322                 pr_warn("target host has requested different behaviour from the local host\n");
323                 status = NVME_SC_INTERNAL;
324         }
325
326 out_free_host:
327         kfree(host);
328 out_complete_req:
329         nvmet_req_complete(req, status);
330 }
331
332 static u16 nvmet_setup_passthru_command(struct nvmet_req *req)
333 {
334         req->p.use_workqueue = false;
335         req->execute = nvmet_passthru_execute_cmd;
336         return NVME_SC_SUCCESS;
337 }
338
339 u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
340 {
341         /* Reject any commands with non-sgl flags set (ie. fused commands) */
342         if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
343                 return NVME_SC_INVALID_FIELD;
344
345         switch (req->cmd->common.opcode) {
346         case nvme_cmd_resv_register:
347         case nvme_cmd_resv_report:
348         case nvme_cmd_resv_acquire:
349         case nvme_cmd_resv_release:
350                 /*
351                  * Reservations cannot be supported properly because the
352                  * underlying device has no way of differentiating different
353                  * hosts that connect via fabrics. This could potentially be
354                  * emulated in the future if regular targets grow support for
355                  * this feature.
356                  */
357                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
358         }
359
360         return nvmet_setup_passthru_command(req);
361 }
362
363 /*
364  * Only features that are emulated or specifically allowed in the list  are
365  * passed down to the controller. This function implements the allow list for
366  * both get and set features.
367  */
368 static u16 nvmet_passthru_get_set_features(struct nvmet_req *req)
369 {
370         switch (le32_to_cpu(req->cmd->features.fid)) {
371         case NVME_FEAT_ARBITRATION:
372         case NVME_FEAT_POWER_MGMT:
373         case NVME_FEAT_LBA_RANGE:
374         case NVME_FEAT_TEMP_THRESH:
375         case NVME_FEAT_ERR_RECOVERY:
376         case NVME_FEAT_VOLATILE_WC:
377         case NVME_FEAT_WRITE_ATOMIC:
378         case NVME_FEAT_AUTO_PST:
379         case NVME_FEAT_TIMESTAMP:
380         case NVME_FEAT_HCTM:
381         case NVME_FEAT_NOPSC:
382         case NVME_FEAT_RRL:
383         case NVME_FEAT_PLM_CONFIG:
384         case NVME_FEAT_PLM_WINDOW:
385         case NVME_FEAT_HOST_BEHAVIOR:
386         case NVME_FEAT_SANITIZE:
387         case NVME_FEAT_VENDOR_START ... NVME_FEAT_VENDOR_END:
388                 return nvmet_setup_passthru_command(req);
389
390         case NVME_FEAT_ASYNC_EVENT:
391                 /* There is no support for forwarding ASYNC events */
392         case NVME_FEAT_IRQ_COALESCE:
393         case NVME_FEAT_IRQ_CONFIG:
394                 /* The IRQ settings will not apply to the target controller */
395         case NVME_FEAT_HOST_MEM_BUF:
396                 /*
397                  * Any HMB that's set will not be passed through and will
398                  * not work as expected
399                  */
400         case NVME_FEAT_SW_PROGRESS:
401                 /*
402                  * The Pre-Boot Software Load Count doesn't make much
403                  * sense for a target to export
404                  */
405         case NVME_FEAT_RESV_MASK:
406         case NVME_FEAT_RESV_PERSIST:
407                 /* No reservations, see nvmet_parse_passthru_io_cmd() */
408         default:
409                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
410         }
411 }
412
413 u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
414 {
415         /* Reject any commands with non-sgl flags set (ie. fused commands) */
416         if (req->cmd->common.flags & ~NVME_CMD_SGL_ALL)
417                 return NVME_SC_INVALID_FIELD;
418
419         /*
420          * Passthru all vendor specific commands
421          */
422         if (req->cmd->common.opcode >= nvme_admin_vendor_start)
423                 return nvmet_setup_passthru_command(req);
424
425         switch (req->cmd->common.opcode) {
426         case nvme_admin_async_event:
427                 req->execute = nvmet_execute_async_event;
428                 return NVME_SC_SUCCESS;
429         case nvme_admin_keep_alive:
430                 /*
431                  * Most PCIe ctrls don't support keep alive cmd, we route keep
432                  * alive to the non-passthru mode. In future please change this
433                  * code when PCIe ctrls with keep alive support available.
434                  */
435                 req->execute = nvmet_execute_keep_alive;
436                 return NVME_SC_SUCCESS;
437         case nvme_admin_set_features:
438                 switch (le32_to_cpu(req->cmd->features.fid)) {
439                 case NVME_FEAT_ASYNC_EVENT:
440                 case NVME_FEAT_KATO:
441                 case NVME_FEAT_NUM_QUEUES:
442                 case NVME_FEAT_HOST_ID:
443                         req->execute = nvmet_execute_set_features;
444                         return NVME_SC_SUCCESS;
445                 case NVME_FEAT_HOST_BEHAVIOR:
446                         req->execute = nvmet_passthru_set_host_behaviour;
447                         return NVME_SC_SUCCESS;
448                 default:
449                         return nvmet_passthru_get_set_features(req);
450                 }
451                 break;
452         case nvme_admin_get_features:
453                 switch (le32_to_cpu(req->cmd->features.fid)) {
454                 case NVME_FEAT_ASYNC_EVENT:
455                 case NVME_FEAT_KATO:
456                 case NVME_FEAT_NUM_QUEUES:
457                 case NVME_FEAT_HOST_ID:
458                         req->execute = nvmet_execute_get_features;
459                         return NVME_SC_SUCCESS;
460                 default:
461                         return nvmet_passthru_get_set_features(req);
462                 }
463                 break;
464         case nvme_admin_identify:
465                 switch (req->cmd->identify.cns) {
466                 case NVME_ID_CNS_CTRL:
467                         req->execute = nvmet_passthru_execute_cmd;
468                         req->p.use_workqueue = true;
469                         return NVME_SC_SUCCESS;
470                 case NVME_ID_CNS_CS_CTRL:
471                         switch (req->cmd->identify.csi) {
472                         case NVME_CSI_ZNS:
473                                 req->execute = nvmet_passthru_execute_cmd;
474                                 req->p.use_workqueue = true;
475                                 return NVME_SC_SUCCESS;
476                         }
477                         return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
478                 case NVME_ID_CNS_NS:
479                         req->execute = nvmet_passthru_execute_cmd;
480                         req->p.use_workqueue = true;
481                         return NVME_SC_SUCCESS;
482                 case NVME_ID_CNS_CS_NS:
483                         switch (req->cmd->identify.csi) {
484                         case NVME_CSI_ZNS:
485                                 req->execute = nvmet_passthru_execute_cmd;
486                                 req->p.use_workqueue = true;
487                                 return NVME_SC_SUCCESS;
488                         }
489                         return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
490                 default:
491                         return nvmet_setup_passthru_command(req);
492                 }
493         case nvme_admin_get_log_page:
494                 return nvmet_setup_passthru_command(req);
495         default:
496                 /* Reject commands not in the allowlist above */
497                 return nvmet_report_invalid_opcode(req);
498         }
499 }
500
501 int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
502 {
503         struct nvme_ctrl *ctrl;
504         struct file *file;
505         int ret = -EINVAL;
506         void *old;
507
508         mutex_lock(&subsys->lock);
509         if (!subsys->passthru_ctrl_path)
510                 goto out_unlock;
511         if (subsys->passthru_ctrl)
512                 goto out_unlock;
513
514         if (subsys->nr_namespaces) {
515                 pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
516                 goto out_unlock;
517         }
518
519         file = filp_open(subsys->passthru_ctrl_path, O_RDWR, 0);
520         if (IS_ERR(file)) {
521                 ret = PTR_ERR(file);
522                 goto out_unlock;
523         }
524
525         ctrl = nvme_ctrl_from_file(file);
526         if (!ctrl) {
527                 pr_err("failed to open nvme controller %s\n",
528                        subsys->passthru_ctrl_path);
529
530                 goto out_put_file;
531         }
532
533         old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL,
534                          subsys, GFP_KERNEL);
535         if (xa_is_err(old)) {
536                 ret = xa_err(old);
537                 goto out_put_file;
538         }
539
540         if (old)
541                 goto out_put_file;
542
543         subsys->passthru_ctrl = ctrl;
544         subsys->ver = ctrl->vs;
545
546         if (subsys->ver < NVME_VS(1, 2, 1)) {
547                 pr_warn("nvme controller version is too old: %llu.%llu.%llu, advertising 1.2.1\n",
548                         NVME_MAJOR(subsys->ver), NVME_MINOR(subsys->ver),
549                         NVME_TERTIARY(subsys->ver));
550                 subsys->ver = NVME_VS(1, 2, 1);
551         }
552         nvme_get_ctrl(ctrl);
553         __module_get(subsys->passthru_ctrl->ops->module);
554         ret = 0;
555
556 out_put_file:
557         filp_close(file, NULL);
558 out_unlock:
559         mutex_unlock(&subsys->lock);
560         return ret;
561 }
562
563 static void __nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
564 {
565         if (subsys->passthru_ctrl) {
566                 xa_erase(&passthru_subsystems, subsys->passthru_ctrl->cntlid);
567                 module_put(subsys->passthru_ctrl->ops->module);
568                 nvme_put_ctrl(subsys->passthru_ctrl);
569         }
570         subsys->passthru_ctrl = NULL;
571         subsys->ver = NVMET_DEFAULT_VS;
572 }
573
574 void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
575 {
576         mutex_lock(&subsys->lock);
577         __nvmet_passthru_ctrl_disable(subsys);
578         mutex_unlock(&subsys->lock);
579 }
580
581 void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys)
582 {
583         mutex_lock(&subsys->lock);
584         __nvmet_passthru_ctrl_disable(subsys);
585         mutex_unlock(&subsys->lock);
586         kfree(subsys->passthru_ctrl_path);
587 }