nvme-tcp: remove incorrect Kconfig dep in BLK_DEV_NVME
[linux-2.6-microblaze.git] / drivers / nvme / target / io-cmd-file.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * NVMe Over Fabrics Target File I/O commands implementation.
4  * Copyright (c) 2017-2018 Western Digital Corporation or its
5  * affiliates.
6  */
7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 #include <linux/uio.h>
9 #include <linux/falloc.h>
10 #include <linux/file.h>
11 #include "nvmet.h"
12
13 #define NVMET_MAX_MPOOL_BVEC            16
14 #define NVMET_MIN_MPOOL_OBJ             16
15
16 int nvmet_file_ns_revalidate(struct nvmet_ns *ns)
17 {
18         struct kstat stat;
19         int ret;
20
21         ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE,
22                           AT_STATX_FORCE_SYNC);
23         if (!ret)
24                 ns->size = stat.size;
25         return ret;
26 }
27
28 void nvmet_file_ns_disable(struct nvmet_ns *ns)
29 {
30         if (ns->file) {
31                 if (ns->buffered_io)
32                         flush_workqueue(buffered_io_wq);
33                 mempool_destroy(ns->bvec_pool);
34                 ns->bvec_pool = NULL;
35                 kmem_cache_destroy(ns->bvec_cache);
36                 ns->bvec_cache = NULL;
37                 fput(ns->file);
38                 ns->file = NULL;
39         }
40 }
41
42 int nvmet_file_ns_enable(struct nvmet_ns *ns)
43 {
44         int flags = O_RDWR | O_LARGEFILE;
45         int ret;
46
47         if (!ns->buffered_io)
48                 flags |= O_DIRECT;
49
50         ns->file = filp_open(ns->device_path, flags, 0);
51         if (IS_ERR(ns->file)) {
52                 ret = PTR_ERR(ns->file);
53                 pr_err("failed to open file %s: (%d)\n",
54                         ns->device_path, ret);
55                 ns->file = NULL;
56                 return ret;
57         }
58
59         ret = nvmet_file_ns_revalidate(ns);
60         if (ret)
61                 goto err;
62
63         /*
64          * i_blkbits can be greater than the universally accepted upper bound,
65          * so make sure we export a sane namespace lba_shift.
66          */
67         ns->blksize_shift = min_t(u8,
68                         file_inode(ns->file)->i_blkbits, 12);
69
70         ns->bvec_cache = kmem_cache_create("nvmet-bvec",
71                         NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec),
72                         0, SLAB_HWCACHE_ALIGN, NULL);
73         if (!ns->bvec_cache) {
74                 ret = -ENOMEM;
75                 goto err;
76         }
77
78         ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab,
79                         mempool_free_slab, ns->bvec_cache);
80
81         if (!ns->bvec_pool) {
82                 ret = -ENOMEM;
83                 goto err;
84         }
85
86         return ret;
87 err:
88         ns->size = 0;
89         ns->blksize_shift = 0;
90         nvmet_file_ns_disable(ns);
91         return ret;
92 }
93
94 static void nvmet_file_init_bvec(struct bio_vec *bv, struct scatterlist *sg)
95 {
96         bv->bv_page = sg_page(sg);
97         bv->bv_offset = sg->offset;
98         bv->bv_len = sg->length;
99 }
100
101 static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
102                 unsigned long nr_segs, size_t count, int ki_flags)
103 {
104         struct kiocb *iocb = &req->f.iocb;
105         ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter);
106         struct iov_iter iter;
107         int rw;
108
109         if (req->cmd->rw.opcode == nvme_cmd_write) {
110                 if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
111                         ki_flags |= IOCB_DSYNC;
112                 call_iter = req->ns->file->f_op->write_iter;
113                 rw = WRITE;
114         } else {
115                 call_iter = req->ns->file->f_op->read_iter;
116                 rw = READ;
117         }
118
119         iov_iter_bvec(&iter, rw, req->f.bvec, nr_segs, count);
120
121         iocb->ki_pos = pos;
122         iocb->ki_filp = req->ns->file;
123         iocb->ki_flags = ki_flags | iocb_flags(req->ns->file);
124
125         return call_iter(iocb, &iter);
126 }
127
128 static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
129 {
130         struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb);
131         u16 status = NVME_SC_SUCCESS;
132
133         if (req->f.bvec != req->inline_bvec) {
134                 if (likely(req->f.mpool_alloc == false))
135                         kfree(req->f.bvec);
136                 else
137                         mempool_free(req->f.bvec, req->ns->bvec_pool);
138         }
139
140         if (unlikely(ret != req->transfer_len))
141                 status = errno_to_nvme_status(req, ret);
142         nvmet_req_complete(req, status);
143 }
144
145 static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
146 {
147         ssize_t nr_bvec = req->sg_cnt;
148         unsigned long bv_cnt = 0;
149         bool is_sync = false;
150         size_t len = 0, total_len = 0;
151         ssize_t ret = 0;
152         loff_t pos;
153         int i;
154         struct scatterlist *sg;
155
156         if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC)
157                 is_sync = true;
158
159         pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
160         if (unlikely(pos + req->transfer_len > req->ns->size)) {
161                 nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
162                 return true;
163         }
164
165         memset(&req->f.iocb, 0, sizeof(struct kiocb));
166         for_each_sg(req->sg, sg, req->sg_cnt, i) {
167                 nvmet_file_init_bvec(&req->f.bvec[bv_cnt], sg);
168                 len += req->f.bvec[bv_cnt].bv_len;
169                 total_len += req->f.bvec[bv_cnt].bv_len;
170                 bv_cnt++;
171
172                 WARN_ON_ONCE((nr_bvec - 1) < 0);
173
174                 if (unlikely(is_sync) &&
175                     (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) {
176                         ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len, 0);
177                         if (ret < 0)
178                                 goto complete;
179
180                         pos += len;
181                         bv_cnt = 0;
182                         len = 0;
183                 }
184                 nr_bvec--;
185         }
186
187         if (WARN_ON_ONCE(total_len != req->transfer_len)) {
188                 ret = -EIO;
189                 goto complete;
190         }
191
192         if (unlikely(is_sync)) {
193                 ret = total_len;
194                 goto complete;
195         }
196
197         /*
198          * A NULL ki_complete ask for synchronous execution, which we want
199          * for the IOCB_NOWAIT case.
200          */
201         if (!(ki_flags & IOCB_NOWAIT))
202                 req->f.iocb.ki_complete = nvmet_file_io_done;
203
204         ret = nvmet_file_submit_bvec(req, pos, bv_cnt, total_len, ki_flags);
205
206         switch (ret) {
207         case -EIOCBQUEUED:
208                 return true;
209         case -EAGAIN:
210                 if (WARN_ON_ONCE(!(ki_flags & IOCB_NOWAIT)))
211                         goto complete;
212                 return false;
213         case -EOPNOTSUPP:
214                 /*
215                  * For file systems returning error -EOPNOTSUPP, handle
216                  * IOCB_NOWAIT error case separately and retry without
217                  * IOCB_NOWAIT.
218                  */
219                 if ((ki_flags & IOCB_NOWAIT))
220                         return false;
221                 break;
222         }
223
224 complete:
225         nvmet_file_io_done(&req->f.iocb, ret, 0);
226         return true;
227 }
228
229 static void nvmet_file_buffered_io_work(struct work_struct *w)
230 {
231         struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
232
233         nvmet_file_execute_io(req, 0);
234 }
235
236 static void nvmet_file_submit_buffered_io(struct nvmet_req *req)
237 {
238         INIT_WORK(&req->f.work, nvmet_file_buffered_io_work);
239         queue_work(buffered_io_wq, &req->f.work);
240 }
241
242 static void nvmet_file_execute_rw(struct nvmet_req *req)
243 {
244         ssize_t nr_bvec = req->sg_cnt;
245
246         if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
247                 return;
248
249         if (!req->sg_cnt || !nr_bvec) {
250                 nvmet_req_complete(req, 0);
251                 return;
252         }
253
254         if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
255                 req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
256                                 GFP_KERNEL);
257         else
258                 req->f.bvec = req->inline_bvec;
259
260         if (unlikely(!req->f.bvec)) {
261                 /* fallback under memory pressure */
262                 req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL);
263                 req->f.mpool_alloc = true;
264         } else
265                 req->f.mpool_alloc = false;
266
267         if (req->ns->buffered_io) {
268                 if (likely(!req->f.mpool_alloc) &&
269                                 nvmet_file_execute_io(req, IOCB_NOWAIT))
270                         return;
271                 nvmet_file_submit_buffered_io(req);
272         } else
273                 nvmet_file_execute_io(req, 0);
274 }
275
276 u16 nvmet_file_flush(struct nvmet_req *req)
277 {
278         return errno_to_nvme_status(req, vfs_fsync(req->ns->file, 1));
279 }
280
281 static void nvmet_file_flush_work(struct work_struct *w)
282 {
283         struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
284
285         nvmet_req_complete(req, nvmet_file_flush(req));
286 }
287
288 static void nvmet_file_execute_flush(struct nvmet_req *req)
289 {
290         if (!nvmet_check_transfer_len(req, 0))
291                 return;
292         INIT_WORK(&req->f.work, nvmet_file_flush_work);
293         schedule_work(&req->f.work);
294 }
295
296 static void nvmet_file_execute_discard(struct nvmet_req *req)
297 {
298         int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
299         struct nvme_dsm_range range;
300         loff_t offset, len;
301         u16 status = 0;
302         int ret;
303         int i;
304
305         for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
306                 status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
307                                         sizeof(range));
308                 if (status)
309                         break;
310
311                 offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
312                 len = le32_to_cpu(range.nlb);
313                 len <<= req->ns->blksize_shift;
314                 if (offset + len > req->ns->size) {
315                         req->error_slba = le64_to_cpu(range.slba);
316                         status = errno_to_nvme_status(req, -ENOSPC);
317                         break;
318                 }
319
320                 ret = vfs_fallocate(req->ns->file, mode, offset, len);
321                 if (ret && ret != -EOPNOTSUPP) {
322                         req->error_slba = le64_to_cpu(range.slba);
323                         status = errno_to_nvme_status(req, ret);
324                         break;
325                 }
326         }
327
328         nvmet_req_complete(req, status);
329 }
330
331 static void nvmet_file_dsm_work(struct work_struct *w)
332 {
333         struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
334
335         switch (le32_to_cpu(req->cmd->dsm.attributes)) {
336         case NVME_DSMGMT_AD:
337                 nvmet_file_execute_discard(req);
338                 return;
339         case NVME_DSMGMT_IDR:
340         case NVME_DSMGMT_IDW:
341         default:
342                 /* Not supported yet */
343                 nvmet_req_complete(req, 0);
344                 return;
345         }
346 }
347
348 static void nvmet_file_execute_dsm(struct nvmet_req *req)
349 {
350         if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
351                 return;
352         INIT_WORK(&req->f.work, nvmet_file_dsm_work);
353         schedule_work(&req->f.work);
354 }
355
356 static void nvmet_file_write_zeroes_work(struct work_struct *w)
357 {
358         struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
359         struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
360         int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE;
361         loff_t offset;
362         loff_t len;
363         int ret;
364
365         offset = le64_to_cpu(write_zeroes->slba) << req->ns->blksize_shift;
366         len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
367                         req->ns->blksize_shift);
368
369         if (unlikely(offset + len > req->ns->size)) {
370                 nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
371                 return;
372         }
373
374         ret = vfs_fallocate(req->ns->file, mode, offset, len);
375         nvmet_req_complete(req, ret < 0 ? errno_to_nvme_status(req, ret) : 0);
376 }
377
378 static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
379 {
380         if (!nvmet_check_transfer_len(req, 0))
381                 return;
382         INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
383         schedule_work(&req->f.work);
384 }
385
386 u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
387 {
388         struct nvme_command *cmd = req->cmd;
389
390         switch (cmd->common.opcode) {
391         case nvme_cmd_read:
392         case nvme_cmd_write:
393                 req->execute = nvmet_file_execute_rw;
394                 return 0;
395         case nvme_cmd_flush:
396                 req->execute = nvmet_file_execute_flush;
397                 return 0;
398         case nvme_cmd_dsm:
399                 req->execute = nvmet_file_execute_dsm;
400                 return 0;
401         case nvme_cmd_write_zeroes:
402                 req->execute = nvmet_file_execute_write_zeroes;
403                 return 0;
404         default:
405                 return nvmet_report_invalid_opcode(req);
406         }
407 }