Linux 6.9-rc1
[linux-2.6-microblaze.git] / drivers / vdpa / vdpa_sim / vdpa_sim_blk.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VDPA simulator for block device.
4  *
5  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
6  * Copyright (c) 2021, Red Hat Inc. All rights reserved.
7  *
8  */
9
10 #include <linux/init.h>
11 #include <linux/module.h>
12 #include <linux/device.h>
13 #include <linux/kernel.h>
14 #include <linux/blkdev.h>
15 #include <linux/vringh.h>
16 #include <linux/vdpa.h>
17 #include <uapi/linux/virtio_blk.h>
18
19 #include "vdpa_sim.h"
20
21 #define DRV_VERSION  "0.1"
22 #define DRV_AUTHOR   "Max Gurtovoy <mgurtovoy@nvidia.com>"
23 #define DRV_DESC     "vDPA Device Simulator for block device"
24 #define DRV_LICENSE  "GPL v2"
25
26 #define VDPASIM_BLK_FEATURES    (VDPASIM_FEATURES | \
27                                  (1ULL << VIRTIO_BLK_F_FLUSH)    | \
28                                  (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
29                                  (1ULL << VIRTIO_BLK_F_SEG_MAX)  | \
30                                  (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
31                                  (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
32                                  (1ULL << VIRTIO_BLK_F_MQ)       | \
33                                  (1ULL << VIRTIO_BLK_F_DISCARD)  | \
34                                  (1ULL << VIRTIO_BLK_F_WRITE_ZEROES))
35
36 #define VDPASIM_BLK_CAPACITY    0x40000
37 #define VDPASIM_BLK_SIZE_MAX    0x1000
38 #define VDPASIM_BLK_SEG_MAX     32
39 #define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX
40
41 /* 1 virtqueue, 1 address space, 1 virtqueue group */
42 #define VDPASIM_BLK_VQ_NUM      1
43 #define VDPASIM_BLK_AS_NUM      1
44 #define VDPASIM_BLK_GROUP_NUM   1
45
46 struct vdpasim_blk {
47         struct vdpasim vdpasim;
48         void *buffer;
49         bool shared_backend;
50 };
51
52 static struct vdpasim_blk *sim_to_blk(struct vdpasim *vdpasim)
53 {
54         return container_of(vdpasim, struct vdpasim_blk, vdpasim);
55 }
56
57 static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
58
59 static bool shared_backend;
60 module_param(shared_backend, bool, 0444);
61 MODULE_PARM_DESC(shared_backend, "Enable the shared backend between virtio-blk devices");
62
63 static void *shared_buffer;
64 /* mutex to synchronize shared_buffer access */
65 static DEFINE_MUTEX(shared_buffer_mutex);
66
67 static void vdpasim_blk_buffer_lock(struct vdpasim_blk *blk)
68 {
69         if (blk->shared_backend)
70                 mutex_lock(&shared_buffer_mutex);
71 }
72
73 static void vdpasim_blk_buffer_unlock(struct vdpasim_blk *blk)
74 {
75         if (blk->shared_backend)
76                 mutex_unlock(&shared_buffer_mutex);
77 }
78
79 static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
80                                     u64 num_sectors, u64 max_sectors)
81 {
82         if (start_sector > VDPASIM_BLK_CAPACITY) {
83                 dev_dbg(&vdpasim->vdpa.dev,
84                         "starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n",
85                         start_sector, VDPASIM_BLK_CAPACITY);
86         }
87
88         if (num_sectors > max_sectors) {
89                 dev_dbg(&vdpasim->vdpa.dev,
90                         "number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n",
91                         num_sectors, max_sectors);
92                 return false;
93         }
94
95         if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) {
96                 dev_dbg(&vdpasim->vdpa.dev,
97                         "request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n",
98                         start_sector, num_sectors, VDPASIM_BLK_CAPACITY);
99                 return false;
100         }
101
102         return true;
103 }
104
105 /* Returns 'true' if the request is handled (with or without an I/O error)
106  * and the status is correctly written in the last byte of the 'in iov',
107  * 'false' otherwise.
108  */
109 static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
110                                    struct vdpasim_virtqueue *vq)
111 {
112         struct vdpasim_blk *blk = sim_to_blk(vdpasim);
113         size_t pushed = 0, to_pull, to_push;
114         struct virtio_blk_outhdr hdr;
115         bool handled = false;
116         ssize_t bytes;
117         loff_t offset;
118         u64 sector;
119         u8 status;
120         u32 type;
121         int ret;
122
123         ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov,
124                                    &vq->head, GFP_ATOMIC);
125         if (ret != 1)
126                 return false;
127
128         if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
129                 dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
130                         vq->out_iov.used, vq->in_iov.used);
131                 goto err;
132         }
133
134         if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
135                 dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n");
136                 goto err;
137         }
138
139         /* The last byte is the status and we checked if the last iov has
140          * enough room for it.
141          */
142         to_push = vringh_kiov_length(&vq->in_iov) - 1;
143
144         to_pull = vringh_kiov_length(&vq->out_iov);
145
146         bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr,
147                                       sizeof(hdr));
148         if (bytes != sizeof(hdr)) {
149                 dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n");
150                 goto err;
151         }
152
153         to_pull -= bytes;
154
155         type = vdpasim32_to_cpu(vdpasim, hdr.type);
156         sector = vdpasim64_to_cpu(vdpasim, hdr.sector);
157         offset = sector << SECTOR_SHIFT;
158         status = VIRTIO_BLK_S_OK;
159
160         if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT &&
161             sector != 0) {
162                 dev_dbg(&vdpasim->vdpa.dev,
163                         "sector must be 0 for %u request - sector: 0x%llx\n",
164                         type, sector);
165                 status = VIRTIO_BLK_S_IOERR;
166                 goto err_status;
167         }
168
169         switch (type) {
170         case VIRTIO_BLK_T_IN:
171                 if (!vdpasim_blk_check_range(vdpasim, sector,
172                                              to_push >> SECTOR_SHIFT,
173                                              VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
174                         status = VIRTIO_BLK_S_IOERR;
175                         break;
176                 }
177
178                 vdpasim_blk_buffer_lock(blk);
179                 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
180                                               blk->buffer + offset, to_push);
181                 vdpasim_blk_buffer_unlock(blk);
182                 if (bytes < 0) {
183                         dev_dbg(&vdpasim->vdpa.dev,
184                                 "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
185                                 bytes, offset, to_push);
186                         status = VIRTIO_BLK_S_IOERR;
187                         break;
188                 }
189
190                 pushed += bytes;
191                 break;
192
193         case VIRTIO_BLK_T_OUT:
194                 if (!vdpasim_blk_check_range(vdpasim, sector,
195                                              to_pull >> SECTOR_SHIFT,
196                                              VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
197                         status = VIRTIO_BLK_S_IOERR;
198                         break;
199                 }
200
201                 vdpasim_blk_buffer_lock(blk);
202                 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov,
203                                               blk->buffer + offset, to_pull);
204                 vdpasim_blk_buffer_unlock(blk);
205                 if (bytes < 0) {
206                         dev_dbg(&vdpasim->vdpa.dev,
207                                 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
208                                 bytes, offset, to_pull);
209                         status = VIRTIO_BLK_S_IOERR;
210                         break;
211                 }
212                 break;
213
214         case VIRTIO_BLK_T_GET_ID:
215                 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
216                                               vdpasim_blk_id,
217                                               VIRTIO_BLK_ID_BYTES);
218                 if (bytes < 0) {
219                         dev_dbg(&vdpasim->vdpa.dev,
220                                 "vringh_iov_push_iotlb() error: %zd\n", bytes);
221                         status = VIRTIO_BLK_S_IOERR;
222                         break;
223                 }
224
225                 pushed += bytes;
226                 break;
227
228         case VIRTIO_BLK_T_FLUSH:
229                 /* nothing to do */
230                 break;
231
232         case VIRTIO_BLK_T_DISCARD:
233         case VIRTIO_BLK_T_WRITE_ZEROES: {
234                 struct virtio_blk_discard_write_zeroes range;
235                 u32 num_sectors, flags;
236
237                 if (to_pull != sizeof(range)) {
238                         dev_dbg(&vdpasim->vdpa.dev,
239                                 "discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n",
240                                 to_pull, sizeof(range));
241                         status = VIRTIO_BLK_S_IOERR;
242                         break;
243                 }
244
245                 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range,
246                                               to_pull);
247                 if (bytes < 0) {
248                         dev_dbg(&vdpasim->vdpa.dev,
249                                 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
250                                 bytes, offset, to_pull);
251                         status = VIRTIO_BLK_S_IOERR;
252                         break;
253                 }
254
255                 sector = le64_to_cpu(range.sector);
256                 offset = sector << SECTOR_SHIFT;
257                 num_sectors = le32_to_cpu(range.num_sectors);
258                 flags = le32_to_cpu(range.flags);
259
260                 if (type == VIRTIO_BLK_T_DISCARD && flags != 0) {
261                         dev_dbg(&vdpasim->vdpa.dev,
262                                 "discard unexpected flags set - flags: 0x%x\n",
263                                 flags);
264                         status = VIRTIO_BLK_S_UNSUPP;
265                         break;
266                 }
267
268                 if (type == VIRTIO_BLK_T_WRITE_ZEROES &&
269                     flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
270                         dev_dbg(&vdpasim->vdpa.dev,
271                                 "write_zeroes unexpected flags set - flags: 0x%x\n",
272                                 flags);
273                         status = VIRTIO_BLK_S_UNSUPP;
274                         break;
275                 }
276
277                 if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors,
278                                              VDPASIM_BLK_DWZ_MAX_SECTORS)) {
279                         status = VIRTIO_BLK_S_IOERR;
280                         break;
281                 }
282
283                 if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
284                         vdpasim_blk_buffer_lock(blk);
285                         memset(blk->buffer + offset, 0,
286                                num_sectors << SECTOR_SHIFT);
287                         vdpasim_blk_buffer_unlock(blk);
288                 }
289
290                 break;
291         }
292         default:
293                 dev_dbg(&vdpasim->vdpa.dev,
294                         "Unsupported request type %d\n", type);
295                 status = VIRTIO_BLK_S_IOERR;
296                 break;
297         }
298
299 err_status:
300         /* If some operations fail, we need to skip the remaining bytes
301          * to put the status in the last byte
302          */
303         if (to_push - pushed > 0)
304                 vringh_kiov_advance(&vq->in_iov, to_push - pushed);
305
306         /* Last byte is the status */
307         bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1);
308         if (bytes != 1)
309                 goto err;
310
311         pushed += bytes;
312
313         /* Make sure data is wrote before advancing index */
314         smp_wmb();
315
316         handled = true;
317
318 err:
319         vringh_complete_iotlb(&vq->vring, vq->head, pushed);
320
321         return handled;
322 }
323
324 static void vdpasim_blk_work(struct vdpasim *vdpasim)
325 {
326         bool reschedule = false;
327         int i;
328
329         mutex_lock(&vdpasim->mutex);
330
331         if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
332                 goto out;
333
334         if (!vdpasim->running)
335                 goto out;
336
337         for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
338                 struct vdpasim_virtqueue *vq = &vdpasim->vqs[i];
339                 int reqs = 0;
340
341                 if (!vq->ready)
342                         continue;
343
344                 while (vdpasim_blk_handle_req(vdpasim, vq)) {
345                         /* Make sure used is visible before rasing the interrupt. */
346                         smp_wmb();
347
348                         local_bh_disable();
349                         if (vringh_need_notify_iotlb(&vq->vring) > 0)
350                                 vringh_notify(&vq->vring);
351                         local_bh_enable();
352
353                         if (++reqs > 4) {
354                                 reschedule = true;
355                                 break;
356                         }
357                 }
358         }
359 out:
360         mutex_unlock(&vdpasim->mutex);
361
362         if (reschedule)
363                 vdpasim_schedule_work(vdpasim);
364 }
365
366 static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
367 {
368         struct virtio_blk_config *blk_config = config;
369
370         memset(config, 0, sizeof(struct virtio_blk_config));
371
372         blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY);
373         blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX);
374         blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX);
375         blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM);
376         blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
377         blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
378         blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
379         /* VIRTIO_BLK_F_DISCARD */
380         blk_config->discard_sector_alignment =
381                 cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
382         blk_config->max_discard_sectors =
383                 cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
384         blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1);
385         /* VIRTIO_BLK_F_WRITE_ZEROES */
386         blk_config->max_write_zeroes_sectors =
387                 cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
388         blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1);
389
390 }
391
392 static void vdpasim_blk_free(struct vdpasim *vdpasim)
393 {
394         struct vdpasim_blk *blk = sim_to_blk(vdpasim);
395
396         if (!blk->shared_backend)
397                 kvfree(blk->buffer);
398 }
399
400 static void vdpasim_blk_mgmtdev_release(struct device *dev)
401 {
402 }
403
404 static struct device vdpasim_blk_mgmtdev = {
405         .init_name = "vdpasim_blk",
406         .release = vdpasim_blk_mgmtdev_release,
407 };
408
409 static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
410                                const struct vdpa_dev_set_config *config)
411 {
412         struct vdpasim_dev_attr dev_attr = {};
413         struct vdpasim_blk *blk;
414         struct vdpasim *simdev;
415         int ret;
416
417         dev_attr.mgmt_dev = mdev;
418         dev_attr.name = name;
419         dev_attr.id = VIRTIO_ID_BLOCK;
420         dev_attr.supported_features = VDPASIM_BLK_FEATURES;
421         dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
422         dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM;
423         dev_attr.nas = VDPASIM_BLK_AS_NUM;
424         dev_attr.alloc_size = sizeof(struct vdpasim_blk);
425         dev_attr.config_size = sizeof(struct virtio_blk_config);
426         dev_attr.get_config = vdpasim_blk_get_config;
427         dev_attr.work_fn = vdpasim_blk_work;
428         dev_attr.free = vdpasim_blk_free;
429
430         simdev = vdpasim_create(&dev_attr, config);
431         if (IS_ERR(simdev))
432                 return PTR_ERR(simdev);
433
434         blk = sim_to_blk(simdev);
435         blk->shared_backend = shared_backend;
436
437         if (blk->shared_backend) {
438                 blk->buffer = shared_buffer;
439         } else {
440                 blk->buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
441                                        GFP_KERNEL);
442                 if (!blk->buffer) {
443                         ret = -ENOMEM;
444                         goto put_dev;
445                 }
446         }
447
448         ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM);
449         if (ret)
450                 goto put_dev;
451
452         return 0;
453
454 put_dev:
455         put_device(&simdev->vdpa.dev);
456         return ret;
457 }
458
459 static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev,
460                                 struct vdpa_device *dev)
461 {
462         struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
463
464         _vdpa_unregister_device(&simdev->vdpa);
465 }
466
467 static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = {
468         .dev_add = vdpasim_blk_dev_add,
469         .dev_del = vdpasim_blk_dev_del
470 };
471
472 static struct virtio_device_id id_table[] = {
473         { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
474         { 0 },
475 };
476
477 static struct vdpa_mgmt_dev mgmt_dev = {
478         .device = &vdpasim_blk_mgmtdev,
479         .id_table = id_table,
480         .ops = &vdpasim_blk_mgmtdev_ops,
481 };
482
483 static int __init vdpasim_blk_init(void)
484 {
485         int ret;
486
487         ret = device_register(&vdpasim_blk_mgmtdev);
488         if (ret) {
489                 put_device(&vdpasim_blk_mgmtdev);
490                 return ret;
491         }
492
493         ret = vdpa_mgmtdev_register(&mgmt_dev);
494         if (ret)
495                 goto parent_err;
496
497         if (shared_backend) {
498                 shared_buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
499                                          GFP_KERNEL);
500                 if (!shared_buffer) {
501                         ret = -ENOMEM;
502                         goto mgmt_dev_err;
503                 }
504         }
505
506         return 0;
507 mgmt_dev_err:
508         vdpa_mgmtdev_unregister(&mgmt_dev);
509 parent_err:
510         device_unregister(&vdpasim_blk_mgmtdev);
511         return ret;
512 }
513
514 static void __exit vdpasim_blk_exit(void)
515 {
516         kvfree(shared_buffer);
517         vdpa_mgmtdev_unregister(&mgmt_dev);
518         device_unregister(&vdpasim_blk_mgmtdev);
519 }
520
521 module_init(vdpasim_blk_init)
522 module_exit(vdpasim_blk_exit)
523
524 MODULE_VERSION(DRV_VERSION);
525 MODULE_LICENSE(DRV_LICENSE);
526 MODULE_AUTHOR(DRV_AUTHOR);
527 MODULE_DESCRIPTION(DRV_DESC);