Merge tag 'x86-microcode-2022-06-05' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / drivers / virtio / virtio_ring.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
3  *
4  *  Copyright 2007 Rusty Russell IBM Corporation
5  */
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/spinlock.h>
15 #include <xen/xen.h>
16
17 #ifdef DEBUG
18 /* For development, we want to crash whenever the ring is screwed. */
19 #define BAD_RING(_vq, fmt, args...)                             \
20         do {                                                    \
21                 dev_err(&(_vq)->vq.vdev->dev,                   \
22                         "%s:"fmt, (_vq)->vq.name, ##args);      \
23                 BUG();                                          \
24         } while (0)
25 /* Caller is supposed to guarantee no reentry. */
26 #define START_USE(_vq)                                          \
27         do {                                                    \
28                 if ((_vq)->in_use)                              \
29                         panic("%s:in_use = %i\n",               \
30                               (_vq)->vq.name, (_vq)->in_use);   \
31                 (_vq)->in_use = __LINE__;                       \
32         } while (0)
33 #define END_USE(_vq) \
34         do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
35 #define LAST_ADD_TIME_UPDATE(_vq)                               \
36         do {                                                    \
37                 ktime_t now = ktime_get();                      \
38                                                                 \
39                 /* No kick or get, with .1 second between?  Warn. */ \
40                 if ((_vq)->last_add_time_valid)                 \
41                         WARN_ON(ktime_to_ms(ktime_sub(now,      \
42                                 (_vq)->last_add_time)) > 100);  \
43                 (_vq)->last_add_time = now;                     \
44                 (_vq)->last_add_time_valid = true;              \
45         } while (0)
46 #define LAST_ADD_TIME_CHECK(_vq)                                \
47         do {                                                    \
48                 if ((_vq)->last_add_time_valid) {               \
49                         WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
50                                       (_vq)->last_add_time)) > 100); \
51                 }                                               \
52         } while (0)
53 #define LAST_ADD_TIME_INVALID(_vq)                              \
54         ((_vq)->last_add_time_valid = false)
55 #else
56 #define BAD_RING(_vq, fmt, args...)                             \
57         do {                                                    \
58                 dev_err(&_vq->vq.vdev->dev,                     \
59                         "%s:"fmt, (_vq)->vq.name, ##args);      \
60                 (_vq)->broken = true;                           \
61         } while (0)
62 #define START_USE(vq)
63 #define END_USE(vq)
64 #define LAST_ADD_TIME_UPDATE(vq)
65 #define LAST_ADD_TIME_CHECK(vq)
66 #define LAST_ADD_TIME_INVALID(vq)
67 #endif
68
69 struct vring_desc_state_split {
70         void *data;                     /* Data for callback. */
71         struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
72 };
73
74 struct vring_desc_state_packed {
75         void *data;                     /* Data for callback. */
76         struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
77         u16 num;                        /* Descriptor list length. */
78         u16 last;                       /* The last desc state in a list. */
79 };
80
81 struct vring_desc_extra {
82         dma_addr_t addr;                /* Descriptor DMA addr. */
83         u32 len;                        /* Descriptor length. */
84         u16 flags;                      /* Descriptor flags. */
85         u16 next;                       /* The next desc state in a list. */
86 };
87
88 struct vring_virtqueue {
89         struct virtqueue vq;
90
91         /* Is this a packed ring? */
92         bool packed_ring;
93
94         /* Is DMA API used? */
95         bool use_dma_api;
96
97         /* Can we use weak barriers? */
98         bool weak_barriers;
99
100         /* Other side has made a mess, don't try any more. */
101         bool broken;
102
103         /* Host supports indirect buffers */
104         bool indirect;
105
106         /* Host publishes avail event idx */
107         bool event;
108
109         /* Head of free buffer list. */
110         unsigned int free_head;
111         /* Number we've added since last sync. */
112         unsigned int num_added;
113
114         /* Last used index we've seen. */
115         u16 last_used_idx;
116
117         /* Hint for event idx: already triggered no need to disable. */
118         bool event_triggered;
119
120         union {
121                 /* Available for split ring */
122                 struct {
123                         /* Actual memory layout for this queue. */
124                         struct vring vring;
125
126                         /* Last written value to avail->flags */
127                         u16 avail_flags_shadow;
128
129                         /*
130                          * Last written value to avail->idx in
131                          * guest byte order.
132                          */
133                         u16 avail_idx_shadow;
134
135                         /* Per-descriptor state. */
136                         struct vring_desc_state_split *desc_state;
137                         struct vring_desc_extra *desc_extra;
138
139                         /* DMA address and size information */
140                         dma_addr_t queue_dma_addr;
141                         size_t queue_size_in_bytes;
142                 } split;
143
144                 /* Available for packed ring */
145                 struct {
146                         /* Actual memory layout for this queue. */
147                         struct {
148                                 unsigned int num;
149                                 struct vring_packed_desc *desc;
150                                 struct vring_packed_desc_event *driver;
151                                 struct vring_packed_desc_event *device;
152                         } vring;
153
154                         /* Driver ring wrap counter. */
155                         bool avail_wrap_counter;
156
157                         /* Device ring wrap counter. */
158                         bool used_wrap_counter;
159
160                         /* Avail used flags. */
161                         u16 avail_used_flags;
162
163                         /* Index of the next avail descriptor. */
164                         u16 next_avail_idx;
165
166                         /*
167                          * Last written value to driver->flags in
168                          * guest byte order.
169                          */
170                         u16 event_flags_shadow;
171
172                         /* Per-descriptor state. */
173                         struct vring_desc_state_packed *desc_state;
174                         struct vring_desc_extra *desc_extra;
175
176                         /* DMA address and size information */
177                         dma_addr_t ring_dma_addr;
178                         dma_addr_t driver_event_dma_addr;
179                         dma_addr_t device_event_dma_addr;
180                         size_t ring_size_in_bytes;
181                         size_t event_size_in_bytes;
182                 } packed;
183         };
184
185         /* How to notify other side. FIXME: commonalize hcalls! */
186         bool (*notify)(struct virtqueue *vq);
187
188         /* DMA, allocation, and size information */
189         bool we_own_ring;
190
191 #ifdef DEBUG
192         /* They're supposed to lock for us. */
193         unsigned int in_use;
194
195         /* Figure out if their kicks are too delayed. */
196         bool last_add_time_valid;
197         ktime_t last_add_time;
198 #endif
199 };
200
201
202 /*
203  * Helpers.
204  */
205
206 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
207
208 static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
209                                           unsigned int total_sg)
210 {
211         /*
212          * If the host supports indirect descriptor tables, and we have multiple
213          * buffers, then go indirect. FIXME: tune this threshold
214          */
215         return (vq->indirect && total_sg > 1 && vq->vq.num_free);
216 }
217
218 /*
219  * Modern virtio devices have feature bits to specify whether they need a
220  * quirk and bypass the IOMMU. If not there, just use the DMA API.
221  *
222  * If there, the interaction between virtio and DMA API is messy.
223  *
224  * On most systems with virtio, physical addresses match bus addresses,
225  * and it doesn't particularly matter whether we use the DMA API.
226  *
227  * On some systems, including Xen and any system with a physical device
228  * that speaks virtio behind a physical IOMMU, we must use the DMA API
229  * for virtio DMA to work at all.
230  *
231  * On other systems, including SPARC and PPC64, virtio-pci devices are
232  * enumerated as though they are behind an IOMMU, but the virtio host
233  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
234  * there or somehow map everything as the identity.
235  *
236  * For the time being, we preserve historic behavior and bypass the DMA
237  * API.
238  *
239  * TODO: install a per-device DMA ops structure that does the right thing
240  * taking into account all the above quirks, and use the DMA API
241  * unconditionally on data path.
242  */
243
244 static bool vring_use_dma_api(struct virtio_device *vdev)
245 {
246         if (!virtio_has_dma_quirk(vdev))
247                 return true;
248
249         /* Otherwise, we are left to guess. */
250         /*
251          * In theory, it's possible to have a buggy QEMU-supposed
252          * emulated Q35 IOMMU and Xen enabled at the same time.  On
253          * such a configuration, virtio has never worked and will
254          * not work without an even larger kludge.  Instead, enable
255          * the DMA API if we're a Xen guest, which at least allows
256          * all of the sensible Xen configurations to work correctly.
257          */
258         if (xen_domain())
259                 return true;
260
261         return false;
262 }
263
264 size_t virtio_max_dma_size(struct virtio_device *vdev)
265 {
266         size_t max_segment_size = SIZE_MAX;
267
268         if (vring_use_dma_api(vdev))
269                 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
270
271         return max_segment_size;
272 }
273 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
274
275 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
276                               dma_addr_t *dma_handle, gfp_t flag)
277 {
278         if (vring_use_dma_api(vdev)) {
279                 return dma_alloc_coherent(vdev->dev.parent, size,
280                                           dma_handle, flag);
281         } else {
282                 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
283
284                 if (queue) {
285                         phys_addr_t phys_addr = virt_to_phys(queue);
286                         *dma_handle = (dma_addr_t)phys_addr;
287
288                         /*
289                          * Sanity check: make sure we dind't truncate
290                          * the address.  The only arches I can find that
291                          * have 64-bit phys_addr_t but 32-bit dma_addr_t
292                          * are certain non-highmem MIPS and x86
293                          * configurations, but these configurations
294                          * should never allocate physical pages above 32
295                          * bits, so this is fine.  Just in case, throw a
296                          * warning and abort if we end up with an
297                          * unrepresentable address.
298                          */
299                         if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
300                                 free_pages_exact(queue, PAGE_ALIGN(size));
301                                 return NULL;
302                         }
303                 }
304                 return queue;
305         }
306 }
307
308 static void vring_free_queue(struct virtio_device *vdev, size_t size,
309                              void *queue, dma_addr_t dma_handle)
310 {
311         if (vring_use_dma_api(vdev))
312                 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
313         else
314                 free_pages_exact(queue, PAGE_ALIGN(size));
315 }
316
317 /*
318  * The DMA ops on various arches are rather gnarly right now, and
319  * making all of the arch DMA ops work on the vring device itself
320  * is a mess.  For now, we use the parent device for DMA ops.
321  */
322 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
323 {
324         return vq->vq.vdev->dev.parent;
325 }
326
327 /* Map one sg entry. */
328 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
329                                    struct scatterlist *sg,
330                                    enum dma_data_direction direction)
331 {
332         if (!vq->use_dma_api)
333                 return (dma_addr_t)sg_phys(sg);
334
335         /*
336          * We can't use dma_map_sg, because we don't use scatterlists in
337          * the way it expects (we don't guarantee that the scatterlist
338          * will exist for the lifetime of the mapping).
339          */
340         return dma_map_page(vring_dma_dev(vq),
341                             sg_page(sg), sg->offset, sg->length,
342                             direction);
343 }
344
345 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
346                                    void *cpu_addr, size_t size,
347                                    enum dma_data_direction direction)
348 {
349         if (!vq->use_dma_api)
350                 return (dma_addr_t)virt_to_phys(cpu_addr);
351
352         return dma_map_single(vring_dma_dev(vq),
353                               cpu_addr, size, direction);
354 }
355
356 static int vring_mapping_error(const struct vring_virtqueue *vq,
357                                dma_addr_t addr)
358 {
359         if (!vq->use_dma_api)
360                 return 0;
361
362         return dma_mapping_error(vring_dma_dev(vq), addr);
363 }
364
365
366 /*
367  * Split ring specific functions - *_split().
368  */
369
370 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
371                                            struct vring_desc *desc)
372 {
373         u16 flags;
374
375         if (!vq->use_dma_api)
376                 return;
377
378         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
379
380         dma_unmap_page(vring_dma_dev(vq),
381                        virtio64_to_cpu(vq->vq.vdev, desc->addr),
382                        virtio32_to_cpu(vq->vq.vdev, desc->len),
383                        (flags & VRING_DESC_F_WRITE) ?
384                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
385 }
386
387 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
388                                           unsigned int i)
389 {
390         struct vring_desc_extra *extra = vq->split.desc_extra;
391         u16 flags;
392
393         if (!vq->use_dma_api)
394                 goto out;
395
396         flags = extra[i].flags;
397
398         if (flags & VRING_DESC_F_INDIRECT) {
399                 dma_unmap_single(vring_dma_dev(vq),
400                                  extra[i].addr,
401                                  extra[i].len,
402                                  (flags & VRING_DESC_F_WRITE) ?
403                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
404         } else {
405                 dma_unmap_page(vring_dma_dev(vq),
406                                extra[i].addr,
407                                extra[i].len,
408                                (flags & VRING_DESC_F_WRITE) ?
409                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
410         }
411
412 out:
413         return extra[i].next;
414 }
415
416 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
417                                                unsigned int total_sg,
418                                                gfp_t gfp)
419 {
420         struct vring_desc *desc;
421         unsigned int i;
422
423         /*
424          * We require lowmem mappings for the descriptors because
425          * otherwise virt_to_phys will give us bogus addresses in the
426          * virtqueue.
427          */
428         gfp &= ~__GFP_HIGHMEM;
429
430         desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
431         if (!desc)
432                 return NULL;
433
434         for (i = 0; i < total_sg; i++)
435                 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
436         return desc;
437 }
438
439 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
440                                                     struct vring_desc *desc,
441                                                     unsigned int i,
442                                                     dma_addr_t addr,
443                                                     unsigned int len,
444                                                     u16 flags,
445                                                     bool indirect)
446 {
447         struct vring_virtqueue *vring = to_vvq(vq);
448         struct vring_desc_extra *extra = vring->split.desc_extra;
449         u16 next;
450
451         desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
452         desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
453         desc[i].len = cpu_to_virtio32(vq->vdev, len);
454
455         if (!indirect) {
456                 next = extra[i].next;
457                 desc[i].next = cpu_to_virtio16(vq->vdev, next);
458
459                 extra[i].addr = addr;
460                 extra[i].len = len;
461                 extra[i].flags = flags;
462         } else
463                 next = virtio16_to_cpu(vq->vdev, desc[i].next);
464
465         return next;
466 }
467
468 static inline int virtqueue_add_split(struct virtqueue *_vq,
469                                       struct scatterlist *sgs[],
470                                       unsigned int total_sg,
471                                       unsigned int out_sgs,
472                                       unsigned int in_sgs,
473                                       void *data,
474                                       void *ctx,
475                                       gfp_t gfp)
476 {
477         struct vring_virtqueue *vq = to_vvq(_vq);
478         struct scatterlist *sg;
479         struct vring_desc *desc;
480         unsigned int i, n, avail, descs_used, prev, err_idx;
481         int head;
482         bool indirect;
483
484         START_USE(vq);
485
486         BUG_ON(data == NULL);
487         BUG_ON(ctx && vq->indirect);
488
489         if (unlikely(vq->broken)) {
490                 END_USE(vq);
491                 return -EIO;
492         }
493
494         LAST_ADD_TIME_UPDATE(vq);
495
496         BUG_ON(total_sg == 0);
497
498         head = vq->free_head;
499
500         if (virtqueue_use_indirect(vq, total_sg))
501                 desc = alloc_indirect_split(_vq, total_sg, gfp);
502         else {
503                 desc = NULL;
504                 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
505         }
506
507         if (desc) {
508                 /* Use a single buffer which doesn't continue */
509                 indirect = true;
510                 /* Set up rest to use this indirect table. */
511                 i = 0;
512                 descs_used = 1;
513         } else {
514                 indirect = false;
515                 desc = vq->split.vring.desc;
516                 i = head;
517                 descs_used = total_sg;
518         }
519
520         if (unlikely(vq->vq.num_free < descs_used)) {
521                 pr_debug("Can't add buf len %i - avail = %i\n",
522                          descs_used, vq->vq.num_free);
523                 /* FIXME: for historical reasons, we force a notify here if
524                  * there are outgoing parts to the buffer.  Presumably the
525                  * host should service the ring ASAP. */
526                 if (out_sgs)
527                         vq->notify(&vq->vq);
528                 if (indirect)
529                         kfree(desc);
530                 END_USE(vq);
531                 return -ENOSPC;
532         }
533
534         for (n = 0; n < out_sgs; n++) {
535                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
536                         dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
537                         if (vring_mapping_error(vq, addr))
538                                 goto unmap_release;
539
540                         prev = i;
541                         /* Note that we trust indirect descriptor
542                          * table since it use stream DMA mapping.
543                          */
544                         i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
545                                                      VRING_DESC_F_NEXT,
546                                                      indirect);
547                 }
548         }
549         for (; n < (out_sgs + in_sgs); n++) {
550                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
551                         dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
552                         if (vring_mapping_error(vq, addr))
553                                 goto unmap_release;
554
555                         prev = i;
556                         /* Note that we trust indirect descriptor
557                          * table since it use stream DMA mapping.
558                          */
559                         i = virtqueue_add_desc_split(_vq, desc, i, addr,
560                                                      sg->length,
561                                                      VRING_DESC_F_NEXT |
562                                                      VRING_DESC_F_WRITE,
563                                                      indirect);
564                 }
565         }
566         /* Last one doesn't continue. */
567         desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
568         if (!indirect && vq->use_dma_api)
569                 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
570                         ~VRING_DESC_F_NEXT;
571
572         if (indirect) {
573                 /* Now that the indirect table is filled in, map it. */
574                 dma_addr_t addr = vring_map_single(
575                         vq, desc, total_sg * sizeof(struct vring_desc),
576                         DMA_TO_DEVICE);
577                 if (vring_mapping_error(vq, addr))
578                         goto unmap_release;
579
580                 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
581                                          head, addr,
582                                          total_sg * sizeof(struct vring_desc),
583                                          VRING_DESC_F_INDIRECT,
584                                          false);
585         }
586
587         /* We're using some buffers from the free list. */
588         vq->vq.num_free -= descs_used;
589
590         /* Update free pointer */
591         if (indirect)
592                 vq->free_head = vq->split.desc_extra[head].next;
593         else
594                 vq->free_head = i;
595
596         /* Store token and indirect buffer state. */
597         vq->split.desc_state[head].data = data;
598         if (indirect)
599                 vq->split.desc_state[head].indir_desc = desc;
600         else
601                 vq->split.desc_state[head].indir_desc = ctx;
602
603         /* Put entry in available array (but don't update avail->idx until they
604          * do sync). */
605         avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
606         vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
607
608         /* Descriptors and available array need to be set before we expose the
609          * new available array entries. */
610         virtio_wmb(vq->weak_barriers);
611         vq->split.avail_idx_shadow++;
612         vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
613                                                 vq->split.avail_idx_shadow);
614         vq->num_added++;
615
616         pr_debug("Added buffer head %i to %p\n", head, vq);
617         END_USE(vq);
618
619         /* This is very unlikely, but theoretically possible.  Kick
620          * just in case. */
621         if (unlikely(vq->num_added == (1 << 16) - 1))
622                 virtqueue_kick(_vq);
623
624         return 0;
625
626 unmap_release:
627         err_idx = i;
628
629         if (indirect)
630                 i = 0;
631         else
632                 i = head;
633
634         for (n = 0; n < total_sg; n++) {
635                 if (i == err_idx)
636                         break;
637                 if (indirect) {
638                         vring_unmap_one_split_indirect(vq, &desc[i]);
639                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
640                 } else
641                         i = vring_unmap_one_split(vq, i);
642         }
643
644         if (indirect)
645                 kfree(desc);
646
647         END_USE(vq);
648         return -ENOMEM;
649 }
650
651 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
652 {
653         struct vring_virtqueue *vq = to_vvq(_vq);
654         u16 new, old;
655         bool needs_kick;
656
657         START_USE(vq);
658         /* We need to expose available array entries before checking avail
659          * event. */
660         virtio_mb(vq->weak_barriers);
661
662         old = vq->split.avail_idx_shadow - vq->num_added;
663         new = vq->split.avail_idx_shadow;
664         vq->num_added = 0;
665
666         LAST_ADD_TIME_CHECK(vq);
667         LAST_ADD_TIME_INVALID(vq);
668
669         if (vq->event) {
670                 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
671                                         vring_avail_event(&vq->split.vring)),
672                                               new, old);
673         } else {
674                 needs_kick = !(vq->split.vring.used->flags &
675                                         cpu_to_virtio16(_vq->vdev,
676                                                 VRING_USED_F_NO_NOTIFY));
677         }
678         END_USE(vq);
679         return needs_kick;
680 }
681
682 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
683                              void **ctx)
684 {
685         unsigned int i, j;
686         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
687
688         /* Clear data ptr. */
689         vq->split.desc_state[head].data = NULL;
690
691         /* Put back on free list: unmap first-level descriptors and find end */
692         i = head;
693
694         while (vq->split.vring.desc[i].flags & nextflag) {
695                 vring_unmap_one_split(vq, i);
696                 i = vq->split.desc_extra[i].next;
697                 vq->vq.num_free++;
698         }
699
700         vring_unmap_one_split(vq, i);
701         vq->split.desc_extra[i].next = vq->free_head;
702         vq->free_head = head;
703
704         /* Plus final descriptor */
705         vq->vq.num_free++;
706
707         if (vq->indirect) {
708                 struct vring_desc *indir_desc =
709                                 vq->split.desc_state[head].indir_desc;
710                 u32 len;
711
712                 /* Free the indirect table, if any, now that it's unmapped. */
713                 if (!indir_desc)
714                         return;
715
716                 len = vq->split.desc_extra[head].len;
717
718                 BUG_ON(!(vq->split.desc_extra[head].flags &
719                                 VRING_DESC_F_INDIRECT));
720                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
721
722                 for (j = 0; j < len / sizeof(struct vring_desc); j++)
723                         vring_unmap_one_split_indirect(vq, &indir_desc[j]);
724
725                 kfree(indir_desc);
726                 vq->split.desc_state[head].indir_desc = NULL;
727         } else if (ctx) {
728                 *ctx = vq->split.desc_state[head].indir_desc;
729         }
730 }
731
732 static inline bool more_used_split(const struct vring_virtqueue *vq)
733 {
734         return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
735                         vq->split.vring.used->idx);
736 }
737
738 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
739                                          unsigned int *len,
740                                          void **ctx)
741 {
742         struct vring_virtqueue *vq = to_vvq(_vq);
743         void *ret;
744         unsigned int i;
745         u16 last_used;
746
747         START_USE(vq);
748
749         if (unlikely(vq->broken)) {
750                 END_USE(vq);
751                 return NULL;
752         }
753
754         if (!more_used_split(vq)) {
755                 pr_debug("No more buffers in queue\n");
756                 END_USE(vq);
757                 return NULL;
758         }
759
760         /* Only get used array entries after they have been exposed by host. */
761         virtio_rmb(vq->weak_barriers);
762
763         last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
764         i = virtio32_to_cpu(_vq->vdev,
765                         vq->split.vring.used->ring[last_used].id);
766         *len = virtio32_to_cpu(_vq->vdev,
767                         vq->split.vring.used->ring[last_used].len);
768
769         if (unlikely(i >= vq->split.vring.num)) {
770                 BAD_RING(vq, "id %u out of range\n", i);
771                 return NULL;
772         }
773         if (unlikely(!vq->split.desc_state[i].data)) {
774                 BAD_RING(vq, "id %u is not a head!\n", i);
775                 return NULL;
776         }
777
778         /* detach_buf_split clears data, so grab it now. */
779         ret = vq->split.desc_state[i].data;
780         detach_buf_split(vq, i, ctx);
781         vq->last_used_idx++;
782         /* If we expect an interrupt for the next entry, tell host
783          * by writing event index and flush out the write before
784          * the read in the next get_buf call. */
785         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
786                 virtio_store_mb(vq->weak_barriers,
787                                 &vring_used_event(&vq->split.vring),
788                                 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
789
790         LAST_ADD_TIME_INVALID(vq);
791
792         END_USE(vq);
793         return ret;
794 }
795
796 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
797 {
798         struct vring_virtqueue *vq = to_vvq(_vq);
799
800         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
801                 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
802                 if (vq->event)
803                         /* TODO: this is a hack. Figure out a cleaner value to write. */
804                         vring_used_event(&vq->split.vring) = 0x0;
805                 else
806                         vq->split.vring.avail->flags =
807                                 cpu_to_virtio16(_vq->vdev,
808                                                 vq->split.avail_flags_shadow);
809         }
810 }
811
812 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
813 {
814         struct vring_virtqueue *vq = to_vvq(_vq);
815         u16 last_used_idx;
816
817         START_USE(vq);
818
819         /* We optimistically turn back on interrupts, then check if there was
820          * more to do. */
821         /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
822          * either clear the flags bit or point the event index at the next
823          * entry. Always do both to keep code simple. */
824         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
825                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
826                 if (!vq->event)
827                         vq->split.vring.avail->flags =
828                                 cpu_to_virtio16(_vq->vdev,
829                                                 vq->split.avail_flags_shadow);
830         }
831         vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
832                         last_used_idx = vq->last_used_idx);
833         END_USE(vq);
834         return last_used_idx;
835 }
836
837 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
838 {
839         struct vring_virtqueue *vq = to_vvq(_vq);
840
841         return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
842                         vq->split.vring.used->idx);
843 }
844
845 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
846 {
847         struct vring_virtqueue *vq = to_vvq(_vq);
848         u16 bufs;
849
850         START_USE(vq);
851
852         /* We optimistically turn back on interrupts, then check if there was
853          * more to do. */
854         /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
855          * either clear the flags bit or point the event index at the next
856          * entry. Always update the event index to keep code simple. */
857         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
858                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
859                 if (!vq->event)
860                         vq->split.vring.avail->flags =
861                                 cpu_to_virtio16(_vq->vdev,
862                                                 vq->split.avail_flags_shadow);
863         }
864         /* TODO: tune this threshold */
865         bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
866
867         virtio_store_mb(vq->weak_barriers,
868                         &vring_used_event(&vq->split.vring),
869                         cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
870
871         if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
872                                         - vq->last_used_idx) > bufs)) {
873                 END_USE(vq);
874                 return false;
875         }
876
877         END_USE(vq);
878         return true;
879 }
880
881 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
882 {
883         struct vring_virtqueue *vq = to_vvq(_vq);
884         unsigned int i;
885         void *buf;
886
887         START_USE(vq);
888
889         for (i = 0; i < vq->split.vring.num; i++) {
890                 if (!vq->split.desc_state[i].data)
891                         continue;
892                 /* detach_buf_split clears data, so grab it now. */
893                 buf = vq->split.desc_state[i].data;
894                 detach_buf_split(vq, i, NULL);
895                 vq->split.avail_idx_shadow--;
896                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
897                                 vq->split.avail_idx_shadow);
898                 END_USE(vq);
899                 return buf;
900         }
901         /* That should have freed everything. */
902         BUG_ON(vq->vq.num_free != vq->split.vring.num);
903
904         END_USE(vq);
905         return NULL;
906 }
907
908 static struct virtqueue *vring_create_virtqueue_split(
909         unsigned int index,
910         unsigned int num,
911         unsigned int vring_align,
912         struct virtio_device *vdev,
913         bool weak_barriers,
914         bool may_reduce_num,
915         bool context,
916         bool (*notify)(struct virtqueue *),
917         void (*callback)(struct virtqueue *),
918         const char *name)
919 {
920         struct virtqueue *vq;
921         void *queue = NULL;
922         dma_addr_t dma_addr;
923         size_t queue_size_in_bytes;
924         struct vring vring;
925
926         /* We assume num is a power of 2. */
927         if (num & (num - 1)) {
928                 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
929                 return NULL;
930         }
931
932         /* TODO: allocate each queue chunk individually */
933         for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
934                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
935                                           &dma_addr,
936                                           GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
937                 if (queue)
938                         break;
939                 if (!may_reduce_num)
940                         return NULL;
941         }
942
943         if (!num)
944                 return NULL;
945
946         if (!queue) {
947                 /* Try to get a single page. You are my only hope! */
948                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
949                                           &dma_addr, GFP_KERNEL|__GFP_ZERO);
950         }
951         if (!queue)
952                 return NULL;
953
954         queue_size_in_bytes = vring_size(num, vring_align);
955         vring_init(&vring, num, queue, vring_align);
956
957         vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
958                                    notify, callback, name);
959         if (!vq) {
960                 vring_free_queue(vdev, queue_size_in_bytes, queue,
961                                  dma_addr);
962                 return NULL;
963         }
964
965         to_vvq(vq)->split.queue_dma_addr = dma_addr;
966         to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
967         to_vvq(vq)->we_own_ring = true;
968
969         return vq;
970 }
971
972
973 /*
974  * Packed ring specific functions - *_packed().
975  */
976
977 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
978                                      struct vring_desc_extra *extra)
979 {
980         u16 flags;
981
982         if (!vq->use_dma_api)
983                 return;
984
985         flags = extra->flags;
986
987         if (flags & VRING_DESC_F_INDIRECT) {
988                 dma_unmap_single(vring_dma_dev(vq),
989                                  extra->addr, extra->len,
990                                  (flags & VRING_DESC_F_WRITE) ?
991                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
992         } else {
993                 dma_unmap_page(vring_dma_dev(vq),
994                                extra->addr, extra->len,
995                                (flags & VRING_DESC_F_WRITE) ?
996                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
997         }
998 }
999
1000 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1001                                    struct vring_packed_desc *desc)
1002 {
1003         u16 flags;
1004
1005         if (!vq->use_dma_api)
1006                 return;
1007
1008         flags = le16_to_cpu(desc->flags);
1009
1010         dma_unmap_page(vring_dma_dev(vq),
1011                        le64_to_cpu(desc->addr),
1012                        le32_to_cpu(desc->len),
1013                        (flags & VRING_DESC_F_WRITE) ?
1014                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
1015 }
1016
1017 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1018                                                        gfp_t gfp)
1019 {
1020         struct vring_packed_desc *desc;
1021
1022         /*
1023          * We require lowmem mappings for the descriptors because
1024          * otherwise virt_to_phys will give us bogus addresses in the
1025          * virtqueue.
1026          */
1027         gfp &= ~__GFP_HIGHMEM;
1028
1029         desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1030
1031         return desc;
1032 }
1033
1034 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1035                                          struct scatterlist *sgs[],
1036                                          unsigned int total_sg,
1037                                          unsigned int out_sgs,
1038                                          unsigned int in_sgs,
1039                                          void *data,
1040                                          gfp_t gfp)
1041 {
1042         struct vring_packed_desc *desc;
1043         struct scatterlist *sg;
1044         unsigned int i, n, err_idx;
1045         u16 head, id;
1046         dma_addr_t addr;
1047
1048         head = vq->packed.next_avail_idx;
1049         desc = alloc_indirect_packed(total_sg, gfp);
1050         if (!desc)
1051                 return -ENOMEM;
1052
1053         if (unlikely(vq->vq.num_free < 1)) {
1054                 pr_debug("Can't add buf len 1 - avail = 0\n");
1055                 kfree(desc);
1056                 END_USE(vq);
1057                 return -ENOSPC;
1058         }
1059
1060         i = 0;
1061         id = vq->free_head;
1062         BUG_ON(id == vq->packed.vring.num);
1063
1064         for (n = 0; n < out_sgs + in_sgs; n++) {
1065                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1066                         addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1067                                         DMA_TO_DEVICE : DMA_FROM_DEVICE);
1068                         if (vring_mapping_error(vq, addr))
1069                                 goto unmap_release;
1070
1071                         desc[i].flags = cpu_to_le16(n < out_sgs ?
1072                                                 0 : VRING_DESC_F_WRITE);
1073                         desc[i].addr = cpu_to_le64(addr);
1074                         desc[i].len = cpu_to_le32(sg->length);
1075                         i++;
1076                 }
1077         }
1078
1079         /* Now that the indirect table is filled in, map it. */
1080         addr = vring_map_single(vq, desc,
1081                         total_sg * sizeof(struct vring_packed_desc),
1082                         DMA_TO_DEVICE);
1083         if (vring_mapping_error(vq, addr))
1084                 goto unmap_release;
1085
1086         vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1087         vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1088                                 sizeof(struct vring_packed_desc));
1089         vq->packed.vring.desc[head].id = cpu_to_le16(id);
1090
1091         if (vq->use_dma_api) {
1092                 vq->packed.desc_extra[id].addr = addr;
1093                 vq->packed.desc_extra[id].len = total_sg *
1094                                 sizeof(struct vring_packed_desc);
1095                 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1096                                                   vq->packed.avail_used_flags;
1097         }
1098
1099         /*
1100          * A driver MUST NOT make the first descriptor in the list
1101          * available before all subsequent descriptors comprising
1102          * the list are made available.
1103          */
1104         virtio_wmb(vq->weak_barriers);
1105         vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1106                                                 vq->packed.avail_used_flags);
1107
1108         /* We're using some buffers from the free list. */
1109         vq->vq.num_free -= 1;
1110
1111         /* Update free pointer */
1112         n = head + 1;
1113         if (n >= vq->packed.vring.num) {
1114                 n = 0;
1115                 vq->packed.avail_wrap_counter ^= 1;
1116                 vq->packed.avail_used_flags ^=
1117                                 1 << VRING_PACKED_DESC_F_AVAIL |
1118                                 1 << VRING_PACKED_DESC_F_USED;
1119         }
1120         vq->packed.next_avail_idx = n;
1121         vq->free_head = vq->packed.desc_extra[id].next;
1122
1123         /* Store token and indirect buffer state. */
1124         vq->packed.desc_state[id].num = 1;
1125         vq->packed.desc_state[id].data = data;
1126         vq->packed.desc_state[id].indir_desc = desc;
1127         vq->packed.desc_state[id].last = id;
1128
1129         vq->num_added += 1;
1130
1131         pr_debug("Added buffer head %i to %p\n", head, vq);
1132         END_USE(vq);
1133
1134         return 0;
1135
1136 unmap_release:
1137         err_idx = i;
1138
1139         for (i = 0; i < err_idx; i++)
1140                 vring_unmap_desc_packed(vq, &desc[i]);
1141
1142         kfree(desc);
1143
1144         END_USE(vq);
1145         return -ENOMEM;
1146 }
1147
1148 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1149                                        struct scatterlist *sgs[],
1150                                        unsigned int total_sg,
1151                                        unsigned int out_sgs,
1152                                        unsigned int in_sgs,
1153                                        void *data,
1154                                        void *ctx,
1155                                        gfp_t gfp)
1156 {
1157         struct vring_virtqueue *vq = to_vvq(_vq);
1158         struct vring_packed_desc *desc;
1159         struct scatterlist *sg;
1160         unsigned int i, n, c, descs_used, err_idx;
1161         __le16 head_flags, flags;
1162         u16 head, id, prev, curr, avail_used_flags;
1163         int err;
1164
1165         START_USE(vq);
1166
1167         BUG_ON(data == NULL);
1168         BUG_ON(ctx && vq->indirect);
1169
1170         if (unlikely(vq->broken)) {
1171                 END_USE(vq);
1172                 return -EIO;
1173         }
1174
1175         LAST_ADD_TIME_UPDATE(vq);
1176
1177         BUG_ON(total_sg == 0);
1178
1179         if (virtqueue_use_indirect(vq, total_sg)) {
1180                 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1181                                                     in_sgs, data, gfp);
1182                 if (err != -ENOMEM) {
1183                         END_USE(vq);
1184                         return err;
1185                 }
1186
1187                 /* fall back on direct */
1188         }
1189
1190         head = vq->packed.next_avail_idx;
1191         avail_used_flags = vq->packed.avail_used_flags;
1192
1193         WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1194
1195         desc = vq->packed.vring.desc;
1196         i = head;
1197         descs_used = total_sg;
1198
1199         if (unlikely(vq->vq.num_free < descs_used)) {
1200                 pr_debug("Can't add buf len %i - avail = %i\n",
1201                          descs_used, vq->vq.num_free);
1202                 END_USE(vq);
1203                 return -ENOSPC;
1204         }
1205
1206         id = vq->free_head;
1207         BUG_ON(id == vq->packed.vring.num);
1208
1209         curr = id;
1210         c = 0;
1211         for (n = 0; n < out_sgs + in_sgs; n++) {
1212                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1213                         dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1214                                         DMA_TO_DEVICE : DMA_FROM_DEVICE);
1215                         if (vring_mapping_error(vq, addr))
1216                                 goto unmap_release;
1217
1218                         flags = cpu_to_le16(vq->packed.avail_used_flags |
1219                                     (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1220                                     (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1221                         if (i == head)
1222                                 head_flags = flags;
1223                         else
1224                                 desc[i].flags = flags;
1225
1226                         desc[i].addr = cpu_to_le64(addr);
1227                         desc[i].len = cpu_to_le32(sg->length);
1228                         desc[i].id = cpu_to_le16(id);
1229
1230                         if (unlikely(vq->use_dma_api)) {
1231                                 vq->packed.desc_extra[curr].addr = addr;
1232                                 vq->packed.desc_extra[curr].len = sg->length;
1233                                 vq->packed.desc_extra[curr].flags =
1234                                         le16_to_cpu(flags);
1235                         }
1236                         prev = curr;
1237                         curr = vq->packed.desc_extra[curr].next;
1238
1239                         if ((unlikely(++i >= vq->packed.vring.num))) {
1240                                 i = 0;
1241                                 vq->packed.avail_used_flags ^=
1242                                         1 << VRING_PACKED_DESC_F_AVAIL |
1243                                         1 << VRING_PACKED_DESC_F_USED;
1244                         }
1245                 }
1246         }
1247
1248         if (i < head)
1249                 vq->packed.avail_wrap_counter ^= 1;
1250
1251         /* We're using some buffers from the free list. */
1252         vq->vq.num_free -= descs_used;
1253
1254         /* Update free pointer */
1255         vq->packed.next_avail_idx = i;
1256         vq->free_head = curr;
1257
1258         /* Store token. */
1259         vq->packed.desc_state[id].num = descs_used;
1260         vq->packed.desc_state[id].data = data;
1261         vq->packed.desc_state[id].indir_desc = ctx;
1262         vq->packed.desc_state[id].last = prev;
1263
1264         /*
1265          * A driver MUST NOT make the first descriptor in the list
1266          * available before all subsequent descriptors comprising
1267          * the list are made available.
1268          */
1269         virtio_wmb(vq->weak_barriers);
1270         vq->packed.vring.desc[head].flags = head_flags;
1271         vq->num_added += descs_used;
1272
1273         pr_debug("Added buffer head %i to %p\n", head, vq);
1274         END_USE(vq);
1275
1276         return 0;
1277
1278 unmap_release:
1279         err_idx = i;
1280         i = head;
1281         curr = vq->free_head;
1282
1283         vq->packed.avail_used_flags = avail_used_flags;
1284
1285         for (n = 0; n < total_sg; n++) {
1286                 if (i == err_idx)
1287                         break;
1288                 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1289                 curr = vq->packed.desc_extra[curr].next;
1290                 i++;
1291                 if (i >= vq->packed.vring.num)
1292                         i = 0;
1293         }
1294
1295         END_USE(vq);
1296         return -EIO;
1297 }
1298
1299 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1300 {
1301         struct vring_virtqueue *vq = to_vvq(_vq);
1302         u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1303         bool needs_kick;
1304         union {
1305                 struct {
1306                         __le16 off_wrap;
1307                         __le16 flags;
1308                 };
1309                 u32 u32;
1310         } snapshot;
1311
1312         START_USE(vq);
1313
1314         /*
1315          * We need to expose the new flags value before checking notification
1316          * suppressions.
1317          */
1318         virtio_mb(vq->weak_barriers);
1319
1320         old = vq->packed.next_avail_idx - vq->num_added;
1321         new = vq->packed.next_avail_idx;
1322         vq->num_added = 0;
1323
1324         snapshot.u32 = *(u32 *)vq->packed.vring.device;
1325         flags = le16_to_cpu(snapshot.flags);
1326
1327         LAST_ADD_TIME_CHECK(vq);
1328         LAST_ADD_TIME_INVALID(vq);
1329
1330         if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1331                 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1332                 goto out;
1333         }
1334
1335         off_wrap = le16_to_cpu(snapshot.off_wrap);
1336
1337         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1338         event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1339         if (wrap_counter != vq->packed.avail_wrap_counter)
1340                 event_idx -= vq->packed.vring.num;
1341
1342         needs_kick = vring_need_event(event_idx, new, old);
1343 out:
1344         END_USE(vq);
1345         return needs_kick;
1346 }
1347
1348 static void detach_buf_packed(struct vring_virtqueue *vq,
1349                               unsigned int id, void **ctx)
1350 {
1351         struct vring_desc_state_packed *state = NULL;
1352         struct vring_packed_desc *desc;
1353         unsigned int i, curr;
1354
1355         state = &vq->packed.desc_state[id];
1356
1357         /* Clear data ptr. */
1358         state->data = NULL;
1359
1360         vq->packed.desc_extra[state->last].next = vq->free_head;
1361         vq->free_head = id;
1362         vq->vq.num_free += state->num;
1363
1364         if (unlikely(vq->use_dma_api)) {
1365                 curr = id;
1366                 for (i = 0; i < state->num; i++) {
1367                         vring_unmap_extra_packed(vq,
1368                                                  &vq->packed.desc_extra[curr]);
1369                         curr = vq->packed.desc_extra[curr].next;
1370                 }
1371         }
1372
1373         if (vq->indirect) {
1374                 u32 len;
1375
1376                 /* Free the indirect table, if any, now that it's unmapped. */
1377                 desc = state->indir_desc;
1378                 if (!desc)
1379                         return;
1380
1381                 if (vq->use_dma_api) {
1382                         len = vq->packed.desc_extra[id].len;
1383                         for (i = 0; i < len / sizeof(struct vring_packed_desc);
1384                                         i++)
1385                                 vring_unmap_desc_packed(vq, &desc[i]);
1386                 }
1387                 kfree(desc);
1388                 state->indir_desc = NULL;
1389         } else if (ctx) {
1390                 *ctx = state->indir_desc;
1391         }
1392 }
1393
1394 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1395                                        u16 idx, bool used_wrap_counter)
1396 {
1397         bool avail, used;
1398         u16 flags;
1399
1400         flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1401         avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1402         used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1403
1404         return avail == used && used == used_wrap_counter;
1405 }
1406
1407 static inline bool more_used_packed(const struct vring_virtqueue *vq)
1408 {
1409         return is_used_desc_packed(vq, vq->last_used_idx,
1410                         vq->packed.used_wrap_counter);
1411 }
1412
1413 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1414                                           unsigned int *len,
1415                                           void **ctx)
1416 {
1417         struct vring_virtqueue *vq = to_vvq(_vq);
1418         u16 last_used, id;
1419         void *ret;
1420
1421         START_USE(vq);
1422
1423         if (unlikely(vq->broken)) {
1424                 END_USE(vq);
1425                 return NULL;
1426         }
1427
1428         if (!more_used_packed(vq)) {
1429                 pr_debug("No more buffers in queue\n");
1430                 END_USE(vq);
1431                 return NULL;
1432         }
1433
1434         /* Only get used elements after they have been exposed by host. */
1435         virtio_rmb(vq->weak_barriers);
1436
1437         last_used = vq->last_used_idx;
1438         id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1439         *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1440
1441         if (unlikely(id >= vq->packed.vring.num)) {
1442                 BAD_RING(vq, "id %u out of range\n", id);
1443                 return NULL;
1444         }
1445         if (unlikely(!vq->packed.desc_state[id].data)) {
1446                 BAD_RING(vq, "id %u is not a head!\n", id);
1447                 return NULL;
1448         }
1449
1450         /* detach_buf_packed clears data, so grab it now. */
1451         ret = vq->packed.desc_state[id].data;
1452         detach_buf_packed(vq, id, ctx);
1453
1454         vq->last_used_idx += vq->packed.desc_state[id].num;
1455         if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
1456                 vq->last_used_idx -= vq->packed.vring.num;
1457                 vq->packed.used_wrap_counter ^= 1;
1458         }
1459
1460         /*
1461          * If we expect an interrupt for the next entry, tell host
1462          * by writing event index and flush out the write before
1463          * the read in the next get_buf call.
1464          */
1465         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1466                 virtio_store_mb(vq->weak_barriers,
1467                                 &vq->packed.vring.driver->off_wrap,
1468                                 cpu_to_le16(vq->last_used_idx |
1469                                         (vq->packed.used_wrap_counter <<
1470                                          VRING_PACKED_EVENT_F_WRAP_CTR)));
1471
1472         LAST_ADD_TIME_INVALID(vq);
1473
1474         END_USE(vq);
1475         return ret;
1476 }
1477
1478 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1479 {
1480         struct vring_virtqueue *vq = to_vvq(_vq);
1481
1482         if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1483                 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1484                 vq->packed.vring.driver->flags =
1485                         cpu_to_le16(vq->packed.event_flags_shadow);
1486         }
1487 }
1488
1489 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1490 {
1491         struct vring_virtqueue *vq = to_vvq(_vq);
1492
1493         START_USE(vq);
1494
1495         /*
1496          * We optimistically turn back on interrupts, then check if there was
1497          * more to do.
1498          */
1499
1500         if (vq->event) {
1501                 vq->packed.vring.driver->off_wrap =
1502                         cpu_to_le16(vq->last_used_idx |
1503                                 (vq->packed.used_wrap_counter <<
1504                                  VRING_PACKED_EVENT_F_WRAP_CTR));
1505                 /*
1506                  * We need to update event offset and event wrap
1507                  * counter first before updating event flags.
1508                  */
1509                 virtio_wmb(vq->weak_barriers);
1510         }
1511
1512         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1513                 vq->packed.event_flags_shadow = vq->event ?
1514                                 VRING_PACKED_EVENT_FLAG_DESC :
1515                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1516                 vq->packed.vring.driver->flags =
1517                                 cpu_to_le16(vq->packed.event_flags_shadow);
1518         }
1519
1520         END_USE(vq);
1521         return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
1522                         VRING_PACKED_EVENT_F_WRAP_CTR);
1523 }
1524
1525 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1526 {
1527         struct vring_virtqueue *vq = to_vvq(_vq);
1528         bool wrap_counter;
1529         u16 used_idx;
1530
1531         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1532         used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1533
1534         return is_used_desc_packed(vq, used_idx, wrap_counter);
1535 }
1536
1537 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1538 {
1539         struct vring_virtqueue *vq = to_vvq(_vq);
1540         u16 used_idx, wrap_counter;
1541         u16 bufs;
1542
1543         START_USE(vq);
1544
1545         /*
1546          * We optimistically turn back on interrupts, then check if there was
1547          * more to do.
1548          */
1549
1550         if (vq->event) {
1551                 /* TODO: tune this threshold */
1552                 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1553                 wrap_counter = vq->packed.used_wrap_counter;
1554
1555                 used_idx = vq->last_used_idx + bufs;
1556                 if (used_idx >= vq->packed.vring.num) {
1557                         used_idx -= vq->packed.vring.num;
1558                         wrap_counter ^= 1;
1559                 }
1560
1561                 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1562                         (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1563
1564                 /*
1565                  * We need to update event offset and event wrap
1566                  * counter first before updating event flags.
1567                  */
1568                 virtio_wmb(vq->weak_barriers);
1569         }
1570
1571         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1572                 vq->packed.event_flags_shadow = vq->event ?
1573                                 VRING_PACKED_EVENT_FLAG_DESC :
1574                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1575                 vq->packed.vring.driver->flags =
1576                                 cpu_to_le16(vq->packed.event_flags_shadow);
1577         }
1578
1579         /*
1580          * We need to update event suppression structure first
1581          * before re-checking for more used buffers.
1582          */
1583         virtio_mb(vq->weak_barriers);
1584
1585         if (is_used_desc_packed(vq,
1586                                 vq->last_used_idx,
1587                                 vq->packed.used_wrap_counter)) {
1588                 END_USE(vq);
1589                 return false;
1590         }
1591
1592         END_USE(vq);
1593         return true;
1594 }
1595
1596 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1597 {
1598         struct vring_virtqueue *vq = to_vvq(_vq);
1599         unsigned int i;
1600         void *buf;
1601
1602         START_USE(vq);
1603
1604         for (i = 0; i < vq->packed.vring.num; i++) {
1605                 if (!vq->packed.desc_state[i].data)
1606                         continue;
1607                 /* detach_buf clears data, so grab it now. */
1608                 buf = vq->packed.desc_state[i].data;
1609                 detach_buf_packed(vq, i, NULL);
1610                 END_USE(vq);
1611                 return buf;
1612         }
1613         /* That should have freed everything. */
1614         BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1615
1616         END_USE(vq);
1617         return NULL;
1618 }
1619
1620 static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq,
1621                                                        unsigned int num)
1622 {
1623         struct vring_desc_extra *desc_extra;
1624         unsigned int i;
1625
1626         desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1627                                    GFP_KERNEL);
1628         if (!desc_extra)
1629                 return NULL;
1630
1631         memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1632
1633         for (i = 0; i < num - 1; i++)
1634                 desc_extra[i].next = i + 1;
1635
1636         return desc_extra;
1637 }
1638
1639 static struct virtqueue *vring_create_virtqueue_packed(
1640         unsigned int index,
1641         unsigned int num,
1642         unsigned int vring_align,
1643         struct virtio_device *vdev,
1644         bool weak_barriers,
1645         bool may_reduce_num,
1646         bool context,
1647         bool (*notify)(struct virtqueue *),
1648         void (*callback)(struct virtqueue *),
1649         const char *name)
1650 {
1651         struct vring_virtqueue *vq;
1652         struct vring_packed_desc *ring;
1653         struct vring_packed_desc_event *driver, *device;
1654         dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1655         size_t ring_size_in_bytes, event_size_in_bytes;
1656
1657         ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1658
1659         ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1660                                  &ring_dma_addr,
1661                                  GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1662         if (!ring)
1663                 goto err_ring;
1664
1665         event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1666
1667         driver = vring_alloc_queue(vdev, event_size_in_bytes,
1668                                    &driver_event_dma_addr,
1669                                    GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1670         if (!driver)
1671                 goto err_driver;
1672
1673         device = vring_alloc_queue(vdev, event_size_in_bytes,
1674                                    &device_event_dma_addr,
1675                                    GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1676         if (!device)
1677                 goto err_device;
1678
1679         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1680         if (!vq)
1681                 goto err_vq;
1682
1683         vq->vq.callback = callback;
1684         vq->vq.vdev = vdev;
1685         vq->vq.name = name;
1686         vq->vq.num_free = num;
1687         vq->vq.index = index;
1688         vq->we_own_ring = true;
1689         vq->notify = notify;
1690         vq->weak_barriers = weak_barriers;
1691         vq->broken = true;
1692         vq->last_used_idx = 0;
1693         vq->event_triggered = false;
1694         vq->num_added = 0;
1695         vq->packed_ring = true;
1696         vq->use_dma_api = vring_use_dma_api(vdev);
1697 #ifdef DEBUG
1698         vq->in_use = false;
1699         vq->last_add_time_valid = false;
1700 #endif
1701
1702         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1703                 !context;
1704         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1705
1706         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1707                 vq->weak_barriers = false;
1708
1709         vq->packed.ring_dma_addr = ring_dma_addr;
1710         vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1711         vq->packed.device_event_dma_addr = device_event_dma_addr;
1712
1713         vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1714         vq->packed.event_size_in_bytes = event_size_in_bytes;
1715
1716         vq->packed.vring.num = num;
1717         vq->packed.vring.desc = ring;
1718         vq->packed.vring.driver = driver;
1719         vq->packed.vring.device = device;
1720
1721         vq->packed.next_avail_idx = 0;
1722         vq->packed.avail_wrap_counter = 1;
1723         vq->packed.used_wrap_counter = 1;
1724         vq->packed.event_flags_shadow = 0;
1725         vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1726
1727         vq->packed.desc_state = kmalloc_array(num,
1728                         sizeof(struct vring_desc_state_packed),
1729                         GFP_KERNEL);
1730         if (!vq->packed.desc_state)
1731                 goto err_desc_state;
1732
1733         memset(vq->packed.desc_state, 0,
1734                 num * sizeof(struct vring_desc_state_packed));
1735
1736         /* Put everything in free lists. */
1737         vq->free_head = 0;
1738
1739         vq->packed.desc_extra = vring_alloc_desc_extra(vq, num);
1740         if (!vq->packed.desc_extra)
1741                 goto err_desc_extra;
1742
1743         /* No callback?  Tell other side not to bother us. */
1744         if (!callback) {
1745                 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1746                 vq->packed.vring.driver->flags =
1747                         cpu_to_le16(vq->packed.event_flags_shadow);
1748         }
1749
1750         spin_lock(&vdev->vqs_list_lock);
1751         list_add_tail(&vq->vq.list, &vdev->vqs);
1752         spin_unlock(&vdev->vqs_list_lock);
1753         return &vq->vq;
1754
1755 err_desc_extra:
1756         kfree(vq->packed.desc_state);
1757 err_desc_state:
1758         kfree(vq);
1759 err_vq:
1760         vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
1761 err_device:
1762         vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
1763 err_driver:
1764         vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1765 err_ring:
1766         return NULL;
1767 }
1768
1769
1770 /*
1771  * Generic functions and exported symbols.
1772  */
1773
1774 static inline int virtqueue_add(struct virtqueue *_vq,
1775                                 struct scatterlist *sgs[],
1776                                 unsigned int total_sg,
1777                                 unsigned int out_sgs,
1778                                 unsigned int in_sgs,
1779                                 void *data,
1780                                 void *ctx,
1781                                 gfp_t gfp)
1782 {
1783         struct vring_virtqueue *vq = to_vvq(_vq);
1784
1785         return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1786                                         out_sgs, in_sgs, data, ctx, gfp) :
1787                                  virtqueue_add_split(_vq, sgs, total_sg,
1788                                         out_sgs, in_sgs, data, ctx, gfp);
1789 }
1790
1791 /**
1792  * virtqueue_add_sgs - expose buffers to other end
1793  * @_vq: the struct virtqueue we're talking about.
1794  * @sgs: array of terminated scatterlists.
1795  * @out_sgs: the number of scatterlists readable by other side
1796  * @in_sgs: the number of scatterlists which are writable (after readable ones)
1797  * @data: the token identifying the buffer.
1798  * @gfp: how to do memory allocations (if necessary).
1799  *
1800  * Caller must ensure we don't call this with other virtqueue operations
1801  * at the same time (except where noted).
1802  *
1803  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1804  */
1805 int virtqueue_add_sgs(struct virtqueue *_vq,
1806                       struct scatterlist *sgs[],
1807                       unsigned int out_sgs,
1808                       unsigned int in_sgs,
1809                       void *data,
1810                       gfp_t gfp)
1811 {
1812         unsigned int i, total_sg = 0;
1813
1814         /* Count them first. */
1815         for (i = 0; i < out_sgs + in_sgs; i++) {
1816                 struct scatterlist *sg;
1817
1818                 for (sg = sgs[i]; sg; sg = sg_next(sg))
1819                         total_sg++;
1820         }
1821         return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1822                              data, NULL, gfp);
1823 }
1824 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1825
1826 /**
1827  * virtqueue_add_outbuf - expose output buffers to other end
1828  * @vq: the struct virtqueue we're talking about.
1829  * @sg: scatterlist (must be well-formed and terminated!)
1830  * @num: the number of entries in @sg readable by other side
1831  * @data: the token identifying the buffer.
1832  * @gfp: how to do memory allocations (if necessary).
1833  *
1834  * Caller must ensure we don't call this with other virtqueue operations
1835  * at the same time (except where noted).
1836  *
1837  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1838  */
1839 int virtqueue_add_outbuf(struct virtqueue *vq,
1840                          struct scatterlist *sg, unsigned int num,
1841                          void *data,
1842                          gfp_t gfp)
1843 {
1844         return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1845 }
1846 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1847
1848 /**
1849  * virtqueue_add_inbuf - expose input buffers to other end
1850  * @vq: the struct virtqueue we're talking about.
1851  * @sg: scatterlist (must be well-formed and terminated!)
1852  * @num: the number of entries in @sg writable by other side
1853  * @data: the token identifying the buffer.
1854  * @gfp: how to do memory allocations (if necessary).
1855  *
1856  * Caller must ensure we don't call this with other virtqueue operations
1857  * at the same time (except where noted).
1858  *
1859  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1860  */
1861 int virtqueue_add_inbuf(struct virtqueue *vq,
1862                         struct scatterlist *sg, unsigned int num,
1863                         void *data,
1864                         gfp_t gfp)
1865 {
1866         return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1867 }
1868 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1869
1870 /**
1871  * virtqueue_add_inbuf_ctx - expose input buffers to other end
1872  * @vq: the struct virtqueue we're talking about.
1873  * @sg: scatterlist (must be well-formed and terminated!)
1874  * @num: the number of entries in @sg writable by other side
1875  * @data: the token identifying the buffer.
1876  * @ctx: extra context for the token
1877  * @gfp: how to do memory allocations (if necessary).
1878  *
1879  * Caller must ensure we don't call this with other virtqueue operations
1880  * at the same time (except where noted).
1881  *
1882  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1883  */
1884 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
1885                         struct scatterlist *sg, unsigned int num,
1886                         void *data,
1887                         void *ctx,
1888                         gfp_t gfp)
1889 {
1890         return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
1891 }
1892 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
1893
1894 /**
1895  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
1896  * @_vq: the struct virtqueue
1897  *
1898  * Instead of virtqueue_kick(), you can do:
1899  *      if (virtqueue_kick_prepare(vq))
1900  *              virtqueue_notify(vq);
1901  *
1902  * This is sometimes useful because the virtqueue_kick_prepare() needs
1903  * to be serialized, but the actual virtqueue_notify() call does not.
1904  */
1905 bool virtqueue_kick_prepare(struct virtqueue *_vq)
1906 {
1907         struct vring_virtqueue *vq = to_vvq(_vq);
1908
1909         return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
1910                                  virtqueue_kick_prepare_split(_vq);
1911 }
1912 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
1913
1914 /**
1915  * virtqueue_notify - second half of split virtqueue_kick call.
1916  * @_vq: the struct virtqueue
1917  *
1918  * This does not need to be serialized.
1919  *
1920  * Returns false if host notify failed or queue is broken, otherwise true.
1921  */
1922 bool virtqueue_notify(struct virtqueue *_vq)
1923 {
1924         struct vring_virtqueue *vq = to_vvq(_vq);
1925
1926         if (unlikely(vq->broken))
1927                 return false;
1928
1929         /* Prod other side to tell it about changes. */
1930         if (!vq->notify(_vq)) {
1931                 vq->broken = true;
1932                 return false;
1933         }
1934         return true;
1935 }
1936 EXPORT_SYMBOL_GPL(virtqueue_notify);
1937
1938 /**
1939  * virtqueue_kick - update after add_buf
1940  * @vq: the struct virtqueue
1941  *
1942  * After one or more virtqueue_add_* calls, invoke this to kick
1943  * the other side.
1944  *
1945  * Caller must ensure we don't call this with other virtqueue
1946  * operations at the same time (except where noted).
1947  *
1948  * Returns false if kick failed, otherwise true.
1949  */
1950 bool virtqueue_kick(struct virtqueue *vq)
1951 {
1952         if (virtqueue_kick_prepare(vq))
1953                 return virtqueue_notify(vq);
1954         return true;
1955 }
1956 EXPORT_SYMBOL_GPL(virtqueue_kick);
1957
1958 /**
1959  * virtqueue_get_buf_ctx - get the next used buffer
1960  * @_vq: the struct virtqueue we're talking about.
1961  * @len: the length written into the buffer
1962  * @ctx: extra context for the token
1963  *
1964  * If the device wrote data into the buffer, @len will be set to the
1965  * amount written.  This means you don't need to clear the buffer
1966  * beforehand to ensure there's no data leakage in the case of short
1967  * writes.
1968  *
1969  * Caller must ensure we don't call this with other virtqueue
1970  * operations at the same time (except where noted).
1971  *
1972  * Returns NULL if there are no used buffers, or the "data" token
1973  * handed to virtqueue_add_*().
1974  */
1975 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
1976                             void **ctx)
1977 {
1978         struct vring_virtqueue *vq = to_vvq(_vq);
1979
1980         return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
1981                                  virtqueue_get_buf_ctx_split(_vq, len, ctx);
1982 }
1983 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
1984
1985 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
1986 {
1987         return virtqueue_get_buf_ctx(_vq, len, NULL);
1988 }
1989 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
1990 /**
1991  * virtqueue_disable_cb - disable callbacks
1992  * @_vq: the struct virtqueue we're talking about.
1993  *
1994  * Note that this is not necessarily synchronous, hence unreliable and only
1995  * useful as an optimization.
1996  *
1997  * Unlike other operations, this need not be serialized.
1998  */
1999 void virtqueue_disable_cb(struct virtqueue *_vq)
2000 {
2001         struct vring_virtqueue *vq = to_vvq(_vq);
2002
2003         /* If device triggered an event already it won't trigger one again:
2004          * no need to disable.
2005          */
2006         if (vq->event_triggered)
2007                 return;
2008
2009         if (vq->packed_ring)
2010                 virtqueue_disable_cb_packed(_vq);
2011         else
2012                 virtqueue_disable_cb_split(_vq);
2013 }
2014 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2015
2016 /**
2017  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2018  * @_vq: the struct virtqueue we're talking about.
2019  *
2020  * This re-enables callbacks; it returns current queue state
2021  * in an opaque unsigned value. This value should be later tested by
2022  * virtqueue_poll, to detect a possible race between the driver checking for
2023  * more work, and enabling callbacks.
2024  *
2025  * Caller must ensure we don't call this with other virtqueue
2026  * operations at the same time (except where noted).
2027  */
2028 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2029 {
2030         struct vring_virtqueue *vq = to_vvq(_vq);
2031
2032         if (vq->event_triggered)
2033                 vq->event_triggered = false;
2034
2035         return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2036                                  virtqueue_enable_cb_prepare_split(_vq);
2037 }
2038 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2039
2040 /**
2041  * virtqueue_poll - query pending used buffers
2042  * @_vq: the struct virtqueue we're talking about.
2043  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2044  *
2045  * Returns "true" if there are pending used buffers in the queue.
2046  *
2047  * This does not need to be serialized.
2048  */
2049 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
2050 {
2051         struct vring_virtqueue *vq = to_vvq(_vq);
2052
2053         if (unlikely(vq->broken))
2054                 return false;
2055
2056         virtio_mb(vq->weak_barriers);
2057         return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2058                                  virtqueue_poll_split(_vq, last_used_idx);
2059 }
2060 EXPORT_SYMBOL_GPL(virtqueue_poll);
2061
2062 /**
2063  * virtqueue_enable_cb - restart callbacks after disable_cb.
2064  * @_vq: the struct virtqueue we're talking about.
2065  *
2066  * This re-enables callbacks; it returns "false" if there are pending
2067  * buffers in the queue, to detect a possible race between the driver
2068  * checking for more work, and enabling callbacks.
2069  *
2070  * Caller must ensure we don't call this with other virtqueue
2071  * operations at the same time (except where noted).
2072  */
2073 bool virtqueue_enable_cb(struct virtqueue *_vq)
2074 {
2075         unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
2076
2077         return !virtqueue_poll(_vq, last_used_idx);
2078 }
2079 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2080
2081 /**
2082  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2083  * @_vq: the struct virtqueue we're talking about.
2084  *
2085  * This re-enables callbacks but hints to the other side to delay
2086  * interrupts until most of the available buffers have been processed;
2087  * it returns "false" if there are many pending buffers in the queue,
2088  * to detect a possible race between the driver checking for more work,
2089  * and enabling callbacks.
2090  *
2091  * Caller must ensure we don't call this with other virtqueue
2092  * operations at the same time (except where noted).
2093  */
2094 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2095 {
2096         struct vring_virtqueue *vq = to_vvq(_vq);
2097
2098         if (vq->event_triggered)
2099                 vq->event_triggered = false;
2100
2101         return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2102                                  virtqueue_enable_cb_delayed_split(_vq);
2103 }
2104 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2105
2106 /**
2107  * virtqueue_detach_unused_buf - detach first unused buffer
2108  * @_vq: the struct virtqueue we're talking about.
2109  *
2110  * Returns NULL or the "data" token handed to virtqueue_add_*().
2111  * This is not valid on an active queue; it is useful only for device
2112  * shutdown.
2113  */
2114 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2115 {
2116         struct vring_virtqueue *vq = to_vvq(_vq);
2117
2118         return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2119                                  virtqueue_detach_unused_buf_split(_vq);
2120 }
2121 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2122
2123 static inline bool more_used(const struct vring_virtqueue *vq)
2124 {
2125         return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2126 }
2127
2128 irqreturn_t vring_interrupt(int irq, void *_vq)
2129 {
2130         struct vring_virtqueue *vq = to_vvq(_vq);
2131
2132         if (!more_used(vq)) {
2133                 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2134                 return IRQ_NONE;
2135         }
2136
2137         if (unlikely(vq->broken)) {
2138                 dev_warn_once(&vq->vq.vdev->dev,
2139                               "virtio vring IRQ raised before DRIVER_OK");
2140                 return IRQ_NONE;
2141         }
2142
2143         /* Just a hint for performance: so it's ok that this can be racy! */
2144         if (vq->event)
2145                 vq->event_triggered = true;
2146
2147         pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2148         if (vq->vq.callback)
2149                 vq->vq.callback(&vq->vq);
2150
2151         return IRQ_HANDLED;
2152 }
2153 EXPORT_SYMBOL_GPL(vring_interrupt);
2154
2155 /* Only available for split ring */
2156 struct virtqueue *__vring_new_virtqueue(unsigned int index,
2157                                         struct vring vring,
2158                                         struct virtio_device *vdev,
2159                                         bool weak_barriers,
2160                                         bool context,
2161                                         bool (*notify)(struct virtqueue *),
2162                                         void (*callback)(struct virtqueue *),
2163                                         const char *name)
2164 {
2165         struct vring_virtqueue *vq;
2166
2167         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2168                 return NULL;
2169
2170         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2171         if (!vq)
2172                 return NULL;
2173
2174         vq->packed_ring = false;
2175         vq->vq.callback = callback;
2176         vq->vq.vdev = vdev;
2177         vq->vq.name = name;
2178         vq->vq.num_free = vring.num;
2179         vq->vq.index = index;
2180         vq->we_own_ring = false;
2181         vq->notify = notify;
2182         vq->weak_barriers = weak_barriers;
2183         vq->broken = true;
2184         vq->last_used_idx = 0;
2185         vq->event_triggered = false;
2186         vq->num_added = 0;
2187         vq->use_dma_api = vring_use_dma_api(vdev);
2188 #ifdef DEBUG
2189         vq->in_use = false;
2190         vq->last_add_time_valid = false;
2191 #endif
2192
2193         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2194                 !context;
2195         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2196
2197         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2198                 vq->weak_barriers = false;
2199
2200         vq->split.queue_dma_addr = 0;
2201         vq->split.queue_size_in_bytes = 0;
2202
2203         vq->split.vring = vring;
2204         vq->split.avail_flags_shadow = 0;
2205         vq->split.avail_idx_shadow = 0;
2206
2207         /* No callback?  Tell other side not to bother us. */
2208         if (!callback) {
2209                 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
2210                 if (!vq->event)
2211                         vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2212                                         vq->split.avail_flags_shadow);
2213         }
2214
2215         vq->split.desc_state = kmalloc_array(vring.num,
2216                         sizeof(struct vring_desc_state_split), GFP_KERNEL);
2217         if (!vq->split.desc_state)
2218                 goto err_state;
2219
2220         vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num);
2221         if (!vq->split.desc_extra)
2222                 goto err_extra;
2223
2224         /* Put everything in free lists. */
2225         vq->free_head = 0;
2226         memset(vq->split.desc_state, 0, vring.num *
2227                         sizeof(struct vring_desc_state_split));
2228
2229         spin_lock(&vdev->vqs_list_lock);
2230         list_add_tail(&vq->vq.list, &vdev->vqs);
2231         spin_unlock(&vdev->vqs_list_lock);
2232         return &vq->vq;
2233
2234 err_extra:
2235         kfree(vq->split.desc_state);
2236 err_state:
2237         kfree(vq);
2238         return NULL;
2239 }
2240 EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
2241
2242 struct virtqueue *vring_create_virtqueue(
2243         unsigned int index,
2244         unsigned int num,
2245         unsigned int vring_align,
2246         struct virtio_device *vdev,
2247         bool weak_barriers,
2248         bool may_reduce_num,
2249         bool context,
2250         bool (*notify)(struct virtqueue *),
2251         void (*callback)(struct virtqueue *),
2252         const char *name)
2253 {
2254
2255         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2256                 return vring_create_virtqueue_packed(index, num, vring_align,
2257                                 vdev, weak_barriers, may_reduce_num,
2258                                 context, notify, callback, name);
2259
2260         return vring_create_virtqueue_split(index, num, vring_align,
2261                         vdev, weak_barriers, may_reduce_num,
2262                         context, notify, callback, name);
2263 }
2264 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2265
2266 /* Only available for split ring */
2267 struct virtqueue *vring_new_virtqueue(unsigned int index,
2268                                       unsigned int num,
2269                                       unsigned int vring_align,
2270                                       struct virtio_device *vdev,
2271                                       bool weak_barriers,
2272                                       bool context,
2273                                       void *pages,
2274                                       bool (*notify)(struct virtqueue *vq),
2275                                       void (*callback)(struct virtqueue *vq),
2276                                       const char *name)
2277 {
2278         struct vring vring;
2279
2280         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2281                 return NULL;
2282
2283         vring_init(&vring, num, pages, vring_align);
2284         return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
2285                                      notify, callback, name);
2286 }
2287 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2288
2289 void vring_del_virtqueue(struct virtqueue *_vq)
2290 {
2291         struct vring_virtqueue *vq = to_vvq(_vq);
2292
2293         spin_lock(&vq->vq.vdev->vqs_list_lock);
2294         list_del(&_vq->list);
2295         spin_unlock(&vq->vq.vdev->vqs_list_lock);
2296
2297         if (vq->we_own_ring) {
2298                 if (vq->packed_ring) {
2299                         vring_free_queue(vq->vq.vdev,
2300                                          vq->packed.ring_size_in_bytes,
2301                                          vq->packed.vring.desc,
2302                                          vq->packed.ring_dma_addr);
2303
2304                         vring_free_queue(vq->vq.vdev,
2305                                          vq->packed.event_size_in_bytes,
2306                                          vq->packed.vring.driver,
2307                                          vq->packed.driver_event_dma_addr);
2308
2309                         vring_free_queue(vq->vq.vdev,
2310                                          vq->packed.event_size_in_bytes,
2311                                          vq->packed.vring.device,
2312                                          vq->packed.device_event_dma_addr);
2313
2314                         kfree(vq->packed.desc_state);
2315                         kfree(vq->packed.desc_extra);
2316                 } else {
2317                         vring_free_queue(vq->vq.vdev,
2318                                          vq->split.queue_size_in_bytes,
2319                                          vq->split.vring.desc,
2320                                          vq->split.queue_dma_addr);
2321                 }
2322         }
2323         if (!vq->packed_ring) {
2324                 kfree(vq->split.desc_state);
2325                 kfree(vq->split.desc_extra);
2326         }
2327         kfree(vq);
2328 }
2329 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2330
2331 /* Manipulates transport-specific feature bits. */
2332 void vring_transport_features(struct virtio_device *vdev)
2333 {
2334         unsigned int i;
2335
2336         for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2337                 switch (i) {
2338                 case VIRTIO_RING_F_INDIRECT_DESC:
2339                         break;
2340                 case VIRTIO_RING_F_EVENT_IDX:
2341                         break;
2342                 case VIRTIO_F_VERSION_1:
2343                         break;
2344                 case VIRTIO_F_ACCESS_PLATFORM:
2345                         break;
2346                 case VIRTIO_F_RING_PACKED:
2347                         break;
2348                 case VIRTIO_F_ORDER_PLATFORM:
2349                         break;
2350                 default:
2351                         /* We don't understand this bit. */
2352                         __virtio_clear_bit(vdev, i);
2353                 }
2354         }
2355 }
2356 EXPORT_SYMBOL_GPL(vring_transport_features);
2357
2358 /**
2359  * virtqueue_get_vring_size - return the size of the virtqueue's vring
2360  * @_vq: the struct virtqueue containing the vring of interest.
2361  *
2362  * Returns the size of the vring.  This is mainly used for boasting to
2363  * userspace.  Unlike other operations, this need not be serialized.
2364  */
2365 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2366 {
2367
2368         struct vring_virtqueue *vq = to_vvq(_vq);
2369
2370         return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2371 }
2372 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2373
2374 bool virtqueue_is_broken(struct virtqueue *_vq)
2375 {
2376         struct vring_virtqueue *vq = to_vvq(_vq);
2377
2378         return READ_ONCE(vq->broken);
2379 }
2380 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2381
2382 /*
2383  * This should prevent the device from being used, allowing drivers to
2384  * recover.  You may need to grab appropriate locks to flush.
2385  */
2386 void virtio_break_device(struct virtio_device *dev)
2387 {
2388         struct virtqueue *_vq;
2389
2390         spin_lock(&dev->vqs_list_lock);
2391         list_for_each_entry(_vq, &dev->vqs, list) {
2392                 struct vring_virtqueue *vq = to_vvq(_vq);
2393
2394                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2395                 WRITE_ONCE(vq->broken, true);
2396         }
2397         spin_unlock(&dev->vqs_list_lock);
2398 }
2399 EXPORT_SYMBOL_GPL(virtio_break_device);
2400
2401 /*
2402  * This should allow the device to be used by the driver. You may
2403  * need to grab appropriate locks to flush the write to
2404  * vq->broken. This should only be used in some specific case e.g
2405  * (probing and restoring). This function should only be called by the
2406  * core, not directly by the driver.
2407  */
2408 void __virtio_unbreak_device(struct virtio_device *dev)
2409 {
2410         struct virtqueue *_vq;
2411
2412         spin_lock(&dev->vqs_list_lock);
2413         list_for_each_entry(_vq, &dev->vqs, list) {
2414                 struct vring_virtqueue *vq = to_vvq(_vq);
2415
2416                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2417                 WRITE_ONCE(vq->broken, false);
2418         }
2419         spin_unlock(&dev->vqs_list_lock);
2420 }
2421 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2422
2423 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
2424 {
2425         struct vring_virtqueue *vq = to_vvq(_vq);
2426
2427         BUG_ON(!vq->we_own_ring);
2428
2429         if (vq->packed_ring)
2430                 return vq->packed.ring_dma_addr;
2431
2432         return vq->split.queue_dma_addr;
2433 }
2434 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2435
2436 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
2437 {
2438         struct vring_virtqueue *vq = to_vvq(_vq);
2439
2440         BUG_ON(!vq->we_own_ring);
2441
2442         if (vq->packed_ring)
2443                 return vq->packed.driver_event_dma_addr;
2444
2445         return vq->split.queue_dma_addr +
2446                 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2447 }
2448 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2449
2450 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2451 {
2452         struct vring_virtqueue *vq = to_vvq(_vq);
2453
2454         BUG_ON(!vq->we_own_ring);
2455
2456         if (vq->packed_ring)
2457                 return vq->packed.device_event_dma_addr;
2458
2459         return vq->split.queue_dma_addr +
2460                 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2461 }
2462 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2463
2464 /* Only available for split ring */
2465 const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2466 {
2467         return &to_vvq(vq)->split.vring;
2468 }
2469 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2470
2471 MODULE_LICENSE("GPL");