1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Virtio PCI driver - modern (virtio 1.0) device support
5 * This module allows virtio devices to be used over a virtual PCI device.
6 * This can be used with QEMU based VMMs like KVM or Xen.
8 * Copyright IBM Corp. 2007
9 * Copyright Red Hat, Inc. 2014
12 * Anthony Liguori <aliguori@us.ibm.com>
13 * Rusty Russell <rusty@rustcorp.com.au>
14 * Michael S. Tsirkin <mst@redhat.com>
17 #include <linux/delay.h>
18 #define VIRTIO_PCI_NO_LEGACY
19 #define VIRTIO_RING_NO_LEGACY
20 #include "virtio_pci_common.h"
23 * Type-safe wrappers for io accesses.
24 * Use these to enforce at compile time the following spec requirement:
26 * The driver MUST access each field using the “natural” access
27 * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
28 * for 16-bit fields and 8-bit accesses for 8-bit fields.
30 static inline u8 vp_ioread8(u8 __iomem *addr)
34 static inline u16 vp_ioread16 (__le16 __iomem *addr)
36 return ioread16(addr);
39 static inline u32 vp_ioread32(__le32 __iomem *addr)
41 return ioread32(addr);
44 static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
46 iowrite8(value, addr);
49 static inline void vp_iowrite16(u16 value, __le16 __iomem *addr)
51 iowrite16(value, addr);
54 static inline void vp_iowrite32(u32 value, __le32 __iomem *addr)
56 iowrite32(value, addr);
59 static void vp_iowrite64_twopart(u64 val,
60 __le32 __iomem *lo, __le32 __iomem *hi)
62 vp_iowrite32((u32)val, lo);
63 vp_iowrite32(val >> 32, hi);
66 static void __iomem *map_capability(struct pci_dev *dev, int off,
76 pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap,
79 pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
81 pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
84 if (length <= start) {
86 "virtio_pci: bad capability len %u (>%u expected)\n",
91 if (length - start < minlen) {
93 "virtio_pci: bad capability len %u (>=%zu expected)\n",
100 if (start + offset < offset) {
102 "virtio_pci: map wrap-around %u+%u\n",
109 if (offset & (align - 1)) {
111 "virtio_pci: offset %u not aligned to %u\n",
122 if (minlen + offset < minlen ||
123 minlen + offset > pci_resource_len(dev, bar)) {
125 "virtio_pci: map virtio %zu@%u "
126 "out of range on bar %i length %lu\n",
128 bar, (unsigned long)pci_resource_len(dev, bar));
132 p = pci_iomap_range(dev, bar, offset, length);
135 "virtio_pci: unable to map virtio %u@%u on bar %i\n",
136 length, offset, bar);
140 /* virtio config->get_features() implementation */
141 static u64 vp_get_features(struct virtio_device *vdev)
143 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
146 vp_iowrite32(0, &vp_dev->common->device_feature_select);
147 features = vp_ioread32(&vp_dev->common->device_feature);
148 vp_iowrite32(1, &vp_dev->common->device_feature_select);
149 features |= ((u64)vp_ioread32(&vp_dev->common->device_feature) << 32);
154 static void vp_transport_features(struct virtio_device *vdev, u64 features)
156 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
157 struct pci_dev *pci_dev = vp_dev->pci_dev;
159 if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
160 pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
161 __virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
164 /* virtio config->finalize_features() implementation */
165 static int vp_finalize_features(struct virtio_device *vdev)
167 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
168 u64 features = vdev->features;
170 /* Give virtio_ring a chance to accept features. */
171 vring_transport_features(vdev);
173 /* Give virtio_pci a chance to accept features. */
174 vp_transport_features(vdev, features);
176 if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
177 dev_err(&vdev->dev, "virtio: device uses modern interface "
178 "but does not have VIRTIO_F_VERSION_1\n");
182 vp_iowrite32(0, &vp_dev->common->guest_feature_select);
183 vp_iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
184 vp_iowrite32(1, &vp_dev->common->guest_feature_select);
185 vp_iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
190 /* virtio config->get() implementation */
191 static void vp_get(struct virtio_device *vdev, unsigned offset,
192 void *buf, unsigned len)
194 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
199 BUG_ON(offset + len > vp_dev->device_len);
203 b = ioread8(vp_dev->device + offset);
204 memcpy(buf, &b, sizeof b);
207 w = cpu_to_le16(ioread16(vp_dev->device + offset));
208 memcpy(buf, &w, sizeof w);
211 l = cpu_to_le32(ioread32(vp_dev->device + offset));
212 memcpy(buf, &l, sizeof l);
215 l = cpu_to_le32(ioread32(vp_dev->device + offset));
216 memcpy(buf, &l, sizeof l);
217 l = cpu_to_le32(ioread32(vp_dev->device + offset + sizeof l));
218 memcpy(buf + sizeof l, &l, sizeof l);
225 /* the config->set() implementation. it's symmetric to the config->get()
227 static void vp_set(struct virtio_device *vdev, unsigned offset,
228 const void *buf, unsigned len)
230 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
235 BUG_ON(offset + len > vp_dev->device_len);
239 memcpy(&b, buf, sizeof b);
240 iowrite8(b, vp_dev->device + offset);
243 memcpy(&w, buf, sizeof w);
244 iowrite16(le16_to_cpu(w), vp_dev->device + offset);
247 memcpy(&l, buf, sizeof l);
248 iowrite32(le32_to_cpu(l), vp_dev->device + offset);
251 memcpy(&l, buf, sizeof l);
252 iowrite32(le32_to_cpu(l), vp_dev->device + offset);
253 memcpy(&l, buf + sizeof l, sizeof l);
254 iowrite32(le32_to_cpu(l), vp_dev->device + offset + sizeof l);
261 static u32 vp_generation(struct virtio_device *vdev)
263 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
264 return vp_ioread8(&vp_dev->common->config_generation);
267 /* config->{get,set}_status() implementations */
268 static u8 vp_get_status(struct virtio_device *vdev)
270 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
271 return vp_ioread8(&vp_dev->common->device_status);
274 static void vp_set_status(struct virtio_device *vdev, u8 status)
276 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
277 /* We should never be setting status to 0. */
279 vp_iowrite8(status, &vp_dev->common->device_status);
282 static void vp_reset(struct virtio_device *vdev)
284 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
285 /* 0 status means a reset. */
286 vp_iowrite8(0, &vp_dev->common->device_status);
287 /* After writing 0 to device_status, the driver MUST wait for a read of
288 * device_status to return 0 before reinitializing the device.
289 * This will flush out the status write, and flush in device writes,
290 * including MSI-X interrupts, if any.
292 while (vp_ioread8(&vp_dev->common->device_status))
294 /* Flush pending VQ/configuration callbacks. */
295 vp_synchronize_vectors(vdev);
298 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
300 /* Setup the vector used for configuration events */
301 vp_iowrite16(vector, &vp_dev->common->msix_config);
302 /* Verify we had enough resources to assign the vector */
303 /* Will also flush the write out to device */
304 return vp_ioread16(&vp_dev->common->msix_config);
307 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
308 struct virtio_pci_vq_info *info,
310 void (*callback)(struct virtqueue *vq),
315 struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
316 struct virtqueue *vq;
320 if (index >= vp_ioread16(&cfg->num_queues))
321 return ERR_PTR(-ENOENT);
323 /* Select the queue we're interested in */
324 vp_iowrite16(index, &cfg->queue_select);
326 /* Check if queue is either not available or already active. */
327 num = vp_ioread16(&cfg->queue_size);
328 if (!num || vp_ioread16(&cfg->queue_enable))
329 return ERR_PTR(-ENOENT);
331 if (num & (num - 1)) {
332 dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
333 return ERR_PTR(-EINVAL);
336 /* get offset of notification word for this vq */
337 off = vp_ioread16(&cfg->queue_notify_off);
339 info->msix_vector = msix_vec;
341 /* create the vring */
342 vq = vring_create_virtqueue(index, num,
343 SMP_CACHE_BYTES, &vp_dev->vdev,
345 vp_notify, callback, name);
347 return ERR_PTR(-ENOMEM);
349 /* activate the queue */
350 vp_iowrite16(virtqueue_get_vring_size(vq), &cfg->queue_size);
351 vp_iowrite64_twopart(virtqueue_get_desc_addr(vq),
352 &cfg->queue_desc_lo, &cfg->queue_desc_hi);
353 vp_iowrite64_twopart(virtqueue_get_avail_addr(vq),
354 &cfg->queue_avail_lo, &cfg->queue_avail_hi);
355 vp_iowrite64_twopart(virtqueue_get_used_addr(vq),
356 &cfg->queue_used_lo, &cfg->queue_used_hi);
358 if (vp_dev->notify_base) {
359 /* offset should not wrap */
360 if ((u64)off * vp_dev->notify_offset_multiplier + 2
361 > vp_dev->notify_len) {
362 dev_warn(&vp_dev->pci_dev->dev,
363 "bad notification offset %u (x %u) "
364 "for queue %u > %zd",
365 off, vp_dev->notify_offset_multiplier,
366 index, vp_dev->notify_len);
370 vq->priv = (void __force *)vp_dev->notify_base +
371 off * vp_dev->notify_offset_multiplier;
373 vq->priv = (void __force *)map_capability(vp_dev->pci_dev,
374 vp_dev->notify_map_cap, 2, 2,
375 off * vp_dev->notify_offset_multiplier, 2,
384 if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
385 vp_iowrite16(msix_vec, &cfg->queue_msix_vector);
386 msix_vec = vp_ioread16(&cfg->queue_msix_vector);
387 if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
389 goto err_assign_vector;
396 if (!vp_dev->notify_base)
397 pci_iounmap(vp_dev->pci_dev, (void __iomem __force *)vq->priv);
399 vring_del_virtqueue(vq);
403 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
404 struct virtqueue *vqs[],
405 vq_callback_t *callbacks[],
406 const char * const names[], const bool *ctx,
407 struct irq_affinity *desc)
409 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
410 struct virtqueue *vq;
411 int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
416 /* Select and activate all queues. Has to be done last: once we do
417 * this, there's no way to go back except reset.
419 list_for_each_entry(vq, &vdev->vqs, list) {
420 vp_iowrite16(vq->index, &vp_dev->common->queue_select);
421 vp_iowrite16(1, &vp_dev->common->queue_enable);
427 static void del_vq(struct virtio_pci_vq_info *info)
429 struct virtqueue *vq = info->vq;
430 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
432 vp_iowrite16(vq->index, &vp_dev->common->queue_select);
434 if (vp_dev->msix_enabled) {
435 vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
436 &vp_dev->common->queue_msix_vector);
437 /* Flush the write out to device */
438 vp_ioread16(&vp_dev->common->queue_msix_vector);
441 if (!vp_dev->notify_base)
442 pci_iounmap(vp_dev->pci_dev, (void __force __iomem *)vq->priv);
444 vring_del_virtqueue(vq);
447 static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
450 .generation = vp_generation,
451 .get_status = vp_get_status,
452 .set_status = vp_set_status,
454 .find_vqs = vp_modern_find_vqs,
455 .del_vqs = vp_del_vqs,
456 .get_features = vp_get_features,
457 .finalize_features = vp_finalize_features,
458 .bus_name = vp_bus_name,
459 .set_vq_affinity = vp_set_vq_affinity,
460 .get_vq_affinity = vp_get_vq_affinity,
463 static const struct virtio_config_ops virtio_pci_config_ops = {
466 .generation = vp_generation,
467 .get_status = vp_get_status,
468 .set_status = vp_set_status,
470 .find_vqs = vp_modern_find_vqs,
471 .del_vqs = vp_del_vqs,
472 .get_features = vp_get_features,
473 .finalize_features = vp_finalize_features,
474 .bus_name = vp_bus_name,
475 .set_vq_affinity = vp_set_vq_affinity,
476 .get_vq_affinity = vp_get_vq_affinity,
480 * virtio_pci_find_capability - walk capabilities to find device info.
481 * @dev: the pci device
482 * @cfg_type: the VIRTIO_PCI_CAP_* value we seek
483 * @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO.
485 * Returns offset of the capability, or 0.
487 static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type,
488 u32 ioresource_types, int *bars)
492 for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
494 pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
496 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
499 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
503 /* Ignore structures with reserved BAR values */
507 if (type == cfg_type) {
508 if (pci_resource_len(dev, bar) &&
509 pci_resource_flags(dev, bar) & ioresource_types) {
518 /* This is part of the ABI. Don't screw with it. */
519 static inline void check_offsets(void)
521 /* Note: disk space was harmed in compilation of this function. */
522 BUILD_BUG_ON(VIRTIO_PCI_CAP_VNDR !=
523 offsetof(struct virtio_pci_cap, cap_vndr));
524 BUILD_BUG_ON(VIRTIO_PCI_CAP_NEXT !=
525 offsetof(struct virtio_pci_cap, cap_next));
526 BUILD_BUG_ON(VIRTIO_PCI_CAP_LEN !=
527 offsetof(struct virtio_pci_cap, cap_len));
528 BUILD_BUG_ON(VIRTIO_PCI_CAP_CFG_TYPE !=
529 offsetof(struct virtio_pci_cap, cfg_type));
530 BUILD_BUG_ON(VIRTIO_PCI_CAP_BAR !=
531 offsetof(struct virtio_pci_cap, bar));
532 BUILD_BUG_ON(VIRTIO_PCI_CAP_OFFSET !=
533 offsetof(struct virtio_pci_cap, offset));
534 BUILD_BUG_ON(VIRTIO_PCI_CAP_LENGTH !=
535 offsetof(struct virtio_pci_cap, length));
536 BUILD_BUG_ON(VIRTIO_PCI_NOTIFY_CAP_MULT !=
537 offsetof(struct virtio_pci_notify_cap,
538 notify_off_multiplier));
539 BUILD_BUG_ON(VIRTIO_PCI_COMMON_DFSELECT !=
540 offsetof(struct virtio_pci_common_cfg,
541 device_feature_select));
542 BUILD_BUG_ON(VIRTIO_PCI_COMMON_DF !=
543 offsetof(struct virtio_pci_common_cfg, device_feature));
544 BUILD_BUG_ON(VIRTIO_PCI_COMMON_GFSELECT !=
545 offsetof(struct virtio_pci_common_cfg,
546 guest_feature_select));
547 BUILD_BUG_ON(VIRTIO_PCI_COMMON_GF !=
548 offsetof(struct virtio_pci_common_cfg, guest_feature));
549 BUILD_BUG_ON(VIRTIO_PCI_COMMON_MSIX !=
550 offsetof(struct virtio_pci_common_cfg, msix_config));
551 BUILD_BUG_ON(VIRTIO_PCI_COMMON_NUMQ !=
552 offsetof(struct virtio_pci_common_cfg, num_queues));
553 BUILD_BUG_ON(VIRTIO_PCI_COMMON_STATUS !=
554 offsetof(struct virtio_pci_common_cfg, device_status));
555 BUILD_BUG_ON(VIRTIO_PCI_COMMON_CFGGENERATION !=
556 offsetof(struct virtio_pci_common_cfg, config_generation));
557 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SELECT !=
558 offsetof(struct virtio_pci_common_cfg, queue_select));
559 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SIZE !=
560 offsetof(struct virtio_pci_common_cfg, queue_size));
561 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_MSIX !=
562 offsetof(struct virtio_pci_common_cfg, queue_msix_vector));
563 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_ENABLE !=
564 offsetof(struct virtio_pci_common_cfg, queue_enable));
565 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NOFF !=
566 offsetof(struct virtio_pci_common_cfg, queue_notify_off));
567 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCLO !=
568 offsetof(struct virtio_pci_common_cfg, queue_desc_lo));
569 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCHI !=
570 offsetof(struct virtio_pci_common_cfg, queue_desc_hi));
571 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILLO !=
572 offsetof(struct virtio_pci_common_cfg, queue_avail_lo));
573 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILHI !=
574 offsetof(struct virtio_pci_common_cfg, queue_avail_hi));
575 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDLO !=
576 offsetof(struct virtio_pci_common_cfg, queue_used_lo));
577 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI !=
578 offsetof(struct virtio_pci_common_cfg, queue_used_hi));
581 /* the PCI probing function */
582 int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
584 struct pci_dev *pci_dev = vp_dev->pci_dev;
585 int err, common, isr, notify, device;
591 /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
592 if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
595 if (pci_dev->device < 0x1040) {
596 /* Transitional devices: use the PCI subsystem device id as
597 * virtio device id, same as legacy driver always did.
599 vp_dev->vdev.id.device = pci_dev->subsystem_device;
601 /* Modern devices: simply use PCI device id, but start from 0x1040. */
602 vp_dev->vdev.id.device = pci_dev->device - 0x1040;
604 vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
606 /* check for a common config: if not, use legacy mode (bar 0). */
607 common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
608 IORESOURCE_IO | IORESOURCE_MEM,
609 &vp_dev->modern_bars);
611 dev_info(&pci_dev->dev,
612 "virtio_pci: leaving for legacy driver\n");
616 /* If common is there, these should be too... */
617 isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
618 IORESOURCE_IO | IORESOURCE_MEM,
619 &vp_dev->modern_bars);
620 notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
621 IORESOURCE_IO | IORESOURCE_MEM,
622 &vp_dev->modern_bars);
623 if (!isr || !notify) {
624 dev_err(&pci_dev->dev,
625 "virtio_pci: missing capabilities %i/%i/%i\n",
626 common, isr, notify);
630 err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
632 err = dma_set_mask_and_coherent(&pci_dev->dev,
635 dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n");
637 /* Device capability is only mandatory for devices that have
638 * device-specific configuration.
640 device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
641 IORESOURCE_IO | IORESOURCE_MEM,
642 &vp_dev->modern_bars);
644 err = pci_request_selected_regions(pci_dev, vp_dev->modern_bars,
645 "virtio-pci-modern");
650 vp_dev->common = map_capability(pci_dev, common,
651 sizeof(struct virtio_pci_common_cfg), 4,
652 0, sizeof(struct virtio_pci_common_cfg),
656 vp_dev->isr = map_capability(pci_dev, isr, sizeof(u8), 1,
662 /* Read notify_off_multiplier from config space. */
663 pci_read_config_dword(pci_dev,
664 notify + offsetof(struct virtio_pci_notify_cap,
665 notify_off_multiplier),
666 &vp_dev->notify_offset_multiplier);
667 /* Read notify length and offset from config space. */
668 pci_read_config_dword(pci_dev,
669 notify + offsetof(struct virtio_pci_notify_cap,
673 pci_read_config_dword(pci_dev,
674 notify + offsetof(struct virtio_pci_notify_cap,
678 /* We don't know how many VQs we'll map, ahead of the time.
679 * If notify length is small, map it all now.
680 * Otherwise, map each VQ individually later.
682 if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
683 vp_dev->notify_base = map_capability(pci_dev, notify, 2, 2,
685 &vp_dev->notify_len);
686 if (!vp_dev->notify_base)
689 vp_dev->notify_map_cap = notify;
692 /* Again, we don't know how much we should map, but PAGE_SIZE
693 * is more than enough for all existing devices.
696 vp_dev->device = map_capability(pci_dev, device, 0, 4,
698 &vp_dev->device_len);
702 vp_dev->vdev.config = &virtio_pci_config_ops;
704 vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
707 vp_dev->config_vector = vp_config_vector;
708 vp_dev->setup_vq = setup_vq;
709 vp_dev->del_vq = del_vq;
714 if (vp_dev->notify_base)
715 pci_iounmap(pci_dev, vp_dev->notify_base);
717 pci_iounmap(pci_dev, vp_dev->isr);
719 pci_iounmap(pci_dev, vp_dev->common);
724 void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev)
726 struct pci_dev *pci_dev = vp_dev->pci_dev;
729 pci_iounmap(pci_dev, vp_dev->device);
730 if (vp_dev->notify_base)
731 pci_iounmap(pci_dev, vp_dev->notify_base);
732 pci_iounmap(pci_dev, vp_dev->isr);
733 pci_iounmap(pci_dev, vp_dev->common);
734 pci_release_selected_regions(pci_dev, vp_dev->modern_bars);