#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
+struct device_domain_info *get_domain_info(struct device *dev)
+{
+ struct device_domain_info *info;
+
+ if (!dev)
+ return NULL;
+
+ info = dev->archdata.iommu;
+ if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO ||
+ info == DEFER_DEVICE_DOMAIN_INFO))
+ return NULL;
+
+ return info;
+}
+
DEFINE_SPINLOCK(device_domain_lock);
static LIST_HEAD(device_domain_list);
if (ecap_prs(iommu->ecap))
intel_svm_finish_prq(iommu);
}
+ if (ecap_vcs(iommu->ecap) && vccap_pasid(iommu->vccap))
+ ioasid_unregister_allocator(&iommu->pasid_allocator);
+
#endif
}
return 0;
}
-static void domain_reserve_special_ranges(struct dmar_domain *domain)
-{
- copy_reserved_iova(&reserved_iova_list, &domain->iovad);
-}
-
static inline int guestwidth_to_adjustwidth(int gaw)
{
int agaw;
domain_remove_dev_info(domain);
/* destroy iovas */
- put_iova_domain(&domain->iovad);
+ if (domain->domain.type == IOMMU_DOMAIN_DMA)
+ put_iova_domain(&domain->iovad);
if (domain->pgd) {
struct page *freelist;
dev = &pci_real_dma_dev(to_pci_dev(dev))->dev;
/* No lock here, assumes no domain exit in normal case */
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (likely(info))
return info->domain;
flags);
}
+static bool dev_is_real_dma_subdevice(struct device *dev)
+{
+ return dev && dev_is_pci(dev) &&
+ pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
+}
+
static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
int bus, int devfn,
struct device *dev,
}
static int iommu_domain_identity_map(struct dmar_domain *domain,
- unsigned long long start,
- unsigned long long end)
+ unsigned long first_vpfn,
+ unsigned long last_vpfn)
{
- unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
- unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
-
- if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
- dma_to_mm_pfn(last_vpfn))) {
- pr_err("Reserving iova failed\n");
- return -ENOMEM;
- }
-
- pr_debug("Mapping reserved region %llx-%llx\n", start, end);
/*
* RMRR range might have overlap with physical memory range,
* clear it first
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
ret = iommu_domain_identity_map(si_domain,
- PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
+ mm_to_dma_pfn(start_pfn),
+ mm_to_dma_pfn(end_pfn));
if (ret)
return ret;
}
return ret;
}
+#ifdef CONFIG_INTEL_IOMMU_SVM
+static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
+{
+ struct intel_iommu *iommu = data;
+ ioasid_t ioasid;
+
+ if (!iommu)
+ return INVALID_IOASID;
+ /*
+ * VT-d virtual command interface always uses the full 20 bit
+ * PASID range. Host can partition guest PASID range based on
+ * policies but it is out of guest's control.
+ */
+ if (min < PASID_MIN || max > intel_pasid_max_id)
+ return INVALID_IOASID;
+
+ if (vcmd_alloc_pasid(iommu, &ioasid))
+ return INVALID_IOASID;
+
+ return ioasid;
+}
+
+static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
+{
+ struct intel_iommu *iommu = data;
+
+ if (!iommu)
+ return;
+ /*
+ * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
+ * We can only free the PASID when all the devices are unbound.
+ */
+ if (ioasid_find(NULL, ioasid, NULL)) {
+ pr_alert("Cannot free active IOASID %d\n", ioasid);
+ return;
+ }
+ vcmd_free_pasid(iommu, ioasid);
+}
+
+static void register_pasid_allocator(struct intel_iommu *iommu)
+{
+ /*
+ * If we are running in the host, no need for custom allocator
+ * in that PASIDs are allocated from the host system-wide.
+ */
+ if (!cap_caching_mode(iommu->cap))
+ return;
+
+ if (!sm_supported(iommu)) {
+ pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
+ return;
+ }
+
+ /*
+ * Register a custom PASID allocator if we are running in a guest,
+ * guest PASID must be obtained via virtual command interface.
+ * There can be multiple vIOMMUs in each guest but only one allocator
+ * is active. All vIOMMU allocators will eventually be calling the same
+ * host allocator.
+ */
+ if (!ecap_vcs(iommu->ecap) || !vccap_pasid(iommu->vccap))
+ return;
+
+ pr_info("Register custom PASID allocator\n");
+ iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
+ iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
+ iommu->pasid_allocator.pdata = (void *)iommu;
+ if (ioasid_register_allocator(&iommu->pasid_allocator)) {
+ pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
+ /*
+ * Disable scalable mode on this IOMMU if there
+ * is no custom allocator. Mixing SM capable vIOMMU
+ * and non-SM vIOMMU are not supported.
+ */
+ intel_iommu_sm = 0;
+ }
+}
+#endif
+
static int __init init_dmars(void)
{
struct dmar_drhd_unit *drhd;
*/
for_each_active_iommu(iommu, drhd) {
iommu_flush_write_buffer(iommu);
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ register_pasid_allocator(iommu);
+#endif
iommu_set_root_entry(iommu);
iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
unsigned long val, void *v)
{
struct memory_notify *mhp = v;
- unsigned long long start, end;
- unsigned long start_vpfn, last_vpfn;
+ unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
+ unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
+ mhp->nr_pages - 1);
switch (val) {
case MEM_GOING_ONLINE:
- start = mhp->start_pfn << PAGE_SHIFT;
- end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
- if (iommu_domain_identity_map(si_domain, start, end)) {
- pr_warn("Failed to build identity map for [%llx-%llx]\n",
- start, end);
+ if (iommu_domain_identity_map(si_domain,
+ start_vpfn, last_vpfn)) {
+ pr_warn("Failed to build identity map for [%lx-%lx]\n",
+ start_vpfn, last_vpfn);
return NOTIFY_BAD;
}
break;
case MEM_OFFLINE:
case MEM_CANCEL_ONLINE:
- start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
- last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
- while (start_vpfn <= last_vpfn) {
- struct iova *iova;
+ {
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
struct page *freelist;
- iova = find_iova(&si_domain->iovad, start_vpfn);
- if (iova == NULL) {
- pr_debug("Failed get IOVA for PFN %lx\n",
- start_vpfn);
- break;
- }
-
- iova = split_and_remove_iova(&si_domain->iovad, iova,
- start_vpfn, last_vpfn);
- if (iova == NULL) {
- pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
- start_vpfn, last_vpfn);
- return NOTIFY_BAD;
- }
-
- freelist = domain_unmap(si_domain, iova->pfn_lo,
- iova->pfn_hi);
+ freelist = domain_unmap(si_domain,
+ start_vpfn, last_vpfn);
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
iommu_flush_iotlb_psi(iommu, si_domain,
- iova->pfn_lo, iova_size(iova),
+ start_vpfn, mhp->nr_pages,
!freelist, 0);
rcu_read_unlock();
dma_free_pagelist(freelist);
-
- start_vpfn = iova->pfn_hi + 1;
- free_iova_mem(iova);
}
break;
}
for (did = 0; did < cap_ndoms(iommu->cap); did++) {
domain = get_iommu_domain(iommu, (u16)did);
- if (!domain)
+ if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA)
continue;
+
free_cpu_cached_iovas(cpu, &domain->iovad);
}
}
if (info->dev) {
if (dev_is_pci(info->dev) && sm_supported(iommu))
intel_pasid_tear_down_entry(iommu, info->dev,
- PASID_RID2PASID);
+ PASID_RID2PASID, false);
iommu_disable_dev_iotlb(info);
- domain_context_clear(iommu, info->dev);
+ if (!dev_is_real_dma_subdevice(info->dev))
+ domain_context_clear(iommu, info->dev);
intel_pasid_free_table(info->dev);
}
unsigned long flags;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
- if (info && info != DEFER_DEVICE_DOMAIN_INFO
- && info != DUMMY_DEVICE_DOMAIN_INFO)
+ info = get_domain_info(dev);
+ if (info)
__dmar_remove_one_dev_info(info);
spin_unlock_irqrestore(&device_domain_lock, flags);
}
{
int adjust_width;
- init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
- domain_reserve_special_ranges(domain);
-
/* calculate AGAW */
domain->gaw = guest_width;
adjust_width = guestwidth_to_adjustwidth(guest_width);
return 0;
}
+static void intel_init_iova_domain(struct dmar_domain *dmar_domain)
+{
+ init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
+ copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad);
+
+ if (!intel_iommu_strict &&
+ init_iova_flush_queue(&dmar_domain->iovad,
+ iommu_flush_iova, iova_entry_free))
+ pr_info("iova flush queue initialization failed\n");
+}
+
static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
{
struct dmar_domain *dmar_domain;
struct iommu_domain *domain;
- int ret;
switch (type) {
case IOMMU_DOMAIN_DMA:
return NULL;
}
- if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) {
- ret = init_iova_flush_queue(&dmar_domain->iovad,
- iommu_flush_iova,
- iova_entry_free);
- if (ret)
- pr_info("iova flush queue initialization failed\n");
- }
+ if (type == IOMMU_DOMAIN_DMA)
+ intel_init_iova_domain(dmar_domain);
domain_update_iommu_cap(dmar_domain);
static inline bool
is_aux_domain(struct device *dev, struct iommu_domain *domain)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
return info && info->auxd_enabled &&
domain->type == IOMMU_DOMAIN_UNMANAGED;
static void auxiliary_link_device(struct dmar_domain *domain,
struct device *dev)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
assert_spin_locked(&device_domain_lock);
if (WARN_ON(!info))
static void auxiliary_unlink_device(struct dmar_domain *domain,
struct device *dev)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
assert_spin_locked(&device_domain_lock);
if (WARN_ON(!info))
return;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
iommu = info->iommu;
auxiliary_unlink_device(domain, dev);
spin_lock(&iommu->lock);
- intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
+ intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false);
domain_detach_iommu(domain, iommu);
spin_unlock(&iommu->lock);
aux_domain_remove_dev(to_dmar_domain(domain), dev);
}
+/*
+ * 2D array for converting and sanitizing IOMMU generic TLB granularity to
+ * VT-d granularity. Invalidation is typically included in the unmap operation
+ * as a result of DMA or VFIO unmap. However, for assigned devices guest
+ * owns the first level page tables. Invalidations of translation caches in the
+ * guest are trapped and passed down to the host.
+ *
+ * vIOMMU in the guest will only expose first level page tables, therefore
+ * we do not support IOTLB granularity for request without PASID (second level).
+ *
+ * For example, to find the VT-d granularity encoding for IOTLB
+ * type and page selective granularity within PASID:
+ * X: indexed by iommu cache type
+ * Y: indexed by enum iommu_inv_granularity
+ * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR]
+ */
+
+static const int
+inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = {
+ /*
+ * PASID based IOTLB invalidation: PASID selective (per PASID),
+ * page selective (address granularity)
+ */
+ {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
+ /* PASID based dev TLBs */
+ {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL},
+ /* PASID cache */
+ {-EINVAL, -EINVAL, -EINVAL}
+};
+
+static inline int to_vtd_granularity(int type, int granu)
+{
+ return inv_type_granu_table[type][granu];
+}
+
+static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
+{
+ u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
+
+ /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
+ * IOMMU cache invalidate API passes granu_size in bytes, and number of
+ * granu size in contiguous memory.
+ */
+ return order_base_2(nr_pages);
+}
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+static int
+intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
+ struct iommu_cache_invalidate_info *inv_info)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct device_domain_info *info;
+ struct intel_iommu *iommu;
+ unsigned long flags;
+ int cache_type;
+ u8 bus, devfn;
+ u16 did, sid;
+ int ret = 0;
+ u64 size = 0;
+
+ if (!inv_info || !dmar_domain ||
+ inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
+ return -EINVAL;
+
+ if (!dev || !dev_is_pci(dev))
+ return -ENODEV;
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE))
+ return -EINVAL;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ spin_lock(&iommu->lock);
+ info = get_domain_info(dev);
+ if (!info) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ did = dmar_domain->iommu_did[iommu->seq_id];
+ sid = PCI_DEVID(bus, devfn);
+
+ /* Size is only valid in address selective invalidation */
+ if (inv_info->granularity != IOMMU_INV_GRANU_PASID)
+ size = to_vtd_size(inv_info->addr_info.granule_size,
+ inv_info->addr_info.nb_granules);
+
+ for_each_set_bit(cache_type,
+ (unsigned long *)&inv_info->cache,
+ IOMMU_CACHE_INV_TYPE_NR) {
+ int granu = 0;
+ u64 pasid = 0;
+
+ granu = to_vtd_granularity(cache_type, inv_info->granularity);
+ if (granu == -EINVAL) {
+ pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n",
+ cache_type, inv_info->granularity);
+ break;
+ }
+
+ /*
+ * PASID is stored in different locations based on the
+ * granularity.
+ */
+ if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
+ (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
+ pasid = inv_info->pasid_info.pasid;
+ else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
+ (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
+ pasid = inv_info->addr_info.pasid;
+
+ switch (BIT(cache_type)) {
+ case IOMMU_CACHE_INV_TYPE_IOTLB:
+ if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
+ size &&
+ (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
+ pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n",
+ inv_info->addr_info.addr, size);
+ ret = -ERANGE;
+ goto out_unlock;
+ }
+
+ /*
+ * If granu is PASID-selective, address is ignored.
+ * We use npages = -1 to indicate that.
+ */
+ qi_flush_piotlb(iommu, did, pasid,
+ mm_to_dma_pfn(inv_info->addr_info.addr),
+ (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
+ inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
+
+ /*
+ * Always flush device IOTLB if ATS is enabled. vIOMMU
+ * in the guest may assume IOTLB flush is inclusive,
+ * which is more efficient.
+ */
+ if (info->ats_enabled)
+ qi_flush_dev_iotlb_pasid(iommu, sid,
+ info->pfsid, pasid,
+ info->ats_qdep,
+ inv_info->addr_info.addr,
+ size, granu);
+ break;
+ case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
+ if (info->ats_enabled)
+ qi_flush_dev_iotlb_pasid(iommu, sid,
+ info->pfsid, pasid,
+ info->ats_qdep,
+ inv_info->addr_info.addr,
+ size, granu);
+ else
+ pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
+ break;
+ default:
+ dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
+ cache_type);
+ ret = -EINVAL;
+ }
+ }
+out_unlock:
+ spin_unlock(&iommu->lock);
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return ret;
+}
+#endif
+
static int intel_iommu_map(struct iommu_domain *domain,
unsigned long iova, phys_addr_t hpa,
size_t size, int iommu_prot, gfp_t gfp)
spin_lock(&iommu->lock);
ret = -EINVAL;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!info || !info->pasid_supported)
goto out;
return -ENODEV;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
info->auxd_enabled = 1;
spin_unlock_irqrestore(&device_domain_lock, flags);
unsigned long flags;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!WARN_ON(!info))
info->auxd_enabled = 0;
spin_unlock_irqrestore(&device_domain_lock, flags);
return !!siov_find_pci_dvsec(to_pci_dev(dev));
}
+ if (feat == IOMMU_DEV_FEAT_SVA) {
+ struct device_domain_info *info = get_domain_info(dev);
+
+ return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
+ info->pasid_supported && info->pri_supported &&
+ info->ats_supported;
+ }
+
return false;
}
if (feat == IOMMU_DEV_FEAT_AUX)
return intel_iommu_enable_auxd(dev);
+ if (feat == IOMMU_DEV_FEAT_SVA) {
+ struct device_domain_info *info = get_domain_info(dev);
+
+ if (!info)
+ return -EINVAL;
+
+ if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
+ return 0;
+ }
+
return -ENODEV;
}
static bool
intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
if (feat == IOMMU_DEV_FEAT_AUX)
return scalable_mode_support() && info && info->auxd_enabled;
.def_domain_type = device_def_domain_type,
.pgsize_bitmap = INTEL_IOMMU_PGSIZES,
#ifdef CONFIG_INTEL_IOMMU_SVM
+ .cache_invalidate = intel_iommu_sva_invalidate,
.sva_bind_gpasid = intel_svm_bind_gpasid,
.sva_unbind_gpasid = intel_svm_unbind_gpasid,
+ .sva_bind = intel_svm_bind,
+ .sva_unbind = intel_svm_unbind,
+ .sva_get_pasid = intel_svm_get_pasid,
#endif
};