1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright © 2006-2014 Intel Corporation.
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Joerg Roedel <jroedel@suse.de>
13 #define pr_fmt(fmt) "DMAR: " fmt
14 #define dev_fmt(fmt) pr_fmt(fmt)
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/debugfs.h>
19 #include <linux/export.h>
20 #include <linux/slab.h>
21 #include <linux/irq.h>
22 #include <linux/interrupt.h>
23 #include <linux/spinlock.h>
24 #include <linux/pci.h>
25 #include <linux/dmar.h>
26 #include <linux/dma-map-ops.h>
27 #include <linux/mempool.h>
28 #include <linux/memory.h>
29 #include <linux/cpu.h>
30 #include <linux/timer.h>
32 #include <linux/iova.h>
33 #include <linux/iommu.h>
34 #include <linux/dma-iommu.h>
35 #include <linux/intel-iommu.h>
36 #include <linux/syscore_ops.h>
37 #include <linux/tboot.h>
38 #include <linux/dmi.h>
39 #include <linux/pci-ats.h>
40 #include <linux/memblock.h>
41 #include <linux/dma-direct.h>
42 #include <linux/crash_dump.h>
43 #include <linux/numa.h>
44 #include <asm/irq_remapping.h>
45 #include <asm/cacheflush.h>
46 #include <asm/iommu.h>
48 #include "../irq_remapping.h"
49 #include "../iommu-sva-lib.h"
51 #include "cap_audit.h"
53 #define ROOT_SIZE VTD_PAGE_SIZE
54 #define CONTEXT_SIZE VTD_PAGE_SIZE
56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
61 #define IOAPIC_RANGE_START (0xfee00000)
62 #define IOAPIC_RANGE_END (0xfeefffff)
63 #define IOVA_START_ADDR (0x1000)
65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
67 #define MAX_AGAW_WIDTH 64
68 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
70 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
79 /* IO virtual address start page frame number */
80 #define IOVA_START_PFN (1)
82 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
84 /* page table handling */
85 #define LEVEL_STRIDE (9)
86 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
89 * This bitmap is used to advertise the page sizes our hardware support
90 * to the IOMMU core, which will then use this information to split
91 * physically contiguous memory regions it is mapping into page sizes
94 * Traditionally the IOMMU core just handed us the mappings directly,
95 * after making sure the size is an order of a 4KiB page and that the
96 * mapping has natural alignment.
98 * To retain this behavior, we currently advertise that we support
99 * all page sizes that are an order of 4KiB.
101 * If at some point we'd like to utilize the IOMMU core's new behavior,
102 * we could change this to advertise the real page sizes we support.
104 #define INTEL_IOMMU_PGSIZES (~0xFFFUL)
106 static inline int agaw_to_level(int agaw)
111 static inline int agaw_to_width(int agaw)
113 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
116 static inline int width_to_agaw(int width)
118 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
121 static inline unsigned int level_to_offset_bits(int level)
123 return (level - 1) * LEVEL_STRIDE;
126 static inline int pfn_level_offset(u64 pfn, int level)
128 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
131 static inline u64 level_mask(int level)
133 return -1ULL << level_to_offset_bits(level);
136 static inline u64 level_size(int level)
138 return 1ULL << level_to_offset_bits(level);
141 static inline u64 align_to_level(u64 pfn, int level)
143 return (pfn + level_size(level) - 1) & level_mask(level);
146 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
148 return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
151 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
152 are never going to work. */
153 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
155 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
158 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
160 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
162 static inline unsigned long page_to_dma_pfn(struct page *pg)
164 return mm_to_dma_pfn(page_to_pfn(pg));
166 static inline unsigned long virt_to_dma_pfn(void *p)
168 return page_to_dma_pfn(virt_to_page(p));
171 /* global iommu list, set NULL for ignored DMAR units */
172 static struct intel_iommu **g_iommus;
174 static void __init check_tylersburg_isoch(void);
175 static int rwbf_quirk;
178 * set to 1 to panic kernel if can't successfully enable VT-d
179 * (used when kernel is launched w/ TXT)
181 static int force_on = 0;
182 static int intel_iommu_tboot_noforce;
183 static int no_platform_optin;
185 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
191 static phys_addr_t root_entry_lctp(struct root_entry *re)
196 return re->lo & VTD_PAGE_MASK;
200 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
203 static phys_addr_t root_entry_uctp(struct root_entry *re)
208 return re->hi & VTD_PAGE_MASK;
211 static inline void context_clear_pasid_enable(struct context_entry *context)
213 context->lo &= ~(1ULL << 11);
216 static inline bool context_pasid_enabled(struct context_entry *context)
218 return !!(context->lo & (1ULL << 11));
221 static inline void context_set_copied(struct context_entry *context)
223 context->hi |= (1ull << 3);
226 static inline bool context_copied(struct context_entry *context)
228 return !!(context->hi & (1ULL << 3));
231 static inline bool __context_present(struct context_entry *context)
233 return (context->lo & 1);
236 bool context_present(struct context_entry *context)
238 return context_pasid_enabled(context) ?
239 __context_present(context) :
240 __context_present(context) && !context_copied(context);
243 static inline void context_set_present(struct context_entry *context)
248 static inline void context_set_fault_enable(struct context_entry *context)
250 context->lo &= (((u64)-1) << 2) | 1;
253 static inline void context_set_translation_type(struct context_entry *context,
256 context->lo &= (((u64)-1) << 4) | 3;
257 context->lo |= (value & 3) << 2;
260 static inline void context_set_address_root(struct context_entry *context,
263 context->lo &= ~VTD_PAGE_MASK;
264 context->lo |= value & VTD_PAGE_MASK;
267 static inline void context_set_address_width(struct context_entry *context,
270 context->hi |= value & 7;
273 static inline void context_set_domain_id(struct context_entry *context,
276 context->hi |= (value & ((1 << 16) - 1)) << 8;
279 static inline int context_domain_id(struct context_entry *c)
281 return((c->hi >> 8) & 0xffff);
284 static inline void context_clear_entry(struct context_entry *context)
291 * This domain is a statically identity mapping domain.
292 * 1. This domain creats a static 1:1 mapping to all usable memory.
293 * 2. It maps to each iommu if successful.
294 * 3. Each iommu mapps to this domain if successful.
296 static struct dmar_domain *si_domain;
297 static int hw_pass_through = 1;
299 #define for_each_domain_iommu(idx, domain) \
300 for (idx = 0; idx < g_num_of_iommus; idx++) \
301 if (domain->iommu_refcnt[idx])
303 struct dmar_rmrr_unit {
304 struct list_head list; /* list of rmrr units */
305 struct acpi_dmar_header *hdr; /* ACPI header */
306 u64 base_address; /* reserved base address*/
307 u64 end_address; /* reserved end address */
308 struct dmar_dev_scope *devices; /* target devices */
309 int devices_cnt; /* target device count */
312 struct dmar_atsr_unit {
313 struct list_head list; /* list of ATSR units */
314 struct acpi_dmar_header *hdr; /* ACPI header */
315 struct dmar_dev_scope *devices; /* target devices */
316 int devices_cnt; /* target device count */
317 u8 include_all:1; /* include all ports */
320 struct dmar_satc_unit {
321 struct list_head list; /* list of SATC units */
322 struct acpi_dmar_header *hdr; /* ACPI header */
323 struct dmar_dev_scope *devices; /* target devices */
324 struct intel_iommu *iommu; /* the corresponding iommu */
325 int devices_cnt; /* target device count */
326 u8 atc_required:1; /* ATS is required */
329 static LIST_HEAD(dmar_atsr_units);
330 static LIST_HEAD(dmar_rmrr_units);
331 static LIST_HEAD(dmar_satc_units);
333 #define for_each_rmrr_units(rmrr) \
334 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
336 /* bitmap for indexing intel_iommus */
337 static int g_num_of_iommus;
339 static void domain_exit(struct dmar_domain *domain);
340 static void domain_remove_dev_info(struct dmar_domain *domain);
341 static void dmar_remove_one_dev_info(struct device *dev);
342 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
343 static int intel_iommu_attach_device(struct iommu_domain *domain,
345 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
348 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
349 int dmar_disabled = 0;
351 int dmar_disabled = 1;
352 #endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
354 #ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
355 int intel_iommu_sm = 1;
358 #endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
360 int intel_iommu_enabled = 0;
361 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
363 static int dmar_map_gfx = 1;
364 static int intel_iommu_strict;
365 static int intel_iommu_superpage = 1;
366 static int iommu_identity_mapping;
367 static int iommu_skip_te_disable;
369 #define IDENTMAP_GFX 2
370 #define IDENTMAP_AZALIA 4
372 int intel_iommu_gfx_mapped;
373 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
375 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
376 struct device_domain_info *get_domain_info(struct device *dev)
378 struct device_domain_info *info;
383 info = dev_iommu_priv_get(dev);
384 if (unlikely(info == DEFER_DEVICE_DOMAIN_INFO))
390 DEFINE_SPINLOCK(device_domain_lock);
391 static LIST_HEAD(device_domain_list);
394 * Iterate over elements in device_domain_list and call the specified
395 * callback @fn against each element.
397 int for_each_device_domain(int (*fn)(struct device_domain_info *info,
398 void *data), void *data)
402 struct device_domain_info *info;
404 spin_lock_irqsave(&device_domain_lock, flags);
405 list_for_each_entry(info, &device_domain_list, global) {
406 ret = fn(info, data);
408 spin_unlock_irqrestore(&device_domain_lock, flags);
412 spin_unlock_irqrestore(&device_domain_lock, flags);
417 const struct iommu_ops intel_iommu_ops;
419 static bool translation_pre_enabled(struct intel_iommu *iommu)
421 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
424 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
426 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
429 static void init_translation_status(struct intel_iommu *iommu)
433 gsts = readl(iommu->reg + DMAR_GSTS_REG);
434 if (gsts & DMA_GSTS_TES)
435 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
438 static int __init intel_iommu_setup(char *str)
443 if (!strncmp(str, "on", 2)) {
445 pr_info("IOMMU enabled\n");
446 } else if (!strncmp(str, "off", 3)) {
448 no_platform_optin = 1;
449 pr_info("IOMMU disabled\n");
450 } else if (!strncmp(str, "igfx_off", 8)) {
452 pr_info("Disable GFX device mapping\n");
453 } else if (!strncmp(str, "forcedac", 8)) {
454 pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
455 iommu_dma_forcedac = true;
456 } else if (!strncmp(str, "strict", 6)) {
457 pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
458 pr_info("Disable batched IOTLB flush\n");
459 intel_iommu_strict = 1;
460 } else if (!strncmp(str, "sp_off", 6)) {
461 pr_info("Disable supported super page\n");
462 intel_iommu_superpage = 0;
463 } else if (!strncmp(str, "sm_on", 5)) {
464 pr_info("Intel-IOMMU: scalable mode supported\n");
466 } else if (!strncmp(str, "tboot_noforce", 13)) {
467 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
468 intel_iommu_tboot_noforce = 1;
471 str += strcspn(str, ",");
477 __setup("intel_iommu=", intel_iommu_setup);
479 static struct kmem_cache *iommu_domain_cache;
480 static struct kmem_cache *iommu_devinfo_cache;
482 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
484 struct dmar_domain **domains;
487 domains = iommu->domains[idx];
491 return domains[did & 0xff];
494 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
495 struct dmar_domain *domain)
497 struct dmar_domain **domains;
500 if (!iommu->domains[idx]) {
501 size_t size = 256 * sizeof(struct dmar_domain *);
502 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
505 domains = iommu->domains[idx];
506 if (WARN_ON(!domains))
509 domains[did & 0xff] = domain;
512 void *alloc_pgtable_page(int node)
517 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
519 vaddr = page_address(page);
523 void free_pgtable_page(void *vaddr)
525 free_page((unsigned long)vaddr);
528 static inline void *alloc_domain_mem(void)
530 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
533 static void free_domain_mem(void *vaddr)
535 kmem_cache_free(iommu_domain_cache, vaddr);
538 static inline void * alloc_devinfo_mem(void)
540 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
543 static inline void free_devinfo_mem(void *vaddr)
545 kmem_cache_free(iommu_devinfo_cache, vaddr);
548 static inline int domain_type_is_si(struct dmar_domain *domain)
550 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
553 static inline bool domain_use_first_level(struct dmar_domain *domain)
555 return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
558 static inline int domain_pfn_supported(struct dmar_domain *domain,
561 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
563 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
566 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
571 sagaw = cap_sagaw(iommu->cap);
572 for (agaw = width_to_agaw(max_gaw);
574 if (test_bit(agaw, &sagaw))
582 * Calculate max SAGAW for each iommu.
584 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
586 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
590 * calculate agaw for each iommu.
591 * "SAGAW" may be different across iommus, use a default agaw, and
592 * get a supported less agaw for iommus that don't support the default agaw.
594 int iommu_calculate_agaw(struct intel_iommu *iommu)
596 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
599 /* This functionin only returns single iommu in a domain */
600 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
604 /* si_domain and vm domain should not get here. */
605 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
608 for_each_domain_iommu(iommu_id, domain)
611 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
614 return g_iommus[iommu_id];
617 static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
619 return sm_supported(iommu) ?
620 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
623 static void domain_update_iommu_coherency(struct dmar_domain *domain)
625 struct dmar_drhd_unit *drhd;
626 struct intel_iommu *iommu;
630 domain->iommu_coherency = true;
632 for_each_domain_iommu(i, domain) {
634 if (!iommu_paging_structure_coherency(g_iommus[i])) {
635 domain->iommu_coherency = false;
642 /* No hardware attached; use lowest common denominator */
644 for_each_active_iommu(iommu, drhd) {
645 if (!iommu_paging_structure_coherency(iommu)) {
646 domain->iommu_coherency = false;
653 static bool domain_update_iommu_snooping(struct intel_iommu *skip)
655 struct dmar_drhd_unit *drhd;
656 struct intel_iommu *iommu;
660 for_each_active_iommu(iommu, drhd) {
663 * If the hardware is operating in the scalable mode,
664 * the snooping control is always supported since we
665 * always set PASID-table-entry.PGSNP bit if the domain
666 * is managed outside (UNMANAGED).
668 if (!sm_supported(iommu) &&
669 !ecap_sc_support(iommu->ecap)) {
680 static int domain_update_iommu_superpage(struct dmar_domain *domain,
681 struct intel_iommu *skip)
683 struct dmar_drhd_unit *drhd;
684 struct intel_iommu *iommu;
687 if (!intel_iommu_superpage)
690 /* set iommu_superpage to the smallest common denominator */
692 for_each_active_iommu(iommu, drhd) {
694 if (domain && domain_use_first_level(domain)) {
695 if (!cap_fl1gp_support(iommu->cap))
698 mask &= cap_super_page_val(iommu->cap);
710 static int domain_update_device_node(struct dmar_domain *domain)
712 struct device_domain_info *info;
713 int nid = NUMA_NO_NODE;
715 assert_spin_locked(&device_domain_lock);
717 if (list_empty(&domain->devices))
720 list_for_each_entry(info, &domain->devices, link) {
725 * There could possibly be multiple device numa nodes as devices
726 * within the same domain may sit behind different IOMMUs. There
727 * isn't perfect answer in such situation, so we select first
728 * come first served policy.
730 nid = dev_to_node(info->dev);
731 if (nid != NUMA_NO_NODE)
738 static void domain_update_iotlb(struct dmar_domain *domain);
740 /* Some capabilities may be different across iommus */
741 static void domain_update_iommu_cap(struct dmar_domain *domain)
743 domain_update_iommu_coherency(domain);
744 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
745 domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
748 * If RHSA is missing, we should default to the device numa domain
751 if (domain->nid == NUMA_NO_NODE)
752 domain->nid = domain_update_device_node(domain);
755 * First-level translation restricts the input-address to a
756 * canonical address (i.e., address bits 63:N have the same
757 * value as address bit [N-1], where N is 48-bits with 4-level
758 * paging and 57-bits with 5-level paging). Hence, skip bit
761 if (domain_use_first_level(domain))
762 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
764 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
766 domain_update_iotlb(domain);
769 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
772 struct root_entry *root = &iommu->root_entry[bus];
773 struct context_entry *context;
777 if (sm_supported(iommu)) {
785 context = phys_to_virt(*entry & VTD_PAGE_MASK);
787 unsigned long phy_addr;
791 context = alloc_pgtable_page(iommu->node);
795 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
796 phy_addr = virt_to_phys((void *)context);
797 *entry = phy_addr | 1;
798 __iommu_flush_cache(iommu, entry, sizeof(*entry));
800 return &context[devfn];
803 static bool attach_deferred(struct device *dev)
805 return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO;
809 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
810 * sub-hierarchy of a candidate PCI-PCI bridge
811 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
812 * @bridge: the candidate PCI-PCI bridge
814 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
817 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
819 struct pci_dev *pdev, *pbridge;
821 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
824 pdev = to_pci_dev(dev);
825 pbridge = to_pci_dev(bridge);
827 if (pbridge->subordinate &&
828 pbridge->subordinate->number <= pdev->bus->number &&
829 pbridge->subordinate->busn_res.end >= pdev->bus->number)
835 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
837 struct dmar_drhd_unit *drhd;
841 /* We know that this device on this chipset has its own IOMMU.
842 * If we find it under a different IOMMU, then the BIOS is lying
843 * to us. Hope that the IOMMU for this device is actually
844 * disabled, and it needs no translation...
846 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
849 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
854 /* we know that the this iommu should be at offset 0xa000 from vtbar */
855 drhd = dmar_find_matched_drhd_unit(pdev);
856 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
857 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
858 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
865 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
867 if (!iommu || iommu->drhd->ignored)
870 if (dev_is_pci(dev)) {
871 struct pci_dev *pdev = to_pci_dev(dev);
873 if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
874 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
875 quirk_ioat_snb_local_iommu(pdev))
882 struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
884 struct dmar_drhd_unit *drhd = NULL;
885 struct pci_dev *pdev = NULL;
886 struct intel_iommu *iommu;
894 if (dev_is_pci(dev)) {
895 struct pci_dev *pf_pdev;
897 pdev = pci_real_dma_dev(to_pci_dev(dev));
899 /* VFs aren't listed in scope tables; we need to look up
900 * the PF instead to find the IOMMU. */
901 pf_pdev = pci_physfn(pdev);
903 segment = pci_domain_nr(pdev->bus);
904 } else if (has_acpi_companion(dev))
905 dev = &ACPI_COMPANION(dev)->dev;
908 for_each_iommu(iommu, drhd) {
909 if (pdev && segment != drhd->segment)
912 for_each_active_dev_scope(drhd->devices,
913 drhd->devices_cnt, i, tmp) {
915 /* For a VF use its original BDF# not that of the PF
916 * which we used for the IOMMU lookup. Strictly speaking
917 * we could do this for all PCI devices; we only need to
918 * get the BDF# from the scope table for ACPI matches. */
919 if (pdev && pdev->is_virtfn)
923 *bus = drhd->devices[i].bus;
924 *devfn = drhd->devices[i].devfn;
929 if (is_downstream_to_pci_bridge(dev, tmp))
933 if (pdev && drhd->include_all) {
936 *bus = pdev->bus->number;
937 *devfn = pdev->devfn;
944 if (iommu_is_dummy(iommu, dev))
952 static void domain_flush_cache(struct dmar_domain *domain,
953 void *addr, int size)
955 if (!domain->iommu_coherency)
956 clflush_cache_range(addr, size);
959 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
961 struct context_entry *context;
965 spin_lock_irqsave(&iommu->lock, flags);
966 context = iommu_context_addr(iommu, bus, devfn, 0);
968 ret = context_present(context);
969 spin_unlock_irqrestore(&iommu->lock, flags);
973 static void free_context_table(struct intel_iommu *iommu)
977 struct context_entry *context;
979 spin_lock_irqsave(&iommu->lock, flags);
980 if (!iommu->root_entry) {
983 for (i = 0; i < ROOT_ENTRY_NR; i++) {
984 context = iommu_context_addr(iommu, i, 0, 0);
986 free_pgtable_page(context);
988 if (!sm_supported(iommu))
991 context = iommu_context_addr(iommu, i, 0x80, 0);
993 free_pgtable_page(context);
996 free_pgtable_page(iommu->root_entry);
997 iommu->root_entry = NULL;
999 spin_unlock_irqrestore(&iommu->lock, flags);
1002 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
1003 unsigned long pfn, int *target_level)
1005 struct dma_pte *parent, *pte;
1006 int level = agaw_to_level(domain->agaw);
1009 BUG_ON(!domain->pgd);
1011 if (!domain_pfn_supported(domain, pfn))
1012 /* Address beyond IOMMU's addressing capabilities. */
1015 parent = domain->pgd;
1020 offset = pfn_level_offset(pfn, level);
1021 pte = &parent[offset];
1022 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
1024 if (level == *target_level)
1027 if (!dma_pte_present(pte)) {
1030 tmp_page = alloc_pgtable_page(domain->nid);
1035 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
1036 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
1037 if (domain_use_first_level(domain)) {
1038 pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
1039 if (domain->domain.type == IOMMU_DOMAIN_DMA)
1040 pteval |= DMA_FL_PTE_ACCESS;
1042 if (cmpxchg64(&pte->val, 0ULL, pteval))
1043 /* Someone else set it while we were thinking; use theirs. */
1044 free_pgtable_page(tmp_page);
1046 domain_flush_cache(domain, pte, sizeof(*pte));
1051 parent = phys_to_virt(dma_pte_addr(pte));
1056 *target_level = level;
1061 /* return address's pte at specific level */
1062 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1064 int level, int *large_page)
1066 struct dma_pte *parent, *pte;
1067 int total = agaw_to_level(domain->agaw);
1070 parent = domain->pgd;
1071 while (level <= total) {
1072 offset = pfn_level_offset(pfn, total);
1073 pte = &parent[offset];
1077 if (!dma_pte_present(pte)) {
1078 *large_page = total;
1082 if (dma_pte_superpage(pte)) {
1083 *large_page = total;
1087 parent = phys_to_virt(dma_pte_addr(pte));
1093 /* clear last level pte, a tlb flush should be followed */
1094 static void dma_pte_clear_range(struct dmar_domain *domain,
1095 unsigned long start_pfn,
1096 unsigned long last_pfn)
1098 unsigned int large_page;
1099 struct dma_pte *first_pte, *pte;
1101 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1102 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1103 BUG_ON(start_pfn > last_pfn);
1105 /* we don't need lock here; nobody else touches the iova range */
1108 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1110 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1115 start_pfn += lvl_to_nr_pages(large_page);
1117 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1119 domain_flush_cache(domain, first_pte,
1120 (void *)pte - (void *)first_pte);
1122 } while (start_pfn && start_pfn <= last_pfn);
1125 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1126 int retain_level, struct dma_pte *pte,
1127 unsigned long pfn, unsigned long start_pfn,
1128 unsigned long last_pfn)
1130 pfn = max(start_pfn, pfn);
1131 pte = &pte[pfn_level_offset(pfn, level)];
1134 unsigned long level_pfn;
1135 struct dma_pte *level_pte;
1137 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1140 level_pfn = pfn & level_mask(level);
1141 level_pte = phys_to_virt(dma_pte_addr(pte));
1144 dma_pte_free_level(domain, level - 1, retain_level,
1145 level_pte, level_pfn, start_pfn,
1150 * Free the page table if we're below the level we want to
1151 * retain and the range covers the entire table.
1153 if (level < retain_level && !(start_pfn > level_pfn ||
1154 last_pfn < level_pfn + level_size(level) - 1)) {
1156 domain_flush_cache(domain, pte, sizeof(*pte));
1157 free_pgtable_page(level_pte);
1160 pfn += level_size(level);
1161 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1165 * clear last level (leaf) ptes and free page table pages below the
1166 * level we wish to keep intact.
1168 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1169 unsigned long start_pfn,
1170 unsigned long last_pfn,
1173 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1174 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1175 BUG_ON(start_pfn > last_pfn);
1177 dma_pte_clear_range(domain, start_pfn, last_pfn);
1179 /* We don't need lock here; nobody else touches the iova range */
1180 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1181 domain->pgd, 0, start_pfn, last_pfn);
1184 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1185 free_pgtable_page(domain->pgd);
1190 /* When a page at a given level is being unlinked from its parent, we don't
1191 need to *modify* it at all. All we need to do is make a list of all the
1192 pages which can be freed just as soon as we've flushed the IOTLB and we
1193 know the hardware page-walk will no longer touch them.
1194 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1196 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1197 int level, struct dma_pte *pte,
1198 struct page *freelist)
1202 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1203 pg->freelist = freelist;
1209 pte = page_address(pg);
1211 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1212 freelist = dma_pte_list_pagetables(domain, level - 1,
1215 } while (!first_pte_in_page(pte));
1220 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1221 struct dma_pte *pte, unsigned long pfn,
1222 unsigned long start_pfn,
1223 unsigned long last_pfn,
1224 struct page *freelist)
1226 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1228 pfn = max(start_pfn, pfn);
1229 pte = &pte[pfn_level_offset(pfn, level)];
1232 unsigned long level_pfn;
1234 if (!dma_pte_present(pte))
1237 level_pfn = pfn & level_mask(level);
1239 /* If range covers entire pagetable, free it */
1240 if (start_pfn <= level_pfn &&
1241 last_pfn >= level_pfn + level_size(level) - 1) {
1242 /* These suborbinate page tables are going away entirely. Don't
1243 bother to clear them; we're just going to *free* them. */
1244 if (level > 1 && !dma_pte_superpage(pte))
1245 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1251 } else if (level > 1) {
1252 /* Recurse down into a level that isn't *entirely* obsolete */
1253 freelist = dma_pte_clear_level(domain, level - 1,
1254 phys_to_virt(dma_pte_addr(pte)),
1255 level_pfn, start_pfn, last_pfn,
1259 pfn += level_size(level);
1260 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1263 domain_flush_cache(domain, first_pte,
1264 (void *)++last_pte - (void *)first_pte);
1269 /* We can't just free the pages because the IOMMU may still be walking
1270 the page tables, and may have cached the intermediate levels. The
1271 pages can only be freed after the IOTLB flush has been done. */
1272 static struct page *domain_unmap(struct dmar_domain *domain,
1273 unsigned long start_pfn,
1274 unsigned long last_pfn,
1275 struct page *freelist)
1277 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1278 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1279 BUG_ON(start_pfn > last_pfn);
1281 /* we don't need lock here; nobody else touches the iova range */
1282 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1283 domain->pgd, 0, start_pfn, last_pfn,
1287 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1288 struct page *pgd_page = virt_to_page(domain->pgd);
1289 pgd_page->freelist = freelist;
1290 freelist = pgd_page;
1298 static void dma_free_pagelist(struct page *freelist)
1302 while ((pg = freelist)) {
1303 freelist = pg->freelist;
1304 free_pgtable_page(page_address(pg));
1308 /* iommu handling */
1309 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1311 struct root_entry *root;
1312 unsigned long flags;
1314 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1316 pr_err("Allocating root entry for %s failed\n",
1321 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1323 spin_lock_irqsave(&iommu->lock, flags);
1324 iommu->root_entry = root;
1325 spin_unlock_irqrestore(&iommu->lock, flags);
1330 static void iommu_set_root_entry(struct intel_iommu *iommu)
1336 addr = virt_to_phys(iommu->root_entry);
1337 if (sm_supported(iommu))
1338 addr |= DMA_RTADDR_SMT;
1340 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1341 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1343 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1345 /* Make sure hardware complete it */
1346 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1347 readl, (sts & DMA_GSTS_RTPS), sts);
1349 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1351 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1352 if (sm_supported(iommu))
1353 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
1354 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1357 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1362 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1365 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1366 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1368 /* Make sure hardware complete it */
1369 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1370 readl, (!(val & DMA_GSTS_WBFS)), val);
1372 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1375 /* return value determine if we need a write buffer flush */
1376 static void __iommu_flush_context(struct intel_iommu *iommu,
1377 u16 did, u16 source_id, u8 function_mask,
1384 case DMA_CCMD_GLOBAL_INVL:
1385 val = DMA_CCMD_GLOBAL_INVL;
1387 case DMA_CCMD_DOMAIN_INVL:
1388 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1390 case DMA_CCMD_DEVICE_INVL:
1391 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1392 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1397 val |= DMA_CCMD_ICC;
1399 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1400 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1402 /* Make sure hardware complete it */
1403 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1404 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1406 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1409 /* return value determine if we need a write buffer flush */
1410 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1411 u64 addr, unsigned int size_order, u64 type)
1413 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1414 u64 val = 0, val_iva = 0;
1418 case DMA_TLB_GLOBAL_FLUSH:
1419 /* global flush doesn't need set IVA_REG */
1420 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1422 case DMA_TLB_DSI_FLUSH:
1423 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1425 case DMA_TLB_PSI_FLUSH:
1426 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1427 /* IH bit is passed in as part of address */
1428 val_iva = size_order | addr;
1433 /* Note: set drain read/write */
1436 * This is probably to be super secure.. Looks like we can
1437 * ignore it without any impact.
1439 if (cap_read_drain(iommu->cap))
1440 val |= DMA_TLB_READ_DRAIN;
1442 if (cap_write_drain(iommu->cap))
1443 val |= DMA_TLB_WRITE_DRAIN;
1445 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1446 /* Note: Only uses first TLB reg currently */
1448 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1449 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1451 /* Make sure hardware complete it */
1452 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1453 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1455 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1457 /* check IOTLB invalidation granularity */
1458 if (DMA_TLB_IAIG(val) == 0)
1459 pr_err("Flush IOTLB failed\n");
1460 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1461 pr_debug("TLB flush request %Lx, actual %Lx\n",
1462 (unsigned long long)DMA_TLB_IIRG(type),
1463 (unsigned long long)DMA_TLB_IAIG(val));
1466 static struct device_domain_info *
1467 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1470 struct device_domain_info *info;
1472 assert_spin_locked(&device_domain_lock);
1477 list_for_each_entry(info, &domain->devices, link)
1478 if (info->iommu == iommu && info->bus == bus &&
1479 info->devfn == devfn) {
1480 if (info->ats_supported && info->dev)
1488 static void domain_update_iotlb(struct dmar_domain *domain)
1490 struct device_domain_info *info;
1491 bool has_iotlb_device = false;
1493 assert_spin_locked(&device_domain_lock);
1495 list_for_each_entry(info, &domain->devices, link)
1496 if (info->ats_enabled) {
1497 has_iotlb_device = true;
1501 if (!has_iotlb_device) {
1502 struct subdev_domain_info *sinfo;
1504 list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
1505 info = get_domain_info(sinfo->pdev);
1506 if (info && info->ats_enabled) {
1507 has_iotlb_device = true;
1513 domain->has_iotlb_device = has_iotlb_device;
1516 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1518 struct pci_dev *pdev;
1520 assert_spin_locked(&device_domain_lock);
1522 if (!info || !dev_is_pci(info->dev))
1525 pdev = to_pci_dev(info->dev);
1526 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1527 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1528 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1529 * reserved, which should be set to 0.
1531 if (!ecap_dit(info->iommu->ecap))
1534 struct pci_dev *pf_pdev;
1536 /* pdev will be returned if device is not a vf */
1537 pf_pdev = pci_physfn(pdev);
1538 info->pfsid = pci_dev_id(pf_pdev);
1541 #ifdef CONFIG_INTEL_IOMMU_SVM
1542 /* The PCIe spec, in its wisdom, declares that the behaviour of
1543 the device if you enable PASID support after ATS support is
1544 undefined. So always enable PASID support on devices which
1545 have it, even if we can't yet know if we're ever going to
1547 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1548 info->pasid_enabled = 1;
1550 if (info->pri_supported &&
1551 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1552 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1553 info->pri_enabled = 1;
1555 if (info->ats_supported && pci_ats_page_aligned(pdev) &&
1556 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1557 info->ats_enabled = 1;
1558 domain_update_iotlb(info->domain);
1559 info->ats_qdep = pci_ats_queue_depth(pdev);
1563 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1565 struct pci_dev *pdev;
1567 assert_spin_locked(&device_domain_lock);
1569 if (!dev_is_pci(info->dev))
1572 pdev = to_pci_dev(info->dev);
1574 if (info->ats_enabled) {
1575 pci_disable_ats(pdev);
1576 info->ats_enabled = 0;
1577 domain_update_iotlb(info->domain);
1579 #ifdef CONFIG_INTEL_IOMMU_SVM
1580 if (info->pri_enabled) {
1581 pci_disable_pri(pdev);
1582 info->pri_enabled = 0;
1584 if (info->pasid_enabled) {
1585 pci_disable_pasid(pdev);
1586 info->pasid_enabled = 0;
1591 static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
1592 u64 addr, unsigned int mask)
1596 if (!info || !info->ats_enabled)
1599 sid = info->bus << 8 | info->devfn;
1600 qdep = info->ats_qdep;
1601 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1605 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1606 u64 addr, unsigned mask)
1608 unsigned long flags;
1609 struct device_domain_info *info;
1610 struct subdev_domain_info *sinfo;
1612 if (!domain->has_iotlb_device)
1615 spin_lock_irqsave(&device_domain_lock, flags);
1616 list_for_each_entry(info, &domain->devices, link)
1617 __iommu_flush_dev_iotlb(info, addr, mask);
1619 list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
1620 info = get_domain_info(sinfo->pdev);
1621 __iommu_flush_dev_iotlb(info, addr, mask);
1623 spin_unlock_irqrestore(&device_domain_lock, flags);
1626 static void domain_flush_piotlb(struct intel_iommu *iommu,
1627 struct dmar_domain *domain,
1628 u64 addr, unsigned long npages, bool ih)
1630 u16 did = domain->iommu_did[iommu->seq_id];
1632 if (domain->default_pasid)
1633 qi_flush_piotlb(iommu, did, domain->default_pasid,
1636 if (!list_empty(&domain->devices))
1637 qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
1640 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1641 struct dmar_domain *domain,
1642 unsigned long pfn, unsigned int pages,
1645 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1646 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1647 u16 did = domain->iommu_did[iommu->seq_id];
1654 if (domain_use_first_level(domain)) {
1655 domain_flush_piotlb(iommu, domain, addr, pages, ih);
1658 * Fallback to domain selective flush if no PSI support or
1659 * the size is too big. PSI requires page size to be 2 ^ x,
1660 * and the base address is naturally aligned to the size.
1662 if (!cap_pgsel_inv(iommu->cap) ||
1663 mask > cap_max_amask_val(iommu->cap))
1664 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1667 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1672 * In caching mode, changes of pages from non-present to present require
1673 * flush. However, device IOTLB doesn't need to be flushed in this case.
1675 if (!cap_caching_mode(iommu->cap) || !map)
1676 iommu_flush_dev_iotlb(domain, addr, mask);
1679 /* Notification for newly created mappings */
1680 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1681 struct dmar_domain *domain,
1682 unsigned long pfn, unsigned int pages)
1685 * It's a non-present to present mapping. Only flush if caching mode
1688 if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
1689 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1691 iommu_flush_write_buffer(iommu);
1694 static void intel_flush_iotlb_all(struct iommu_domain *domain)
1696 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
1699 for_each_domain_iommu(idx, dmar_domain) {
1700 struct intel_iommu *iommu = g_iommus[idx];
1701 u16 did = dmar_domain->iommu_did[iommu->seq_id];
1703 if (domain_use_first_level(dmar_domain))
1704 domain_flush_piotlb(iommu, dmar_domain, 0, -1, 0);
1706 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1709 if (!cap_caching_mode(iommu->cap))
1710 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1711 0, MAX_AGAW_PFN_WIDTH);
1715 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1718 unsigned long flags;
1720 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1723 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1724 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1725 pmen &= ~DMA_PMEN_EPM;
1726 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1728 /* wait for the protected region status bit to clear */
1729 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1730 readl, !(pmen & DMA_PMEN_PRS), pmen);
1732 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1735 static void iommu_enable_translation(struct intel_iommu *iommu)
1738 unsigned long flags;
1740 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1741 iommu->gcmd |= DMA_GCMD_TE;
1742 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1744 /* Make sure hardware complete it */
1745 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1746 readl, (sts & DMA_GSTS_TES), sts);
1748 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1751 static void iommu_disable_translation(struct intel_iommu *iommu)
1756 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
1757 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
1760 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1761 iommu->gcmd &= ~DMA_GCMD_TE;
1762 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1764 /* Make sure hardware complete it */
1765 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1766 readl, (!(sts & DMA_GSTS_TES)), sts);
1768 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1771 static int iommu_init_domains(struct intel_iommu *iommu)
1773 u32 ndomains, nlongs;
1776 ndomains = cap_ndoms(iommu->cap);
1777 pr_debug("%s: Number of Domains supported <%d>\n",
1778 iommu->name, ndomains);
1779 nlongs = BITS_TO_LONGS(ndomains);
1781 spin_lock_init(&iommu->lock);
1783 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1784 if (!iommu->domain_ids) {
1785 pr_err("%s: Allocating domain id array failed\n",
1790 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1791 iommu->domains = kzalloc(size, GFP_KERNEL);
1793 if (iommu->domains) {
1794 size = 256 * sizeof(struct dmar_domain *);
1795 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1798 if (!iommu->domains || !iommu->domains[0]) {
1799 pr_err("%s: Allocating domain array failed\n",
1801 kfree(iommu->domain_ids);
1802 kfree(iommu->domains);
1803 iommu->domain_ids = NULL;
1804 iommu->domains = NULL;
1809 * If Caching mode is set, then invalid translations are tagged
1810 * with domain-id 0, hence we need to pre-allocate it. We also
1811 * use domain-id 0 as a marker for non-allocated domain-id, so
1812 * make sure it is not used for a real domain.
1814 set_bit(0, iommu->domain_ids);
1817 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1818 * entry for first-level or pass-through translation modes should
1819 * be programmed with a domain id different from those used for
1820 * second-level or nested translation. We reserve a domain id for
1823 if (sm_supported(iommu))
1824 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1829 static void disable_dmar_iommu(struct intel_iommu *iommu)
1831 struct device_domain_info *info, *tmp;
1832 unsigned long flags;
1834 if (!iommu->domains || !iommu->domain_ids)
1837 spin_lock_irqsave(&device_domain_lock, flags);
1838 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1839 if (info->iommu != iommu)
1842 if (!info->dev || !info->domain)
1845 __dmar_remove_one_dev_info(info);
1847 spin_unlock_irqrestore(&device_domain_lock, flags);
1849 if (iommu->gcmd & DMA_GCMD_TE)
1850 iommu_disable_translation(iommu);
1853 static void free_dmar_iommu(struct intel_iommu *iommu)
1855 if ((iommu->domains) && (iommu->domain_ids)) {
1856 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1859 for (i = 0; i < elems; i++)
1860 kfree(iommu->domains[i]);
1861 kfree(iommu->domains);
1862 kfree(iommu->domain_ids);
1863 iommu->domains = NULL;
1864 iommu->domain_ids = NULL;
1867 g_iommus[iommu->seq_id] = NULL;
1869 /* free context mapping */
1870 free_context_table(iommu);
1872 #ifdef CONFIG_INTEL_IOMMU_SVM
1873 if (pasid_supported(iommu)) {
1874 if (ecap_prs(iommu->ecap))
1875 intel_svm_finish_prq(iommu);
1877 if (vccap_pasid(iommu->vccap))
1878 ioasid_unregister_allocator(&iommu->pasid_allocator);
1884 * Check and return whether first level is used by default for
1887 static bool first_level_by_default(void)
1889 return scalable_mode_support() && intel_cap_flts_sanity();
1892 static struct dmar_domain *alloc_domain(int flags)
1894 struct dmar_domain *domain;
1896 domain = alloc_domain_mem();
1900 memset(domain, 0, sizeof(*domain));
1901 domain->nid = NUMA_NO_NODE;
1902 domain->flags = flags;
1903 if (first_level_by_default())
1904 domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
1905 domain->has_iotlb_device = false;
1906 INIT_LIST_HEAD(&domain->devices);
1907 INIT_LIST_HEAD(&domain->subdevices);
1912 /* Must be called with iommu->lock */
1913 static int domain_attach_iommu(struct dmar_domain *domain,
1914 struct intel_iommu *iommu)
1916 unsigned long ndomains;
1919 assert_spin_locked(&device_domain_lock);
1920 assert_spin_locked(&iommu->lock);
1922 domain->iommu_refcnt[iommu->seq_id] += 1;
1923 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1924 ndomains = cap_ndoms(iommu->cap);
1925 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1927 if (num >= ndomains) {
1928 pr_err("%s: No free domain ids\n", iommu->name);
1929 domain->iommu_refcnt[iommu->seq_id] -= 1;
1933 set_bit(num, iommu->domain_ids);
1934 set_iommu_domain(iommu, num, domain);
1936 domain->iommu_did[iommu->seq_id] = num;
1937 domain->nid = iommu->node;
1939 domain_update_iommu_cap(domain);
1945 static void domain_detach_iommu(struct dmar_domain *domain,
1946 struct intel_iommu *iommu)
1950 assert_spin_locked(&device_domain_lock);
1951 assert_spin_locked(&iommu->lock);
1953 domain->iommu_refcnt[iommu->seq_id] -= 1;
1954 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1955 num = domain->iommu_did[iommu->seq_id];
1956 clear_bit(num, iommu->domain_ids);
1957 set_iommu_domain(iommu, num, NULL);
1959 domain_update_iommu_cap(domain);
1960 domain->iommu_did[iommu->seq_id] = 0;
1964 static inline int guestwidth_to_adjustwidth(int gaw)
1967 int r = (gaw - 12) % 9;
1978 static void domain_exit(struct dmar_domain *domain)
1981 /* Remove associated devices and clear attached or cached domains */
1982 domain_remove_dev_info(domain);
1985 if (domain->domain.type == IOMMU_DOMAIN_DMA)
1986 iommu_put_dma_cookie(&domain->domain);
1989 struct page *freelist;
1991 freelist = domain_unmap(domain, 0,
1992 DOMAIN_MAX_PFN(domain->gaw), NULL);
1993 dma_free_pagelist(freelist);
1996 free_domain_mem(domain);
2000 * Get the PASID directory size for scalable mode context entry.
2001 * Value of X in the PDTS field of a scalable mode context entry
2002 * indicates PASID directory with 2^(X + 7) entries.
2004 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
2008 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
2009 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
2017 * Set the RID_PASID field of a scalable mode context entry. The
2018 * IOMMU hardware will use the PASID value set in this field for
2019 * DMA translations of DMA requests without PASID.
2022 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
2024 context->hi |= pasid & ((1 << 20) - 1);
2028 * Set the DTE(Device-TLB Enable) field of a scalable mode context
2031 static inline void context_set_sm_dte(struct context_entry *context)
2033 context->lo |= (1 << 2);
2037 * Set the PRE(Page Request Enable) field of a scalable mode context
2040 static inline void context_set_sm_pre(struct context_entry *context)
2042 context->lo |= (1 << 4);
2045 /* Convert value to context PASID directory size field coding. */
2046 #define context_pdts(pds) (((pds) & 0x7) << 9)
2048 static int domain_context_mapping_one(struct dmar_domain *domain,
2049 struct intel_iommu *iommu,
2050 struct pasid_table *table,
2053 u16 did = domain->iommu_did[iommu->seq_id];
2054 int translation = CONTEXT_TT_MULTI_LEVEL;
2055 struct device_domain_info *info = NULL;
2056 struct context_entry *context;
2057 unsigned long flags;
2062 if (hw_pass_through && domain_type_is_si(domain))
2063 translation = CONTEXT_TT_PASS_THROUGH;
2065 pr_debug("Set context mapping for %02x:%02x.%d\n",
2066 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
2068 BUG_ON(!domain->pgd);
2070 spin_lock_irqsave(&device_domain_lock, flags);
2071 spin_lock(&iommu->lock);
2074 context = iommu_context_addr(iommu, bus, devfn, 1);
2079 if (context_present(context))
2083 * For kdump cases, old valid entries may be cached due to the
2084 * in-flight DMA and copied pgtable, but there is no unmapping
2085 * behaviour for them, thus we need an explicit cache flush for
2086 * the newly-mapped device. For kdump, at this point, the device
2087 * is supposed to finish reset at its driver probe stage, so no
2088 * in-flight DMA will exist, and we don't need to worry anymore
2091 if (context_copied(context)) {
2092 u16 did_old = context_domain_id(context);
2094 if (did_old < cap_ndoms(iommu->cap)) {
2095 iommu->flush.flush_context(iommu, did_old,
2096 (((u16)bus) << 8) | devfn,
2097 DMA_CCMD_MASK_NOBIT,
2098 DMA_CCMD_DEVICE_INVL);
2099 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2104 context_clear_entry(context);
2106 if (sm_supported(iommu)) {
2111 /* Setup the PASID DIR pointer: */
2112 pds = context_get_sm_pds(table);
2113 context->lo = (u64)virt_to_phys(table->table) |
2116 /* Setup the RID_PASID field: */
2117 context_set_sm_rid2pasid(context, PASID_RID2PASID);
2120 * Setup the Device-TLB enable bit and Page request
2123 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2124 if (info && info->ats_supported)
2125 context_set_sm_dte(context);
2126 if (info && info->pri_supported)
2127 context_set_sm_pre(context);
2129 struct dma_pte *pgd = domain->pgd;
2132 context_set_domain_id(context, did);
2134 if (translation != CONTEXT_TT_PASS_THROUGH) {
2136 * Skip top levels of page tables for iommu which has
2137 * less agaw than default. Unnecessary for PT mode.
2139 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2141 pgd = phys_to_virt(dma_pte_addr(pgd));
2142 if (!dma_pte_present(pgd))
2146 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2147 if (info && info->ats_supported)
2148 translation = CONTEXT_TT_DEV_IOTLB;
2150 translation = CONTEXT_TT_MULTI_LEVEL;
2152 context_set_address_root(context, virt_to_phys(pgd));
2153 context_set_address_width(context, agaw);
2156 * In pass through mode, AW must be programmed to
2157 * indicate the largest AGAW value supported by
2158 * hardware. And ASR is ignored by hardware.
2160 context_set_address_width(context, iommu->msagaw);
2163 context_set_translation_type(context, translation);
2166 context_set_fault_enable(context);
2167 context_set_present(context);
2168 if (!ecap_coherent(iommu->ecap))
2169 clflush_cache_range(context, sizeof(*context));
2172 * It's a non-present to present mapping. If hardware doesn't cache
2173 * non-present entry we only need to flush the write-buffer. If the
2174 * _does_ cache non-present entries, then it does so in the special
2175 * domain #0, which we have to flush:
2177 if (cap_caching_mode(iommu->cap)) {
2178 iommu->flush.flush_context(iommu, 0,
2179 (((u16)bus) << 8) | devfn,
2180 DMA_CCMD_MASK_NOBIT,
2181 DMA_CCMD_DEVICE_INVL);
2182 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2184 iommu_flush_write_buffer(iommu);
2186 iommu_enable_dev_iotlb(info);
2191 spin_unlock(&iommu->lock);
2192 spin_unlock_irqrestore(&device_domain_lock, flags);
2197 struct domain_context_mapping_data {
2198 struct dmar_domain *domain;
2199 struct intel_iommu *iommu;
2200 struct pasid_table *table;
2203 static int domain_context_mapping_cb(struct pci_dev *pdev,
2204 u16 alias, void *opaque)
2206 struct domain_context_mapping_data *data = opaque;
2208 return domain_context_mapping_one(data->domain, data->iommu,
2209 data->table, PCI_BUS_NUM(alias),
2214 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2216 struct domain_context_mapping_data data;
2217 struct pasid_table *table;
2218 struct intel_iommu *iommu;
2221 iommu = device_to_iommu(dev, &bus, &devfn);
2225 table = intel_pasid_get_table(dev);
2227 if (!dev_is_pci(dev))
2228 return domain_context_mapping_one(domain, iommu, table,
2231 data.domain = domain;
2235 return pci_for_each_dma_alias(to_pci_dev(dev),
2236 &domain_context_mapping_cb, &data);
2239 static int domain_context_mapped_cb(struct pci_dev *pdev,
2240 u16 alias, void *opaque)
2242 struct intel_iommu *iommu = opaque;
2244 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2247 static int domain_context_mapped(struct device *dev)
2249 struct intel_iommu *iommu;
2252 iommu = device_to_iommu(dev, &bus, &devfn);
2256 if (!dev_is_pci(dev))
2257 return device_context_mapped(iommu, bus, devfn);
2259 return !pci_for_each_dma_alias(to_pci_dev(dev),
2260 domain_context_mapped_cb, iommu);
2263 /* Returns a number of VTD pages, but aligned to MM page size */
2264 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2267 host_addr &= ~PAGE_MASK;
2268 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2271 /* Return largest possible superpage level for a given mapping */
2272 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2273 unsigned long iov_pfn,
2274 unsigned long phy_pfn,
2275 unsigned long pages)
2277 int support, level = 1;
2278 unsigned long pfnmerge;
2280 support = domain->iommu_superpage;
2282 /* To use a large page, the virtual *and* physical addresses
2283 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2284 of them will mean we have to use smaller pages. So just
2285 merge them and check both at once. */
2286 pfnmerge = iov_pfn | phy_pfn;
2288 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2289 pages >>= VTD_STRIDE_SHIFT;
2292 pfnmerge >>= VTD_STRIDE_SHIFT;
2300 * Ensure that old small page tables are removed to make room for superpage(s).
2301 * We're going to add new large pages, so make sure we don't remove their parent
2302 * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared.
2304 static void switch_to_super_page(struct dmar_domain *domain,
2305 unsigned long start_pfn,
2306 unsigned long end_pfn, int level)
2308 unsigned long lvl_pages = lvl_to_nr_pages(level);
2309 struct dma_pte *pte = NULL;
2312 while (start_pfn <= end_pfn) {
2314 pte = pfn_to_dma_pte(domain, start_pfn, &level);
2316 if (dma_pte_present(pte)) {
2317 dma_pte_free_pagetable(domain, start_pfn,
2318 start_pfn + lvl_pages - 1,
2321 for_each_domain_iommu(i, domain)
2322 iommu_flush_iotlb_psi(g_iommus[i], domain,
2323 start_pfn, lvl_pages,
2328 start_pfn += lvl_pages;
2329 if (first_pte_in_page(pte))
2335 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2336 unsigned long phys_pfn, unsigned long nr_pages, int prot)
2338 unsigned int largepage_lvl = 0;
2339 unsigned long lvl_pages = 0;
2340 struct dma_pte *pte = NULL;
2344 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2346 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2349 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2350 attr |= DMA_FL_PTE_PRESENT;
2351 if (domain_use_first_level(domain)) {
2352 attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
2354 if (domain->domain.type == IOMMU_DOMAIN_DMA) {
2355 attr |= DMA_FL_PTE_ACCESS;
2356 if (prot & DMA_PTE_WRITE)
2357 attr |= DMA_FL_PTE_DIRTY;
2361 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
2363 while (nr_pages > 0) {
2367 largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
2368 phys_pfn, nr_pages);
2370 pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2373 /* It is large page*/
2374 if (largepage_lvl > 1) {
2375 unsigned long end_pfn;
2377 pteval |= DMA_PTE_LARGE_PAGE;
2378 end_pfn = ((iov_pfn + nr_pages) & level_mask(largepage_lvl)) - 1;
2379 switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
2381 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2385 /* We don't need lock here, nobody else
2386 * touches the iova range
2388 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2390 static int dumps = 5;
2391 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2392 iov_pfn, tmp, (unsigned long long)pteval);
2395 debug_dma_dump_mappings(NULL);
2400 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2402 BUG_ON(nr_pages < lvl_pages);
2404 nr_pages -= lvl_pages;
2405 iov_pfn += lvl_pages;
2406 phys_pfn += lvl_pages;
2407 pteval += lvl_pages * VTD_PAGE_SIZE;
2409 /* If the next PTE would be the first in a new page, then we
2410 * need to flush the cache on the entries we've just written.
2411 * And then we'll need to recalculate 'pte', so clear it and
2412 * let it get set again in the if (!pte) block above.
2414 * If we're done (!nr_pages) we need to flush the cache too.
2416 * Also if we've been setting superpages, we may need to
2417 * recalculate 'pte' and switch back to smaller pages for the
2418 * end of the mapping, if the trailing size is not enough to
2419 * use another superpage (i.e. nr_pages < lvl_pages).
2421 * We leave clflush for the leaf pte changes to iotlb_sync_map()
2425 if (!nr_pages || first_pte_in_page(pte) ||
2426 (largepage_lvl > 1 && nr_pages < lvl_pages))
2433 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
2435 struct intel_iommu *iommu = info->iommu;
2436 struct context_entry *context;
2437 unsigned long flags;
2443 spin_lock_irqsave(&iommu->lock, flags);
2444 context = iommu_context_addr(iommu, bus, devfn, 0);
2446 spin_unlock_irqrestore(&iommu->lock, flags);
2450 if (sm_supported(iommu)) {
2451 if (hw_pass_through && domain_type_is_si(info->domain))
2452 did_old = FLPT_DEFAULT_DID;
2454 did_old = info->domain->iommu_did[iommu->seq_id];
2456 did_old = context_domain_id(context);
2459 context_clear_entry(context);
2460 __iommu_flush_cache(iommu, context, sizeof(*context));
2461 spin_unlock_irqrestore(&iommu->lock, flags);
2462 iommu->flush.flush_context(iommu,
2464 (((u16)bus) << 8) | devfn,
2465 DMA_CCMD_MASK_NOBIT,
2466 DMA_CCMD_DEVICE_INVL);
2468 if (sm_supported(iommu))
2469 qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
2471 iommu->flush.flush_iotlb(iommu,
2477 __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
2480 static inline void unlink_domain_info(struct device_domain_info *info)
2482 assert_spin_locked(&device_domain_lock);
2483 list_del(&info->link);
2484 list_del(&info->global);
2486 dev_iommu_priv_set(info->dev, NULL);
2489 static void domain_remove_dev_info(struct dmar_domain *domain)
2491 struct device_domain_info *info, *tmp;
2492 unsigned long flags;
2494 spin_lock_irqsave(&device_domain_lock, flags);
2495 list_for_each_entry_safe(info, tmp, &domain->devices, link)
2496 __dmar_remove_one_dev_info(info);
2497 spin_unlock_irqrestore(&device_domain_lock, flags);
2500 struct dmar_domain *find_domain(struct device *dev)
2502 struct device_domain_info *info;
2504 if (unlikely(!dev || !dev->iommu))
2507 if (unlikely(attach_deferred(dev)))
2510 /* No lock here, assumes no domain exit in normal case */
2511 info = get_domain_info(dev);
2513 return info->domain;
2518 static inline struct device_domain_info *
2519 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2521 struct device_domain_info *info;
2523 list_for_each_entry(info, &device_domain_list, global)
2524 if (info->segment == segment && info->bus == bus &&
2525 info->devfn == devfn)
2531 static int domain_setup_first_level(struct intel_iommu *iommu,
2532 struct dmar_domain *domain,
2536 struct dma_pte *pgd = domain->pgd;
2541 * Skip top levels of page tables for iommu which has
2542 * less agaw than default. Unnecessary for PT mode.
2544 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2545 pgd = phys_to_virt(dma_pte_addr(pgd));
2546 if (!dma_pte_present(pgd))
2550 level = agaw_to_level(agaw);
2551 if (level != 4 && level != 5)
2554 if (pasid != PASID_RID2PASID)
2555 flags |= PASID_FLAG_SUPERVISOR_MODE;
2557 flags |= PASID_FLAG_FL5LP;
2559 if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
2560 flags |= PASID_FLAG_PAGE_SNOOP;
2562 return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2563 domain->iommu_did[iommu->seq_id],
2567 static bool dev_is_real_dma_subdevice(struct device *dev)
2569 return dev && dev_is_pci(dev) &&
2570 pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
2573 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2576 struct dmar_domain *domain)
2578 struct dmar_domain *found = NULL;
2579 struct device_domain_info *info;
2580 unsigned long flags;
2583 info = alloc_devinfo_mem();
2587 if (!dev_is_real_dma_subdevice(dev)) {
2589 info->devfn = devfn;
2590 info->segment = iommu->segment;
2592 struct pci_dev *pdev = to_pci_dev(dev);
2594 info->bus = pdev->bus->number;
2595 info->devfn = pdev->devfn;
2596 info->segment = pci_domain_nr(pdev->bus);
2599 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2600 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2603 info->domain = domain;
2604 info->iommu = iommu;
2605 info->pasid_table = NULL;
2606 info->auxd_enabled = 0;
2607 INIT_LIST_HEAD(&info->subdevices);
2609 if (dev && dev_is_pci(dev)) {
2610 struct pci_dev *pdev = to_pci_dev(info->dev);
2612 if (ecap_dev_iotlb_support(iommu->ecap) &&
2613 pci_ats_supported(pdev) &&
2614 dmar_find_matched_atsr_unit(pdev))
2615 info->ats_supported = 1;
2617 if (sm_supported(iommu)) {
2618 if (pasid_supported(iommu)) {
2619 int features = pci_pasid_features(pdev);
2621 info->pasid_supported = features | 1;
2624 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2625 pci_pri_supported(pdev))
2626 info->pri_supported = 1;
2630 spin_lock_irqsave(&device_domain_lock, flags);
2632 found = find_domain(dev);
2635 struct device_domain_info *info2;
2636 info2 = dmar_search_domain_by_dev_info(info->segment, info->bus,
2639 found = info2->domain;
2645 spin_unlock_irqrestore(&device_domain_lock, flags);
2646 free_devinfo_mem(info);
2647 /* Caller must free the original domain */
2651 spin_lock(&iommu->lock);
2652 ret = domain_attach_iommu(domain, iommu);
2653 spin_unlock(&iommu->lock);
2656 spin_unlock_irqrestore(&device_domain_lock, flags);
2657 free_devinfo_mem(info);
2661 list_add(&info->link, &domain->devices);
2662 list_add(&info->global, &device_domain_list);
2664 dev_iommu_priv_set(dev, info);
2665 spin_unlock_irqrestore(&device_domain_lock, flags);
2667 /* PASID table is mandatory for a PCI device in scalable mode. */
2668 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2669 ret = intel_pasid_alloc_table(dev);
2671 dev_err(dev, "PASID table allocation failed\n");
2672 dmar_remove_one_dev_info(dev);
2676 /* Setup the PASID entry for requests without PASID: */
2677 spin_lock_irqsave(&iommu->lock, flags);
2678 if (hw_pass_through && domain_type_is_si(domain))
2679 ret = intel_pasid_setup_pass_through(iommu, domain,
2680 dev, PASID_RID2PASID);
2681 else if (domain_use_first_level(domain))
2682 ret = domain_setup_first_level(iommu, domain, dev,
2685 ret = intel_pasid_setup_second_level(iommu, domain,
2686 dev, PASID_RID2PASID);
2687 spin_unlock_irqrestore(&iommu->lock, flags);
2689 dev_err(dev, "Setup RID2PASID failed\n");
2690 dmar_remove_one_dev_info(dev);
2695 if (dev && domain_context_mapping(domain, dev)) {
2696 dev_err(dev, "Domain context map failed\n");
2697 dmar_remove_one_dev_info(dev);
2704 static int iommu_domain_identity_map(struct dmar_domain *domain,
2705 unsigned long first_vpfn,
2706 unsigned long last_vpfn)
2709 * RMRR range might have overlap with physical memory range,
2712 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2714 return __domain_mapping(domain, first_vpfn,
2715 first_vpfn, last_vpfn - first_vpfn + 1,
2716 DMA_PTE_READ|DMA_PTE_WRITE);
2719 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2721 static int __init si_domain_init(int hw)
2723 struct dmar_rmrr_unit *rmrr;
2727 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2731 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2732 domain_exit(si_domain);
2739 for_each_online_node(nid) {
2740 unsigned long start_pfn, end_pfn;
2743 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2744 ret = iommu_domain_identity_map(si_domain,
2745 mm_to_dma_pfn(start_pfn),
2746 mm_to_dma_pfn(end_pfn));
2753 * Identity map the RMRRs so that devices with RMRRs could also use
2756 for_each_rmrr_units(rmrr) {
2757 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2759 unsigned long long start = rmrr->base_address;
2760 unsigned long long end = rmrr->end_address;
2762 if (WARN_ON(end < start ||
2763 end >> agaw_to_width(si_domain->agaw)))
2766 ret = iommu_domain_identity_map(si_domain,
2767 mm_to_dma_pfn(start >> PAGE_SHIFT),
2768 mm_to_dma_pfn(end >> PAGE_SHIFT));
2777 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2779 struct dmar_domain *ndomain;
2780 struct intel_iommu *iommu;
2783 iommu = device_to_iommu(dev, &bus, &devfn);
2787 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2788 if (ndomain != domain)
2794 static bool device_has_rmrr(struct device *dev)
2796 struct dmar_rmrr_unit *rmrr;
2801 for_each_rmrr_units(rmrr) {
2803 * Return TRUE if this RMRR contains the device that
2806 for_each_active_dev_scope(rmrr->devices,
2807 rmrr->devices_cnt, i, tmp)
2809 is_downstream_to_pci_bridge(dev, tmp)) {
2819 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2820 * is relaxable (ie. is allowed to be not enforced under some conditions)
2821 * @dev: device handle
2823 * We assume that PCI USB devices with RMRRs have them largely
2824 * for historical reasons and that the RMRR space is not actively used post
2825 * boot. This exclusion may change if vendors begin to abuse it.
2827 * The same exception is made for graphics devices, with the requirement that
2828 * any use of the RMRR regions will be torn down before assigning the device
2831 * Return: true if the RMRR is relaxable, false otherwise
2833 static bool device_rmrr_is_relaxable(struct device *dev)
2835 struct pci_dev *pdev;
2837 if (!dev_is_pci(dev))
2840 pdev = to_pci_dev(dev);
2841 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2848 * There are a couple cases where we need to restrict the functionality of
2849 * devices associated with RMRRs. The first is when evaluating a device for
2850 * identity mapping because problems exist when devices are moved in and out
2851 * of domains and their respective RMRR information is lost. This means that
2852 * a device with associated RMRRs will never be in a "passthrough" domain.
2853 * The second is use of the device through the IOMMU API. This interface
2854 * expects to have full control of the IOVA space for the device. We cannot
2855 * satisfy both the requirement that RMRR access is maintained and have an
2856 * unencumbered IOVA space. We also have no ability to quiesce the device's
2857 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2858 * We therefore prevent devices associated with an RMRR from participating in
2859 * the IOMMU API, which eliminates them from device assignment.
2861 * In both cases, devices which have relaxable RMRRs are not concerned by this
2862 * restriction. See device_rmrr_is_relaxable comment.
2864 static bool device_is_rmrr_locked(struct device *dev)
2866 if (!device_has_rmrr(dev))
2869 if (device_rmrr_is_relaxable(dev))
2876 * Return the required default domain type for a specific device.
2878 * @dev: the device in query
2879 * @startup: true if this is during early boot
2882 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2883 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2884 * - 0: both identity and dynamic domains work for this device
2886 static int device_def_domain_type(struct device *dev)
2888 if (dev_is_pci(dev)) {
2889 struct pci_dev *pdev = to_pci_dev(dev);
2891 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2892 return IOMMU_DOMAIN_IDENTITY;
2894 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2895 return IOMMU_DOMAIN_IDENTITY;
2901 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2904 * Start from the sane iommu hardware state.
2905 * If the queued invalidation is already initialized by us
2906 * (for example, while enabling interrupt-remapping) then
2907 * we got the things already rolling from a sane state.
2911 * Clear any previous faults.
2913 dmar_fault(-1, iommu);
2915 * Disable queued invalidation if supported and already enabled
2916 * before OS handover.
2918 dmar_disable_qi(iommu);
2921 if (dmar_enable_qi(iommu)) {
2923 * Queued Invalidate not enabled, use Register Based Invalidate
2925 iommu->flush.flush_context = __iommu_flush_context;
2926 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2927 pr_info("%s: Using Register based invalidation\n",
2930 iommu->flush.flush_context = qi_flush_context;
2931 iommu->flush.flush_iotlb = qi_flush_iotlb;
2932 pr_info("%s: Using Queued invalidation\n", iommu->name);
2936 static int copy_context_table(struct intel_iommu *iommu,
2937 struct root_entry *old_re,
2938 struct context_entry **tbl,
2941 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2942 struct context_entry *new_ce = NULL, ce;
2943 struct context_entry *old_ce = NULL;
2944 struct root_entry re;
2945 phys_addr_t old_ce_phys;
2947 tbl_idx = ext ? bus * 2 : bus;
2948 memcpy(&re, old_re, sizeof(re));
2950 for (devfn = 0; devfn < 256; devfn++) {
2951 /* First calculate the correct index */
2952 idx = (ext ? devfn * 2 : devfn) % 256;
2955 /* First save what we may have and clean up */
2957 tbl[tbl_idx] = new_ce;
2958 __iommu_flush_cache(iommu, new_ce,
2968 old_ce_phys = root_entry_lctp(&re);
2970 old_ce_phys = root_entry_uctp(&re);
2973 if (ext && devfn == 0) {
2974 /* No LCTP, try UCTP */
2983 old_ce = memremap(old_ce_phys, PAGE_SIZE,
2988 new_ce = alloc_pgtable_page(iommu->node);
2995 /* Now copy the context entry */
2996 memcpy(&ce, old_ce + idx, sizeof(ce));
2998 if (!__context_present(&ce))
3001 did = context_domain_id(&ce);
3002 if (did >= 0 && did < cap_ndoms(iommu->cap))
3003 set_bit(did, iommu->domain_ids);
3006 * We need a marker for copied context entries. This
3007 * marker needs to work for the old format as well as
3008 * for extended context entries.
3010 * Bit 67 of the context entry is used. In the old
3011 * format this bit is available to software, in the
3012 * extended format it is the PGE bit, but PGE is ignored
3013 * by HW if PASIDs are disabled (and thus still
3016 * So disable PASIDs first and then mark the entry
3017 * copied. This means that we don't copy PASID
3018 * translations from the old kernel, but this is fine as
3019 * faults there are not fatal.
3021 context_clear_pasid_enable(&ce);
3022 context_set_copied(&ce);
3027 tbl[tbl_idx + pos] = new_ce;
3029 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3038 static int copy_translation_tables(struct intel_iommu *iommu)
3040 struct context_entry **ctxt_tbls;
3041 struct root_entry *old_rt;
3042 phys_addr_t old_rt_phys;
3043 int ctxt_table_entries;
3044 unsigned long flags;
3049 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3050 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
3051 new_ext = !!ecap_ecs(iommu->ecap);
3054 * The RTT bit can only be changed when translation is disabled,
3055 * but disabling translation means to open a window for data
3056 * corruption. So bail out and don't copy anything if we would
3057 * have to change the bit.
3062 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3066 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3070 /* This is too big for the stack - allocate it from slab */
3071 ctxt_table_entries = ext ? 512 : 256;
3073 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3077 for (bus = 0; bus < 256; bus++) {
3078 ret = copy_context_table(iommu, &old_rt[bus],
3079 ctxt_tbls, bus, ext);
3081 pr_err("%s: Failed to copy context table for bus %d\n",
3087 spin_lock_irqsave(&iommu->lock, flags);
3089 /* Context tables are copied, now write them to the root_entry table */
3090 for (bus = 0; bus < 256; bus++) {
3091 int idx = ext ? bus * 2 : bus;
3094 if (ctxt_tbls[idx]) {
3095 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3096 iommu->root_entry[bus].lo = val;
3099 if (!ext || !ctxt_tbls[idx + 1])
3102 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3103 iommu->root_entry[bus].hi = val;
3106 spin_unlock_irqrestore(&iommu->lock, flags);
3110 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3120 #ifdef CONFIG_INTEL_IOMMU_SVM
3121 static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
3123 struct intel_iommu *iommu = data;
3127 return INVALID_IOASID;
3129 * VT-d virtual command interface always uses the full 20 bit
3130 * PASID range. Host can partition guest PASID range based on
3131 * policies but it is out of guest's control.
3133 if (min < PASID_MIN || max > intel_pasid_max_id)
3134 return INVALID_IOASID;
3136 if (vcmd_alloc_pasid(iommu, &ioasid))
3137 return INVALID_IOASID;
3142 static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
3144 struct intel_iommu *iommu = data;
3149 * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
3150 * We can only free the PASID when all the devices are unbound.
3152 if (ioasid_find(NULL, ioasid, NULL)) {
3153 pr_alert("Cannot free active IOASID %d\n", ioasid);
3156 vcmd_free_pasid(iommu, ioasid);
3159 static void register_pasid_allocator(struct intel_iommu *iommu)
3162 * If we are running in the host, no need for custom allocator
3163 * in that PASIDs are allocated from the host system-wide.
3165 if (!cap_caching_mode(iommu->cap))
3168 if (!sm_supported(iommu)) {
3169 pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
3174 * Register a custom PASID allocator if we are running in a guest,
3175 * guest PASID must be obtained via virtual command interface.
3176 * There can be multiple vIOMMUs in each guest but only one allocator
3177 * is active. All vIOMMU allocators will eventually be calling the same
3180 if (!vccap_pasid(iommu->vccap))
3183 pr_info("Register custom PASID allocator\n");
3184 iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
3185 iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
3186 iommu->pasid_allocator.pdata = (void *)iommu;
3187 if (ioasid_register_allocator(&iommu->pasid_allocator)) {
3188 pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
3190 * Disable scalable mode on this IOMMU if there
3191 * is no custom allocator. Mixing SM capable vIOMMU
3192 * and non-SM vIOMMU are not supported.
3199 static int __init init_dmars(void)
3201 struct dmar_drhd_unit *drhd;
3202 struct intel_iommu *iommu;
3208 * initialize and program root entry to not present
3211 for_each_drhd_unit(drhd) {
3213 * lock not needed as this is only incremented in the single
3214 * threaded kernel __init code path all other access are read
3217 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3221 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3224 /* Preallocate enough resources for IOMMU hot-addition */
3225 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3226 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3228 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3231 pr_err("Allocating global iommu array failed\n");
3236 ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
3240 for_each_iommu(iommu, drhd) {
3241 if (drhd->ignored) {
3242 iommu_disable_translation(iommu);
3247 * Find the max pasid size of all IOMMU's in the system.
3248 * We need to ensure the system pasid table is no bigger
3249 * than the smallest supported.
3251 if (pasid_supported(iommu)) {
3252 u32 temp = 2 << ecap_pss(iommu->ecap);
3254 intel_pasid_max_id = min_t(u32, temp,
3255 intel_pasid_max_id);
3258 g_iommus[iommu->seq_id] = iommu;
3260 intel_iommu_init_qi(iommu);
3262 ret = iommu_init_domains(iommu);
3266 init_translation_status(iommu);
3268 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3269 iommu_disable_translation(iommu);
3270 clear_translation_pre_enabled(iommu);
3271 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3277 * we could share the same root & context tables
3278 * among all IOMMU's. Need to Split it later.
3280 ret = iommu_alloc_root_entry(iommu);
3284 if (translation_pre_enabled(iommu)) {
3285 pr_info("Translation already enabled - trying to copy translation structures\n");
3287 ret = copy_translation_tables(iommu);
3290 * We found the IOMMU with translation
3291 * enabled - but failed to copy over the
3292 * old root-entry table. Try to proceed
3293 * by disabling translation now and
3294 * allocating a clean root-entry table.
3295 * This might cause DMAR faults, but
3296 * probably the dump will still succeed.
3298 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3300 iommu_disable_translation(iommu);
3301 clear_translation_pre_enabled(iommu);
3303 pr_info("Copied translation tables from previous kernel for %s\n",
3308 if (!ecap_pass_through(iommu->ecap))
3309 hw_pass_through = 0;
3310 intel_svm_check(iommu);
3314 * Now that qi is enabled on all iommus, set the root entry and flush
3315 * caches. This is required on some Intel X58 chipsets, otherwise the
3316 * flush_context function will loop forever and the boot hangs.
3318 for_each_active_iommu(iommu, drhd) {
3319 iommu_flush_write_buffer(iommu);
3320 #ifdef CONFIG_INTEL_IOMMU_SVM
3321 register_pasid_allocator(iommu);
3323 iommu_set_root_entry(iommu);
3326 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3331 iommu_identity_mapping |= IDENTMAP_GFX;
3333 check_tylersburg_isoch();
3335 ret = si_domain_init(hw_pass_through);
3342 * global invalidate context cache
3343 * global invalidate iotlb
3344 * enable translation
3346 for_each_iommu(iommu, drhd) {
3347 if (drhd->ignored) {
3349 * we always have to disable PMRs or DMA may fail on
3353 iommu_disable_protect_mem_regions(iommu);
3357 iommu_flush_write_buffer(iommu);
3359 #ifdef CONFIG_INTEL_IOMMU_SVM
3360 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3362 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3363 * could cause possible lock race condition.
3365 up_write(&dmar_global_lock);
3366 ret = intel_svm_enable_prq(iommu);
3367 down_write(&dmar_global_lock);
3372 ret = dmar_set_interrupt(iommu);
3380 for_each_active_iommu(iommu, drhd) {
3381 disable_dmar_iommu(iommu);
3382 free_dmar_iommu(iommu);
3391 static inline int iommu_domain_cache_init(void)
3395 iommu_domain_cache = kmem_cache_create("iommu_domain",
3396 sizeof(struct dmar_domain),
3401 if (!iommu_domain_cache) {
3402 pr_err("Couldn't create iommu_domain cache\n");
3409 static inline int iommu_devinfo_cache_init(void)
3413 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3414 sizeof(struct device_domain_info),
3418 if (!iommu_devinfo_cache) {
3419 pr_err("Couldn't create devinfo cache\n");
3426 static int __init iommu_init_mempool(void)
3429 ret = iova_cache_get();
3433 ret = iommu_domain_cache_init();
3437 ret = iommu_devinfo_cache_init();
3441 kmem_cache_destroy(iommu_domain_cache);
3448 static void __init iommu_exit_mempool(void)
3450 kmem_cache_destroy(iommu_devinfo_cache);
3451 kmem_cache_destroy(iommu_domain_cache);
3455 static void __init init_no_remapping_devices(void)
3457 struct dmar_drhd_unit *drhd;
3461 for_each_drhd_unit(drhd) {
3462 if (!drhd->include_all) {
3463 for_each_active_dev_scope(drhd->devices,
3464 drhd->devices_cnt, i, dev)
3466 /* ignore DMAR unit if no devices exist */
3467 if (i == drhd->devices_cnt)
3472 for_each_active_drhd_unit(drhd) {
3473 if (drhd->include_all)
3476 for_each_active_dev_scope(drhd->devices,
3477 drhd->devices_cnt, i, dev)
3478 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3480 if (i < drhd->devices_cnt)
3483 /* This IOMMU has *only* gfx devices. Either bypass it or
3484 set the gfx_mapped flag, as appropriate */
3485 drhd->gfx_dedicated = 1;
3491 #ifdef CONFIG_SUSPEND
3492 static int init_iommu_hw(void)
3494 struct dmar_drhd_unit *drhd;
3495 struct intel_iommu *iommu = NULL;
3497 for_each_active_iommu(iommu, drhd)
3499 dmar_reenable_qi(iommu);
3501 for_each_iommu(iommu, drhd) {
3502 if (drhd->ignored) {
3504 * we always have to disable PMRs or DMA may fail on
3508 iommu_disable_protect_mem_regions(iommu);
3512 iommu_flush_write_buffer(iommu);
3513 iommu_set_root_entry(iommu);
3514 iommu_enable_translation(iommu);
3515 iommu_disable_protect_mem_regions(iommu);
3521 static void iommu_flush_all(void)
3523 struct dmar_drhd_unit *drhd;
3524 struct intel_iommu *iommu;
3526 for_each_active_iommu(iommu, drhd) {
3527 iommu->flush.flush_context(iommu, 0, 0, 0,
3528 DMA_CCMD_GLOBAL_INVL);
3529 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3530 DMA_TLB_GLOBAL_FLUSH);
3534 static int iommu_suspend(void)
3536 struct dmar_drhd_unit *drhd;
3537 struct intel_iommu *iommu = NULL;
3540 for_each_active_iommu(iommu, drhd) {
3541 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
3543 if (!iommu->iommu_state)
3549 for_each_active_iommu(iommu, drhd) {
3550 iommu_disable_translation(iommu);
3552 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3554 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3555 readl(iommu->reg + DMAR_FECTL_REG);
3556 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3557 readl(iommu->reg + DMAR_FEDATA_REG);
3558 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3559 readl(iommu->reg + DMAR_FEADDR_REG);
3560 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3561 readl(iommu->reg + DMAR_FEUADDR_REG);
3563 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3568 for_each_active_iommu(iommu, drhd)
3569 kfree(iommu->iommu_state);
3574 static void iommu_resume(void)
3576 struct dmar_drhd_unit *drhd;
3577 struct intel_iommu *iommu = NULL;
3580 if (init_iommu_hw()) {
3582 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3584 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3588 for_each_active_iommu(iommu, drhd) {
3590 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3592 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3593 iommu->reg + DMAR_FECTL_REG);
3594 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3595 iommu->reg + DMAR_FEDATA_REG);
3596 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3597 iommu->reg + DMAR_FEADDR_REG);
3598 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3599 iommu->reg + DMAR_FEUADDR_REG);
3601 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3604 for_each_active_iommu(iommu, drhd)
3605 kfree(iommu->iommu_state);
3608 static struct syscore_ops iommu_syscore_ops = {
3609 .resume = iommu_resume,
3610 .suspend = iommu_suspend,
3613 static void __init init_iommu_pm_ops(void)
3615 register_syscore_ops(&iommu_syscore_ops);
3619 static inline void init_iommu_pm_ops(void) {}
3620 #endif /* CONFIG_PM */
3622 static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
3624 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
3625 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
3626 rmrr->end_address <= rmrr->base_address ||
3627 arch_rmrr_sanity_check(rmrr))
3633 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3635 struct acpi_dmar_reserved_memory *rmrr;
3636 struct dmar_rmrr_unit *rmrru;
3638 rmrr = (struct acpi_dmar_reserved_memory *)header;
3639 if (rmrr_sanity_check(rmrr)) {
3641 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
3642 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3643 rmrr->base_address, rmrr->end_address,
3644 dmi_get_system_info(DMI_BIOS_VENDOR),
3645 dmi_get_system_info(DMI_BIOS_VERSION),
3646 dmi_get_system_info(DMI_PRODUCT_VERSION));
3647 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
3650 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3654 rmrru->hdr = header;
3656 rmrru->base_address = rmrr->base_address;
3657 rmrru->end_address = rmrr->end_address;
3659 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3660 ((void *)rmrr) + rmrr->header.length,
3661 &rmrru->devices_cnt);
3662 if (rmrru->devices_cnt && rmrru->devices == NULL)
3665 list_add(&rmrru->list, &dmar_rmrr_units);
3674 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3676 struct dmar_atsr_unit *atsru;
3677 struct acpi_dmar_atsr *tmp;
3679 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
3681 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3682 if (atsr->segment != tmp->segment)
3684 if (atsr->header.length != tmp->header.length)
3686 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3693 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3695 struct acpi_dmar_atsr *atsr;
3696 struct dmar_atsr_unit *atsru;
3698 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3701 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3702 atsru = dmar_find_atsr(atsr);
3706 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3711 * If memory is allocated from slab by ACPI _DSM method, we need to
3712 * copy the memory content because the memory buffer will be freed
3715 atsru->hdr = (void *)(atsru + 1);
3716 memcpy(atsru->hdr, hdr, hdr->length);
3717 atsru->include_all = atsr->flags & 0x1;
3718 if (!atsru->include_all) {
3719 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3720 (void *)atsr + atsr->header.length,
3721 &atsru->devices_cnt);
3722 if (atsru->devices_cnt && atsru->devices == NULL) {
3728 list_add_rcu(&atsru->list, &dmar_atsr_units);
3733 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3735 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3739 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3741 struct acpi_dmar_atsr *atsr;
3742 struct dmar_atsr_unit *atsru;
3744 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3745 atsru = dmar_find_atsr(atsr);
3747 list_del_rcu(&atsru->list);
3749 intel_iommu_free_atsr(atsru);
3755 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3759 struct acpi_dmar_atsr *atsr;
3760 struct dmar_atsr_unit *atsru;
3762 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3763 atsru = dmar_find_atsr(atsr);
3767 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
3768 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3776 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
3778 struct dmar_satc_unit *satcu;
3779 struct acpi_dmar_satc *tmp;
3781 list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
3783 tmp = (struct acpi_dmar_satc *)satcu->hdr;
3784 if (satc->segment != tmp->segment)
3786 if (satc->header.length != tmp->header.length)
3788 if (memcmp(satc, tmp, satc->header.length) == 0)
3795 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
3797 struct acpi_dmar_satc *satc;
3798 struct dmar_satc_unit *satcu;
3800 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3803 satc = container_of(hdr, struct acpi_dmar_satc, header);
3804 satcu = dmar_find_satc(satc);
3808 satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
3812 satcu->hdr = (void *)(satcu + 1);
3813 memcpy(satcu->hdr, hdr, hdr->length);
3814 satcu->atc_required = satc->flags & 0x1;
3815 satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
3816 (void *)satc + satc->header.length,
3817 &satcu->devices_cnt);
3818 if (satcu->devices_cnt && !satcu->devices) {
3822 list_add_rcu(&satcu->list, &dmar_satc_units);
3827 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3830 struct intel_iommu *iommu = dmaru->iommu;
3832 if (g_iommus[iommu->seq_id])
3835 ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
3839 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3840 pr_warn("%s: Doesn't support hardware pass through.\n",
3844 if (!ecap_sc_support(iommu->ecap) &&
3845 domain_update_iommu_snooping(iommu)) {
3846 pr_warn("%s: Doesn't support snooping.\n",
3850 sp = domain_update_iommu_superpage(NULL, iommu) - 1;
3851 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3852 pr_warn("%s: Doesn't support large page.\n",
3858 * Disable translation if already enabled prior to OS handover.
3860 if (iommu->gcmd & DMA_GCMD_TE)
3861 iommu_disable_translation(iommu);
3863 g_iommus[iommu->seq_id] = iommu;
3864 ret = iommu_init_domains(iommu);
3866 ret = iommu_alloc_root_entry(iommu);
3870 intel_svm_check(iommu);
3872 if (dmaru->ignored) {
3874 * we always have to disable PMRs or DMA may fail on this device
3877 iommu_disable_protect_mem_regions(iommu);
3881 intel_iommu_init_qi(iommu);
3882 iommu_flush_write_buffer(iommu);
3884 #ifdef CONFIG_INTEL_IOMMU_SVM
3885 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3886 ret = intel_svm_enable_prq(iommu);
3891 ret = dmar_set_interrupt(iommu);
3895 iommu_set_root_entry(iommu);
3896 iommu_enable_translation(iommu);
3898 iommu_disable_protect_mem_regions(iommu);
3902 disable_dmar_iommu(iommu);
3904 free_dmar_iommu(iommu);
3908 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3911 struct intel_iommu *iommu = dmaru->iommu;
3913 if (!intel_iommu_enabled)
3919 ret = intel_iommu_add(dmaru);
3921 disable_dmar_iommu(iommu);
3922 free_dmar_iommu(iommu);
3928 static void intel_iommu_free_dmars(void)
3930 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3931 struct dmar_atsr_unit *atsru, *atsr_n;
3932 struct dmar_satc_unit *satcu, *satc_n;
3934 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3935 list_del(&rmrru->list);
3936 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3940 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3941 list_del(&atsru->list);
3942 intel_iommu_free_atsr(atsru);
3944 list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
3945 list_del(&satcu->list);
3946 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
3951 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3954 struct pci_bus *bus;
3955 struct pci_dev *bridge = NULL;
3957 struct acpi_dmar_atsr *atsr;
3958 struct dmar_atsr_unit *atsru;
3960 dev = pci_physfn(dev);
3961 for (bus = dev->bus; bus; bus = bus->parent) {
3963 /* If it's an integrated device, allow ATS */
3966 /* Connected via non-PCIe: no ATS */
3967 if (!pci_is_pcie(bridge) ||
3968 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3970 /* If we found the root port, look it up in the ATSR */
3971 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3976 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3977 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3978 if (atsr->segment != pci_domain_nr(dev->bus))
3981 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3982 if (tmp == &bridge->dev)
3985 if (atsru->include_all)
3995 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3998 struct dmar_rmrr_unit *rmrru;
3999 struct dmar_atsr_unit *atsru;
4000 struct dmar_satc_unit *satcu;
4001 struct acpi_dmar_atsr *atsr;
4002 struct acpi_dmar_reserved_memory *rmrr;
4003 struct acpi_dmar_satc *satc;
4005 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4008 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4009 rmrr = container_of(rmrru->hdr,
4010 struct acpi_dmar_reserved_memory, header);
4011 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4012 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4013 ((void *)rmrr) + rmrr->header.length,
4014 rmrr->segment, rmrru->devices,
4015 rmrru->devices_cnt);
4018 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4019 dmar_remove_dev_scope(info, rmrr->segment,
4020 rmrru->devices, rmrru->devices_cnt);
4024 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4025 if (atsru->include_all)
4028 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4029 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4030 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4031 (void *)atsr + atsr->header.length,
4032 atsr->segment, atsru->devices,
4033 atsru->devices_cnt);
4038 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4039 if (dmar_remove_dev_scope(info, atsr->segment,
4040 atsru->devices, atsru->devices_cnt))
4044 list_for_each_entry(satcu, &dmar_satc_units, list) {
4045 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
4046 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4047 ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
4048 (void *)satc + satc->header.length,
4049 satc->segment, satcu->devices,
4050 satcu->devices_cnt);
4055 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4056 if (dmar_remove_dev_scope(info, satc->segment,
4057 satcu->devices, satcu->devices_cnt))
4065 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4066 unsigned long val, void *v)
4068 struct memory_notify *mhp = v;
4069 unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4070 unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
4074 case MEM_GOING_ONLINE:
4075 if (iommu_domain_identity_map(si_domain,
4076 start_vpfn, last_vpfn)) {
4077 pr_warn("Failed to build identity map for [%lx-%lx]\n",
4078 start_vpfn, last_vpfn);
4084 case MEM_CANCEL_ONLINE:
4086 struct dmar_drhd_unit *drhd;
4087 struct intel_iommu *iommu;
4088 struct page *freelist;
4090 freelist = domain_unmap(si_domain,
4091 start_vpfn, last_vpfn,
4095 for_each_active_iommu(iommu, drhd)
4096 iommu_flush_iotlb_psi(iommu, si_domain,
4097 start_vpfn, mhp->nr_pages,
4100 dma_free_pagelist(freelist);
4108 static struct notifier_block intel_iommu_memory_nb = {
4109 .notifier_call = intel_iommu_memory_notifier,
4113 static void intel_disable_iommus(void)
4115 struct intel_iommu *iommu = NULL;
4116 struct dmar_drhd_unit *drhd;
4118 for_each_iommu(iommu, drhd)
4119 iommu_disable_translation(iommu);
4122 void intel_iommu_shutdown(void)
4124 struct dmar_drhd_unit *drhd;
4125 struct intel_iommu *iommu = NULL;
4127 if (no_iommu || dmar_disabled)
4130 down_write(&dmar_global_lock);
4132 /* Disable PMRs explicitly here. */
4133 for_each_iommu(iommu, drhd)
4134 iommu_disable_protect_mem_regions(iommu);
4136 /* Make sure the IOMMUs are switched off */
4137 intel_disable_iommus();
4139 up_write(&dmar_global_lock);
4142 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4144 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4146 return container_of(iommu_dev, struct intel_iommu, iommu);
4149 static ssize_t version_show(struct device *dev,
4150 struct device_attribute *attr, char *buf)
4152 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4153 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4154 return sprintf(buf, "%d:%d\n",
4155 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4157 static DEVICE_ATTR_RO(version);
4159 static ssize_t address_show(struct device *dev,
4160 struct device_attribute *attr, char *buf)
4162 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4163 return sprintf(buf, "%llx\n", iommu->reg_phys);
4165 static DEVICE_ATTR_RO(address);
4167 static ssize_t cap_show(struct device *dev,
4168 struct device_attribute *attr, char *buf)
4170 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4171 return sprintf(buf, "%llx\n", iommu->cap);
4173 static DEVICE_ATTR_RO(cap);
4175 static ssize_t ecap_show(struct device *dev,
4176 struct device_attribute *attr, char *buf)
4178 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4179 return sprintf(buf, "%llx\n", iommu->ecap);
4181 static DEVICE_ATTR_RO(ecap);
4183 static ssize_t domains_supported_show(struct device *dev,
4184 struct device_attribute *attr, char *buf)
4186 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4187 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4189 static DEVICE_ATTR_RO(domains_supported);
4191 static ssize_t domains_used_show(struct device *dev,
4192 struct device_attribute *attr, char *buf)
4194 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4195 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4196 cap_ndoms(iommu->cap)));
4198 static DEVICE_ATTR_RO(domains_used);
4200 static struct attribute *intel_iommu_attrs[] = {
4201 &dev_attr_version.attr,
4202 &dev_attr_address.attr,
4204 &dev_attr_ecap.attr,
4205 &dev_attr_domains_supported.attr,
4206 &dev_attr_domains_used.attr,
4210 static struct attribute_group intel_iommu_group = {
4211 .name = "intel-iommu",
4212 .attrs = intel_iommu_attrs,
4215 const struct attribute_group *intel_iommu_groups[] = {
4220 static inline bool has_external_pci(void)
4222 struct pci_dev *pdev = NULL;
4224 for_each_pci_dev(pdev)
4225 if (pdev->external_facing)
4231 static int __init platform_optin_force_iommu(void)
4233 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
4236 if (no_iommu || dmar_disabled)
4237 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4240 * If Intel-IOMMU is disabled by default, we will apply identity
4241 * map for all devices except those marked as being untrusted.
4244 iommu_set_default_passthrough(false);
4252 static int __init probe_acpi_namespace_devices(void)
4254 struct dmar_drhd_unit *drhd;
4255 /* To avoid a -Wunused-but-set-variable warning. */
4256 struct intel_iommu *iommu __maybe_unused;
4260 for_each_active_iommu(iommu, drhd) {
4261 for_each_active_dev_scope(drhd->devices,
4262 drhd->devices_cnt, i, dev) {
4263 struct acpi_device_physical_node *pn;
4264 struct iommu_group *group;
4265 struct acpi_device *adev;
4267 if (dev->bus != &acpi_bus_type)
4270 adev = to_acpi_device(dev);
4271 mutex_lock(&adev->physical_node_lock);
4272 list_for_each_entry(pn,
4273 &adev->physical_node_list, node) {
4274 group = iommu_group_get(pn->dev);
4276 iommu_group_put(group);
4280 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4281 ret = iommu_probe_device(pn->dev);
4285 mutex_unlock(&adev->physical_node_lock);
4295 int __init intel_iommu_init(void)
4298 struct dmar_drhd_unit *drhd;
4299 struct intel_iommu *iommu;
4302 * Intel IOMMU is required for a TXT/tboot launch or platform
4303 * opt in, so enforce that.
4305 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
4306 platform_optin_force_iommu();
4308 if (iommu_init_mempool()) {
4310 panic("tboot: Failed to initialize iommu memory\n");
4314 down_write(&dmar_global_lock);
4315 if (dmar_table_init()) {
4317 panic("tboot: Failed to initialize DMAR table\n");
4321 if (dmar_dev_scope_init() < 0) {
4323 panic("tboot: Failed to initialize DMAR device scope\n");
4327 up_write(&dmar_global_lock);
4330 * The bus notifier takes the dmar_global_lock, so lockdep will
4331 * complain later when we register it under the lock.
4333 dmar_register_bus_notifier();
4335 down_write(&dmar_global_lock);
4338 intel_iommu_debugfs_init();
4340 if (no_iommu || dmar_disabled) {
4342 * We exit the function here to ensure IOMMU's remapping and
4343 * mempool aren't setup, which means that the IOMMU's PMRs
4344 * won't be disabled via the call to init_dmars(). So disable
4345 * it explicitly here. The PMRs were setup by tboot prior to
4346 * calling SENTER, but the kernel is expected to reset/tear
4349 if (intel_iommu_tboot_noforce) {
4350 for_each_iommu(iommu, drhd)
4351 iommu_disable_protect_mem_regions(iommu);
4355 * Make sure the IOMMUs are switched off, even when we
4356 * boot into a kexec kernel and the previous kernel left
4359 intel_disable_iommus();
4363 if (list_empty(&dmar_rmrr_units))
4364 pr_info("No RMRR found\n");
4366 if (list_empty(&dmar_atsr_units))
4367 pr_info("No ATSR found\n");
4369 if (list_empty(&dmar_satc_units))
4370 pr_info("No SATC found\n");
4373 intel_iommu_gfx_mapped = 1;
4375 init_no_remapping_devices();
4380 panic("tboot: Failed to initialize DMARs\n");
4381 pr_err("Initialization failed\n");
4384 up_write(&dmar_global_lock);
4386 init_iommu_pm_ops();
4388 down_read(&dmar_global_lock);
4389 for_each_active_iommu(iommu, drhd) {
4391 * The flush queue implementation does not perform
4392 * page-selective invalidations that are required for efficient
4393 * TLB flushes in virtual environments. The benefit of batching
4394 * is likely to be much lower than the overhead of synchronizing
4395 * the virtual and physical IOMMU page-tables.
4397 if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) {
4398 pr_warn("IOMMU batching is disabled due to virtualization");
4399 intel_iommu_strict = 1;
4401 iommu_device_sysfs_add(&iommu->iommu, NULL,
4404 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
4406 up_read(&dmar_global_lock);
4408 iommu_set_dma_strict(intel_iommu_strict);
4409 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4410 if (si_domain && !hw_pass_through)
4411 register_memory_notifier(&intel_iommu_memory_nb);
4413 down_read(&dmar_global_lock);
4414 if (probe_acpi_namespace_devices())
4415 pr_warn("ACPI name space devices didn't probe correctly\n");
4417 /* Finally, we enable the DMA remapping hardware. */
4418 for_each_iommu(iommu, drhd) {
4419 if (!drhd->ignored && !translation_pre_enabled(iommu))
4420 iommu_enable_translation(iommu);
4422 iommu_disable_protect_mem_regions(iommu);
4424 up_read(&dmar_global_lock);
4426 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4428 intel_iommu_enabled = 1;
4433 intel_iommu_free_dmars();
4434 up_write(&dmar_global_lock);
4435 iommu_exit_mempool();
4439 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4441 struct device_domain_info *info = opaque;
4443 domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
4448 * NB - intel-iommu lacks any sort of reference counting for the users of
4449 * dependent devices. If multiple endpoints have intersecting dependent
4450 * devices, unbinding the driver from any one of them will possibly leave
4451 * the others unable to operate.
4453 static void domain_context_clear(struct device_domain_info *info)
4455 if (!info->iommu || !info->dev || !dev_is_pci(info->dev))
4458 pci_for_each_dma_alias(to_pci_dev(info->dev),
4459 &domain_context_clear_one_cb, info);
4462 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4464 struct dmar_domain *domain;
4465 struct intel_iommu *iommu;
4466 unsigned long flags;
4468 assert_spin_locked(&device_domain_lock);
4473 iommu = info->iommu;
4474 domain = info->domain;
4476 if (info->dev && !dev_is_real_dma_subdevice(info->dev)) {
4477 if (dev_is_pci(info->dev) && sm_supported(iommu))
4478 intel_pasid_tear_down_entry(iommu, info->dev,
4479 PASID_RID2PASID, false);
4481 iommu_disable_dev_iotlb(info);
4482 domain_context_clear(info);
4483 intel_pasid_free_table(info->dev);
4486 unlink_domain_info(info);
4488 spin_lock_irqsave(&iommu->lock, flags);
4489 domain_detach_iommu(domain, iommu);
4490 spin_unlock_irqrestore(&iommu->lock, flags);
4492 free_devinfo_mem(info);
4495 static void dmar_remove_one_dev_info(struct device *dev)
4497 struct device_domain_info *info;
4498 unsigned long flags;
4500 spin_lock_irqsave(&device_domain_lock, flags);
4501 info = get_domain_info(dev);
4503 __dmar_remove_one_dev_info(info);
4504 spin_unlock_irqrestore(&device_domain_lock, flags);
4507 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4511 /* calculate AGAW */
4512 domain->gaw = guest_width;
4513 adjust_width = guestwidth_to_adjustwidth(guest_width);
4514 domain->agaw = width_to_agaw(adjust_width);
4516 domain->iommu_coherency = false;
4517 domain->iommu_snooping = false;
4518 domain->iommu_superpage = 0;
4519 domain->max_addr = 0;
4521 /* always allocate the top pgd */
4522 domain->pgd = alloc_pgtable_page(domain->nid);
4525 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4529 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4531 struct dmar_domain *dmar_domain;
4532 struct iommu_domain *domain;
4535 case IOMMU_DOMAIN_DMA:
4536 case IOMMU_DOMAIN_UNMANAGED:
4537 dmar_domain = alloc_domain(0);
4539 pr_err("Can't allocate dmar_domain\n");
4542 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4543 pr_err("Domain initialization failed\n");
4544 domain_exit(dmar_domain);
4548 if (type == IOMMU_DOMAIN_DMA &&
4549 iommu_get_dma_cookie(&dmar_domain->domain))
4552 domain = &dmar_domain->domain;
4553 domain->geometry.aperture_start = 0;
4554 domain->geometry.aperture_end =
4555 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4556 domain->geometry.force_aperture = true;
4559 case IOMMU_DOMAIN_IDENTITY:
4560 return &si_domain->domain;
4568 static void intel_iommu_domain_free(struct iommu_domain *domain)
4570 if (domain != &si_domain->domain)
4571 domain_exit(to_dmar_domain(domain));
4575 * Check whether a @domain could be attached to the @dev through the
4576 * aux-domain attach/detach APIs.
4579 is_aux_domain(struct device *dev, struct iommu_domain *domain)
4581 struct device_domain_info *info = get_domain_info(dev);
4583 return info && info->auxd_enabled &&
4584 domain->type == IOMMU_DOMAIN_UNMANAGED;
4587 static inline struct subdev_domain_info *
4588 lookup_subdev_info(struct dmar_domain *domain, struct device *dev)
4590 struct subdev_domain_info *sinfo;
4592 if (!list_empty(&domain->subdevices)) {
4593 list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
4594 if (sinfo->pdev == dev)
4602 static int auxiliary_link_device(struct dmar_domain *domain,
4605 struct device_domain_info *info = get_domain_info(dev);
4606 struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev);
4608 assert_spin_locked(&device_domain_lock);
4613 sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC);
4616 sinfo->domain = domain;
4618 list_add(&sinfo->link_phys, &info->subdevices);
4619 list_add(&sinfo->link_domain, &domain->subdevices);
4622 return ++sinfo->users;
4625 static int auxiliary_unlink_device(struct dmar_domain *domain,
4628 struct device_domain_info *info = get_domain_info(dev);
4629 struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev);
4632 assert_spin_locked(&device_domain_lock);
4633 if (WARN_ON(!info || !sinfo || sinfo->users <= 0))
4636 ret = --sinfo->users;
4638 list_del(&sinfo->link_phys);
4639 list_del(&sinfo->link_domain);
4646 static int aux_domain_add_dev(struct dmar_domain *domain,
4650 unsigned long flags;
4651 struct intel_iommu *iommu;
4653 iommu = device_to_iommu(dev, NULL, NULL);
4657 if (domain->default_pasid <= 0) {
4660 /* No private data needed for the default pasid */
4661 pasid = ioasid_alloc(NULL, PASID_MIN,
4662 pci_max_pasids(to_pci_dev(dev)) - 1,
4664 if (pasid == INVALID_IOASID) {
4665 pr_err("Can't allocate default pasid\n");
4668 domain->default_pasid = pasid;
4671 spin_lock_irqsave(&device_domain_lock, flags);
4672 ret = auxiliary_link_device(domain, dev);
4677 * Subdevices from the same physical device can be attached to the
4678 * same domain. For such cases, only the first subdevice attachment
4679 * needs to go through the full steps in this function. So if ret >
4686 * iommu->lock must be held to attach domain to iommu and setup the
4687 * pasid entry for second level translation.
4689 spin_lock(&iommu->lock);
4690 ret = domain_attach_iommu(domain, iommu);
4694 /* Setup the PASID entry for mediated devices: */
4695 if (domain_use_first_level(domain))
4696 ret = domain_setup_first_level(iommu, domain, dev,
4697 domain->default_pasid);
4699 ret = intel_pasid_setup_second_level(iommu, domain, dev,
4700 domain->default_pasid);
4704 spin_unlock(&iommu->lock);
4706 spin_unlock_irqrestore(&device_domain_lock, flags);
4711 domain_detach_iommu(domain, iommu);
4713 spin_unlock(&iommu->lock);
4714 auxiliary_unlink_device(domain, dev);
4716 spin_unlock_irqrestore(&device_domain_lock, flags);
4717 if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
4718 ioasid_put(domain->default_pasid);
4723 static void aux_domain_remove_dev(struct dmar_domain *domain,
4726 struct device_domain_info *info;
4727 struct intel_iommu *iommu;
4728 unsigned long flags;
4730 if (!is_aux_domain(dev, &domain->domain))
4733 spin_lock_irqsave(&device_domain_lock, flags);
4734 info = get_domain_info(dev);
4735 iommu = info->iommu;
4737 if (!auxiliary_unlink_device(domain, dev)) {
4738 spin_lock(&iommu->lock);
4739 intel_pasid_tear_down_entry(iommu, dev,
4740 domain->default_pasid, false);
4741 domain_detach_iommu(domain, iommu);
4742 spin_unlock(&iommu->lock);
4745 spin_unlock_irqrestore(&device_domain_lock, flags);
4747 if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
4748 ioasid_put(domain->default_pasid);
4751 static int prepare_domain_attach_device(struct iommu_domain *domain,
4754 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4755 struct intel_iommu *iommu;
4758 iommu = device_to_iommu(dev, NULL, NULL);
4762 if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) &&
4763 !ecap_nest(iommu->ecap)) {
4764 dev_err(dev, "%s: iommu not support nested translation\n",
4769 /* check if this iommu agaw is sufficient for max mapped address */
4770 addr_width = agaw_to_width(iommu->agaw);
4771 if (addr_width > cap_mgaw(iommu->cap))
4772 addr_width = cap_mgaw(iommu->cap);
4774 if (dmar_domain->max_addr > (1LL << addr_width)) {
4775 dev_err(dev, "%s: iommu width (%d) is not "
4776 "sufficient for the mapped address (%llx)\n",
4777 __func__, addr_width, dmar_domain->max_addr);
4780 dmar_domain->gaw = addr_width;
4783 * Knock out extra levels of page tables if necessary
4785 while (iommu->agaw < dmar_domain->agaw) {
4786 struct dma_pte *pte;
4788 pte = dmar_domain->pgd;
4789 if (dma_pte_present(pte)) {
4790 dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
4791 free_pgtable_page(pte);
4793 dmar_domain->agaw--;
4799 static int intel_iommu_attach_device(struct iommu_domain *domain,
4804 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
4805 device_is_rmrr_locked(dev)) {
4806 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4810 if (is_aux_domain(dev, domain))
4813 /* normally dev is not mapped */
4814 if (unlikely(domain_context_mapped(dev))) {
4815 struct dmar_domain *old_domain;
4817 old_domain = find_domain(dev);
4819 dmar_remove_one_dev_info(dev);
4822 ret = prepare_domain_attach_device(domain, dev);
4826 return domain_add_dev_info(to_dmar_domain(domain), dev);
4829 static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
4834 if (!is_aux_domain(dev, domain))
4837 ret = prepare_domain_attach_device(domain, dev);
4841 return aux_domain_add_dev(to_dmar_domain(domain), dev);
4844 static void intel_iommu_detach_device(struct iommu_domain *domain,
4847 dmar_remove_one_dev_info(dev);
4850 static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
4853 aux_domain_remove_dev(to_dmar_domain(domain), dev);
4856 #ifdef CONFIG_INTEL_IOMMU_SVM
4858 * 2D array for converting and sanitizing IOMMU generic TLB granularity to
4859 * VT-d granularity. Invalidation is typically included in the unmap operation
4860 * as a result of DMA or VFIO unmap. However, for assigned devices guest
4861 * owns the first level page tables. Invalidations of translation caches in the
4862 * guest are trapped and passed down to the host.
4864 * vIOMMU in the guest will only expose first level page tables, therefore
4865 * we do not support IOTLB granularity for request without PASID (second level).
4867 * For example, to find the VT-d granularity encoding for IOTLB
4868 * type and page selective granularity within PASID:
4869 * X: indexed by iommu cache type
4870 * Y: indexed by enum iommu_inv_granularity
4871 * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR]
4875 inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = {
4877 * PASID based IOTLB invalidation: PASID selective (per PASID),
4878 * page selective (address granularity)
4880 {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
4881 /* PASID based dev TLBs */
4882 {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL},
4884 {-EINVAL, -EINVAL, -EINVAL}
4887 static inline int to_vtd_granularity(int type, int granu)
4889 return inv_type_granu_table[type][granu];
4892 static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
4894 u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
4896 /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
4897 * IOMMU cache invalidate API passes granu_size in bytes, and number of
4898 * granu size in contiguous memory.
4900 return order_base_2(nr_pages);
4904 intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
4905 struct iommu_cache_invalidate_info *inv_info)
4907 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4908 struct device_domain_info *info;
4909 struct intel_iommu *iommu;
4910 unsigned long flags;
4917 if (!inv_info || !dmar_domain)
4920 if (!dev || !dev_is_pci(dev))
4923 iommu = device_to_iommu(dev, &bus, &devfn);
4927 if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE))
4930 spin_lock_irqsave(&device_domain_lock, flags);
4931 spin_lock(&iommu->lock);
4932 info = get_domain_info(dev);
4937 did = dmar_domain->iommu_did[iommu->seq_id];
4938 sid = PCI_DEVID(bus, devfn);
4940 /* Size is only valid in address selective invalidation */
4941 if (inv_info->granularity == IOMMU_INV_GRANU_ADDR)
4942 size = to_vtd_size(inv_info->granu.addr_info.granule_size,
4943 inv_info->granu.addr_info.nb_granules);
4945 for_each_set_bit(cache_type,
4946 (unsigned long *)&inv_info->cache,
4947 IOMMU_CACHE_INV_TYPE_NR) {
4952 granu = to_vtd_granularity(cache_type, inv_info->granularity);
4953 if (granu == -EINVAL) {
4954 pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n",
4955 cache_type, inv_info->granularity);
4960 * PASID is stored in different locations based on the
4963 if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
4964 (inv_info->granu.pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
4965 pasid = inv_info->granu.pasid_info.pasid;
4966 else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
4967 (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
4968 pasid = inv_info->granu.addr_info.pasid;
4970 switch (BIT(cache_type)) {
4971 case IOMMU_CACHE_INV_TYPE_IOTLB:
4972 /* HW will ignore LSB bits based on address mask */
4973 if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
4975 (inv_info->granu.addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
4976 pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n",
4977 inv_info->granu.addr_info.addr, size);
4981 * If granu is PASID-selective, address is ignored.
4982 * We use npages = -1 to indicate that.
4984 qi_flush_piotlb(iommu, did, pasid,
4985 mm_to_dma_pfn(inv_info->granu.addr_info.addr),
4986 (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
4987 inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
4989 if (!info->ats_enabled)
4992 * Always flush device IOTLB if ATS is enabled. vIOMMU
4993 * in the guest may assume IOTLB flush is inclusive,
4994 * which is more efficient.
4997 case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
4999 * PASID based device TLB invalidation does not support
5000 * IOMMU_INV_GRANU_PASID granularity but only supports
5001 * IOMMU_INV_GRANU_ADDR.
5002 * The equivalent of that is we set the size to be the
5003 * entire range of 64 bit. User only provides PASID info
5004 * without address info. So we set addr to 0.
5006 if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
5007 size = 64 - VTD_PAGE_SHIFT;
5009 } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
5010 addr = inv_info->granu.addr_info.addr;
5013 if (info->ats_enabled)
5014 qi_flush_dev_iotlb_pasid(iommu, sid,
5016 info->ats_qdep, addr,
5019 pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
5022 dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
5028 spin_unlock(&iommu->lock);
5029 spin_unlock_irqrestore(&device_domain_lock, flags);
5035 static int intel_iommu_map(struct iommu_domain *domain,
5036 unsigned long iova, phys_addr_t hpa,
5037 size_t size, int iommu_prot, gfp_t gfp)
5039 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5043 if (iommu_prot & IOMMU_READ)
5044 prot |= DMA_PTE_READ;
5045 if (iommu_prot & IOMMU_WRITE)
5046 prot |= DMA_PTE_WRITE;
5047 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5048 prot |= DMA_PTE_SNP;
5050 max_addr = iova + size;
5051 if (dmar_domain->max_addr < max_addr) {
5054 /* check if minimum agaw is sufficient for mapped address */
5055 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5056 if (end < max_addr) {
5057 pr_err("%s: iommu width (%d) is not "
5058 "sufficient for the mapped address (%llx)\n",
5059 __func__, dmar_domain->gaw, max_addr);
5062 dmar_domain->max_addr = max_addr;
5064 /* Round up size to next multiple of PAGE_SIZE, if it and
5065 the low bits of hpa would take us onto the next page */
5066 size = aligned_nrpages(hpa, size);
5067 return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5068 hpa >> VTD_PAGE_SHIFT, size, prot);
5071 static size_t intel_iommu_unmap(struct iommu_domain *domain,
5072 unsigned long iova, size_t size,
5073 struct iommu_iotlb_gather *gather)
5075 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5076 unsigned long start_pfn, last_pfn;
5079 /* Cope with horrid API which requires us to unmap more than the
5080 size argument if it happens to be a large-page mapping. */
5081 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5083 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5084 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5086 start_pfn = iova >> VTD_PAGE_SHIFT;
5087 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5089 gather->freelist = domain_unmap(dmar_domain, start_pfn,
5090 last_pfn, gather->freelist);
5092 if (dmar_domain->max_addr == iova + size)
5093 dmar_domain->max_addr = iova;
5095 iommu_iotlb_gather_add_page(domain, gather, iova, size);
5100 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
5101 struct iommu_iotlb_gather *gather)
5103 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5104 unsigned long iova_pfn = IOVA_PFN(gather->start);
5105 size_t size = gather->end - gather->start;
5106 unsigned long start_pfn;
5107 unsigned long nrpages;
5110 nrpages = aligned_nrpages(gather->start, size);
5111 start_pfn = mm_to_dma_pfn(iova_pfn);
5113 for_each_domain_iommu(iommu_id, dmar_domain)
5114 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5115 start_pfn, nrpages, !gather->freelist, 0);
5117 dma_free_pagelist(gather->freelist);
5120 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5123 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5124 struct dma_pte *pte;
5128 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5129 if (pte && dma_pte_present(pte))
5130 phys = dma_pte_addr(pte) +
5131 (iova & (BIT_MASK(level_to_offset_bits(level) +
5132 VTD_PAGE_SHIFT) - 1));
5137 static bool intel_iommu_capable(enum iommu_cap cap)
5139 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5140 return domain_update_iommu_snooping(NULL);
5141 if (cap == IOMMU_CAP_INTR_REMAP)
5142 return irq_remapping_enabled == 1;
5147 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
5149 struct intel_iommu *iommu;
5151 iommu = device_to_iommu(dev, NULL, NULL);
5153 return ERR_PTR(-ENODEV);
5155 if (translation_pre_enabled(iommu))
5156 dev_iommu_priv_set(dev, DEFER_DEVICE_DOMAIN_INFO);
5158 return &iommu->iommu;
5161 static void intel_iommu_release_device(struct device *dev)
5163 struct intel_iommu *iommu;
5165 iommu = device_to_iommu(dev, NULL, NULL);
5169 dmar_remove_one_dev_info(dev);
5171 set_dma_ops(dev, NULL);
5174 static void intel_iommu_probe_finalize(struct device *dev)
5176 struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
5178 if (domain && domain->type == IOMMU_DOMAIN_DMA)
5179 iommu_setup_dma_ops(dev, 0, U64_MAX);
5181 set_dma_ops(dev, NULL);
5184 static void intel_iommu_get_resv_regions(struct device *device,
5185 struct list_head *head)
5187 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
5188 struct iommu_resv_region *reg;
5189 struct dmar_rmrr_unit *rmrr;
5190 struct device *i_dev;
5193 down_read(&dmar_global_lock);
5194 for_each_rmrr_units(rmrr) {
5195 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5197 struct iommu_resv_region *resv;
5198 enum iommu_resv_type type;
5201 if (i_dev != device &&
5202 !is_downstream_to_pci_bridge(device, i_dev))
5205 length = rmrr->end_address - rmrr->base_address + 1;
5207 type = device_rmrr_is_relaxable(device) ?
5208 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5210 resv = iommu_alloc_resv_region(rmrr->base_address,
5211 length, prot, type);
5215 list_add_tail(&resv->list, head);
5218 up_read(&dmar_global_lock);
5220 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5221 if (dev_is_pci(device)) {
5222 struct pci_dev *pdev = to_pci_dev(device);
5224 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5225 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
5226 IOMMU_RESV_DIRECT_RELAXABLE);
5228 list_add_tail(®->list, head);
5231 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5233 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5234 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5238 list_add_tail(®->list, head);
5241 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
5243 struct device_domain_info *info;
5244 struct context_entry *context;
5245 struct dmar_domain *domain;
5246 unsigned long flags;
5250 domain = find_domain(dev);
5254 spin_lock_irqsave(&device_domain_lock, flags);
5255 spin_lock(&iommu->lock);
5258 info = get_domain_info(dev);
5259 if (!info || !info->pasid_supported)
5262 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5263 if (WARN_ON(!context))
5266 ctx_lo = context[0].lo;
5268 if (!(ctx_lo & CONTEXT_PASIDE)) {
5269 ctx_lo |= CONTEXT_PASIDE;
5270 context[0].lo = ctx_lo;
5272 iommu->flush.flush_context(iommu,
5273 domain->iommu_did[iommu->seq_id],
5274 PCI_DEVID(info->bus, info->devfn),
5275 DMA_CCMD_MASK_NOBIT,
5276 DMA_CCMD_DEVICE_INVL);
5279 /* Enable PASID support in the device, if it wasn't already */
5280 if (!info->pasid_enabled)
5281 iommu_enable_dev_iotlb(info);
5286 spin_unlock(&iommu->lock);
5287 spin_unlock_irqrestore(&device_domain_lock, flags);
5292 static struct iommu_group *intel_iommu_device_group(struct device *dev)
5294 if (dev_is_pci(dev))
5295 return pci_device_group(dev);
5296 return generic_device_group(dev);
5299 static int intel_iommu_enable_auxd(struct device *dev)
5301 struct device_domain_info *info;
5302 struct intel_iommu *iommu;
5303 unsigned long flags;
5306 iommu = device_to_iommu(dev, NULL, NULL);
5307 if (!iommu || dmar_disabled)
5310 if (!sm_supported(iommu) || !pasid_supported(iommu))
5313 ret = intel_iommu_enable_pasid(iommu, dev);
5317 spin_lock_irqsave(&device_domain_lock, flags);
5318 info = get_domain_info(dev);
5319 info->auxd_enabled = 1;
5320 spin_unlock_irqrestore(&device_domain_lock, flags);
5325 static int intel_iommu_disable_auxd(struct device *dev)
5327 struct device_domain_info *info;
5328 unsigned long flags;
5330 spin_lock_irqsave(&device_domain_lock, flags);
5331 info = get_domain_info(dev);
5332 if (!WARN_ON(!info))
5333 info->auxd_enabled = 0;
5334 spin_unlock_irqrestore(&device_domain_lock, flags);
5339 static int intel_iommu_enable_sva(struct device *dev)
5341 struct device_domain_info *info = get_domain_info(dev);
5342 struct intel_iommu *iommu;
5345 if (!info || dmar_disabled)
5348 iommu = info->iommu;
5352 if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
5355 if (intel_iommu_enable_pasid(iommu, dev))
5358 if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
5361 ret = iopf_queue_add_device(iommu->iopf_queue, dev);
5363 ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
5368 static int intel_iommu_disable_sva(struct device *dev)
5370 struct device_domain_info *info = get_domain_info(dev);
5371 struct intel_iommu *iommu = info->iommu;
5374 ret = iommu_unregister_device_fault_handler(dev);
5376 ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
5382 * A PCI express designated vendor specific extended capability is defined
5383 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5384 * for system software and tools to detect endpoint devices supporting the
5385 * Intel scalable IO virtualization without host driver dependency.
5387 * Returns the address of the matching extended capability structure within
5388 * the device's PCI configuration space or 0 if the device does not support
5391 static int siov_find_pci_dvsec(struct pci_dev *pdev)
5396 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5398 pci_read_config_word(pdev, pos + 4, &vendor);
5399 pci_read_config_word(pdev, pos + 8, &id);
5400 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5403 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5410 intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5412 struct device_domain_info *info = get_domain_info(dev);
5414 if (feat == IOMMU_DEV_FEAT_AUX) {
5417 if (!dev_is_pci(dev) || dmar_disabled ||
5418 !scalable_mode_support() || !pasid_mode_support())
5421 ret = pci_pasid_features(to_pci_dev(dev));
5425 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5428 if (feat == IOMMU_DEV_FEAT_IOPF)
5429 return info && info->pri_supported;
5431 if (feat == IOMMU_DEV_FEAT_SVA)
5432 return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
5433 info->pasid_supported && info->pri_supported &&
5434 info->ats_supported;
5440 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5443 case IOMMU_DEV_FEAT_AUX:
5444 return intel_iommu_enable_auxd(dev);
5446 case IOMMU_DEV_FEAT_IOPF:
5447 return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV;
5449 case IOMMU_DEV_FEAT_SVA:
5450 return intel_iommu_enable_sva(dev);
5458 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5461 case IOMMU_DEV_FEAT_AUX:
5462 return intel_iommu_disable_auxd(dev);
5464 case IOMMU_DEV_FEAT_IOPF:
5467 case IOMMU_DEV_FEAT_SVA:
5468 return intel_iommu_disable_sva(dev);
5476 intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5478 struct device_domain_info *info = get_domain_info(dev);
5480 if (feat == IOMMU_DEV_FEAT_AUX)
5481 return scalable_mode_support() && info && info->auxd_enabled;
5487 intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5489 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5491 return dmar_domain->default_pasid > 0 ?
5492 dmar_domain->default_pasid : -EINVAL;
5495 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5498 return attach_deferred(dev);
5502 intel_iommu_enable_nesting(struct iommu_domain *domain)
5504 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5505 unsigned long flags;
5508 spin_lock_irqsave(&device_domain_lock, flags);
5509 if (list_empty(&dmar_domain->devices)) {
5510 dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
5511 dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
5514 spin_unlock_irqrestore(&device_domain_lock, flags);
5520 * Check that the device does not live on an external facing PCI port that is
5521 * marked as untrusted. Such devices should not be able to apply quirks and
5522 * thus not be able to bypass the IOMMU restrictions.
5524 static bool risky_device(struct pci_dev *pdev)
5526 if (pdev->untrusted) {
5528 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
5529 pdev->vendor, pdev->device);
5530 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
5536 static void clflush_sync_map(struct dmar_domain *domain, unsigned long clf_pfn,
5537 unsigned long clf_pages)
5539 struct dma_pte *first_pte = NULL, *pte = NULL;
5540 unsigned long lvl_pages = 0;
5543 while (clf_pages > 0) {
5546 pte = pfn_to_dma_pte(domain, clf_pfn, &level);
5550 lvl_pages = lvl_to_nr_pages(level);
5553 if (WARN_ON(!lvl_pages || clf_pages < lvl_pages))
5556 clf_pages -= lvl_pages;
5557 clf_pfn += lvl_pages;
5560 if (!clf_pages || first_pte_in_page(pte) ||
5561 (level > 1 && clf_pages < lvl_pages)) {
5562 domain_flush_cache(domain, first_pte,
5563 (void *)pte - (void *)first_pte);
5569 static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
5570 unsigned long iova, size_t size)
5572 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5573 unsigned long pages = aligned_nrpages(iova, size);
5574 unsigned long pfn = iova >> VTD_PAGE_SHIFT;
5575 struct intel_iommu *iommu;
5578 if (!dmar_domain->iommu_coherency)
5579 clflush_sync_map(dmar_domain, pfn, pages);
5581 for_each_domain_iommu(iommu_id, dmar_domain) {
5582 iommu = g_iommus[iommu_id];
5583 __mapping_notify_one(iommu, dmar_domain, pfn, pages);
5587 const struct iommu_ops intel_iommu_ops = {
5588 .capable = intel_iommu_capable,
5589 .domain_alloc = intel_iommu_domain_alloc,
5590 .domain_free = intel_iommu_domain_free,
5591 .enable_nesting = intel_iommu_enable_nesting,
5592 .attach_dev = intel_iommu_attach_device,
5593 .detach_dev = intel_iommu_detach_device,
5594 .aux_attach_dev = intel_iommu_aux_attach_device,
5595 .aux_detach_dev = intel_iommu_aux_detach_device,
5596 .aux_get_pasid = intel_iommu_aux_get_pasid,
5597 .map = intel_iommu_map,
5598 .iotlb_sync_map = intel_iommu_iotlb_sync_map,
5599 .unmap = intel_iommu_unmap,
5600 .flush_iotlb_all = intel_flush_iotlb_all,
5601 .iotlb_sync = intel_iommu_tlb_sync,
5602 .iova_to_phys = intel_iommu_iova_to_phys,
5603 .probe_device = intel_iommu_probe_device,
5604 .probe_finalize = intel_iommu_probe_finalize,
5605 .release_device = intel_iommu_release_device,
5606 .get_resv_regions = intel_iommu_get_resv_regions,
5607 .put_resv_regions = generic_iommu_put_resv_regions,
5608 .device_group = intel_iommu_device_group,
5609 .dev_has_feat = intel_iommu_dev_has_feat,
5610 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
5611 .dev_enable_feat = intel_iommu_dev_enable_feat,
5612 .dev_disable_feat = intel_iommu_dev_disable_feat,
5613 .is_attach_deferred = intel_iommu_is_attach_deferred,
5614 .def_domain_type = device_def_domain_type,
5615 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
5616 #ifdef CONFIG_INTEL_IOMMU_SVM
5617 .cache_invalidate = intel_iommu_sva_invalidate,
5618 .sva_bind_gpasid = intel_svm_bind_gpasid,
5619 .sva_unbind_gpasid = intel_svm_unbind_gpasid,
5620 .sva_bind = intel_svm_bind,
5621 .sva_unbind = intel_svm_unbind,
5622 .sva_get_pasid = intel_svm_get_pasid,
5623 .page_response = intel_svm_page_response,
5627 static void quirk_iommu_igfx(struct pci_dev *dev)
5629 if (risky_device(dev))
5632 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
5636 /* G4x/GM45 integrated gfx dmar support is totally busted. */
5637 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
5638 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
5639 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
5640 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
5641 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
5642 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
5643 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
5645 /* Broadwell igfx malfunctions with dmar */
5646 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
5647 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
5648 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
5649 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
5650 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
5651 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
5652 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
5653 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
5654 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
5655 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
5656 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
5657 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
5658 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
5659 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
5660 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
5661 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
5662 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
5663 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
5664 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
5665 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
5666 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
5667 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
5668 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
5669 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
5671 static void quirk_iommu_rwbf(struct pci_dev *dev)
5673 if (risky_device(dev))
5677 * Mobile 4 Series Chipset neglects to set RWBF capability,
5678 * but needs it. Same seems to hold for the desktop versions.
5680 pci_info(dev, "Forcing write-buffer flush capability\n");
5684 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5685 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5686 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5687 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5688 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5689 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5690 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5693 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
5694 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5695 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
5696 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
5697 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5698 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5699 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5700 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5702 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5706 if (risky_device(dev))
5709 if (pci_read_config_word(dev, GGC, &ggc))
5712 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5713 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5715 } else if (dmar_map_gfx) {
5716 /* we have to ensure the gfx device is idle before we flush */
5717 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
5718 intel_iommu_strict = 1;
5721 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5722 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5723 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5724 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5726 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
5730 if (!IS_GFX_DEVICE(dev))
5733 ver = (dev->device >> 8) & 0xff;
5734 if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
5735 ver != 0x4e && ver != 0x8a && ver != 0x98 &&
5739 if (risky_device(dev))
5742 pci_info(dev, "Skip IOMMU disabling for graphics\n");
5743 iommu_skip_te_disable = 1;
5745 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
5747 /* On Tylersburg chipsets, some BIOSes have been known to enable the
5748 ISOCH DMAR unit for the Azalia sound device, but not give it any
5749 TLB entries, which causes it to deadlock. Check for that. We do
5750 this in a function called from init_dmars(), instead of in a PCI
5751 quirk, because we don't want to print the obnoxious "BIOS broken"
5752 message if VT-d is actually disabled.
5754 static void __init check_tylersburg_isoch(void)
5756 struct pci_dev *pdev;
5757 uint32_t vtisochctrl;
5759 /* If there's no Azalia in the system anyway, forget it. */
5760 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5764 if (risky_device(pdev)) {
5771 /* System Management Registers. Might be hidden, in which case
5772 we can't do the sanity check. But that's OK, because the
5773 known-broken BIOSes _don't_ actually hide it, so far. */
5774 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5778 if (risky_device(pdev)) {
5783 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5790 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5791 if (vtisochctrl & 1)
5794 /* Drop all bits other than the number of TLB entries */
5795 vtisochctrl &= 0x1c;
5797 /* If we have the recommended number of TLB entries (16), fine. */
5798 if (vtisochctrl == 0x10)
5801 /* Zero TLB entries? You get to ride the short bus to school. */
5803 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5804 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5805 dmi_get_system_info(DMI_BIOS_VENDOR),
5806 dmi_get_system_info(DMI_BIOS_VERSION),
5807 dmi_get_system_info(DMI_PRODUCT_VERSION));
5808 iommu_identity_mapping |= IDENTMAP_AZALIA;
5812 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",