1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright © 2006-2014 Intel Corporation.
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Joerg Roedel <jroedel@suse.de>
13 #define pr_fmt(fmt) "DMAR: " fmt
14 #define dev_fmt(fmt) pr_fmt(fmt)
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/debugfs.h>
19 #include <linux/export.h>
20 #include <linux/slab.h>
21 #include <linux/irq.h>
22 #include <linux/interrupt.h>
23 #include <linux/spinlock.h>
24 #include <linux/pci.h>
25 #include <linux/dmar.h>
26 #include <linux/dma-map-ops.h>
27 #include <linux/mempool.h>
28 #include <linux/memory.h>
29 #include <linux/cpu.h>
30 #include <linux/timer.h>
32 #include <linux/iova.h>
33 #include <linux/iommu.h>
34 #include <linux/dma-iommu.h>
35 #include <linux/intel-iommu.h>
36 #include <linux/syscore_ops.h>
37 #include <linux/tboot.h>
38 #include <linux/dmi.h>
39 #include <linux/pci-ats.h>
40 #include <linux/memblock.h>
41 #include <linux/dma-direct.h>
42 #include <linux/crash_dump.h>
43 #include <linux/numa.h>
44 #include <asm/irq_remapping.h>
45 #include <asm/cacheflush.h>
46 #include <asm/iommu.h>
48 #include "../irq_remapping.h"
49 #include "../iommu-sva-lib.h"
51 #include "cap_audit.h"
53 #define ROOT_SIZE VTD_PAGE_SIZE
54 #define CONTEXT_SIZE VTD_PAGE_SIZE
56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
61 #define IOAPIC_RANGE_START (0xfee00000)
62 #define IOAPIC_RANGE_END (0xfeefffff)
63 #define IOVA_START_ADDR (0x1000)
65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
67 #define MAX_AGAW_WIDTH 64
68 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
70 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
79 /* IO virtual address start page frame number */
80 #define IOVA_START_PFN (1)
82 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
84 /* page table handling */
85 #define LEVEL_STRIDE (9)
86 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
88 static inline int agaw_to_level(int agaw)
93 static inline int agaw_to_width(int agaw)
95 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
98 static inline int width_to_agaw(int width)
100 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
103 static inline unsigned int level_to_offset_bits(int level)
105 return (level - 1) * LEVEL_STRIDE;
108 static inline int pfn_level_offset(u64 pfn, int level)
110 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
113 static inline u64 level_mask(int level)
115 return -1ULL << level_to_offset_bits(level);
118 static inline u64 level_size(int level)
120 return 1ULL << level_to_offset_bits(level);
123 static inline u64 align_to_level(u64 pfn, int level)
125 return (pfn + level_size(level) - 1) & level_mask(level);
128 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
130 return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
133 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
134 are never going to work. */
135 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
137 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
140 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
142 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
144 static inline unsigned long page_to_dma_pfn(struct page *pg)
146 return mm_to_dma_pfn(page_to_pfn(pg));
148 static inline unsigned long virt_to_dma_pfn(void *p)
150 return page_to_dma_pfn(virt_to_page(p));
153 /* global iommu list, set NULL for ignored DMAR units */
154 static struct intel_iommu **g_iommus;
156 static void __init check_tylersburg_isoch(void);
157 static int rwbf_quirk;
160 * set to 1 to panic kernel if can't successfully enable VT-d
161 * (used when kernel is launched w/ TXT)
163 static int force_on = 0;
164 static int intel_iommu_tboot_noforce;
165 static int no_platform_optin;
167 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
170 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
173 static phys_addr_t root_entry_lctp(struct root_entry *re)
178 return re->lo & VTD_PAGE_MASK;
182 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
185 static phys_addr_t root_entry_uctp(struct root_entry *re)
190 return re->hi & VTD_PAGE_MASK;
193 static inline void context_clear_pasid_enable(struct context_entry *context)
195 context->lo &= ~(1ULL << 11);
198 static inline bool context_pasid_enabled(struct context_entry *context)
200 return !!(context->lo & (1ULL << 11));
203 static inline void context_set_copied(struct context_entry *context)
205 context->hi |= (1ull << 3);
208 static inline bool context_copied(struct context_entry *context)
210 return !!(context->hi & (1ULL << 3));
213 static inline bool __context_present(struct context_entry *context)
215 return (context->lo & 1);
218 bool context_present(struct context_entry *context)
220 return context_pasid_enabled(context) ?
221 __context_present(context) :
222 __context_present(context) && !context_copied(context);
225 static inline void context_set_present(struct context_entry *context)
230 static inline void context_set_fault_enable(struct context_entry *context)
232 context->lo &= (((u64)-1) << 2) | 1;
235 static inline void context_set_translation_type(struct context_entry *context,
238 context->lo &= (((u64)-1) << 4) | 3;
239 context->lo |= (value & 3) << 2;
242 static inline void context_set_address_root(struct context_entry *context,
245 context->lo &= ~VTD_PAGE_MASK;
246 context->lo |= value & VTD_PAGE_MASK;
249 static inline void context_set_address_width(struct context_entry *context,
252 context->hi |= value & 7;
255 static inline void context_set_domain_id(struct context_entry *context,
258 context->hi |= (value & ((1 << 16) - 1)) << 8;
261 static inline int context_domain_id(struct context_entry *c)
263 return((c->hi >> 8) & 0xffff);
266 static inline void context_clear_entry(struct context_entry *context)
273 * This domain is a statically identity mapping domain.
274 * 1. This domain creats a static 1:1 mapping to all usable memory.
275 * 2. It maps to each iommu if successful.
276 * 3. Each iommu mapps to this domain if successful.
278 static struct dmar_domain *si_domain;
279 static int hw_pass_through = 1;
281 #define for_each_domain_iommu(idx, domain) \
282 for (idx = 0; idx < g_num_of_iommus; idx++) \
283 if (domain->iommu_refcnt[idx])
285 struct dmar_rmrr_unit {
286 struct list_head list; /* list of rmrr units */
287 struct acpi_dmar_header *hdr; /* ACPI header */
288 u64 base_address; /* reserved base address*/
289 u64 end_address; /* reserved end address */
290 struct dmar_dev_scope *devices; /* target devices */
291 int devices_cnt; /* target device count */
294 struct dmar_atsr_unit {
295 struct list_head list; /* list of ATSR units */
296 struct acpi_dmar_header *hdr; /* ACPI header */
297 struct dmar_dev_scope *devices; /* target devices */
298 int devices_cnt; /* target device count */
299 u8 include_all:1; /* include all ports */
302 struct dmar_satc_unit {
303 struct list_head list; /* list of SATC units */
304 struct acpi_dmar_header *hdr; /* ACPI header */
305 struct dmar_dev_scope *devices; /* target devices */
306 struct intel_iommu *iommu; /* the corresponding iommu */
307 int devices_cnt; /* target device count */
308 u8 atc_required:1; /* ATS is required */
311 static LIST_HEAD(dmar_atsr_units);
312 static LIST_HEAD(dmar_rmrr_units);
313 static LIST_HEAD(dmar_satc_units);
315 #define for_each_rmrr_units(rmrr) \
316 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
318 /* bitmap for indexing intel_iommus */
319 static int g_num_of_iommus;
321 static void domain_exit(struct dmar_domain *domain);
322 static void domain_remove_dev_info(struct dmar_domain *domain);
323 static void dmar_remove_one_dev_info(struct device *dev);
324 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
325 static int intel_iommu_attach_device(struct iommu_domain *domain,
327 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
330 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
331 int dmar_disabled = 0;
333 int dmar_disabled = 1;
334 #endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
336 #ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
337 int intel_iommu_sm = 1;
340 #endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
342 int intel_iommu_enabled = 0;
343 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
345 static int dmar_map_gfx = 1;
346 static int intel_iommu_superpage = 1;
347 static int iommu_identity_mapping;
348 static int iommu_skip_te_disable;
350 #define IDENTMAP_GFX 2
351 #define IDENTMAP_AZALIA 4
353 int intel_iommu_gfx_mapped;
354 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
356 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
357 struct device_domain_info *get_domain_info(struct device *dev)
359 struct device_domain_info *info;
364 info = dev_iommu_priv_get(dev);
365 if (unlikely(info == DEFER_DEVICE_DOMAIN_INFO))
371 DEFINE_SPINLOCK(device_domain_lock);
372 static LIST_HEAD(device_domain_list);
375 * Iterate over elements in device_domain_list and call the specified
376 * callback @fn against each element.
378 int for_each_device_domain(int (*fn)(struct device_domain_info *info,
379 void *data), void *data)
383 struct device_domain_info *info;
385 spin_lock_irqsave(&device_domain_lock, flags);
386 list_for_each_entry(info, &device_domain_list, global) {
387 ret = fn(info, data);
389 spin_unlock_irqrestore(&device_domain_lock, flags);
393 spin_unlock_irqrestore(&device_domain_lock, flags);
398 const struct iommu_ops intel_iommu_ops;
400 static bool translation_pre_enabled(struct intel_iommu *iommu)
402 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
405 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
407 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
410 static void init_translation_status(struct intel_iommu *iommu)
414 gsts = readl(iommu->reg + DMAR_GSTS_REG);
415 if (gsts & DMA_GSTS_TES)
416 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
419 static int __init intel_iommu_setup(char *str)
424 if (!strncmp(str, "on", 2)) {
426 pr_info("IOMMU enabled\n");
427 } else if (!strncmp(str, "off", 3)) {
429 no_platform_optin = 1;
430 pr_info("IOMMU disabled\n");
431 } else if (!strncmp(str, "igfx_off", 8)) {
433 pr_info("Disable GFX device mapping\n");
434 } else if (!strncmp(str, "forcedac", 8)) {
435 pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
436 iommu_dma_forcedac = true;
437 } else if (!strncmp(str, "strict", 6)) {
438 pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
439 iommu_set_dma_strict();
440 } else if (!strncmp(str, "sp_off", 6)) {
441 pr_info("Disable supported super page\n");
442 intel_iommu_superpage = 0;
443 } else if (!strncmp(str, "sm_on", 5)) {
444 pr_info("Intel-IOMMU: scalable mode supported\n");
446 } else if (!strncmp(str, "tboot_noforce", 13)) {
447 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
448 intel_iommu_tboot_noforce = 1;
451 str += strcspn(str, ",");
457 __setup("intel_iommu=", intel_iommu_setup);
459 static struct kmem_cache *iommu_domain_cache;
460 static struct kmem_cache *iommu_devinfo_cache;
462 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
464 struct dmar_domain **domains;
467 domains = iommu->domains[idx];
471 return domains[did & 0xff];
474 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
475 struct dmar_domain *domain)
477 struct dmar_domain **domains;
480 if (!iommu->domains[idx]) {
481 size_t size = 256 * sizeof(struct dmar_domain *);
482 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
485 domains = iommu->domains[idx];
486 if (WARN_ON(!domains))
489 domains[did & 0xff] = domain;
492 void *alloc_pgtable_page(int node)
497 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
499 vaddr = page_address(page);
503 void free_pgtable_page(void *vaddr)
505 free_page((unsigned long)vaddr);
508 static inline void *alloc_domain_mem(void)
510 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
513 static void free_domain_mem(void *vaddr)
515 kmem_cache_free(iommu_domain_cache, vaddr);
518 static inline void * alloc_devinfo_mem(void)
520 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
523 static inline void free_devinfo_mem(void *vaddr)
525 kmem_cache_free(iommu_devinfo_cache, vaddr);
528 static inline int domain_type_is_si(struct dmar_domain *domain)
530 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
533 static inline bool domain_use_first_level(struct dmar_domain *domain)
535 return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
538 static inline int domain_pfn_supported(struct dmar_domain *domain,
541 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
543 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
546 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
551 sagaw = cap_sagaw(iommu->cap);
552 for (agaw = width_to_agaw(max_gaw);
554 if (test_bit(agaw, &sagaw))
562 * Calculate max SAGAW for each iommu.
564 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
566 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
570 * calculate agaw for each iommu.
571 * "SAGAW" may be different across iommus, use a default agaw, and
572 * get a supported less agaw for iommus that don't support the default agaw.
574 int iommu_calculate_agaw(struct intel_iommu *iommu)
576 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
579 /* This functionin only returns single iommu in a domain */
580 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
584 /* si_domain and vm domain should not get here. */
585 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
588 for_each_domain_iommu(iommu_id, domain)
591 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
594 return g_iommus[iommu_id];
597 static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
599 return sm_supported(iommu) ?
600 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
603 static void domain_update_iommu_coherency(struct dmar_domain *domain)
605 struct dmar_drhd_unit *drhd;
606 struct intel_iommu *iommu;
610 domain->iommu_coherency = true;
612 for_each_domain_iommu(i, domain) {
614 if (!iommu_paging_structure_coherency(g_iommus[i])) {
615 domain->iommu_coherency = false;
622 /* No hardware attached; use lowest common denominator */
624 for_each_active_iommu(iommu, drhd) {
625 if (!iommu_paging_structure_coherency(iommu)) {
626 domain->iommu_coherency = false;
633 static bool domain_update_iommu_snooping(struct intel_iommu *skip)
635 struct dmar_drhd_unit *drhd;
636 struct intel_iommu *iommu;
640 for_each_active_iommu(iommu, drhd) {
643 * If the hardware is operating in the scalable mode,
644 * the snooping control is always supported since we
645 * always set PASID-table-entry.PGSNP bit if the domain
646 * is managed outside (UNMANAGED).
648 if (!sm_supported(iommu) &&
649 !ecap_sc_support(iommu->ecap)) {
660 static int domain_update_iommu_superpage(struct dmar_domain *domain,
661 struct intel_iommu *skip)
663 struct dmar_drhd_unit *drhd;
664 struct intel_iommu *iommu;
667 if (!intel_iommu_superpage)
670 /* set iommu_superpage to the smallest common denominator */
672 for_each_active_iommu(iommu, drhd) {
674 if (domain && domain_use_first_level(domain)) {
675 if (!cap_fl1gp_support(iommu->cap))
678 mask &= cap_super_page_val(iommu->cap);
690 static int domain_update_device_node(struct dmar_domain *domain)
692 struct device_domain_info *info;
693 int nid = NUMA_NO_NODE;
695 assert_spin_locked(&device_domain_lock);
697 if (list_empty(&domain->devices))
700 list_for_each_entry(info, &domain->devices, link) {
705 * There could possibly be multiple device numa nodes as devices
706 * within the same domain may sit behind different IOMMUs. There
707 * isn't perfect answer in such situation, so we select first
708 * come first served policy.
710 nid = dev_to_node(info->dev);
711 if (nid != NUMA_NO_NODE)
718 static void domain_update_iotlb(struct dmar_domain *domain);
720 /* Return the super pagesize bitmap if supported. */
721 static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
723 unsigned long bitmap = 0;
726 * 1-level super page supports page size of 2MiB, 2-level super page
727 * supports page size of both 2MiB and 1GiB.
729 if (domain->iommu_superpage == 1)
731 else if (domain->iommu_superpage == 2)
732 bitmap |= SZ_2M | SZ_1G;
737 /* Some capabilities may be different across iommus */
738 static void domain_update_iommu_cap(struct dmar_domain *domain)
740 domain_update_iommu_coherency(domain);
741 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
742 domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
745 * If RHSA is missing, we should default to the device numa domain
748 if (domain->nid == NUMA_NO_NODE)
749 domain->nid = domain_update_device_node(domain);
752 * First-level translation restricts the input-address to a
753 * canonical address (i.e., address bits 63:N have the same
754 * value as address bit [N-1], where N is 48-bits with 4-level
755 * paging and 57-bits with 5-level paging). Hence, skip bit
758 if (domain_use_first_level(domain))
759 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
761 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
763 domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
764 domain_update_iotlb(domain);
767 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
770 struct root_entry *root = &iommu->root_entry[bus];
771 struct context_entry *context;
775 if (sm_supported(iommu)) {
783 context = phys_to_virt(*entry & VTD_PAGE_MASK);
785 unsigned long phy_addr;
789 context = alloc_pgtable_page(iommu->node);
793 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
794 phy_addr = virt_to_phys((void *)context);
795 *entry = phy_addr | 1;
796 __iommu_flush_cache(iommu, entry, sizeof(*entry));
798 return &context[devfn];
801 static bool attach_deferred(struct device *dev)
803 return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO;
807 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
808 * sub-hierarchy of a candidate PCI-PCI bridge
809 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
810 * @bridge: the candidate PCI-PCI bridge
812 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
815 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
817 struct pci_dev *pdev, *pbridge;
819 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
822 pdev = to_pci_dev(dev);
823 pbridge = to_pci_dev(bridge);
825 if (pbridge->subordinate &&
826 pbridge->subordinate->number <= pdev->bus->number &&
827 pbridge->subordinate->busn_res.end >= pdev->bus->number)
833 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
835 struct dmar_drhd_unit *drhd;
839 /* We know that this device on this chipset has its own IOMMU.
840 * If we find it under a different IOMMU, then the BIOS is lying
841 * to us. Hope that the IOMMU for this device is actually
842 * disabled, and it needs no translation...
844 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
847 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
852 /* we know that the this iommu should be at offset 0xa000 from vtbar */
853 drhd = dmar_find_matched_drhd_unit(pdev);
854 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
855 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
856 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
863 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
865 if (!iommu || iommu->drhd->ignored)
868 if (dev_is_pci(dev)) {
869 struct pci_dev *pdev = to_pci_dev(dev);
871 if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
872 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
873 quirk_ioat_snb_local_iommu(pdev))
880 struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
882 struct dmar_drhd_unit *drhd = NULL;
883 struct pci_dev *pdev = NULL;
884 struct intel_iommu *iommu;
892 if (dev_is_pci(dev)) {
893 struct pci_dev *pf_pdev;
895 pdev = pci_real_dma_dev(to_pci_dev(dev));
897 /* VFs aren't listed in scope tables; we need to look up
898 * the PF instead to find the IOMMU. */
899 pf_pdev = pci_physfn(pdev);
901 segment = pci_domain_nr(pdev->bus);
902 } else if (has_acpi_companion(dev))
903 dev = &ACPI_COMPANION(dev)->dev;
906 for_each_iommu(iommu, drhd) {
907 if (pdev && segment != drhd->segment)
910 for_each_active_dev_scope(drhd->devices,
911 drhd->devices_cnt, i, tmp) {
913 /* For a VF use its original BDF# not that of the PF
914 * which we used for the IOMMU lookup. Strictly speaking
915 * we could do this for all PCI devices; we only need to
916 * get the BDF# from the scope table for ACPI matches. */
917 if (pdev && pdev->is_virtfn)
921 *bus = drhd->devices[i].bus;
922 *devfn = drhd->devices[i].devfn;
927 if (is_downstream_to_pci_bridge(dev, tmp))
931 if (pdev && drhd->include_all) {
934 *bus = pdev->bus->number;
935 *devfn = pdev->devfn;
942 if (iommu_is_dummy(iommu, dev))
950 static void domain_flush_cache(struct dmar_domain *domain,
951 void *addr, int size)
953 if (!domain->iommu_coherency)
954 clflush_cache_range(addr, size);
957 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
959 struct context_entry *context;
963 spin_lock_irqsave(&iommu->lock, flags);
964 context = iommu_context_addr(iommu, bus, devfn, 0);
966 ret = context_present(context);
967 spin_unlock_irqrestore(&iommu->lock, flags);
971 static void free_context_table(struct intel_iommu *iommu)
975 struct context_entry *context;
977 spin_lock_irqsave(&iommu->lock, flags);
978 if (!iommu->root_entry) {
981 for (i = 0; i < ROOT_ENTRY_NR; i++) {
982 context = iommu_context_addr(iommu, i, 0, 0);
984 free_pgtable_page(context);
986 if (!sm_supported(iommu))
989 context = iommu_context_addr(iommu, i, 0x80, 0);
991 free_pgtable_page(context);
994 free_pgtable_page(iommu->root_entry);
995 iommu->root_entry = NULL;
997 spin_unlock_irqrestore(&iommu->lock, flags);
1000 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
1001 unsigned long pfn, int *target_level)
1003 struct dma_pte *parent, *pte;
1004 int level = agaw_to_level(domain->agaw);
1007 BUG_ON(!domain->pgd);
1009 if (!domain_pfn_supported(domain, pfn))
1010 /* Address beyond IOMMU's addressing capabilities. */
1013 parent = domain->pgd;
1018 offset = pfn_level_offset(pfn, level);
1019 pte = &parent[offset];
1020 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
1022 if (level == *target_level)
1025 if (!dma_pte_present(pte)) {
1028 tmp_page = alloc_pgtable_page(domain->nid);
1033 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
1034 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
1035 if (domain_use_first_level(domain)) {
1036 pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
1037 if (domain->domain.type == IOMMU_DOMAIN_DMA)
1038 pteval |= DMA_FL_PTE_ACCESS;
1040 if (cmpxchg64(&pte->val, 0ULL, pteval))
1041 /* Someone else set it while we were thinking; use theirs. */
1042 free_pgtable_page(tmp_page);
1044 domain_flush_cache(domain, pte, sizeof(*pte));
1049 parent = phys_to_virt(dma_pte_addr(pte));
1054 *target_level = level;
1059 /* return address's pte at specific level */
1060 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1062 int level, int *large_page)
1064 struct dma_pte *parent, *pte;
1065 int total = agaw_to_level(domain->agaw);
1068 parent = domain->pgd;
1069 while (level <= total) {
1070 offset = pfn_level_offset(pfn, total);
1071 pte = &parent[offset];
1075 if (!dma_pte_present(pte)) {
1076 *large_page = total;
1080 if (dma_pte_superpage(pte)) {
1081 *large_page = total;
1085 parent = phys_to_virt(dma_pte_addr(pte));
1091 /* clear last level pte, a tlb flush should be followed */
1092 static void dma_pte_clear_range(struct dmar_domain *domain,
1093 unsigned long start_pfn,
1094 unsigned long last_pfn)
1096 unsigned int large_page;
1097 struct dma_pte *first_pte, *pte;
1099 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1100 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1101 BUG_ON(start_pfn > last_pfn);
1103 /* we don't need lock here; nobody else touches the iova range */
1106 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1108 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1113 start_pfn += lvl_to_nr_pages(large_page);
1115 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1117 domain_flush_cache(domain, first_pte,
1118 (void *)pte - (void *)first_pte);
1120 } while (start_pfn && start_pfn <= last_pfn);
1123 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1124 int retain_level, struct dma_pte *pte,
1125 unsigned long pfn, unsigned long start_pfn,
1126 unsigned long last_pfn)
1128 pfn = max(start_pfn, pfn);
1129 pte = &pte[pfn_level_offset(pfn, level)];
1132 unsigned long level_pfn;
1133 struct dma_pte *level_pte;
1135 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1138 level_pfn = pfn & level_mask(level);
1139 level_pte = phys_to_virt(dma_pte_addr(pte));
1142 dma_pte_free_level(domain, level - 1, retain_level,
1143 level_pte, level_pfn, start_pfn,
1148 * Free the page table if we're below the level we want to
1149 * retain and the range covers the entire table.
1151 if (level < retain_level && !(start_pfn > level_pfn ||
1152 last_pfn < level_pfn + level_size(level) - 1)) {
1154 domain_flush_cache(domain, pte, sizeof(*pte));
1155 free_pgtable_page(level_pte);
1158 pfn += level_size(level);
1159 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1163 * clear last level (leaf) ptes and free page table pages below the
1164 * level we wish to keep intact.
1166 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1167 unsigned long start_pfn,
1168 unsigned long last_pfn,
1171 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1172 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1173 BUG_ON(start_pfn > last_pfn);
1175 dma_pte_clear_range(domain, start_pfn, last_pfn);
1177 /* We don't need lock here; nobody else touches the iova range */
1178 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1179 domain->pgd, 0, start_pfn, last_pfn);
1182 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1183 free_pgtable_page(domain->pgd);
1188 /* When a page at a given level is being unlinked from its parent, we don't
1189 need to *modify* it at all. All we need to do is make a list of all the
1190 pages which can be freed just as soon as we've flushed the IOTLB and we
1191 know the hardware page-walk will no longer touch them.
1192 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1194 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1195 int level, struct dma_pte *pte,
1196 struct page *freelist)
1200 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1201 pg->freelist = freelist;
1207 pte = page_address(pg);
1209 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1210 freelist = dma_pte_list_pagetables(domain, level - 1,
1213 } while (!first_pte_in_page(pte));
1218 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1219 struct dma_pte *pte, unsigned long pfn,
1220 unsigned long start_pfn,
1221 unsigned long last_pfn,
1222 struct page *freelist)
1224 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1226 pfn = max(start_pfn, pfn);
1227 pte = &pte[pfn_level_offset(pfn, level)];
1230 unsigned long level_pfn;
1232 if (!dma_pte_present(pte))
1235 level_pfn = pfn & level_mask(level);
1237 /* If range covers entire pagetable, free it */
1238 if (start_pfn <= level_pfn &&
1239 last_pfn >= level_pfn + level_size(level) - 1) {
1240 /* These suborbinate page tables are going away entirely. Don't
1241 bother to clear them; we're just going to *free* them. */
1242 if (level > 1 && !dma_pte_superpage(pte))
1243 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1249 } else if (level > 1) {
1250 /* Recurse down into a level that isn't *entirely* obsolete */
1251 freelist = dma_pte_clear_level(domain, level - 1,
1252 phys_to_virt(dma_pte_addr(pte)),
1253 level_pfn, start_pfn, last_pfn,
1257 pfn += level_size(level);
1258 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1261 domain_flush_cache(domain, first_pte,
1262 (void *)++last_pte - (void *)first_pte);
1267 /* We can't just free the pages because the IOMMU may still be walking
1268 the page tables, and may have cached the intermediate levels. The
1269 pages can only be freed after the IOTLB flush has been done. */
1270 static struct page *domain_unmap(struct dmar_domain *domain,
1271 unsigned long start_pfn,
1272 unsigned long last_pfn,
1273 struct page *freelist)
1275 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1276 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1277 BUG_ON(start_pfn > last_pfn);
1279 /* we don't need lock here; nobody else touches the iova range */
1280 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1281 domain->pgd, 0, start_pfn, last_pfn,
1285 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1286 struct page *pgd_page = virt_to_page(domain->pgd);
1287 pgd_page->freelist = freelist;
1288 freelist = pgd_page;
1296 static void dma_free_pagelist(struct page *freelist)
1300 while ((pg = freelist)) {
1301 freelist = pg->freelist;
1302 free_pgtable_page(page_address(pg));
1306 /* iommu handling */
1307 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1309 struct root_entry *root;
1310 unsigned long flags;
1312 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1314 pr_err("Allocating root entry for %s failed\n",
1319 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1321 spin_lock_irqsave(&iommu->lock, flags);
1322 iommu->root_entry = root;
1323 spin_unlock_irqrestore(&iommu->lock, flags);
1328 static void iommu_set_root_entry(struct intel_iommu *iommu)
1334 addr = virt_to_phys(iommu->root_entry);
1335 if (sm_supported(iommu))
1336 addr |= DMA_RTADDR_SMT;
1338 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1339 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1341 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1343 /* Make sure hardware complete it */
1344 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1345 readl, (sts & DMA_GSTS_RTPS), sts);
1347 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1349 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1350 if (sm_supported(iommu))
1351 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
1352 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1355 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1360 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1363 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1364 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1366 /* Make sure hardware complete it */
1367 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1368 readl, (!(val & DMA_GSTS_WBFS)), val);
1370 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1373 /* return value determine if we need a write buffer flush */
1374 static void __iommu_flush_context(struct intel_iommu *iommu,
1375 u16 did, u16 source_id, u8 function_mask,
1382 case DMA_CCMD_GLOBAL_INVL:
1383 val = DMA_CCMD_GLOBAL_INVL;
1385 case DMA_CCMD_DOMAIN_INVL:
1386 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1388 case DMA_CCMD_DEVICE_INVL:
1389 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1390 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1395 val |= DMA_CCMD_ICC;
1397 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1398 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1400 /* Make sure hardware complete it */
1401 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1402 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1404 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1407 /* return value determine if we need a write buffer flush */
1408 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1409 u64 addr, unsigned int size_order, u64 type)
1411 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1412 u64 val = 0, val_iva = 0;
1416 case DMA_TLB_GLOBAL_FLUSH:
1417 /* global flush doesn't need set IVA_REG */
1418 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1420 case DMA_TLB_DSI_FLUSH:
1421 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1423 case DMA_TLB_PSI_FLUSH:
1424 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1425 /* IH bit is passed in as part of address */
1426 val_iva = size_order | addr;
1431 /* Note: set drain read/write */
1434 * This is probably to be super secure.. Looks like we can
1435 * ignore it without any impact.
1437 if (cap_read_drain(iommu->cap))
1438 val |= DMA_TLB_READ_DRAIN;
1440 if (cap_write_drain(iommu->cap))
1441 val |= DMA_TLB_WRITE_DRAIN;
1443 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1444 /* Note: Only uses first TLB reg currently */
1446 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1447 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1449 /* Make sure hardware complete it */
1450 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1451 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1453 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1455 /* check IOTLB invalidation granularity */
1456 if (DMA_TLB_IAIG(val) == 0)
1457 pr_err("Flush IOTLB failed\n");
1458 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1459 pr_debug("TLB flush request %Lx, actual %Lx\n",
1460 (unsigned long long)DMA_TLB_IIRG(type),
1461 (unsigned long long)DMA_TLB_IAIG(val));
1464 static struct device_domain_info *
1465 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1468 struct device_domain_info *info;
1470 assert_spin_locked(&device_domain_lock);
1475 list_for_each_entry(info, &domain->devices, link)
1476 if (info->iommu == iommu && info->bus == bus &&
1477 info->devfn == devfn) {
1478 if (info->ats_supported && info->dev)
1486 static void domain_update_iotlb(struct dmar_domain *domain)
1488 struct device_domain_info *info;
1489 bool has_iotlb_device = false;
1491 assert_spin_locked(&device_domain_lock);
1493 list_for_each_entry(info, &domain->devices, link)
1494 if (info->ats_enabled) {
1495 has_iotlb_device = true;
1499 if (!has_iotlb_device) {
1500 struct subdev_domain_info *sinfo;
1502 list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
1503 info = get_domain_info(sinfo->pdev);
1504 if (info && info->ats_enabled) {
1505 has_iotlb_device = true;
1511 domain->has_iotlb_device = has_iotlb_device;
1514 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1516 struct pci_dev *pdev;
1518 assert_spin_locked(&device_domain_lock);
1520 if (!info || !dev_is_pci(info->dev))
1523 pdev = to_pci_dev(info->dev);
1524 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1525 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1526 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1527 * reserved, which should be set to 0.
1529 if (!ecap_dit(info->iommu->ecap))
1532 struct pci_dev *pf_pdev;
1534 /* pdev will be returned if device is not a vf */
1535 pf_pdev = pci_physfn(pdev);
1536 info->pfsid = pci_dev_id(pf_pdev);
1539 #ifdef CONFIG_INTEL_IOMMU_SVM
1540 /* The PCIe spec, in its wisdom, declares that the behaviour of
1541 the device if you enable PASID support after ATS support is
1542 undefined. So always enable PASID support on devices which
1543 have it, even if we can't yet know if we're ever going to
1545 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1546 info->pasid_enabled = 1;
1548 if (info->pri_supported &&
1549 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1550 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1551 info->pri_enabled = 1;
1553 if (info->ats_supported && pci_ats_page_aligned(pdev) &&
1554 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1555 info->ats_enabled = 1;
1556 domain_update_iotlb(info->domain);
1557 info->ats_qdep = pci_ats_queue_depth(pdev);
1561 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1563 struct pci_dev *pdev;
1565 assert_spin_locked(&device_domain_lock);
1567 if (!dev_is_pci(info->dev))
1570 pdev = to_pci_dev(info->dev);
1572 if (info->ats_enabled) {
1573 pci_disable_ats(pdev);
1574 info->ats_enabled = 0;
1575 domain_update_iotlb(info->domain);
1577 #ifdef CONFIG_INTEL_IOMMU_SVM
1578 if (info->pri_enabled) {
1579 pci_disable_pri(pdev);
1580 info->pri_enabled = 0;
1582 if (info->pasid_enabled) {
1583 pci_disable_pasid(pdev);
1584 info->pasid_enabled = 0;
1589 static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
1590 u64 addr, unsigned int mask)
1594 if (!info || !info->ats_enabled)
1597 sid = info->bus << 8 | info->devfn;
1598 qdep = info->ats_qdep;
1599 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1603 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1604 u64 addr, unsigned mask)
1606 unsigned long flags;
1607 struct device_domain_info *info;
1608 struct subdev_domain_info *sinfo;
1610 if (!domain->has_iotlb_device)
1613 spin_lock_irqsave(&device_domain_lock, flags);
1614 list_for_each_entry(info, &domain->devices, link)
1615 __iommu_flush_dev_iotlb(info, addr, mask);
1617 list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
1618 info = get_domain_info(sinfo->pdev);
1619 __iommu_flush_dev_iotlb(info, addr, mask);
1621 spin_unlock_irqrestore(&device_domain_lock, flags);
1624 static void domain_flush_piotlb(struct intel_iommu *iommu,
1625 struct dmar_domain *domain,
1626 u64 addr, unsigned long npages, bool ih)
1628 u16 did = domain->iommu_did[iommu->seq_id];
1630 if (domain->default_pasid)
1631 qi_flush_piotlb(iommu, did, domain->default_pasid,
1634 if (!list_empty(&domain->devices))
1635 qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
1638 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1639 struct dmar_domain *domain,
1640 unsigned long pfn, unsigned int pages,
1643 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1644 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1645 u16 did = domain->iommu_did[iommu->seq_id];
1652 if (domain_use_first_level(domain)) {
1653 domain_flush_piotlb(iommu, domain, addr, pages, ih);
1656 * Fallback to domain selective flush if no PSI support or
1657 * the size is too big. PSI requires page size to be 2 ^ x,
1658 * and the base address is naturally aligned to the size.
1660 if (!cap_pgsel_inv(iommu->cap) ||
1661 mask > cap_max_amask_val(iommu->cap))
1662 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1665 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1670 * In caching mode, changes of pages from non-present to present require
1671 * flush. However, device IOTLB doesn't need to be flushed in this case.
1673 if (!cap_caching_mode(iommu->cap) || !map)
1674 iommu_flush_dev_iotlb(domain, addr, mask);
1677 /* Notification for newly created mappings */
1678 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1679 struct dmar_domain *domain,
1680 unsigned long pfn, unsigned int pages)
1683 * It's a non-present to present mapping. Only flush if caching mode
1686 if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
1687 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1689 iommu_flush_write_buffer(iommu);
1692 static void intel_flush_iotlb_all(struct iommu_domain *domain)
1694 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
1697 for_each_domain_iommu(idx, dmar_domain) {
1698 struct intel_iommu *iommu = g_iommus[idx];
1699 u16 did = dmar_domain->iommu_did[iommu->seq_id];
1701 if (domain_use_first_level(dmar_domain))
1702 domain_flush_piotlb(iommu, dmar_domain, 0, -1, 0);
1704 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1707 if (!cap_caching_mode(iommu->cap))
1708 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1709 0, MAX_AGAW_PFN_WIDTH);
1713 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1716 unsigned long flags;
1718 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1721 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1722 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1723 pmen &= ~DMA_PMEN_EPM;
1724 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1726 /* wait for the protected region status bit to clear */
1727 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1728 readl, !(pmen & DMA_PMEN_PRS), pmen);
1730 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1733 static void iommu_enable_translation(struct intel_iommu *iommu)
1736 unsigned long flags;
1738 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1739 iommu->gcmd |= DMA_GCMD_TE;
1740 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1742 /* Make sure hardware complete it */
1743 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1744 readl, (sts & DMA_GSTS_TES), sts);
1746 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1749 static void iommu_disable_translation(struct intel_iommu *iommu)
1754 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
1755 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
1758 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1759 iommu->gcmd &= ~DMA_GCMD_TE;
1760 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1762 /* Make sure hardware complete it */
1763 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1764 readl, (!(sts & DMA_GSTS_TES)), sts);
1766 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1769 static int iommu_init_domains(struct intel_iommu *iommu)
1771 u32 ndomains, nlongs;
1774 ndomains = cap_ndoms(iommu->cap);
1775 pr_debug("%s: Number of Domains supported <%d>\n",
1776 iommu->name, ndomains);
1777 nlongs = BITS_TO_LONGS(ndomains);
1779 spin_lock_init(&iommu->lock);
1781 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1782 if (!iommu->domain_ids) {
1783 pr_err("%s: Allocating domain id array failed\n",
1788 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1789 iommu->domains = kzalloc(size, GFP_KERNEL);
1791 if (iommu->domains) {
1792 size = 256 * sizeof(struct dmar_domain *);
1793 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1796 if (!iommu->domains || !iommu->domains[0]) {
1797 pr_err("%s: Allocating domain array failed\n",
1799 kfree(iommu->domain_ids);
1800 kfree(iommu->domains);
1801 iommu->domain_ids = NULL;
1802 iommu->domains = NULL;
1807 * If Caching mode is set, then invalid translations are tagged
1808 * with domain-id 0, hence we need to pre-allocate it. We also
1809 * use domain-id 0 as a marker for non-allocated domain-id, so
1810 * make sure it is not used for a real domain.
1812 set_bit(0, iommu->domain_ids);
1815 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1816 * entry for first-level or pass-through translation modes should
1817 * be programmed with a domain id different from those used for
1818 * second-level or nested translation. We reserve a domain id for
1821 if (sm_supported(iommu))
1822 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1827 static void disable_dmar_iommu(struct intel_iommu *iommu)
1829 struct device_domain_info *info, *tmp;
1830 unsigned long flags;
1832 if (!iommu->domains || !iommu->domain_ids)
1835 spin_lock_irqsave(&device_domain_lock, flags);
1836 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1837 if (info->iommu != iommu)
1840 if (!info->dev || !info->domain)
1843 __dmar_remove_one_dev_info(info);
1845 spin_unlock_irqrestore(&device_domain_lock, flags);
1847 if (iommu->gcmd & DMA_GCMD_TE)
1848 iommu_disable_translation(iommu);
1851 static void free_dmar_iommu(struct intel_iommu *iommu)
1853 if ((iommu->domains) && (iommu->domain_ids)) {
1854 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1857 for (i = 0; i < elems; i++)
1858 kfree(iommu->domains[i]);
1859 kfree(iommu->domains);
1860 kfree(iommu->domain_ids);
1861 iommu->domains = NULL;
1862 iommu->domain_ids = NULL;
1865 g_iommus[iommu->seq_id] = NULL;
1867 /* free context mapping */
1868 free_context_table(iommu);
1870 #ifdef CONFIG_INTEL_IOMMU_SVM
1871 if (pasid_supported(iommu)) {
1872 if (ecap_prs(iommu->ecap))
1873 intel_svm_finish_prq(iommu);
1875 if (vccap_pasid(iommu->vccap))
1876 ioasid_unregister_allocator(&iommu->pasid_allocator);
1882 * Check and return whether first level is used by default for
1885 static bool first_level_by_default(void)
1887 return scalable_mode_support() && intel_cap_flts_sanity();
1890 static struct dmar_domain *alloc_domain(int flags)
1892 struct dmar_domain *domain;
1894 domain = alloc_domain_mem();
1898 memset(domain, 0, sizeof(*domain));
1899 domain->nid = NUMA_NO_NODE;
1900 domain->flags = flags;
1901 if (first_level_by_default())
1902 domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
1903 domain->has_iotlb_device = false;
1904 INIT_LIST_HEAD(&domain->devices);
1905 INIT_LIST_HEAD(&domain->subdevices);
1910 /* Must be called with iommu->lock */
1911 static int domain_attach_iommu(struct dmar_domain *domain,
1912 struct intel_iommu *iommu)
1914 unsigned long ndomains;
1917 assert_spin_locked(&device_domain_lock);
1918 assert_spin_locked(&iommu->lock);
1920 domain->iommu_refcnt[iommu->seq_id] += 1;
1921 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1922 ndomains = cap_ndoms(iommu->cap);
1923 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1925 if (num >= ndomains) {
1926 pr_err("%s: No free domain ids\n", iommu->name);
1927 domain->iommu_refcnt[iommu->seq_id] -= 1;
1931 set_bit(num, iommu->domain_ids);
1932 set_iommu_domain(iommu, num, domain);
1934 domain->iommu_did[iommu->seq_id] = num;
1935 domain->nid = iommu->node;
1937 domain_update_iommu_cap(domain);
1943 static void domain_detach_iommu(struct dmar_domain *domain,
1944 struct intel_iommu *iommu)
1948 assert_spin_locked(&device_domain_lock);
1949 assert_spin_locked(&iommu->lock);
1951 domain->iommu_refcnt[iommu->seq_id] -= 1;
1952 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1953 num = domain->iommu_did[iommu->seq_id];
1954 clear_bit(num, iommu->domain_ids);
1955 set_iommu_domain(iommu, num, NULL);
1957 domain_update_iommu_cap(domain);
1958 domain->iommu_did[iommu->seq_id] = 0;
1962 static inline int guestwidth_to_adjustwidth(int gaw)
1965 int r = (gaw - 12) % 9;
1976 static void domain_exit(struct dmar_domain *domain)
1979 /* Remove associated devices and clear attached or cached domains */
1980 domain_remove_dev_info(domain);
1983 if (domain->domain.type == IOMMU_DOMAIN_DMA)
1984 iommu_put_dma_cookie(&domain->domain);
1987 struct page *freelist;
1989 freelist = domain_unmap(domain, 0,
1990 DOMAIN_MAX_PFN(domain->gaw), NULL);
1991 dma_free_pagelist(freelist);
1994 free_domain_mem(domain);
1998 * Get the PASID directory size for scalable mode context entry.
1999 * Value of X in the PDTS field of a scalable mode context entry
2000 * indicates PASID directory with 2^(X + 7) entries.
2002 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
2006 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
2007 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
2015 * Set the RID_PASID field of a scalable mode context entry. The
2016 * IOMMU hardware will use the PASID value set in this field for
2017 * DMA translations of DMA requests without PASID.
2020 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
2022 context->hi |= pasid & ((1 << 20) - 1);
2026 * Set the DTE(Device-TLB Enable) field of a scalable mode context
2029 static inline void context_set_sm_dte(struct context_entry *context)
2031 context->lo |= (1 << 2);
2035 * Set the PRE(Page Request Enable) field of a scalable mode context
2038 static inline void context_set_sm_pre(struct context_entry *context)
2040 context->lo |= (1 << 4);
2043 /* Convert value to context PASID directory size field coding. */
2044 #define context_pdts(pds) (((pds) & 0x7) << 9)
2046 static int domain_context_mapping_one(struct dmar_domain *domain,
2047 struct intel_iommu *iommu,
2048 struct pasid_table *table,
2051 u16 did = domain->iommu_did[iommu->seq_id];
2052 int translation = CONTEXT_TT_MULTI_LEVEL;
2053 struct device_domain_info *info = NULL;
2054 struct context_entry *context;
2055 unsigned long flags;
2060 if (hw_pass_through && domain_type_is_si(domain))
2061 translation = CONTEXT_TT_PASS_THROUGH;
2063 pr_debug("Set context mapping for %02x:%02x.%d\n",
2064 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
2066 BUG_ON(!domain->pgd);
2068 spin_lock_irqsave(&device_domain_lock, flags);
2069 spin_lock(&iommu->lock);
2072 context = iommu_context_addr(iommu, bus, devfn, 1);
2077 if (context_present(context))
2081 * For kdump cases, old valid entries may be cached due to the
2082 * in-flight DMA and copied pgtable, but there is no unmapping
2083 * behaviour for them, thus we need an explicit cache flush for
2084 * the newly-mapped device. For kdump, at this point, the device
2085 * is supposed to finish reset at its driver probe stage, so no
2086 * in-flight DMA will exist, and we don't need to worry anymore
2089 if (context_copied(context)) {
2090 u16 did_old = context_domain_id(context);
2092 if (did_old < cap_ndoms(iommu->cap)) {
2093 iommu->flush.flush_context(iommu, did_old,
2094 (((u16)bus) << 8) | devfn,
2095 DMA_CCMD_MASK_NOBIT,
2096 DMA_CCMD_DEVICE_INVL);
2097 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2102 context_clear_entry(context);
2104 if (sm_supported(iommu)) {
2109 /* Setup the PASID DIR pointer: */
2110 pds = context_get_sm_pds(table);
2111 context->lo = (u64)virt_to_phys(table->table) |
2114 /* Setup the RID_PASID field: */
2115 context_set_sm_rid2pasid(context, PASID_RID2PASID);
2118 * Setup the Device-TLB enable bit and Page request
2121 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2122 if (info && info->ats_supported)
2123 context_set_sm_dte(context);
2124 if (info && info->pri_supported)
2125 context_set_sm_pre(context);
2127 struct dma_pte *pgd = domain->pgd;
2130 context_set_domain_id(context, did);
2132 if (translation != CONTEXT_TT_PASS_THROUGH) {
2134 * Skip top levels of page tables for iommu which has
2135 * less agaw than default. Unnecessary for PT mode.
2137 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2139 pgd = phys_to_virt(dma_pte_addr(pgd));
2140 if (!dma_pte_present(pgd))
2144 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2145 if (info && info->ats_supported)
2146 translation = CONTEXT_TT_DEV_IOTLB;
2148 translation = CONTEXT_TT_MULTI_LEVEL;
2150 context_set_address_root(context, virt_to_phys(pgd));
2151 context_set_address_width(context, agaw);
2154 * In pass through mode, AW must be programmed to
2155 * indicate the largest AGAW value supported by
2156 * hardware. And ASR is ignored by hardware.
2158 context_set_address_width(context, iommu->msagaw);
2161 context_set_translation_type(context, translation);
2164 context_set_fault_enable(context);
2165 context_set_present(context);
2166 if (!ecap_coherent(iommu->ecap))
2167 clflush_cache_range(context, sizeof(*context));
2170 * It's a non-present to present mapping. If hardware doesn't cache
2171 * non-present entry we only need to flush the write-buffer. If the
2172 * _does_ cache non-present entries, then it does so in the special
2173 * domain #0, which we have to flush:
2175 if (cap_caching_mode(iommu->cap)) {
2176 iommu->flush.flush_context(iommu, 0,
2177 (((u16)bus) << 8) | devfn,
2178 DMA_CCMD_MASK_NOBIT,
2179 DMA_CCMD_DEVICE_INVL);
2180 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2182 iommu_flush_write_buffer(iommu);
2184 iommu_enable_dev_iotlb(info);
2189 spin_unlock(&iommu->lock);
2190 spin_unlock_irqrestore(&device_domain_lock, flags);
2195 struct domain_context_mapping_data {
2196 struct dmar_domain *domain;
2197 struct intel_iommu *iommu;
2198 struct pasid_table *table;
2201 static int domain_context_mapping_cb(struct pci_dev *pdev,
2202 u16 alias, void *opaque)
2204 struct domain_context_mapping_data *data = opaque;
2206 return domain_context_mapping_one(data->domain, data->iommu,
2207 data->table, PCI_BUS_NUM(alias),
2212 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2214 struct domain_context_mapping_data data;
2215 struct pasid_table *table;
2216 struct intel_iommu *iommu;
2219 iommu = device_to_iommu(dev, &bus, &devfn);
2223 table = intel_pasid_get_table(dev);
2225 if (!dev_is_pci(dev))
2226 return domain_context_mapping_one(domain, iommu, table,
2229 data.domain = domain;
2233 return pci_for_each_dma_alias(to_pci_dev(dev),
2234 &domain_context_mapping_cb, &data);
2237 static int domain_context_mapped_cb(struct pci_dev *pdev,
2238 u16 alias, void *opaque)
2240 struct intel_iommu *iommu = opaque;
2242 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2245 static int domain_context_mapped(struct device *dev)
2247 struct intel_iommu *iommu;
2250 iommu = device_to_iommu(dev, &bus, &devfn);
2254 if (!dev_is_pci(dev))
2255 return device_context_mapped(iommu, bus, devfn);
2257 return !pci_for_each_dma_alias(to_pci_dev(dev),
2258 domain_context_mapped_cb, iommu);
2261 /* Returns a number of VTD pages, but aligned to MM page size */
2262 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2265 host_addr &= ~PAGE_MASK;
2266 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2269 /* Return largest possible superpage level for a given mapping */
2270 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2271 unsigned long iov_pfn,
2272 unsigned long phy_pfn,
2273 unsigned long pages)
2275 int support, level = 1;
2276 unsigned long pfnmerge;
2278 support = domain->iommu_superpage;
2280 /* To use a large page, the virtual *and* physical addresses
2281 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2282 of them will mean we have to use smaller pages. So just
2283 merge them and check both at once. */
2284 pfnmerge = iov_pfn | phy_pfn;
2286 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2287 pages >>= VTD_STRIDE_SHIFT;
2290 pfnmerge >>= VTD_STRIDE_SHIFT;
2298 * Ensure that old small page tables are removed to make room for superpage(s).
2299 * We're going to add new large pages, so make sure we don't remove their parent
2300 * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared.
2302 static void switch_to_super_page(struct dmar_domain *domain,
2303 unsigned long start_pfn,
2304 unsigned long end_pfn, int level)
2306 unsigned long lvl_pages = lvl_to_nr_pages(level);
2307 struct dma_pte *pte = NULL;
2310 while (start_pfn <= end_pfn) {
2312 pte = pfn_to_dma_pte(domain, start_pfn, &level);
2314 if (dma_pte_present(pte)) {
2315 dma_pte_free_pagetable(domain, start_pfn,
2316 start_pfn + lvl_pages - 1,
2319 for_each_domain_iommu(i, domain)
2320 iommu_flush_iotlb_psi(g_iommus[i], domain,
2321 start_pfn, lvl_pages,
2326 start_pfn += lvl_pages;
2327 if (first_pte_in_page(pte))
2333 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2334 unsigned long phys_pfn, unsigned long nr_pages, int prot)
2336 unsigned int largepage_lvl = 0;
2337 unsigned long lvl_pages = 0;
2338 struct dma_pte *pte = NULL;
2342 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2344 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2347 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2348 attr |= DMA_FL_PTE_PRESENT;
2349 if (domain_use_first_level(domain)) {
2350 attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
2352 if (domain->domain.type == IOMMU_DOMAIN_DMA) {
2353 attr |= DMA_FL_PTE_ACCESS;
2354 if (prot & DMA_PTE_WRITE)
2355 attr |= DMA_FL_PTE_DIRTY;
2359 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
2361 while (nr_pages > 0) {
2365 largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
2366 phys_pfn, nr_pages);
2368 pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2371 /* It is large page*/
2372 if (largepage_lvl > 1) {
2373 unsigned long end_pfn;
2375 pteval |= DMA_PTE_LARGE_PAGE;
2376 end_pfn = ((iov_pfn + nr_pages) & level_mask(largepage_lvl)) - 1;
2377 switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
2379 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2383 /* We don't need lock here, nobody else
2384 * touches the iova range
2386 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2388 static int dumps = 5;
2389 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2390 iov_pfn, tmp, (unsigned long long)pteval);
2393 debug_dma_dump_mappings(NULL);
2398 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2400 BUG_ON(nr_pages < lvl_pages);
2402 nr_pages -= lvl_pages;
2403 iov_pfn += lvl_pages;
2404 phys_pfn += lvl_pages;
2405 pteval += lvl_pages * VTD_PAGE_SIZE;
2407 /* If the next PTE would be the first in a new page, then we
2408 * need to flush the cache on the entries we've just written.
2409 * And then we'll need to recalculate 'pte', so clear it and
2410 * let it get set again in the if (!pte) block above.
2412 * If we're done (!nr_pages) we need to flush the cache too.
2414 * Also if we've been setting superpages, we may need to
2415 * recalculate 'pte' and switch back to smaller pages for the
2416 * end of the mapping, if the trailing size is not enough to
2417 * use another superpage (i.e. nr_pages < lvl_pages).
2419 * We leave clflush for the leaf pte changes to iotlb_sync_map()
2423 if (!nr_pages || first_pte_in_page(pte) ||
2424 (largepage_lvl > 1 && nr_pages < lvl_pages))
2431 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
2433 struct intel_iommu *iommu = info->iommu;
2434 struct context_entry *context;
2435 unsigned long flags;
2441 spin_lock_irqsave(&iommu->lock, flags);
2442 context = iommu_context_addr(iommu, bus, devfn, 0);
2444 spin_unlock_irqrestore(&iommu->lock, flags);
2448 if (sm_supported(iommu)) {
2449 if (hw_pass_through && domain_type_is_si(info->domain))
2450 did_old = FLPT_DEFAULT_DID;
2452 did_old = info->domain->iommu_did[iommu->seq_id];
2454 did_old = context_domain_id(context);
2457 context_clear_entry(context);
2458 __iommu_flush_cache(iommu, context, sizeof(*context));
2459 spin_unlock_irqrestore(&iommu->lock, flags);
2460 iommu->flush.flush_context(iommu,
2462 (((u16)bus) << 8) | devfn,
2463 DMA_CCMD_MASK_NOBIT,
2464 DMA_CCMD_DEVICE_INVL);
2466 if (sm_supported(iommu))
2467 qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
2469 iommu->flush.flush_iotlb(iommu,
2475 __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
2478 static inline void unlink_domain_info(struct device_domain_info *info)
2480 assert_spin_locked(&device_domain_lock);
2481 list_del(&info->link);
2482 list_del(&info->global);
2484 dev_iommu_priv_set(info->dev, NULL);
2487 static void domain_remove_dev_info(struct dmar_domain *domain)
2489 struct device_domain_info *info, *tmp;
2490 unsigned long flags;
2492 spin_lock_irqsave(&device_domain_lock, flags);
2493 list_for_each_entry_safe(info, tmp, &domain->devices, link)
2494 __dmar_remove_one_dev_info(info);
2495 spin_unlock_irqrestore(&device_domain_lock, flags);
2498 struct dmar_domain *find_domain(struct device *dev)
2500 struct device_domain_info *info;
2502 if (unlikely(!dev || !dev->iommu))
2505 if (unlikely(attach_deferred(dev)))
2508 /* No lock here, assumes no domain exit in normal case */
2509 info = get_domain_info(dev);
2511 return info->domain;
2516 static inline struct device_domain_info *
2517 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2519 struct device_domain_info *info;
2521 list_for_each_entry(info, &device_domain_list, global)
2522 if (info->segment == segment && info->bus == bus &&
2523 info->devfn == devfn)
2529 static int domain_setup_first_level(struct intel_iommu *iommu,
2530 struct dmar_domain *domain,
2534 struct dma_pte *pgd = domain->pgd;
2539 * Skip top levels of page tables for iommu which has
2540 * less agaw than default. Unnecessary for PT mode.
2542 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2543 pgd = phys_to_virt(dma_pte_addr(pgd));
2544 if (!dma_pte_present(pgd))
2548 level = agaw_to_level(agaw);
2549 if (level != 4 && level != 5)
2552 if (pasid != PASID_RID2PASID)
2553 flags |= PASID_FLAG_SUPERVISOR_MODE;
2555 flags |= PASID_FLAG_FL5LP;
2557 if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
2558 flags |= PASID_FLAG_PAGE_SNOOP;
2560 return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2561 domain->iommu_did[iommu->seq_id],
2565 static bool dev_is_real_dma_subdevice(struct device *dev)
2567 return dev && dev_is_pci(dev) &&
2568 pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
2571 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2574 struct dmar_domain *domain)
2576 struct dmar_domain *found = NULL;
2577 struct device_domain_info *info;
2578 unsigned long flags;
2581 info = alloc_devinfo_mem();
2585 if (!dev_is_real_dma_subdevice(dev)) {
2587 info->devfn = devfn;
2588 info->segment = iommu->segment;
2590 struct pci_dev *pdev = to_pci_dev(dev);
2592 info->bus = pdev->bus->number;
2593 info->devfn = pdev->devfn;
2594 info->segment = pci_domain_nr(pdev->bus);
2597 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2598 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2601 info->domain = domain;
2602 info->iommu = iommu;
2603 info->pasid_table = NULL;
2604 info->auxd_enabled = 0;
2605 INIT_LIST_HEAD(&info->subdevices);
2607 if (dev && dev_is_pci(dev)) {
2608 struct pci_dev *pdev = to_pci_dev(info->dev);
2610 if (ecap_dev_iotlb_support(iommu->ecap) &&
2611 pci_ats_supported(pdev) &&
2612 dmar_find_matched_atsr_unit(pdev))
2613 info->ats_supported = 1;
2615 if (sm_supported(iommu)) {
2616 if (pasid_supported(iommu)) {
2617 int features = pci_pasid_features(pdev);
2619 info->pasid_supported = features | 1;
2622 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2623 pci_pri_supported(pdev))
2624 info->pri_supported = 1;
2628 spin_lock_irqsave(&device_domain_lock, flags);
2630 found = find_domain(dev);
2633 struct device_domain_info *info2;
2634 info2 = dmar_search_domain_by_dev_info(info->segment, info->bus,
2637 found = info2->domain;
2643 spin_unlock_irqrestore(&device_domain_lock, flags);
2644 free_devinfo_mem(info);
2645 /* Caller must free the original domain */
2649 spin_lock(&iommu->lock);
2650 ret = domain_attach_iommu(domain, iommu);
2651 spin_unlock(&iommu->lock);
2654 spin_unlock_irqrestore(&device_domain_lock, flags);
2655 free_devinfo_mem(info);
2659 list_add(&info->link, &domain->devices);
2660 list_add(&info->global, &device_domain_list);
2662 dev_iommu_priv_set(dev, info);
2663 spin_unlock_irqrestore(&device_domain_lock, flags);
2665 /* PASID table is mandatory for a PCI device in scalable mode. */
2666 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2667 ret = intel_pasid_alloc_table(dev);
2669 dev_err(dev, "PASID table allocation failed\n");
2670 dmar_remove_one_dev_info(dev);
2674 /* Setup the PASID entry for requests without PASID: */
2675 spin_lock_irqsave(&iommu->lock, flags);
2676 if (hw_pass_through && domain_type_is_si(domain))
2677 ret = intel_pasid_setup_pass_through(iommu, domain,
2678 dev, PASID_RID2PASID);
2679 else if (domain_use_first_level(domain))
2680 ret = domain_setup_first_level(iommu, domain, dev,
2683 ret = intel_pasid_setup_second_level(iommu, domain,
2684 dev, PASID_RID2PASID);
2685 spin_unlock_irqrestore(&iommu->lock, flags);
2687 dev_err(dev, "Setup RID2PASID failed\n");
2688 dmar_remove_one_dev_info(dev);
2693 if (dev && domain_context_mapping(domain, dev)) {
2694 dev_err(dev, "Domain context map failed\n");
2695 dmar_remove_one_dev_info(dev);
2702 static int iommu_domain_identity_map(struct dmar_domain *domain,
2703 unsigned long first_vpfn,
2704 unsigned long last_vpfn)
2707 * RMRR range might have overlap with physical memory range,
2710 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2712 return __domain_mapping(domain, first_vpfn,
2713 first_vpfn, last_vpfn - first_vpfn + 1,
2714 DMA_PTE_READ|DMA_PTE_WRITE);
2717 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2719 static int __init si_domain_init(int hw)
2721 struct dmar_rmrr_unit *rmrr;
2725 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2729 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2730 domain_exit(si_domain);
2737 for_each_online_node(nid) {
2738 unsigned long start_pfn, end_pfn;
2741 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2742 ret = iommu_domain_identity_map(si_domain,
2743 mm_to_dma_pfn(start_pfn),
2744 mm_to_dma_pfn(end_pfn));
2751 * Identity map the RMRRs so that devices with RMRRs could also use
2754 for_each_rmrr_units(rmrr) {
2755 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2757 unsigned long long start = rmrr->base_address;
2758 unsigned long long end = rmrr->end_address;
2760 if (WARN_ON(end < start ||
2761 end >> agaw_to_width(si_domain->agaw)))
2764 ret = iommu_domain_identity_map(si_domain,
2765 mm_to_dma_pfn(start >> PAGE_SHIFT),
2766 mm_to_dma_pfn(end >> PAGE_SHIFT));
2775 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2777 struct dmar_domain *ndomain;
2778 struct intel_iommu *iommu;
2781 iommu = device_to_iommu(dev, &bus, &devfn);
2785 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2786 if (ndomain != domain)
2792 static bool device_has_rmrr(struct device *dev)
2794 struct dmar_rmrr_unit *rmrr;
2799 for_each_rmrr_units(rmrr) {
2801 * Return TRUE if this RMRR contains the device that
2804 for_each_active_dev_scope(rmrr->devices,
2805 rmrr->devices_cnt, i, tmp)
2807 is_downstream_to_pci_bridge(dev, tmp)) {
2817 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2818 * is relaxable (ie. is allowed to be not enforced under some conditions)
2819 * @dev: device handle
2821 * We assume that PCI USB devices with RMRRs have them largely
2822 * for historical reasons and that the RMRR space is not actively used post
2823 * boot. This exclusion may change if vendors begin to abuse it.
2825 * The same exception is made for graphics devices, with the requirement that
2826 * any use of the RMRR regions will be torn down before assigning the device
2829 * Return: true if the RMRR is relaxable, false otherwise
2831 static bool device_rmrr_is_relaxable(struct device *dev)
2833 struct pci_dev *pdev;
2835 if (!dev_is_pci(dev))
2838 pdev = to_pci_dev(dev);
2839 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2846 * There are a couple cases where we need to restrict the functionality of
2847 * devices associated with RMRRs. The first is when evaluating a device for
2848 * identity mapping because problems exist when devices are moved in and out
2849 * of domains and their respective RMRR information is lost. This means that
2850 * a device with associated RMRRs will never be in a "passthrough" domain.
2851 * The second is use of the device through the IOMMU API. This interface
2852 * expects to have full control of the IOVA space for the device. We cannot
2853 * satisfy both the requirement that RMRR access is maintained and have an
2854 * unencumbered IOVA space. We also have no ability to quiesce the device's
2855 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2856 * We therefore prevent devices associated with an RMRR from participating in
2857 * the IOMMU API, which eliminates them from device assignment.
2859 * In both cases, devices which have relaxable RMRRs are not concerned by this
2860 * restriction. See device_rmrr_is_relaxable comment.
2862 static bool device_is_rmrr_locked(struct device *dev)
2864 if (!device_has_rmrr(dev))
2867 if (device_rmrr_is_relaxable(dev))
2874 * Return the required default domain type for a specific device.
2876 * @dev: the device in query
2877 * @startup: true if this is during early boot
2880 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2881 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2882 * - 0: both identity and dynamic domains work for this device
2884 static int device_def_domain_type(struct device *dev)
2886 if (dev_is_pci(dev)) {
2887 struct pci_dev *pdev = to_pci_dev(dev);
2889 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2890 return IOMMU_DOMAIN_IDENTITY;
2892 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2893 return IOMMU_DOMAIN_IDENTITY;
2899 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2902 * Start from the sane iommu hardware state.
2903 * If the queued invalidation is already initialized by us
2904 * (for example, while enabling interrupt-remapping) then
2905 * we got the things already rolling from a sane state.
2909 * Clear any previous faults.
2911 dmar_fault(-1, iommu);
2913 * Disable queued invalidation if supported and already enabled
2914 * before OS handover.
2916 dmar_disable_qi(iommu);
2919 if (dmar_enable_qi(iommu)) {
2921 * Queued Invalidate not enabled, use Register Based Invalidate
2923 iommu->flush.flush_context = __iommu_flush_context;
2924 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2925 pr_info("%s: Using Register based invalidation\n",
2928 iommu->flush.flush_context = qi_flush_context;
2929 iommu->flush.flush_iotlb = qi_flush_iotlb;
2930 pr_info("%s: Using Queued invalidation\n", iommu->name);
2934 static int copy_context_table(struct intel_iommu *iommu,
2935 struct root_entry *old_re,
2936 struct context_entry **tbl,
2939 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2940 struct context_entry *new_ce = NULL, ce;
2941 struct context_entry *old_ce = NULL;
2942 struct root_entry re;
2943 phys_addr_t old_ce_phys;
2945 tbl_idx = ext ? bus * 2 : bus;
2946 memcpy(&re, old_re, sizeof(re));
2948 for (devfn = 0; devfn < 256; devfn++) {
2949 /* First calculate the correct index */
2950 idx = (ext ? devfn * 2 : devfn) % 256;
2953 /* First save what we may have and clean up */
2955 tbl[tbl_idx] = new_ce;
2956 __iommu_flush_cache(iommu, new_ce,
2966 old_ce_phys = root_entry_lctp(&re);
2968 old_ce_phys = root_entry_uctp(&re);
2971 if (ext && devfn == 0) {
2972 /* No LCTP, try UCTP */
2981 old_ce = memremap(old_ce_phys, PAGE_SIZE,
2986 new_ce = alloc_pgtable_page(iommu->node);
2993 /* Now copy the context entry */
2994 memcpy(&ce, old_ce + idx, sizeof(ce));
2996 if (!__context_present(&ce))
2999 did = context_domain_id(&ce);
3000 if (did >= 0 && did < cap_ndoms(iommu->cap))
3001 set_bit(did, iommu->domain_ids);
3004 * We need a marker for copied context entries. This
3005 * marker needs to work for the old format as well as
3006 * for extended context entries.
3008 * Bit 67 of the context entry is used. In the old
3009 * format this bit is available to software, in the
3010 * extended format it is the PGE bit, but PGE is ignored
3011 * by HW if PASIDs are disabled (and thus still
3014 * So disable PASIDs first and then mark the entry
3015 * copied. This means that we don't copy PASID
3016 * translations from the old kernel, but this is fine as
3017 * faults there are not fatal.
3019 context_clear_pasid_enable(&ce);
3020 context_set_copied(&ce);
3025 tbl[tbl_idx + pos] = new_ce;
3027 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3036 static int copy_translation_tables(struct intel_iommu *iommu)
3038 struct context_entry **ctxt_tbls;
3039 struct root_entry *old_rt;
3040 phys_addr_t old_rt_phys;
3041 int ctxt_table_entries;
3042 unsigned long flags;
3047 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3048 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
3049 new_ext = !!ecap_ecs(iommu->ecap);
3052 * The RTT bit can only be changed when translation is disabled,
3053 * but disabling translation means to open a window for data
3054 * corruption. So bail out and don't copy anything if we would
3055 * have to change the bit.
3060 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3064 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3068 /* This is too big for the stack - allocate it from slab */
3069 ctxt_table_entries = ext ? 512 : 256;
3071 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3075 for (bus = 0; bus < 256; bus++) {
3076 ret = copy_context_table(iommu, &old_rt[bus],
3077 ctxt_tbls, bus, ext);
3079 pr_err("%s: Failed to copy context table for bus %d\n",
3085 spin_lock_irqsave(&iommu->lock, flags);
3087 /* Context tables are copied, now write them to the root_entry table */
3088 for (bus = 0; bus < 256; bus++) {
3089 int idx = ext ? bus * 2 : bus;
3092 if (ctxt_tbls[idx]) {
3093 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3094 iommu->root_entry[bus].lo = val;
3097 if (!ext || !ctxt_tbls[idx + 1])
3100 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3101 iommu->root_entry[bus].hi = val;
3104 spin_unlock_irqrestore(&iommu->lock, flags);
3108 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3118 #ifdef CONFIG_INTEL_IOMMU_SVM
3119 static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
3121 struct intel_iommu *iommu = data;
3125 return INVALID_IOASID;
3127 * VT-d virtual command interface always uses the full 20 bit
3128 * PASID range. Host can partition guest PASID range based on
3129 * policies but it is out of guest's control.
3131 if (min < PASID_MIN || max > intel_pasid_max_id)
3132 return INVALID_IOASID;
3134 if (vcmd_alloc_pasid(iommu, &ioasid))
3135 return INVALID_IOASID;
3140 static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
3142 struct intel_iommu *iommu = data;
3147 * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
3148 * We can only free the PASID when all the devices are unbound.
3150 if (ioasid_find(NULL, ioasid, NULL)) {
3151 pr_alert("Cannot free active IOASID %d\n", ioasid);
3154 vcmd_free_pasid(iommu, ioasid);
3157 static void register_pasid_allocator(struct intel_iommu *iommu)
3160 * If we are running in the host, no need for custom allocator
3161 * in that PASIDs are allocated from the host system-wide.
3163 if (!cap_caching_mode(iommu->cap))
3166 if (!sm_supported(iommu)) {
3167 pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
3172 * Register a custom PASID allocator if we are running in a guest,
3173 * guest PASID must be obtained via virtual command interface.
3174 * There can be multiple vIOMMUs in each guest but only one allocator
3175 * is active. All vIOMMU allocators will eventually be calling the same
3178 if (!vccap_pasid(iommu->vccap))
3181 pr_info("Register custom PASID allocator\n");
3182 iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
3183 iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
3184 iommu->pasid_allocator.pdata = (void *)iommu;
3185 if (ioasid_register_allocator(&iommu->pasid_allocator)) {
3186 pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
3188 * Disable scalable mode on this IOMMU if there
3189 * is no custom allocator. Mixing SM capable vIOMMU
3190 * and non-SM vIOMMU are not supported.
3197 static int __init init_dmars(void)
3199 struct dmar_drhd_unit *drhd;
3200 struct intel_iommu *iommu;
3206 * initialize and program root entry to not present
3209 for_each_drhd_unit(drhd) {
3211 * lock not needed as this is only incremented in the single
3212 * threaded kernel __init code path all other access are read
3215 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3219 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3222 /* Preallocate enough resources for IOMMU hot-addition */
3223 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3224 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3226 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3229 pr_err("Allocating global iommu array failed\n");
3234 ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
3238 for_each_iommu(iommu, drhd) {
3239 if (drhd->ignored) {
3240 iommu_disable_translation(iommu);
3245 * Find the max pasid size of all IOMMU's in the system.
3246 * We need to ensure the system pasid table is no bigger
3247 * than the smallest supported.
3249 if (pasid_supported(iommu)) {
3250 u32 temp = 2 << ecap_pss(iommu->ecap);
3252 intel_pasid_max_id = min_t(u32, temp,
3253 intel_pasid_max_id);
3256 g_iommus[iommu->seq_id] = iommu;
3258 intel_iommu_init_qi(iommu);
3260 ret = iommu_init_domains(iommu);
3264 init_translation_status(iommu);
3266 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3267 iommu_disable_translation(iommu);
3268 clear_translation_pre_enabled(iommu);
3269 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3275 * we could share the same root & context tables
3276 * among all IOMMU's. Need to Split it later.
3278 ret = iommu_alloc_root_entry(iommu);
3282 if (translation_pre_enabled(iommu)) {
3283 pr_info("Translation already enabled - trying to copy translation structures\n");
3285 ret = copy_translation_tables(iommu);
3288 * We found the IOMMU with translation
3289 * enabled - but failed to copy over the
3290 * old root-entry table. Try to proceed
3291 * by disabling translation now and
3292 * allocating a clean root-entry table.
3293 * This might cause DMAR faults, but
3294 * probably the dump will still succeed.
3296 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3298 iommu_disable_translation(iommu);
3299 clear_translation_pre_enabled(iommu);
3301 pr_info("Copied translation tables from previous kernel for %s\n",
3306 if (!ecap_pass_through(iommu->ecap))
3307 hw_pass_through = 0;
3308 intel_svm_check(iommu);
3312 * Now that qi is enabled on all iommus, set the root entry and flush
3313 * caches. This is required on some Intel X58 chipsets, otherwise the
3314 * flush_context function will loop forever and the boot hangs.
3316 for_each_active_iommu(iommu, drhd) {
3317 iommu_flush_write_buffer(iommu);
3318 #ifdef CONFIG_INTEL_IOMMU_SVM
3319 register_pasid_allocator(iommu);
3321 iommu_set_root_entry(iommu);
3324 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3329 iommu_identity_mapping |= IDENTMAP_GFX;
3331 check_tylersburg_isoch();
3333 ret = si_domain_init(hw_pass_through);
3340 * global invalidate context cache
3341 * global invalidate iotlb
3342 * enable translation
3344 for_each_iommu(iommu, drhd) {
3345 if (drhd->ignored) {
3347 * we always have to disable PMRs or DMA may fail on
3351 iommu_disable_protect_mem_regions(iommu);
3355 iommu_flush_write_buffer(iommu);
3357 #ifdef CONFIG_INTEL_IOMMU_SVM
3358 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3360 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3361 * could cause possible lock race condition.
3363 up_write(&dmar_global_lock);
3364 ret = intel_svm_enable_prq(iommu);
3365 down_write(&dmar_global_lock);
3370 ret = dmar_set_interrupt(iommu);
3378 for_each_active_iommu(iommu, drhd) {
3379 disable_dmar_iommu(iommu);
3380 free_dmar_iommu(iommu);
3389 static inline int iommu_domain_cache_init(void)
3393 iommu_domain_cache = kmem_cache_create("iommu_domain",
3394 sizeof(struct dmar_domain),
3399 if (!iommu_domain_cache) {
3400 pr_err("Couldn't create iommu_domain cache\n");
3407 static inline int iommu_devinfo_cache_init(void)
3411 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3412 sizeof(struct device_domain_info),
3416 if (!iommu_devinfo_cache) {
3417 pr_err("Couldn't create devinfo cache\n");
3424 static int __init iommu_init_mempool(void)
3427 ret = iova_cache_get();
3431 ret = iommu_domain_cache_init();
3435 ret = iommu_devinfo_cache_init();
3439 kmem_cache_destroy(iommu_domain_cache);
3446 static void __init iommu_exit_mempool(void)
3448 kmem_cache_destroy(iommu_devinfo_cache);
3449 kmem_cache_destroy(iommu_domain_cache);
3453 static void __init init_no_remapping_devices(void)
3455 struct dmar_drhd_unit *drhd;
3459 for_each_drhd_unit(drhd) {
3460 if (!drhd->include_all) {
3461 for_each_active_dev_scope(drhd->devices,
3462 drhd->devices_cnt, i, dev)
3464 /* ignore DMAR unit if no devices exist */
3465 if (i == drhd->devices_cnt)
3470 for_each_active_drhd_unit(drhd) {
3471 if (drhd->include_all)
3474 for_each_active_dev_scope(drhd->devices,
3475 drhd->devices_cnt, i, dev)
3476 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3478 if (i < drhd->devices_cnt)
3481 /* This IOMMU has *only* gfx devices. Either bypass it or
3482 set the gfx_mapped flag, as appropriate */
3483 drhd->gfx_dedicated = 1;
3489 #ifdef CONFIG_SUSPEND
3490 static int init_iommu_hw(void)
3492 struct dmar_drhd_unit *drhd;
3493 struct intel_iommu *iommu = NULL;
3495 for_each_active_iommu(iommu, drhd)
3497 dmar_reenable_qi(iommu);
3499 for_each_iommu(iommu, drhd) {
3500 if (drhd->ignored) {
3502 * we always have to disable PMRs or DMA may fail on
3506 iommu_disable_protect_mem_regions(iommu);
3510 iommu_flush_write_buffer(iommu);
3511 iommu_set_root_entry(iommu);
3512 iommu_enable_translation(iommu);
3513 iommu_disable_protect_mem_regions(iommu);
3519 static void iommu_flush_all(void)
3521 struct dmar_drhd_unit *drhd;
3522 struct intel_iommu *iommu;
3524 for_each_active_iommu(iommu, drhd) {
3525 iommu->flush.flush_context(iommu, 0, 0, 0,
3526 DMA_CCMD_GLOBAL_INVL);
3527 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3528 DMA_TLB_GLOBAL_FLUSH);
3532 static int iommu_suspend(void)
3534 struct dmar_drhd_unit *drhd;
3535 struct intel_iommu *iommu = NULL;
3538 for_each_active_iommu(iommu, drhd) {
3539 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
3541 if (!iommu->iommu_state)
3547 for_each_active_iommu(iommu, drhd) {
3548 iommu_disable_translation(iommu);
3550 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3552 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3553 readl(iommu->reg + DMAR_FECTL_REG);
3554 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3555 readl(iommu->reg + DMAR_FEDATA_REG);
3556 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3557 readl(iommu->reg + DMAR_FEADDR_REG);
3558 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3559 readl(iommu->reg + DMAR_FEUADDR_REG);
3561 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3566 for_each_active_iommu(iommu, drhd)
3567 kfree(iommu->iommu_state);
3572 static void iommu_resume(void)
3574 struct dmar_drhd_unit *drhd;
3575 struct intel_iommu *iommu = NULL;
3578 if (init_iommu_hw()) {
3580 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3582 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3586 for_each_active_iommu(iommu, drhd) {
3588 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3590 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3591 iommu->reg + DMAR_FECTL_REG);
3592 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3593 iommu->reg + DMAR_FEDATA_REG);
3594 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3595 iommu->reg + DMAR_FEADDR_REG);
3596 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3597 iommu->reg + DMAR_FEUADDR_REG);
3599 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3602 for_each_active_iommu(iommu, drhd)
3603 kfree(iommu->iommu_state);
3606 static struct syscore_ops iommu_syscore_ops = {
3607 .resume = iommu_resume,
3608 .suspend = iommu_suspend,
3611 static void __init init_iommu_pm_ops(void)
3613 register_syscore_ops(&iommu_syscore_ops);
3617 static inline void init_iommu_pm_ops(void) {}
3618 #endif /* CONFIG_PM */
3620 static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
3622 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
3623 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
3624 rmrr->end_address <= rmrr->base_address ||
3625 arch_rmrr_sanity_check(rmrr))
3631 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3633 struct acpi_dmar_reserved_memory *rmrr;
3634 struct dmar_rmrr_unit *rmrru;
3636 rmrr = (struct acpi_dmar_reserved_memory *)header;
3637 if (rmrr_sanity_check(rmrr)) {
3639 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
3640 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3641 rmrr->base_address, rmrr->end_address,
3642 dmi_get_system_info(DMI_BIOS_VENDOR),
3643 dmi_get_system_info(DMI_BIOS_VERSION),
3644 dmi_get_system_info(DMI_PRODUCT_VERSION));
3645 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
3648 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3652 rmrru->hdr = header;
3654 rmrru->base_address = rmrr->base_address;
3655 rmrru->end_address = rmrr->end_address;
3657 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3658 ((void *)rmrr) + rmrr->header.length,
3659 &rmrru->devices_cnt);
3660 if (rmrru->devices_cnt && rmrru->devices == NULL)
3663 list_add(&rmrru->list, &dmar_rmrr_units);
3672 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3674 struct dmar_atsr_unit *atsru;
3675 struct acpi_dmar_atsr *tmp;
3677 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
3679 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3680 if (atsr->segment != tmp->segment)
3682 if (atsr->header.length != tmp->header.length)
3684 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3691 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3693 struct acpi_dmar_atsr *atsr;
3694 struct dmar_atsr_unit *atsru;
3696 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3699 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3700 atsru = dmar_find_atsr(atsr);
3704 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3709 * If memory is allocated from slab by ACPI _DSM method, we need to
3710 * copy the memory content because the memory buffer will be freed
3713 atsru->hdr = (void *)(atsru + 1);
3714 memcpy(atsru->hdr, hdr, hdr->length);
3715 atsru->include_all = atsr->flags & 0x1;
3716 if (!atsru->include_all) {
3717 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3718 (void *)atsr + atsr->header.length,
3719 &atsru->devices_cnt);
3720 if (atsru->devices_cnt && atsru->devices == NULL) {
3726 list_add_rcu(&atsru->list, &dmar_atsr_units);
3731 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3733 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3737 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3739 struct acpi_dmar_atsr *atsr;
3740 struct dmar_atsr_unit *atsru;
3742 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3743 atsru = dmar_find_atsr(atsr);
3745 list_del_rcu(&atsru->list);
3747 intel_iommu_free_atsr(atsru);
3753 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3757 struct acpi_dmar_atsr *atsr;
3758 struct dmar_atsr_unit *atsru;
3760 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3761 atsru = dmar_find_atsr(atsr);
3765 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
3766 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3774 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
3776 struct dmar_satc_unit *satcu;
3777 struct acpi_dmar_satc *tmp;
3779 list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
3781 tmp = (struct acpi_dmar_satc *)satcu->hdr;
3782 if (satc->segment != tmp->segment)
3784 if (satc->header.length != tmp->header.length)
3786 if (memcmp(satc, tmp, satc->header.length) == 0)
3793 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
3795 struct acpi_dmar_satc *satc;
3796 struct dmar_satc_unit *satcu;
3798 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3801 satc = container_of(hdr, struct acpi_dmar_satc, header);
3802 satcu = dmar_find_satc(satc);
3806 satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
3810 satcu->hdr = (void *)(satcu + 1);
3811 memcpy(satcu->hdr, hdr, hdr->length);
3812 satcu->atc_required = satc->flags & 0x1;
3813 satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
3814 (void *)satc + satc->header.length,
3815 &satcu->devices_cnt);
3816 if (satcu->devices_cnt && !satcu->devices) {
3820 list_add_rcu(&satcu->list, &dmar_satc_units);
3825 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3828 struct intel_iommu *iommu = dmaru->iommu;
3830 if (g_iommus[iommu->seq_id])
3833 ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
3837 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3838 pr_warn("%s: Doesn't support hardware pass through.\n",
3842 if (!ecap_sc_support(iommu->ecap) &&
3843 domain_update_iommu_snooping(iommu)) {
3844 pr_warn("%s: Doesn't support snooping.\n",
3848 sp = domain_update_iommu_superpage(NULL, iommu) - 1;
3849 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3850 pr_warn("%s: Doesn't support large page.\n",
3856 * Disable translation if already enabled prior to OS handover.
3858 if (iommu->gcmd & DMA_GCMD_TE)
3859 iommu_disable_translation(iommu);
3861 g_iommus[iommu->seq_id] = iommu;
3862 ret = iommu_init_domains(iommu);
3864 ret = iommu_alloc_root_entry(iommu);
3868 intel_svm_check(iommu);
3870 if (dmaru->ignored) {
3872 * we always have to disable PMRs or DMA may fail on this device
3875 iommu_disable_protect_mem_regions(iommu);
3879 intel_iommu_init_qi(iommu);
3880 iommu_flush_write_buffer(iommu);
3882 #ifdef CONFIG_INTEL_IOMMU_SVM
3883 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3884 ret = intel_svm_enable_prq(iommu);
3889 ret = dmar_set_interrupt(iommu);
3893 iommu_set_root_entry(iommu);
3894 iommu_enable_translation(iommu);
3896 iommu_disable_protect_mem_regions(iommu);
3900 disable_dmar_iommu(iommu);
3902 free_dmar_iommu(iommu);
3906 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3909 struct intel_iommu *iommu = dmaru->iommu;
3911 if (!intel_iommu_enabled)
3917 ret = intel_iommu_add(dmaru);
3919 disable_dmar_iommu(iommu);
3920 free_dmar_iommu(iommu);
3926 static void intel_iommu_free_dmars(void)
3928 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3929 struct dmar_atsr_unit *atsru, *atsr_n;
3930 struct dmar_satc_unit *satcu, *satc_n;
3932 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3933 list_del(&rmrru->list);
3934 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3938 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3939 list_del(&atsru->list);
3940 intel_iommu_free_atsr(atsru);
3942 list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
3943 list_del(&satcu->list);
3944 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
3949 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3952 struct pci_bus *bus;
3953 struct pci_dev *bridge = NULL;
3955 struct acpi_dmar_atsr *atsr;
3956 struct dmar_atsr_unit *atsru;
3958 dev = pci_physfn(dev);
3959 for (bus = dev->bus; bus; bus = bus->parent) {
3961 /* If it's an integrated device, allow ATS */
3964 /* Connected via non-PCIe: no ATS */
3965 if (!pci_is_pcie(bridge) ||
3966 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3968 /* If we found the root port, look it up in the ATSR */
3969 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3974 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3975 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3976 if (atsr->segment != pci_domain_nr(dev->bus))
3979 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3980 if (tmp == &bridge->dev)
3983 if (atsru->include_all)
3993 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3996 struct dmar_rmrr_unit *rmrru;
3997 struct dmar_atsr_unit *atsru;
3998 struct dmar_satc_unit *satcu;
3999 struct acpi_dmar_atsr *atsr;
4000 struct acpi_dmar_reserved_memory *rmrr;
4001 struct acpi_dmar_satc *satc;
4003 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4006 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4007 rmrr = container_of(rmrru->hdr,
4008 struct acpi_dmar_reserved_memory, header);
4009 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4010 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4011 ((void *)rmrr) + rmrr->header.length,
4012 rmrr->segment, rmrru->devices,
4013 rmrru->devices_cnt);
4016 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4017 dmar_remove_dev_scope(info, rmrr->segment,
4018 rmrru->devices, rmrru->devices_cnt);
4022 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4023 if (atsru->include_all)
4026 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4027 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4028 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4029 (void *)atsr + atsr->header.length,
4030 atsr->segment, atsru->devices,
4031 atsru->devices_cnt);
4036 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4037 if (dmar_remove_dev_scope(info, atsr->segment,
4038 atsru->devices, atsru->devices_cnt))
4042 list_for_each_entry(satcu, &dmar_satc_units, list) {
4043 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
4044 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4045 ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
4046 (void *)satc + satc->header.length,
4047 satc->segment, satcu->devices,
4048 satcu->devices_cnt);
4053 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4054 if (dmar_remove_dev_scope(info, satc->segment,
4055 satcu->devices, satcu->devices_cnt))
4063 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4064 unsigned long val, void *v)
4066 struct memory_notify *mhp = v;
4067 unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4068 unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
4072 case MEM_GOING_ONLINE:
4073 if (iommu_domain_identity_map(si_domain,
4074 start_vpfn, last_vpfn)) {
4075 pr_warn("Failed to build identity map for [%lx-%lx]\n",
4076 start_vpfn, last_vpfn);
4082 case MEM_CANCEL_ONLINE:
4084 struct dmar_drhd_unit *drhd;
4085 struct intel_iommu *iommu;
4086 struct page *freelist;
4088 freelist = domain_unmap(si_domain,
4089 start_vpfn, last_vpfn,
4093 for_each_active_iommu(iommu, drhd)
4094 iommu_flush_iotlb_psi(iommu, si_domain,
4095 start_vpfn, mhp->nr_pages,
4098 dma_free_pagelist(freelist);
4106 static struct notifier_block intel_iommu_memory_nb = {
4107 .notifier_call = intel_iommu_memory_notifier,
4111 static void intel_disable_iommus(void)
4113 struct intel_iommu *iommu = NULL;
4114 struct dmar_drhd_unit *drhd;
4116 for_each_iommu(iommu, drhd)
4117 iommu_disable_translation(iommu);
4120 void intel_iommu_shutdown(void)
4122 struct dmar_drhd_unit *drhd;
4123 struct intel_iommu *iommu = NULL;
4125 if (no_iommu || dmar_disabled)
4128 down_write(&dmar_global_lock);
4130 /* Disable PMRs explicitly here. */
4131 for_each_iommu(iommu, drhd)
4132 iommu_disable_protect_mem_regions(iommu);
4134 /* Make sure the IOMMUs are switched off */
4135 intel_disable_iommus();
4137 up_write(&dmar_global_lock);
4140 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4142 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4144 return container_of(iommu_dev, struct intel_iommu, iommu);
4147 static ssize_t version_show(struct device *dev,
4148 struct device_attribute *attr, char *buf)
4150 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4151 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4152 return sprintf(buf, "%d:%d\n",
4153 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4155 static DEVICE_ATTR_RO(version);
4157 static ssize_t address_show(struct device *dev,
4158 struct device_attribute *attr, char *buf)
4160 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4161 return sprintf(buf, "%llx\n", iommu->reg_phys);
4163 static DEVICE_ATTR_RO(address);
4165 static ssize_t cap_show(struct device *dev,
4166 struct device_attribute *attr, char *buf)
4168 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4169 return sprintf(buf, "%llx\n", iommu->cap);
4171 static DEVICE_ATTR_RO(cap);
4173 static ssize_t ecap_show(struct device *dev,
4174 struct device_attribute *attr, char *buf)
4176 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4177 return sprintf(buf, "%llx\n", iommu->ecap);
4179 static DEVICE_ATTR_RO(ecap);
4181 static ssize_t domains_supported_show(struct device *dev,
4182 struct device_attribute *attr, char *buf)
4184 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4185 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4187 static DEVICE_ATTR_RO(domains_supported);
4189 static ssize_t domains_used_show(struct device *dev,
4190 struct device_attribute *attr, char *buf)
4192 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4193 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4194 cap_ndoms(iommu->cap)));
4196 static DEVICE_ATTR_RO(domains_used);
4198 static struct attribute *intel_iommu_attrs[] = {
4199 &dev_attr_version.attr,
4200 &dev_attr_address.attr,
4202 &dev_attr_ecap.attr,
4203 &dev_attr_domains_supported.attr,
4204 &dev_attr_domains_used.attr,
4208 static struct attribute_group intel_iommu_group = {
4209 .name = "intel-iommu",
4210 .attrs = intel_iommu_attrs,
4213 const struct attribute_group *intel_iommu_groups[] = {
4218 static inline bool has_external_pci(void)
4220 struct pci_dev *pdev = NULL;
4222 for_each_pci_dev(pdev)
4223 if (pdev->external_facing)
4229 static int __init platform_optin_force_iommu(void)
4231 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
4234 if (no_iommu || dmar_disabled)
4235 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4238 * If Intel-IOMMU is disabled by default, we will apply identity
4239 * map for all devices except those marked as being untrusted.
4242 iommu_set_default_passthrough(false);
4250 static int __init probe_acpi_namespace_devices(void)
4252 struct dmar_drhd_unit *drhd;
4253 /* To avoid a -Wunused-but-set-variable warning. */
4254 struct intel_iommu *iommu __maybe_unused;
4258 for_each_active_iommu(iommu, drhd) {
4259 for_each_active_dev_scope(drhd->devices,
4260 drhd->devices_cnt, i, dev) {
4261 struct acpi_device_physical_node *pn;
4262 struct iommu_group *group;
4263 struct acpi_device *adev;
4265 if (dev->bus != &acpi_bus_type)
4268 adev = to_acpi_device(dev);
4269 mutex_lock(&adev->physical_node_lock);
4270 list_for_each_entry(pn,
4271 &adev->physical_node_list, node) {
4272 group = iommu_group_get(pn->dev);
4274 iommu_group_put(group);
4278 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4279 ret = iommu_probe_device(pn->dev);
4283 mutex_unlock(&adev->physical_node_lock);
4293 int __init intel_iommu_init(void)
4296 struct dmar_drhd_unit *drhd;
4297 struct intel_iommu *iommu;
4300 * Intel IOMMU is required for a TXT/tboot launch or platform
4301 * opt in, so enforce that.
4303 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
4304 platform_optin_force_iommu();
4306 if (iommu_init_mempool()) {
4308 panic("tboot: Failed to initialize iommu memory\n");
4312 down_write(&dmar_global_lock);
4313 if (dmar_table_init()) {
4315 panic("tboot: Failed to initialize DMAR table\n");
4319 if (dmar_dev_scope_init() < 0) {
4321 panic("tboot: Failed to initialize DMAR device scope\n");
4325 up_write(&dmar_global_lock);
4328 * The bus notifier takes the dmar_global_lock, so lockdep will
4329 * complain later when we register it under the lock.
4331 dmar_register_bus_notifier();
4333 down_write(&dmar_global_lock);
4336 intel_iommu_debugfs_init();
4338 if (no_iommu || dmar_disabled) {
4340 * We exit the function here to ensure IOMMU's remapping and
4341 * mempool aren't setup, which means that the IOMMU's PMRs
4342 * won't be disabled via the call to init_dmars(). So disable
4343 * it explicitly here. The PMRs were setup by tboot prior to
4344 * calling SENTER, but the kernel is expected to reset/tear
4347 if (intel_iommu_tboot_noforce) {
4348 for_each_iommu(iommu, drhd)
4349 iommu_disable_protect_mem_regions(iommu);
4353 * Make sure the IOMMUs are switched off, even when we
4354 * boot into a kexec kernel and the previous kernel left
4357 intel_disable_iommus();
4361 if (list_empty(&dmar_rmrr_units))
4362 pr_info("No RMRR found\n");
4364 if (list_empty(&dmar_atsr_units))
4365 pr_info("No ATSR found\n");
4367 if (list_empty(&dmar_satc_units))
4368 pr_info("No SATC found\n");
4371 intel_iommu_gfx_mapped = 1;
4373 init_no_remapping_devices();
4378 panic("tboot: Failed to initialize DMARs\n");
4379 pr_err("Initialization failed\n");
4382 up_write(&dmar_global_lock);
4384 init_iommu_pm_ops();
4386 down_read(&dmar_global_lock);
4387 for_each_active_iommu(iommu, drhd) {
4389 * The flush queue implementation does not perform
4390 * page-selective invalidations that are required for efficient
4391 * TLB flushes in virtual environments. The benefit of batching
4392 * is likely to be much lower than the overhead of synchronizing
4393 * the virtual and physical IOMMU page-tables.
4395 if (cap_caching_mode(iommu->cap)) {
4396 pr_info_once("IOMMU batching disallowed due to virtualization\n");
4397 iommu_set_dma_strict();
4399 iommu_device_sysfs_add(&iommu->iommu, NULL,
4402 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
4404 up_read(&dmar_global_lock);
4406 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4407 if (si_domain && !hw_pass_through)
4408 register_memory_notifier(&intel_iommu_memory_nb);
4410 down_read(&dmar_global_lock);
4411 if (probe_acpi_namespace_devices())
4412 pr_warn("ACPI name space devices didn't probe correctly\n");
4414 /* Finally, we enable the DMA remapping hardware. */
4415 for_each_iommu(iommu, drhd) {
4416 if (!drhd->ignored && !translation_pre_enabled(iommu))
4417 iommu_enable_translation(iommu);
4419 iommu_disable_protect_mem_regions(iommu);
4421 up_read(&dmar_global_lock);
4423 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4425 intel_iommu_enabled = 1;
4430 intel_iommu_free_dmars();
4431 up_write(&dmar_global_lock);
4432 iommu_exit_mempool();
4436 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4438 struct device_domain_info *info = opaque;
4440 domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
4445 * NB - intel-iommu lacks any sort of reference counting for the users of
4446 * dependent devices. If multiple endpoints have intersecting dependent
4447 * devices, unbinding the driver from any one of them will possibly leave
4448 * the others unable to operate.
4450 static void domain_context_clear(struct device_domain_info *info)
4452 if (!info->iommu || !info->dev || !dev_is_pci(info->dev))
4455 pci_for_each_dma_alias(to_pci_dev(info->dev),
4456 &domain_context_clear_one_cb, info);
4459 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4461 struct dmar_domain *domain;
4462 struct intel_iommu *iommu;
4463 unsigned long flags;
4465 assert_spin_locked(&device_domain_lock);
4470 iommu = info->iommu;
4471 domain = info->domain;
4473 if (info->dev && !dev_is_real_dma_subdevice(info->dev)) {
4474 if (dev_is_pci(info->dev) && sm_supported(iommu))
4475 intel_pasid_tear_down_entry(iommu, info->dev,
4476 PASID_RID2PASID, false);
4478 iommu_disable_dev_iotlb(info);
4479 domain_context_clear(info);
4480 intel_pasid_free_table(info->dev);
4483 unlink_domain_info(info);
4485 spin_lock_irqsave(&iommu->lock, flags);
4486 domain_detach_iommu(domain, iommu);
4487 spin_unlock_irqrestore(&iommu->lock, flags);
4489 free_devinfo_mem(info);
4492 static void dmar_remove_one_dev_info(struct device *dev)
4494 struct device_domain_info *info;
4495 unsigned long flags;
4497 spin_lock_irqsave(&device_domain_lock, flags);
4498 info = get_domain_info(dev);
4500 __dmar_remove_one_dev_info(info);
4501 spin_unlock_irqrestore(&device_domain_lock, flags);
4504 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4508 /* calculate AGAW */
4509 domain->gaw = guest_width;
4510 adjust_width = guestwidth_to_adjustwidth(guest_width);
4511 domain->agaw = width_to_agaw(adjust_width);
4513 domain->iommu_coherency = false;
4514 domain->iommu_snooping = false;
4515 domain->iommu_superpage = 0;
4516 domain->max_addr = 0;
4518 /* always allocate the top pgd */
4519 domain->pgd = alloc_pgtable_page(domain->nid);
4522 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4526 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4528 struct dmar_domain *dmar_domain;
4529 struct iommu_domain *domain;
4532 case IOMMU_DOMAIN_DMA:
4533 case IOMMU_DOMAIN_UNMANAGED:
4534 dmar_domain = alloc_domain(0);
4536 pr_err("Can't allocate dmar_domain\n");
4539 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4540 pr_err("Domain initialization failed\n");
4541 domain_exit(dmar_domain);
4545 if (type == IOMMU_DOMAIN_DMA &&
4546 iommu_get_dma_cookie(&dmar_domain->domain))
4549 domain = &dmar_domain->domain;
4550 domain->geometry.aperture_start = 0;
4551 domain->geometry.aperture_end =
4552 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4553 domain->geometry.force_aperture = true;
4556 case IOMMU_DOMAIN_IDENTITY:
4557 return &si_domain->domain;
4565 static void intel_iommu_domain_free(struct iommu_domain *domain)
4567 if (domain != &si_domain->domain)
4568 domain_exit(to_dmar_domain(domain));
4572 * Check whether a @domain could be attached to the @dev through the
4573 * aux-domain attach/detach APIs.
4576 is_aux_domain(struct device *dev, struct iommu_domain *domain)
4578 struct device_domain_info *info = get_domain_info(dev);
4580 return info && info->auxd_enabled &&
4581 domain->type == IOMMU_DOMAIN_UNMANAGED;
4584 static inline struct subdev_domain_info *
4585 lookup_subdev_info(struct dmar_domain *domain, struct device *dev)
4587 struct subdev_domain_info *sinfo;
4589 if (!list_empty(&domain->subdevices)) {
4590 list_for_each_entry(sinfo, &domain->subdevices, link_domain) {
4591 if (sinfo->pdev == dev)
4599 static int auxiliary_link_device(struct dmar_domain *domain,
4602 struct device_domain_info *info = get_domain_info(dev);
4603 struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev);
4605 assert_spin_locked(&device_domain_lock);
4610 sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC);
4613 sinfo->domain = domain;
4615 list_add(&sinfo->link_phys, &info->subdevices);
4616 list_add(&sinfo->link_domain, &domain->subdevices);
4619 return ++sinfo->users;
4622 static int auxiliary_unlink_device(struct dmar_domain *domain,
4625 struct device_domain_info *info = get_domain_info(dev);
4626 struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev);
4629 assert_spin_locked(&device_domain_lock);
4630 if (WARN_ON(!info || !sinfo || sinfo->users <= 0))
4633 ret = --sinfo->users;
4635 list_del(&sinfo->link_phys);
4636 list_del(&sinfo->link_domain);
4643 static int aux_domain_add_dev(struct dmar_domain *domain,
4647 unsigned long flags;
4648 struct intel_iommu *iommu;
4650 iommu = device_to_iommu(dev, NULL, NULL);
4654 if (domain->default_pasid <= 0) {
4657 /* No private data needed for the default pasid */
4658 pasid = ioasid_alloc(NULL, PASID_MIN,
4659 pci_max_pasids(to_pci_dev(dev)) - 1,
4661 if (pasid == INVALID_IOASID) {
4662 pr_err("Can't allocate default pasid\n");
4665 domain->default_pasid = pasid;
4668 spin_lock_irqsave(&device_domain_lock, flags);
4669 ret = auxiliary_link_device(domain, dev);
4674 * Subdevices from the same physical device can be attached to the
4675 * same domain. For such cases, only the first subdevice attachment
4676 * needs to go through the full steps in this function. So if ret >
4683 * iommu->lock must be held to attach domain to iommu and setup the
4684 * pasid entry for second level translation.
4686 spin_lock(&iommu->lock);
4687 ret = domain_attach_iommu(domain, iommu);
4691 /* Setup the PASID entry for mediated devices: */
4692 if (domain_use_first_level(domain))
4693 ret = domain_setup_first_level(iommu, domain, dev,
4694 domain->default_pasid);
4696 ret = intel_pasid_setup_second_level(iommu, domain, dev,
4697 domain->default_pasid);
4701 spin_unlock(&iommu->lock);
4703 spin_unlock_irqrestore(&device_domain_lock, flags);
4708 domain_detach_iommu(domain, iommu);
4710 spin_unlock(&iommu->lock);
4711 auxiliary_unlink_device(domain, dev);
4713 spin_unlock_irqrestore(&device_domain_lock, flags);
4714 if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
4715 ioasid_put(domain->default_pasid);
4720 static void aux_domain_remove_dev(struct dmar_domain *domain,
4723 struct device_domain_info *info;
4724 struct intel_iommu *iommu;
4725 unsigned long flags;
4727 if (!is_aux_domain(dev, &domain->domain))
4730 spin_lock_irqsave(&device_domain_lock, flags);
4731 info = get_domain_info(dev);
4732 iommu = info->iommu;
4734 if (!auxiliary_unlink_device(domain, dev)) {
4735 spin_lock(&iommu->lock);
4736 intel_pasid_tear_down_entry(iommu, dev,
4737 domain->default_pasid, false);
4738 domain_detach_iommu(domain, iommu);
4739 spin_unlock(&iommu->lock);
4742 spin_unlock_irqrestore(&device_domain_lock, flags);
4744 if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
4745 ioasid_put(domain->default_pasid);
4748 static int prepare_domain_attach_device(struct iommu_domain *domain,
4751 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4752 struct intel_iommu *iommu;
4755 iommu = device_to_iommu(dev, NULL, NULL);
4759 if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) &&
4760 !ecap_nest(iommu->ecap)) {
4761 dev_err(dev, "%s: iommu not support nested translation\n",
4766 /* check if this iommu agaw is sufficient for max mapped address */
4767 addr_width = agaw_to_width(iommu->agaw);
4768 if (addr_width > cap_mgaw(iommu->cap))
4769 addr_width = cap_mgaw(iommu->cap);
4771 if (dmar_domain->max_addr > (1LL << addr_width)) {
4772 dev_err(dev, "%s: iommu width (%d) is not "
4773 "sufficient for the mapped address (%llx)\n",
4774 __func__, addr_width, dmar_domain->max_addr);
4777 dmar_domain->gaw = addr_width;
4780 * Knock out extra levels of page tables if necessary
4782 while (iommu->agaw < dmar_domain->agaw) {
4783 struct dma_pte *pte;
4785 pte = dmar_domain->pgd;
4786 if (dma_pte_present(pte)) {
4787 dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
4788 free_pgtable_page(pte);
4790 dmar_domain->agaw--;
4796 static int intel_iommu_attach_device(struct iommu_domain *domain,
4801 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
4802 device_is_rmrr_locked(dev)) {
4803 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4807 if (is_aux_domain(dev, domain))
4810 /* normally dev is not mapped */
4811 if (unlikely(domain_context_mapped(dev))) {
4812 struct dmar_domain *old_domain;
4814 old_domain = find_domain(dev);
4816 dmar_remove_one_dev_info(dev);
4819 ret = prepare_domain_attach_device(domain, dev);
4823 return domain_add_dev_info(to_dmar_domain(domain), dev);
4826 static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
4831 if (!is_aux_domain(dev, domain))
4834 ret = prepare_domain_attach_device(domain, dev);
4838 return aux_domain_add_dev(to_dmar_domain(domain), dev);
4841 static void intel_iommu_detach_device(struct iommu_domain *domain,
4844 dmar_remove_one_dev_info(dev);
4847 static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
4850 aux_domain_remove_dev(to_dmar_domain(domain), dev);
4853 #ifdef CONFIG_INTEL_IOMMU_SVM
4855 * 2D array for converting and sanitizing IOMMU generic TLB granularity to
4856 * VT-d granularity. Invalidation is typically included in the unmap operation
4857 * as a result of DMA or VFIO unmap. However, for assigned devices guest
4858 * owns the first level page tables. Invalidations of translation caches in the
4859 * guest are trapped and passed down to the host.
4861 * vIOMMU in the guest will only expose first level page tables, therefore
4862 * we do not support IOTLB granularity for request without PASID (second level).
4864 * For example, to find the VT-d granularity encoding for IOTLB
4865 * type and page selective granularity within PASID:
4866 * X: indexed by iommu cache type
4867 * Y: indexed by enum iommu_inv_granularity
4868 * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR]
4872 inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = {
4874 * PASID based IOTLB invalidation: PASID selective (per PASID),
4875 * page selective (address granularity)
4877 {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
4878 /* PASID based dev TLBs */
4879 {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL},
4881 {-EINVAL, -EINVAL, -EINVAL}
4884 static inline int to_vtd_granularity(int type, int granu)
4886 return inv_type_granu_table[type][granu];
4889 static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
4891 u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
4893 /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
4894 * IOMMU cache invalidate API passes granu_size in bytes, and number of
4895 * granu size in contiguous memory.
4897 return order_base_2(nr_pages);
4901 intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
4902 struct iommu_cache_invalidate_info *inv_info)
4904 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4905 struct device_domain_info *info;
4906 struct intel_iommu *iommu;
4907 unsigned long flags;
4914 if (!inv_info || !dmar_domain)
4917 if (!dev || !dev_is_pci(dev))
4920 iommu = device_to_iommu(dev, &bus, &devfn);
4924 if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE))
4927 spin_lock_irqsave(&device_domain_lock, flags);
4928 spin_lock(&iommu->lock);
4929 info = get_domain_info(dev);
4934 did = dmar_domain->iommu_did[iommu->seq_id];
4935 sid = PCI_DEVID(bus, devfn);
4937 /* Size is only valid in address selective invalidation */
4938 if (inv_info->granularity == IOMMU_INV_GRANU_ADDR)
4939 size = to_vtd_size(inv_info->granu.addr_info.granule_size,
4940 inv_info->granu.addr_info.nb_granules);
4942 for_each_set_bit(cache_type,
4943 (unsigned long *)&inv_info->cache,
4944 IOMMU_CACHE_INV_TYPE_NR) {
4949 granu = to_vtd_granularity(cache_type, inv_info->granularity);
4950 if (granu == -EINVAL) {
4951 pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n",
4952 cache_type, inv_info->granularity);
4957 * PASID is stored in different locations based on the
4960 if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
4961 (inv_info->granu.pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
4962 pasid = inv_info->granu.pasid_info.pasid;
4963 else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
4964 (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
4965 pasid = inv_info->granu.addr_info.pasid;
4967 switch (BIT(cache_type)) {
4968 case IOMMU_CACHE_INV_TYPE_IOTLB:
4969 /* HW will ignore LSB bits based on address mask */
4970 if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
4972 (inv_info->granu.addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
4973 pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n",
4974 inv_info->granu.addr_info.addr, size);
4978 * If granu is PASID-selective, address is ignored.
4979 * We use npages = -1 to indicate that.
4981 qi_flush_piotlb(iommu, did, pasid,
4982 mm_to_dma_pfn(inv_info->granu.addr_info.addr),
4983 (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
4984 inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
4986 if (!info->ats_enabled)
4989 * Always flush device IOTLB if ATS is enabled. vIOMMU
4990 * in the guest may assume IOTLB flush is inclusive,
4991 * which is more efficient.
4994 case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
4996 * PASID based device TLB invalidation does not support
4997 * IOMMU_INV_GRANU_PASID granularity but only supports
4998 * IOMMU_INV_GRANU_ADDR.
4999 * The equivalent of that is we set the size to be the
5000 * entire range of 64 bit. User only provides PASID info
5001 * without address info. So we set addr to 0.
5003 if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
5004 size = 64 - VTD_PAGE_SHIFT;
5006 } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
5007 addr = inv_info->granu.addr_info.addr;
5010 if (info->ats_enabled)
5011 qi_flush_dev_iotlb_pasid(iommu, sid,
5013 info->ats_qdep, addr,
5016 pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
5019 dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
5025 spin_unlock(&iommu->lock);
5026 spin_unlock_irqrestore(&device_domain_lock, flags);
5032 static int intel_iommu_map(struct iommu_domain *domain,
5033 unsigned long iova, phys_addr_t hpa,
5034 size_t size, int iommu_prot, gfp_t gfp)
5036 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5040 if (iommu_prot & IOMMU_READ)
5041 prot |= DMA_PTE_READ;
5042 if (iommu_prot & IOMMU_WRITE)
5043 prot |= DMA_PTE_WRITE;
5044 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5045 prot |= DMA_PTE_SNP;
5047 max_addr = iova + size;
5048 if (dmar_domain->max_addr < max_addr) {
5051 /* check if minimum agaw is sufficient for mapped address */
5052 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5053 if (end < max_addr) {
5054 pr_err("%s: iommu width (%d) is not "
5055 "sufficient for the mapped address (%llx)\n",
5056 __func__, dmar_domain->gaw, max_addr);
5059 dmar_domain->max_addr = max_addr;
5061 /* Round up size to next multiple of PAGE_SIZE, if it and
5062 the low bits of hpa would take us onto the next page */
5063 size = aligned_nrpages(hpa, size);
5064 return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5065 hpa >> VTD_PAGE_SHIFT, size, prot);
5068 static int intel_iommu_map_pages(struct iommu_domain *domain,
5069 unsigned long iova, phys_addr_t paddr,
5070 size_t pgsize, size_t pgcount,
5071 int prot, gfp_t gfp, size_t *mapped)
5073 unsigned long pgshift = __ffs(pgsize);
5074 size_t size = pgcount << pgshift;
5077 if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G)
5080 if (!IS_ALIGNED(iova | paddr, pgsize))
5083 ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp);
5090 static size_t intel_iommu_unmap(struct iommu_domain *domain,
5091 unsigned long iova, size_t size,
5092 struct iommu_iotlb_gather *gather)
5094 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5095 unsigned long start_pfn, last_pfn;
5098 /* Cope with horrid API which requires us to unmap more than the
5099 size argument if it happens to be a large-page mapping. */
5100 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5102 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5103 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5105 start_pfn = iova >> VTD_PAGE_SHIFT;
5106 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5108 gather->freelist = domain_unmap(dmar_domain, start_pfn,
5109 last_pfn, gather->freelist);
5111 if (dmar_domain->max_addr == iova + size)
5112 dmar_domain->max_addr = iova;
5114 iommu_iotlb_gather_add_page(domain, gather, iova, size);
5119 static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
5121 size_t pgsize, size_t pgcount,
5122 struct iommu_iotlb_gather *gather)
5124 unsigned long pgshift = __ffs(pgsize);
5125 size_t size = pgcount << pgshift;
5127 return intel_iommu_unmap(domain, iova, size, gather);
5130 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
5131 struct iommu_iotlb_gather *gather)
5133 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5134 unsigned long iova_pfn = IOVA_PFN(gather->start);
5135 size_t size = gather->end - gather->start;
5136 unsigned long start_pfn;
5137 unsigned long nrpages;
5140 nrpages = aligned_nrpages(gather->start, size);
5141 start_pfn = mm_to_dma_pfn(iova_pfn);
5143 for_each_domain_iommu(iommu_id, dmar_domain)
5144 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5145 start_pfn, nrpages, !gather->freelist, 0);
5147 dma_free_pagelist(gather->freelist);
5150 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5153 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5154 struct dma_pte *pte;
5158 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5159 if (pte && dma_pte_present(pte))
5160 phys = dma_pte_addr(pte) +
5161 (iova & (BIT_MASK(level_to_offset_bits(level) +
5162 VTD_PAGE_SHIFT) - 1));
5167 static bool intel_iommu_capable(enum iommu_cap cap)
5169 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5170 return domain_update_iommu_snooping(NULL);
5171 if (cap == IOMMU_CAP_INTR_REMAP)
5172 return irq_remapping_enabled == 1;
5177 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
5179 struct intel_iommu *iommu;
5181 iommu = device_to_iommu(dev, NULL, NULL);
5183 return ERR_PTR(-ENODEV);
5185 if (translation_pre_enabled(iommu))
5186 dev_iommu_priv_set(dev, DEFER_DEVICE_DOMAIN_INFO);
5188 return &iommu->iommu;
5191 static void intel_iommu_release_device(struct device *dev)
5193 struct intel_iommu *iommu;
5195 iommu = device_to_iommu(dev, NULL, NULL);
5199 dmar_remove_one_dev_info(dev);
5201 set_dma_ops(dev, NULL);
5204 static void intel_iommu_probe_finalize(struct device *dev)
5206 struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
5208 if (domain && domain->type == IOMMU_DOMAIN_DMA)
5209 iommu_setup_dma_ops(dev, 0, U64_MAX);
5211 set_dma_ops(dev, NULL);
5214 static void intel_iommu_get_resv_regions(struct device *device,
5215 struct list_head *head)
5217 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
5218 struct iommu_resv_region *reg;
5219 struct dmar_rmrr_unit *rmrr;
5220 struct device *i_dev;
5223 down_read(&dmar_global_lock);
5224 for_each_rmrr_units(rmrr) {
5225 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5227 struct iommu_resv_region *resv;
5228 enum iommu_resv_type type;
5231 if (i_dev != device &&
5232 !is_downstream_to_pci_bridge(device, i_dev))
5235 length = rmrr->end_address - rmrr->base_address + 1;
5237 type = device_rmrr_is_relaxable(device) ?
5238 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5240 resv = iommu_alloc_resv_region(rmrr->base_address,
5241 length, prot, type);
5245 list_add_tail(&resv->list, head);
5248 up_read(&dmar_global_lock);
5250 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5251 if (dev_is_pci(device)) {
5252 struct pci_dev *pdev = to_pci_dev(device);
5254 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5255 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
5256 IOMMU_RESV_DIRECT_RELAXABLE);
5258 list_add_tail(®->list, head);
5261 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5263 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5264 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5268 list_add_tail(®->list, head);
5271 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
5273 struct device_domain_info *info;
5274 struct context_entry *context;
5275 struct dmar_domain *domain;
5276 unsigned long flags;
5280 domain = find_domain(dev);
5284 spin_lock_irqsave(&device_domain_lock, flags);
5285 spin_lock(&iommu->lock);
5288 info = get_domain_info(dev);
5289 if (!info || !info->pasid_supported)
5292 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5293 if (WARN_ON(!context))
5296 ctx_lo = context[0].lo;
5298 if (!(ctx_lo & CONTEXT_PASIDE)) {
5299 ctx_lo |= CONTEXT_PASIDE;
5300 context[0].lo = ctx_lo;
5302 iommu->flush.flush_context(iommu,
5303 domain->iommu_did[iommu->seq_id],
5304 PCI_DEVID(info->bus, info->devfn),
5305 DMA_CCMD_MASK_NOBIT,
5306 DMA_CCMD_DEVICE_INVL);
5309 /* Enable PASID support in the device, if it wasn't already */
5310 if (!info->pasid_enabled)
5311 iommu_enable_dev_iotlb(info);
5316 spin_unlock(&iommu->lock);
5317 spin_unlock_irqrestore(&device_domain_lock, flags);
5322 static struct iommu_group *intel_iommu_device_group(struct device *dev)
5324 if (dev_is_pci(dev))
5325 return pci_device_group(dev);
5326 return generic_device_group(dev);
5329 static int intel_iommu_enable_auxd(struct device *dev)
5331 struct device_domain_info *info;
5332 struct intel_iommu *iommu;
5333 unsigned long flags;
5336 iommu = device_to_iommu(dev, NULL, NULL);
5337 if (!iommu || dmar_disabled)
5340 if (!sm_supported(iommu) || !pasid_supported(iommu))
5343 ret = intel_iommu_enable_pasid(iommu, dev);
5347 spin_lock_irqsave(&device_domain_lock, flags);
5348 info = get_domain_info(dev);
5349 info->auxd_enabled = 1;
5350 spin_unlock_irqrestore(&device_domain_lock, flags);
5355 static int intel_iommu_disable_auxd(struct device *dev)
5357 struct device_domain_info *info;
5358 unsigned long flags;
5360 spin_lock_irqsave(&device_domain_lock, flags);
5361 info = get_domain_info(dev);
5362 if (!WARN_ON(!info))
5363 info->auxd_enabled = 0;
5364 spin_unlock_irqrestore(&device_domain_lock, flags);
5369 static int intel_iommu_enable_sva(struct device *dev)
5371 struct device_domain_info *info = get_domain_info(dev);
5372 struct intel_iommu *iommu;
5375 if (!info || dmar_disabled)
5378 iommu = info->iommu;
5382 if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
5385 if (intel_iommu_enable_pasid(iommu, dev))
5388 if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
5391 ret = iopf_queue_add_device(iommu->iopf_queue, dev);
5393 ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
5398 static int intel_iommu_disable_sva(struct device *dev)
5400 struct device_domain_info *info = get_domain_info(dev);
5401 struct intel_iommu *iommu = info->iommu;
5404 ret = iommu_unregister_device_fault_handler(dev);
5406 ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
5412 * A PCI express designated vendor specific extended capability is defined
5413 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5414 * for system software and tools to detect endpoint devices supporting the
5415 * Intel scalable IO virtualization without host driver dependency.
5417 * Returns the address of the matching extended capability structure within
5418 * the device's PCI configuration space or 0 if the device does not support
5421 static int siov_find_pci_dvsec(struct pci_dev *pdev)
5426 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5428 pci_read_config_word(pdev, pos + 4, &vendor);
5429 pci_read_config_word(pdev, pos + 8, &id);
5430 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5433 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5440 intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5442 struct device_domain_info *info = get_domain_info(dev);
5444 if (feat == IOMMU_DEV_FEAT_AUX) {
5447 if (!dev_is_pci(dev) || dmar_disabled ||
5448 !scalable_mode_support() || !pasid_mode_support())
5451 ret = pci_pasid_features(to_pci_dev(dev));
5455 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5458 if (feat == IOMMU_DEV_FEAT_IOPF)
5459 return info && info->pri_supported;
5461 if (feat == IOMMU_DEV_FEAT_SVA)
5462 return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
5463 info->pasid_supported && info->pri_supported &&
5464 info->ats_supported;
5470 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5473 case IOMMU_DEV_FEAT_AUX:
5474 return intel_iommu_enable_auxd(dev);
5476 case IOMMU_DEV_FEAT_IOPF:
5477 return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV;
5479 case IOMMU_DEV_FEAT_SVA:
5480 return intel_iommu_enable_sva(dev);
5488 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5491 case IOMMU_DEV_FEAT_AUX:
5492 return intel_iommu_disable_auxd(dev);
5494 case IOMMU_DEV_FEAT_IOPF:
5497 case IOMMU_DEV_FEAT_SVA:
5498 return intel_iommu_disable_sva(dev);
5506 intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5508 struct device_domain_info *info = get_domain_info(dev);
5510 if (feat == IOMMU_DEV_FEAT_AUX)
5511 return scalable_mode_support() && info && info->auxd_enabled;
5517 intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5519 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5521 return dmar_domain->default_pasid > 0 ?
5522 dmar_domain->default_pasid : -EINVAL;
5525 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5528 return attach_deferred(dev);
5532 intel_iommu_enable_nesting(struct iommu_domain *domain)
5534 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5535 unsigned long flags;
5538 spin_lock_irqsave(&device_domain_lock, flags);
5539 if (list_empty(&dmar_domain->devices)) {
5540 dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
5541 dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
5544 spin_unlock_irqrestore(&device_domain_lock, flags);
5550 * Check that the device does not live on an external facing PCI port that is
5551 * marked as untrusted. Such devices should not be able to apply quirks and
5552 * thus not be able to bypass the IOMMU restrictions.
5554 static bool risky_device(struct pci_dev *pdev)
5556 if (pdev->untrusted) {
5558 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
5559 pdev->vendor, pdev->device);
5560 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
5566 static void clflush_sync_map(struct dmar_domain *domain, unsigned long clf_pfn,
5567 unsigned long clf_pages)
5569 struct dma_pte *first_pte = NULL, *pte = NULL;
5570 unsigned long lvl_pages = 0;
5573 while (clf_pages > 0) {
5576 pte = pfn_to_dma_pte(domain, clf_pfn, &level);
5580 lvl_pages = lvl_to_nr_pages(level);
5583 if (WARN_ON(!lvl_pages || clf_pages < lvl_pages))
5586 clf_pages -= lvl_pages;
5587 clf_pfn += lvl_pages;
5590 if (!clf_pages || first_pte_in_page(pte) ||
5591 (level > 1 && clf_pages < lvl_pages)) {
5592 domain_flush_cache(domain, first_pte,
5593 (void *)pte - (void *)first_pte);
5599 static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
5600 unsigned long iova, size_t size)
5602 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5603 unsigned long pages = aligned_nrpages(iova, size);
5604 unsigned long pfn = iova >> VTD_PAGE_SHIFT;
5605 struct intel_iommu *iommu;
5608 if (!dmar_domain->iommu_coherency)
5609 clflush_sync_map(dmar_domain, pfn, pages);
5611 for_each_domain_iommu(iommu_id, dmar_domain) {
5612 iommu = g_iommus[iommu_id];
5613 __mapping_notify_one(iommu, dmar_domain, pfn, pages);
5617 const struct iommu_ops intel_iommu_ops = {
5618 .capable = intel_iommu_capable,
5619 .domain_alloc = intel_iommu_domain_alloc,
5620 .domain_free = intel_iommu_domain_free,
5621 .enable_nesting = intel_iommu_enable_nesting,
5622 .attach_dev = intel_iommu_attach_device,
5623 .detach_dev = intel_iommu_detach_device,
5624 .aux_attach_dev = intel_iommu_aux_attach_device,
5625 .aux_detach_dev = intel_iommu_aux_detach_device,
5626 .aux_get_pasid = intel_iommu_aux_get_pasid,
5627 .map_pages = intel_iommu_map_pages,
5628 .unmap_pages = intel_iommu_unmap_pages,
5629 .iotlb_sync_map = intel_iommu_iotlb_sync_map,
5630 .flush_iotlb_all = intel_flush_iotlb_all,
5631 .iotlb_sync = intel_iommu_tlb_sync,
5632 .iova_to_phys = intel_iommu_iova_to_phys,
5633 .probe_device = intel_iommu_probe_device,
5634 .probe_finalize = intel_iommu_probe_finalize,
5635 .release_device = intel_iommu_release_device,
5636 .get_resv_regions = intel_iommu_get_resv_regions,
5637 .put_resv_regions = generic_iommu_put_resv_regions,
5638 .device_group = intel_iommu_device_group,
5639 .dev_has_feat = intel_iommu_dev_has_feat,
5640 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
5641 .dev_enable_feat = intel_iommu_dev_enable_feat,
5642 .dev_disable_feat = intel_iommu_dev_disable_feat,
5643 .is_attach_deferred = intel_iommu_is_attach_deferred,
5644 .def_domain_type = device_def_domain_type,
5645 .pgsize_bitmap = SZ_4K,
5646 #ifdef CONFIG_INTEL_IOMMU_SVM
5647 .cache_invalidate = intel_iommu_sva_invalidate,
5648 .sva_bind_gpasid = intel_svm_bind_gpasid,
5649 .sva_unbind_gpasid = intel_svm_unbind_gpasid,
5650 .sva_bind = intel_svm_bind,
5651 .sva_unbind = intel_svm_unbind,
5652 .sva_get_pasid = intel_svm_get_pasid,
5653 .page_response = intel_svm_page_response,
5657 static void quirk_iommu_igfx(struct pci_dev *dev)
5659 if (risky_device(dev))
5662 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
5666 /* G4x/GM45 integrated gfx dmar support is totally busted. */
5667 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
5668 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
5669 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
5670 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
5671 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
5672 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
5673 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
5675 /* Broadwell igfx malfunctions with dmar */
5676 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
5677 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
5678 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
5679 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
5680 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
5681 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
5682 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
5683 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
5684 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
5685 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
5686 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
5687 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
5688 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
5689 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
5690 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
5691 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
5692 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
5693 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
5694 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
5695 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
5696 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
5697 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
5698 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
5699 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
5701 static void quirk_iommu_rwbf(struct pci_dev *dev)
5703 if (risky_device(dev))
5707 * Mobile 4 Series Chipset neglects to set RWBF capability,
5708 * but needs it. Same seems to hold for the desktop versions.
5710 pci_info(dev, "Forcing write-buffer flush capability\n");
5714 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5715 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5716 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5717 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5718 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5719 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5720 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5723 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
5724 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5725 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
5726 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
5727 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5728 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5729 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5730 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5732 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5736 if (risky_device(dev))
5739 if (pci_read_config_word(dev, GGC, &ggc))
5742 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5743 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5745 } else if (dmar_map_gfx) {
5746 /* we have to ensure the gfx device is idle before we flush */
5747 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
5748 iommu_set_dma_strict();
5751 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5752 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5753 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5754 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5756 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
5760 if (!IS_GFX_DEVICE(dev))
5763 ver = (dev->device >> 8) & 0xff;
5764 if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
5765 ver != 0x4e && ver != 0x8a && ver != 0x98 &&
5769 if (risky_device(dev))
5772 pci_info(dev, "Skip IOMMU disabling for graphics\n");
5773 iommu_skip_te_disable = 1;
5775 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
5777 /* On Tylersburg chipsets, some BIOSes have been known to enable the
5778 ISOCH DMAR unit for the Azalia sound device, but not give it any
5779 TLB entries, which causes it to deadlock. Check for that. We do
5780 this in a function called from init_dmars(), instead of in a PCI
5781 quirk, because we don't want to print the obnoxious "BIOS broken"
5782 message if VT-d is actually disabled.
5784 static void __init check_tylersburg_isoch(void)
5786 struct pci_dev *pdev;
5787 uint32_t vtisochctrl;
5789 /* If there's no Azalia in the system anyway, forget it. */
5790 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5794 if (risky_device(pdev)) {
5801 /* System Management Registers. Might be hidden, in which case
5802 we can't do the sanity check. But that's OK, because the
5803 known-broken BIOSes _don't_ actually hide it, so far. */
5804 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5808 if (risky_device(pdev)) {
5813 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5820 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5821 if (vtisochctrl & 1)
5824 /* Drop all bits other than the number of TLB entries */
5825 vtisochctrl &= 0x1c;
5827 /* If we have the recommended number of TLB entries (16), fine. */
5828 if (vtisochctrl == 0x10)
5831 /* Zero TLB entries? You get to ride the short bus to school. */
5833 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5834 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5835 dmi_get_system_info(DMI_BIOS_VENDOR),
5836 dmi_get_system_info(DMI_BIOS_VERSION),
5837 dmi_get_system_info(DMI_PRODUCT_VERSION));
5838 iommu_identity_mapping |= IDENTMAP_AZALIA;
5842 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",