1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright © 2006-2014 Intel Corporation.
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Joerg Roedel <jroedel@suse.de>
13 #define pr_fmt(fmt) "DMAR: " fmt
14 #define dev_fmt(fmt) pr_fmt(fmt)
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/debugfs.h>
19 #include <linux/export.h>
20 #include <linux/slab.h>
21 #include <linux/irq.h>
22 #include <linux/interrupt.h>
23 #include <linux/spinlock.h>
24 #include <linux/pci.h>
25 #include <linux/dmar.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/mempool.h>
28 #include <linux/memory.h>
29 #include <linux/cpu.h>
30 #include <linux/timer.h>
32 #include <linux/iova.h>
33 #include <linux/iommu.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/syscore_ops.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38 #include <linux/pci-ats.h>
39 #include <linux/memblock.h>
40 #include <linux/dma-contiguous.h>
41 #include <linux/dma-direct.h>
42 #include <linux/crash_dump.h>
43 #include <linux/numa.h>
44 #include <linux/swiotlb.h>
45 #include <asm/irq_remapping.h>
46 #include <asm/cacheflush.h>
47 #include <asm/iommu.h>
48 #include <trace/events/intel_iommu.h>
50 #include "irq_remapping.h"
51 #include "intel-pasid.h"
53 #define ROOT_SIZE VTD_PAGE_SIZE
54 #define CONTEXT_SIZE VTD_PAGE_SIZE
56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
61 #define IOAPIC_RANGE_START (0xfee00000)
62 #define IOAPIC_RANGE_END (0xfeefffff)
63 #define IOVA_START_ADDR (0x1000)
65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
67 #define MAX_AGAW_WIDTH 64
68 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
70 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
79 /* IO virtual address start page frame number */
80 #define IOVA_START_PFN (1)
82 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
84 /* page table handling */
85 #define LEVEL_STRIDE (9)
86 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
89 * This bitmap is used to advertise the page sizes our hardware support
90 * to the IOMMU core, which will then use this information to split
91 * physically contiguous memory regions it is mapping into page sizes
94 * Traditionally the IOMMU core just handed us the mappings directly,
95 * after making sure the size is an order of a 4KiB page and that the
96 * mapping has natural alignment.
98 * To retain this behavior, we currently advertise that we support
99 * all page sizes that are an order of 4KiB.
101 * If at some point we'd like to utilize the IOMMU core's new behavior,
102 * we could change this to advertise the real page sizes we support.
104 #define INTEL_IOMMU_PGSIZES (~0xFFFUL)
106 static inline int agaw_to_level(int agaw)
111 static inline int agaw_to_width(int agaw)
113 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
116 static inline int width_to_agaw(int width)
118 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
121 static inline unsigned int level_to_offset_bits(int level)
123 return (level - 1) * LEVEL_STRIDE;
126 static inline int pfn_level_offset(unsigned long pfn, int level)
128 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
131 static inline unsigned long level_mask(int level)
133 return -1UL << level_to_offset_bits(level);
136 static inline unsigned long level_size(int level)
138 return 1UL << level_to_offset_bits(level);
141 static inline unsigned long align_to_level(unsigned long pfn, int level)
143 return (pfn + level_size(level) - 1) & level_mask(level);
146 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
148 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
151 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
152 are never going to work. */
153 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
155 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
158 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
160 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
162 static inline unsigned long page_to_dma_pfn(struct page *pg)
164 return mm_to_dma_pfn(page_to_pfn(pg));
166 static inline unsigned long virt_to_dma_pfn(void *p)
168 return page_to_dma_pfn(virt_to_page(p));
171 /* global iommu list, set NULL for ignored DMAR units */
172 static struct intel_iommu **g_iommus;
174 static void __init check_tylersburg_isoch(void);
175 static int rwbf_quirk;
178 * set to 1 to panic kernel if can't successfully enable VT-d
179 * (used when kernel is launched w/ TXT)
181 static int force_on = 0;
182 int intel_iommu_tboot_noforce;
183 static int no_platform_optin;
185 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
191 static phys_addr_t root_entry_lctp(struct root_entry *re)
196 return re->lo & VTD_PAGE_MASK;
200 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
203 static phys_addr_t root_entry_uctp(struct root_entry *re)
208 return re->hi & VTD_PAGE_MASK;
211 static inline void context_clear_pasid_enable(struct context_entry *context)
213 context->lo &= ~(1ULL << 11);
216 static inline bool context_pasid_enabled(struct context_entry *context)
218 return !!(context->lo & (1ULL << 11));
221 static inline void context_set_copied(struct context_entry *context)
223 context->hi |= (1ull << 3);
226 static inline bool context_copied(struct context_entry *context)
228 return !!(context->hi & (1ULL << 3));
231 static inline bool __context_present(struct context_entry *context)
233 return (context->lo & 1);
236 bool context_present(struct context_entry *context)
238 return context_pasid_enabled(context) ?
239 __context_present(context) :
240 __context_present(context) && !context_copied(context);
243 static inline void context_set_present(struct context_entry *context)
248 static inline void context_set_fault_enable(struct context_entry *context)
250 context->lo &= (((u64)-1) << 2) | 1;
253 static inline void context_set_translation_type(struct context_entry *context,
256 context->lo &= (((u64)-1) << 4) | 3;
257 context->lo |= (value & 3) << 2;
260 static inline void context_set_address_root(struct context_entry *context,
263 context->lo &= ~VTD_PAGE_MASK;
264 context->lo |= value & VTD_PAGE_MASK;
267 static inline void context_set_address_width(struct context_entry *context,
270 context->hi |= value & 7;
273 static inline void context_set_domain_id(struct context_entry *context,
276 context->hi |= (value & ((1 << 16) - 1)) << 8;
279 static inline int context_domain_id(struct context_entry *c)
281 return((c->hi >> 8) & 0xffff);
284 static inline void context_clear_entry(struct context_entry *context)
291 * This domain is a statically identity mapping domain.
292 * 1. This domain creats a static 1:1 mapping to all usable memory.
293 * 2. It maps to each iommu if successful.
294 * 3. Each iommu mapps to this domain if successful.
296 static struct dmar_domain *si_domain;
297 static int hw_pass_through = 1;
299 /* si_domain contains mulitple devices */
300 #define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
303 * This is a DMA domain allocated through the iommu domain allocation
304 * interface. But one or more devices belonging to this domain have
305 * been chosen to use a private domain. We should avoid to use the
306 * map/unmap/iova_to_phys APIs on it.
308 #define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
311 * When VT-d works in the scalable mode, it allows DMA translation to
312 * happen through either first level or second level page table. This
313 * bit marks that the DMA translation for the domain goes through the
314 * first level page table, otherwise, it goes through the second level.
316 #define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(2)
319 * Domain represents a virtual machine which demands iommu nested
320 * translation mode support.
322 #define DOMAIN_FLAG_NESTING_MODE BIT(3)
324 #define for_each_domain_iommu(idx, domain) \
325 for (idx = 0; idx < g_num_of_iommus; idx++) \
326 if (domain->iommu_refcnt[idx])
328 struct dmar_rmrr_unit {
329 struct list_head list; /* list of rmrr units */
330 struct acpi_dmar_header *hdr; /* ACPI header */
331 u64 base_address; /* reserved base address*/
332 u64 end_address; /* reserved end address */
333 struct dmar_dev_scope *devices; /* target devices */
334 int devices_cnt; /* target device count */
337 struct dmar_atsr_unit {
338 struct list_head list; /* list of ATSR units */
339 struct acpi_dmar_header *hdr; /* ACPI header */
340 struct dmar_dev_scope *devices; /* target devices */
341 int devices_cnt; /* target device count */
342 u8 include_all:1; /* include all ports */
345 static LIST_HEAD(dmar_atsr_units);
346 static LIST_HEAD(dmar_rmrr_units);
348 #define for_each_rmrr_units(rmrr) \
349 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
351 /* bitmap for indexing intel_iommus */
352 static int g_num_of_iommus;
354 static void domain_exit(struct dmar_domain *domain);
355 static void domain_remove_dev_info(struct dmar_domain *domain);
356 static void dmar_remove_one_dev_info(struct device *dev);
357 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
358 static int intel_iommu_attach_device(struct iommu_domain *domain,
360 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
363 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
364 int dmar_disabled = 0;
366 int dmar_disabled = 1;
367 #endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
369 #ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
370 int intel_iommu_sm = 1;
373 #endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
375 int intel_iommu_enabled = 0;
376 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
378 static int dmar_map_gfx = 1;
379 static int dmar_forcedac;
380 static int intel_iommu_strict;
381 static int intel_iommu_superpage = 1;
382 static int iommu_identity_mapping;
383 static int intel_no_bounce;
385 #define IDENTMAP_GFX 2
386 #define IDENTMAP_AZALIA 4
388 int intel_iommu_gfx_mapped;
389 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
391 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
392 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
393 DEFINE_SPINLOCK(device_domain_lock);
394 static LIST_HEAD(device_domain_list);
396 #define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
397 to_pci_dev(d)->untrusted)
400 * Iterate over elements in device_domain_list and call the specified
401 * callback @fn against each element.
403 int for_each_device_domain(int (*fn)(struct device_domain_info *info,
404 void *data), void *data)
408 struct device_domain_info *info;
410 spin_lock_irqsave(&device_domain_lock, flags);
411 list_for_each_entry(info, &device_domain_list, global) {
412 ret = fn(info, data);
414 spin_unlock_irqrestore(&device_domain_lock, flags);
418 spin_unlock_irqrestore(&device_domain_lock, flags);
423 const struct iommu_ops intel_iommu_ops;
425 static bool translation_pre_enabled(struct intel_iommu *iommu)
427 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
430 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
432 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
435 static void init_translation_status(struct intel_iommu *iommu)
439 gsts = readl(iommu->reg + DMAR_GSTS_REG);
440 if (gsts & DMA_GSTS_TES)
441 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
444 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
445 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
447 return container_of(dom, struct dmar_domain, domain);
450 static int __init intel_iommu_setup(char *str)
455 if (!strncmp(str, "on", 2)) {
457 pr_info("IOMMU enabled\n");
458 } else if (!strncmp(str, "off", 3)) {
460 no_platform_optin = 1;
461 pr_info("IOMMU disabled\n");
462 } else if (!strncmp(str, "igfx_off", 8)) {
464 pr_info("Disable GFX device mapping\n");
465 } else if (!strncmp(str, "forcedac", 8)) {
466 pr_info("Forcing DAC for PCI devices\n");
468 } else if (!strncmp(str, "strict", 6)) {
469 pr_info("Disable batched IOTLB flush\n");
470 intel_iommu_strict = 1;
471 } else if (!strncmp(str, "sp_off", 6)) {
472 pr_info("Disable supported super page\n");
473 intel_iommu_superpage = 0;
474 } else if (!strncmp(str, "sm_on", 5)) {
475 pr_info("Intel-IOMMU: scalable mode supported\n");
477 } else if (!strncmp(str, "tboot_noforce", 13)) {
478 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
479 intel_iommu_tboot_noforce = 1;
480 } else if (!strncmp(str, "nobounce", 8)) {
481 pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
485 str += strcspn(str, ",");
491 __setup("intel_iommu=", intel_iommu_setup);
493 static struct kmem_cache *iommu_domain_cache;
494 static struct kmem_cache *iommu_devinfo_cache;
496 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
498 struct dmar_domain **domains;
501 domains = iommu->domains[idx];
505 return domains[did & 0xff];
508 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
509 struct dmar_domain *domain)
511 struct dmar_domain **domains;
514 if (!iommu->domains[idx]) {
515 size_t size = 256 * sizeof(struct dmar_domain *);
516 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
519 domains = iommu->domains[idx];
520 if (WARN_ON(!domains))
523 domains[did & 0xff] = domain;
526 void *alloc_pgtable_page(int node)
531 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
533 vaddr = page_address(page);
537 void free_pgtable_page(void *vaddr)
539 free_page((unsigned long)vaddr);
542 static inline void *alloc_domain_mem(void)
544 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
547 static void free_domain_mem(void *vaddr)
549 kmem_cache_free(iommu_domain_cache, vaddr);
552 static inline void * alloc_devinfo_mem(void)
554 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
557 static inline void free_devinfo_mem(void *vaddr)
559 kmem_cache_free(iommu_devinfo_cache, vaddr);
562 static inline int domain_type_is_si(struct dmar_domain *domain)
564 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
567 static inline bool domain_use_first_level(struct dmar_domain *domain)
569 return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
572 static inline int domain_pfn_supported(struct dmar_domain *domain,
575 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
577 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
580 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
585 sagaw = cap_sagaw(iommu->cap);
586 for (agaw = width_to_agaw(max_gaw);
588 if (test_bit(agaw, &sagaw))
596 * Calculate max SAGAW for each iommu.
598 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
600 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
604 * calculate agaw for each iommu.
605 * "SAGAW" may be different across iommus, use a default agaw, and
606 * get a supported less agaw for iommus that don't support the default agaw.
608 int iommu_calculate_agaw(struct intel_iommu *iommu)
610 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
613 /* This functionin only returns single iommu in a domain */
614 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
618 /* si_domain and vm domain should not get here. */
619 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
622 for_each_domain_iommu(iommu_id, domain)
625 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
628 return g_iommus[iommu_id];
631 static void domain_update_iommu_coherency(struct dmar_domain *domain)
633 struct dmar_drhd_unit *drhd;
634 struct intel_iommu *iommu;
638 domain->iommu_coherency = 1;
640 for_each_domain_iommu(i, domain) {
642 if (!ecap_coherent(g_iommus[i]->ecap)) {
643 domain->iommu_coherency = 0;
650 /* No hardware attached; use lowest common denominator */
652 for_each_active_iommu(iommu, drhd) {
653 if (!ecap_coherent(iommu->ecap)) {
654 domain->iommu_coherency = 0;
661 static int domain_update_iommu_snooping(struct intel_iommu *skip)
663 struct dmar_drhd_unit *drhd;
664 struct intel_iommu *iommu;
668 for_each_active_iommu(iommu, drhd) {
670 if (!ecap_sc_support(iommu->ecap)) {
681 static int domain_update_iommu_superpage(struct dmar_domain *domain,
682 struct intel_iommu *skip)
684 struct dmar_drhd_unit *drhd;
685 struct intel_iommu *iommu;
688 if (!intel_iommu_superpage) {
692 /* set iommu_superpage to the smallest common denominator */
694 for_each_active_iommu(iommu, drhd) {
696 if (domain && domain_use_first_level(domain)) {
697 if (!cap_fl1gp_support(iommu->cap))
700 mask &= cap_super_page_val(iommu->cap);
712 /* Some capabilities may be different across iommus */
713 static void domain_update_iommu_cap(struct dmar_domain *domain)
715 domain_update_iommu_coherency(domain);
716 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
717 domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
720 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
723 struct root_entry *root = &iommu->root_entry[bus];
724 struct context_entry *context;
728 if (sm_supported(iommu)) {
736 context = phys_to_virt(*entry & VTD_PAGE_MASK);
738 unsigned long phy_addr;
742 context = alloc_pgtable_page(iommu->node);
746 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
747 phy_addr = virt_to_phys((void *)context);
748 *entry = phy_addr | 1;
749 __iommu_flush_cache(iommu, entry, sizeof(*entry));
751 return &context[devfn];
754 static int iommu_dummy(struct device *dev)
756 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
759 static bool attach_deferred(struct device *dev)
761 return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
765 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
766 * sub-hierarchy of a candidate PCI-PCI bridge
767 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
768 * @bridge: the candidate PCI-PCI bridge
770 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
773 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
775 struct pci_dev *pdev, *pbridge;
777 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
780 pdev = to_pci_dev(dev);
781 pbridge = to_pci_dev(bridge);
783 if (pbridge->subordinate &&
784 pbridge->subordinate->number <= pdev->bus->number &&
785 pbridge->subordinate->busn_res.end >= pdev->bus->number)
791 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
793 struct dmar_drhd_unit *drhd = NULL;
794 struct intel_iommu *iommu;
796 struct pci_dev *pdev = NULL;
800 if (iommu_dummy(dev))
803 if (dev_is_pci(dev)) {
804 struct pci_dev *pf_pdev;
806 pdev = pci_real_dma_dev(to_pci_dev(dev));
808 /* VFs aren't listed in scope tables; we need to look up
809 * the PF instead to find the IOMMU. */
810 pf_pdev = pci_physfn(pdev);
812 segment = pci_domain_nr(pdev->bus);
813 } else if (has_acpi_companion(dev))
814 dev = &ACPI_COMPANION(dev)->dev;
817 for_each_active_iommu(iommu, drhd) {
818 if (pdev && segment != drhd->segment)
821 for_each_active_dev_scope(drhd->devices,
822 drhd->devices_cnt, i, tmp) {
824 /* For a VF use its original BDF# not that of the PF
825 * which we used for the IOMMU lookup. Strictly speaking
826 * we could do this for all PCI devices; we only need to
827 * get the BDF# from the scope table for ACPI matches. */
828 if (pdev && pdev->is_virtfn)
831 *bus = drhd->devices[i].bus;
832 *devfn = drhd->devices[i].devfn;
836 if (is_downstream_to_pci_bridge(dev, tmp))
840 if (pdev && drhd->include_all) {
842 *bus = pdev->bus->number;
843 *devfn = pdev->devfn;
854 static void domain_flush_cache(struct dmar_domain *domain,
855 void *addr, int size)
857 if (!domain->iommu_coherency)
858 clflush_cache_range(addr, size);
861 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
863 struct context_entry *context;
867 spin_lock_irqsave(&iommu->lock, flags);
868 context = iommu_context_addr(iommu, bus, devfn, 0);
870 ret = context_present(context);
871 spin_unlock_irqrestore(&iommu->lock, flags);
875 static void free_context_table(struct intel_iommu *iommu)
879 struct context_entry *context;
881 spin_lock_irqsave(&iommu->lock, flags);
882 if (!iommu->root_entry) {
885 for (i = 0; i < ROOT_ENTRY_NR; i++) {
886 context = iommu_context_addr(iommu, i, 0, 0);
888 free_pgtable_page(context);
890 if (!sm_supported(iommu))
893 context = iommu_context_addr(iommu, i, 0x80, 0);
895 free_pgtable_page(context);
898 free_pgtable_page(iommu->root_entry);
899 iommu->root_entry = NULL;
901 spin_unlock_irqrestore(&iommu->lock, flags);
904 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
905 unsigned long pfn, int *target_level)
907 struct dma_pte *parent, *pte;
908 int level = agaw_to_level(domain->agaw);
911 BUG_ON(!domain->pgd);
913 if (!domain_pfn_supported(domain, pfn))
914 /* Address beyond IOMMU's addressing capabilities. */
917 parent = domain->pgd;
922 offset = pfn_level_offset(pfn, level);
923 pte = &parent[offset];
924 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
926 if (level == *target_level)
929 if (!dma_pte_present(pte)) {
932 tmp_page = alloc_pgtable_page(domain->nid);
937 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
938 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
939 if (domain_use_first_level(domain))
940 pteval |= DMA_FL_PTE_XD;
941 if (cmpxchg64(&pte->val, 0ULL, pteval))
942 /* Someone else set it while we were thinking; use theirs. */
943 free_pgtable_page(tmp_page);
945 domain_flush_cache(domain, pte, sizeof(*pte));
950 parent = phys_to_virt(dma_pte_addr(pte));
955 *target_level = level;
960 /* return address's pte at specific level */
961 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
963 int level, int *large_page)
965 struct dma_pte *parent, *pte;
966 int total = agaw_to_level(domain->agaw);
969 parent = domain->pgd;
970 while (level <= total) {
971 offset = pfn_level_offset(pfn, total);
972 pte = &parent[offset];
976 if (!dma_pte_present(pte)) {
981 if (dma_pte_superpage(pte)) {
986 parent = phys_to_virt(dma_pte_addr(pte));
992 /* clear last level pte, a tlb flush should be followed */
993 static void dma_pte_clear_range(struct dmar_domain *domain,
994 unsigned long start_pfn,
995 unsigned long last_pfn)
997 unsigned int large_page;
998 struct dma_pte *first_pte, *pte;
1000 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1001 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1002 BUG_ON(start_pfn > last_pfn);
1004 /* we don't need lock here; nobody else touches the iova range */
1007 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1009 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1014 start_pfn += lvl_to_nr_pages(large_page);
1016 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1018 domain_flush_cache(domain, first_pte,
1019 (void *)pte - (void *)first_pte);
1021 } while (start_pfn && start_pfn <= last_pfn);
1024 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1025 int retain_level, struct dma_pte *pte,
1026 unsigned long pfn, unsigned long start_pfn,
1027 unsigned long last_pfn)
1029 pfn = max(start_pfn, pfn);
1030 pte = &pte[pfn_level_offset(pfn, level)];
1033 unsigned long level_pfn;
1034 struct dma_pte *level_pte;
1036 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1039 level_pfn = pfn & level_mask(level);
1040 level_pte = phys_to_virt(dma_pte_addr(pte));
1043 dma_pte_free_level(domain, level - 1, retain_level,
1044 level_pte, level_pfn, start_pfn,
1049 * Free the page table if we're below the level we want to
1050 * retain and the range covers the entire table.
1052 if (level < retain_level && !(start_pfn > level_pfn ||
1053 last_pfn < level_pfn + level_size(level) - 1)) {
1055 domain_flush_cache(domain, pte, sizeof(*pte));
1056 free_pgtable_page(level_pte);
1059 pfn += level_size(level);
1060 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1064 * clear last level (leaf) ptes and free page table pages below the
1065 * level we wish to keep intact.
1067 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1068 unsigned long start_pfn,
1069 unsigned long last_pfn,
1072 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1073 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1074 BUG_ON(start_pfn > last_pfn);
1076 dma_pte_clear_range(domain, start_pfn, last_pfn);
1078 /* We don't need lock here; nobody else touches the iova range */
1079 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1080 domain->pgd, 0, start_pfn, last_pfn);
1083 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1084 free_pgtable_page(domain->pgd);
1089 /* When a page at a given level is being unlinked from its parent, we don't
1090 need to *modify* it at all. All we need to do is make a list of all the
1091 pages which can be freed just as soon as we've flushed the IOTLB and we
1092 know the hardware page-walk will no longer touch them.
1093 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1095 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1096 int level, struct dma_pte *pte,
1097 struct page *freelist)
1101 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1102 pg->freelist = freelist;
1108 pte = page_address(pg);
1110 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1111 freelist = dma_pte_list_pagetables(domain, level - 1,
1114 } while (!first_pte_in_page(pte));
1119 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1120 struct dma_pte *pte, unsigned long pfn,
1121 unsigned long start_pfn,
1122 unsigned long last_pfn,
1123 struct page *freelist)
1125 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1127 pfn = max(start_pfn, pfn);
1128 pte = &pte[pfn_level_offset(pfn, level)];
1131 unsigned long level_pfn;
1133 if (!dma_pte_present(pte))
1136 level_pfn = pfn & level_mask(level);
1138 /* If range covers entire pagetable, free it */
1139 if (start_pfn <= level_pfn &&
1140 last_pfn >= level_pfn + level_size(level) - 1) {
1141 /* These suborbinate page tables are going away entirely. Don't
1142 bother to clear them; we're just going to *free* them. */
1143 if (level > 1 && !dma_pte_superpage(pte))
1144 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1150 } else if (level > 1) {
1151 /* Recurse down into a level that isn't *entirely* obsolete */
1152 freelist = dma_pte_clear_level(domain, level - 1,
1153 phys_to_virt(dma_pte_addr(pte)),
1154 level_pfn, start_pfn, last_pfn,
1158 pfn += level_size(level);
1159 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1162 domain_flush_cache(domain, first_pte,
1163 (void *)++last_pte - (void *)first_pte);
1168 /* We can't just free the pages because the IOMMU may still be walking
1169 the page tables, and may have cached the intermediate levels. The
1170 pages can only be freed after the IOTLB flush has been done. */
1171 static struct page *domain_unmap(struct dmar_domain *domain,
1172 unsigned long start_pfn,
1173 unsigned long last_pfn)
1175 struct page *freelist;
1177 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1178 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1179 BUG_ON(start_pfn > last_pfn);
1181 /* we don't need lock here; nobody else touches the iova range */
1182 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1183 domain->pgd, 0, start_pfn, last_pfn, NULL);
1186 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1187 struct page *pgd_page = virt_to_page(domain->pgd);
1188 pgd_page->freelist = freelist;
1189 freelist = pgd_page;
1197 static void dma_free_pagelist(struct page *freelist)
1201 while ((pg = freelist)) {
1202 freelist = pg->freelist;
1203 free_pgtable_page(page_address(pg));
1207 static void iova_entry_free(unsigned long data)
1209 struct page *freelist = (struct page *)data;
1211 dma_free_pagelist(freelist);
1214 /* iommu handling */
1215 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1217 struct root_entry *root;
1218 unsigned long flags;
1220 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1222 pr_err("Allocating root entry for %s failed\n",
1227 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1229 spin_lock_irqsave(&iommu->lock, flags);
1230 iommu->root_entry = root;
1231 spin_unlock_irqrestore(&iommu->lock, flags);
1236 static void iommu_set_root_entry(struct intel_iommu *iommu)
1242 addr = virt_to_phys(iommu->root_entry);
1243 if (sm_supported(iommu))
1244 addr |= DMA_RTADDR_SMT;
1246 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1247 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1249 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1251 /* Make sure hardware complete it */
1252 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1253 readl, (sts & DMA_GSTS_RTPS), sts);
1255 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1258 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1263 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1266 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1267 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1269 /* Make sure hardware complete it */
1270 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1271 readl, (!(val & DMA_GSTS_WBFS)), val);
1273 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1276 /* return value determine if we need a write buffer flush */
1277 static void __iommu_flush_context(struct intel_iommu *iommu,
1278 u16 did, u16 source_id, u8 function_mask,
1285 case DMA_CCMD_GLOBAL_INVL:
1286 val = DMA_CCMD_GLOBAL_INVL;
1288 case DMA_CCMD_DOMAIN_INVL:
1289 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1291 case DMA_CCMD_DEVICE_INVL:
1292 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1293 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1298 val |= DMA_CCMD_ICC;
1300 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1301 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1303 /* Make sure hardware complete it */
1304 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1305 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1307 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1310 /* return value determine if we need a write buffer flush */
1311 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1312 u64 addr, unsigned int size_order, u64 type)
1314 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1315 u64 val = 0, val_iva = 0;
1319 case DMA_TLB_GLOBAL_FLUSH:
1320 /* global flush doesn't need set IVA_REG */
1321 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1323 case DMA_TLB_DSI_FLUSH:
1324 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1326 case DMA_TLB_PSI_FLUSH:
1327 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1328 /* IH bit is passed in as part of address */
1329 val_iva = size_order | addr;
1334 /* Note: set drain read/write */
1337 * This is probably to be super secure.. Looks like we can
1338 * ignore it without any impact.
1340 if (cap_read_drain(iommu->cap))
1341 val |= DMA_TLB_READ_DRAIN;
1343 if (cap_write_drain(iommu->cap))
1344 val |= DMA_TLB_WRITE_DRAIN;
1346 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1347 /* Note: Only uses first TLB reg currently */
1349 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1350 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1352 /* Make sure hardware complete it */
1353 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1354 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1356 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1358 /* check IOTLB invalidation granularity */
1359 if (DMA_TLB_IAIG(val) == 0)
1360 pr_err("Flush IOTLB failed\n");
1361 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1362 pr_debug("TLB flush request %Lx, actual %Lx\n",
1363 (unsigned long long)DMA_TLB_IIRG(type),
1364 (unsigned long long)DMA_TLB_IAIG(val));
1367 static struct device_domain_info *
1368 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1371 struct device_domain_info *info;
1373 assert_spin_locked(&device_domain_lock);
1378 list_for_each_entry(info, &domain->devices, link)
1379 if (info->iommu == iommu && info->bus == bus &&
1380 info->devfn == devfn) {
1381 if (info->ats_supported && info->dev)
1389 static void domain_update_iotlb(struct dmar_domain *domain)
1391 struct device_domain_info *info;
1392 bool has_iotlb_device = false;
1394 assert_spin_locked(&device_domain_lock);
1396 list_for_each_entry(info, &domain->devices, link) {
1397 struct pci_dev *pdev;
1399 if (!info->dev || !dev_is_pci(info->dev))
1402 pdev = to_pci_dev(info->dev);
1403 if (pdev->ats_enabled) {
1404 has_iotlb_device = true;
1409 domain->has_iotlb_device = has_iotlb_device;
1412 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1414 struct pci_dev *pdev;
1416 assert_spin_locked(&device_domain_lock);
1418 if (!info || !dev_is_pci(info->dev))
1421 pdev = to_pci_dev(info->dev);
1422 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1423 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1424 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1425 * reserved, which should be set to 0.
1427 if (!ecap_dit(info->iommu->ecap))
1430 struct pci_dev *pf_pdev;
1432 /* pdev will be returned if device is not a vf */
1433 pf_pdev = pci_physfn(pdev);
1434 info->pfsid = pci_dev_id(pf_pdev);
1437 #ifdef CONFIG_INTEL_IOMMU_SVM
1438 /* The PCIe spec, in its wisdom, declares that the behaviour of
1439 the device if you enable PASID support after ATS support is
1440 undefined. So always enable PASID support on devices which
1441 have it, even if we can't yet know if we're ever going to
1443 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1444 info->pasid_enabled = 1;
1446 if (info->pri_supported &&
1447 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1448 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1449 info->pri_enabled = 1;
1451 if (!pdev->untrusted && info->ats_supported &&
1452 pci_ats_page_aligned(pdev) &&
1453 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1454 info->ats_enabled = 1;
1455 domain_update_iotlb(info->domain);
1456 info->ats_qdep = pci_ats_queue_depth(pdev);
1460 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1462 struct pci_dev *pdev;
1464 assert_spin_locked(&device_domain_lock);
1466 if (!dev_is_pci(info->dev))
1469 pdev = to_pci_dev(info->dev);
1471 if (info->ats_enabled) {
1472 pci_disable_ats(pdev);
1473 info->ats_enabled = 0;
1474 domain_update_iotlb(info->domain);
1476 #ifdef CONFIG_INTEL_IOMMU_SVM
1477 if (info->pri_enabled) {
1478 pci_disable_pri(pdev);
1479 info->pri_enabled = 0;
1481 if (info->pasid_enabled) {
1482 pci_disable_pasid(pdev);
1483 info->pasid_enabled = 0;
1488 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1489 u64 addr, unsigned mask)
1492 unsigned long flags;
1493 struct device_domain_info *info;
1495 if (!domain->has_iotlb_device)
1498 spin_lock_irqsave(&device_domain_lock, flags);
1499 list_for_each_entry(info, &domain->devices, link) {
1500 if (!info->ats_enabled)
1503 sid = info->bus << 8 | info->devfn;
1504 qdep = info->ats_qdep;
1505 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1508 spin_unlock_irqrestore(&device_domain_lock, flags);
1511 static void domain_flush_piotlb(struct intel_iommu *iommu,
1512 struct dmar_domain *domain,
1513 u64 addr, unsigned long npages, bool ih)
1515 u16 did = domain->iommu_did[iommu->seq_id];
1517 if (domain->default_pasid)
1518 qi_flush_piotlb(iommu, did, domain->default_pasid,
1521 if (!list_empty(&domain->devices))
1522 qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
1525 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1526 struct dmar_domain *domain,
1527 unsigned long pfn, unsigned int pages,
1530 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1531 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1532 u16 did = domain->iommu_did[iommu->seq_id];
1539 if (domain_use_first_level(domain)) {
1540 domain_flush_piotlb(iommu, domain, addr, pages, ih);
1543 * Fallback to domain selective flush if no PSI support or
1544 * the size is too big. PSI requires page size to be 2 ^ x,
1545 * and the base address is naturally aligned to the size.
1547 if (!cap_pgsel_inv(iommu->cap) ||
1548 mask > cap_max_amask_val(iommu->cap))
1549 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1552 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1557 * In caching mode, changes of pages from non-present to present require
1558 * flush. However, device IOTLB doesn't need to be flushed in this case.
1560 if (!cap_caching_mode(iommu->cap) || !map)
1561 iommu_flush_dev_iotlb(domain, addr, mask);
1564 /* Notification for newly created mappings */
1565 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1566 struct dmar_domain *domain,
1567 unsigned long pfn, unsigned int pages)
1570 * It's a non-present to present mapping. Only flush if caching mode
1573 if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
1574 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1576 iommu_flush_write_buffer(iommu);
1579 static void iommu_flush_iova(struct iova_domain *iovad)
1581 struct dmar_domain *domain;
1584 domain = container_of(iovad, struct dmar_domain, iovad);
1586 for_each_domain_iommu(idx, domain) {
1587 struct intel_iommu *iommu = g_iommus[idx];
1588 u16 did = domain->iommu_did[iommu->seq_id];
1590 if (domain_use_first_level(domain))
1591 domain_flush_piotlb(iommu, domain, 0, -1, 0);
1593 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1596 if (!cap_caching_mode(iommu->cap))
1597 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1598 0, MAX_AGAW_PFN_WIDTH);
1602 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1605 unsigned long flags;
1607 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1610 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1611 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1612 pmen &= ~DMA_PMEN_EPM;
1613 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1615 /* wait for the protected region status bit to clear */
1616 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1617 readl, !(pmen & DMA_PMEN_PRS), pmen);
1619 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1622 static void iommu_enable_translation(struct intel_iommu *iommu)
1625 unsigned long flags;
1627 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1628 iommu->gcmd |= DMA_GCMD_TE;
1629 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1631 /* Make sure hardware complete it */
1632 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1633 readl, (sts & DMA_GSTS_TES), sts);
1635 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1638 static void iommu_disable_translation(struct intel_iommu *iommu)
1643 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1644 iommu->gcmd &= ~DMA_GCMD_TE;
1645 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1647 /* Make sure hardware complete it */
1648 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1649 readl, (!(sts & DMA_GSTS_TES)), sts);
1651 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1654 static int iommu_init_domains(struct intel_iommu *iommu)
1656 u32 ndomains, nlongs;
1659 ndomains = cap_ndoms(iommu->cap);
1660 pr_debug("%s: Number of Domains supported <%d>\n",
1661 iommu->name, ndomains);
1662 nlongs = BITS_TO_LONGS(ndomains);
1664 spin_lock_init(&iommu->lock);
1666 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1667 if (!iommu->domain_ids) {
1668 pr_err("%s: Allocating domain id array failed\n",
1673 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1674 iommu->domains = kzalloc(size, GFP_KERNEL);
1676 if (iommu->domains) {
1677 size = 256 * sizeof(struct dmar_domain *);
1678 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1681 if (!iommu->domains || !iommu->domains[0]) {
1682 pr_err("%s: Allocating domain array failed\n",
1684 kfree(iommu->domain_ids);
1685 kfree(iommu->domains);
1686 iommu->domain_ids = NULL;
1687 iommu->domains = NULL;
1692 * If Caching mode is set, then invalid translations are tagged
1693 * with domain-id 0, hence we need to pre-allocate it. We also
1694 * use domain-id 0 as a marker for non-allocated domain-id, so
1695 * make sure it is not used for a real domain.
1697 set_bit(0, iommu->domain_ids);
1700 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1701 * entry for first-level or pass-through translation modes should
1702 * be programmed with a domain id different from those used for
1703 * second-level or nested translation. We reserve a domain id for
1706 if (sm_supported(iommu))
1707 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1712 static void disable_dmar_iommu(struct intel_iommu *iommu)
1714 struct device_domain_info *info, *tmp;
1715 unsigned long flags;
1717 if (!iommu->domains || !iommu->domain_ids)
1720 spin_lock_irqsave(&device_domain_lock, flags);
1721 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1722 if (info->iommu != iommu)
1725 if (!info->dev || !info->domain)
1728 __dmar_remove_one_dev_info(info);
1730 spin_unlock_irqrestore(&device_domain_lock, flags);
1732 if (iommu->gcmd & DMA_GCMD_TE)
1733 iommu_disable_translation(iommu);
1736 static void free_dmar_iommu(struct intel_iommu *iommu)
1738 if ((iommu->domains) && (iommu->domain_ids)) {
1739 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1742 for (i = 0; i < elems; i++)
1743 kfree(iommu->domains[i]);
1744 kfree(iommu->domains);
1745 kfree(iommu->domain_ids);
1746 iommu->domains = NULL;
1747 iommu->domain_ids = NULL;
1750 g_iommus[iommu->seq_id] = NULL;
1752 /* free context mapping */
1753 free_context_table(iommu);
1755 #ifdef CONFIG_INTEL_IOMMU_SVM
1756 if (pasid_supported(iommu)) {
1757 if (ecap_prs(iommu->ecap))
1758 intel_svm_finish_prq(iommu);
1764 * Check and return whether first level is used by default for
1767 static bool first_level_by_default(void)
1769 struct dmar_drhd_unit *drhd;
1770 struct intel_iommu *iommu;
1771 static int first_level_support = -1;
1773 if (likely(first_level_support != -1))
1774 return first_level_support;
1776 first_level_support = 1;
1779 for_each_active_iommu(iommu, drhd) {
1780 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) {
1781 first_level_support = 0;
1787 return first_level_support;
1790 static struct dmar_domain *alloc_domain(int flags)
1792 struct dmar_domain *domain;
1794 domain = alloc_domain_mem();
1798 memset(domain, 0, sizeof(*domain));
1799 domain->nid = NUMA_NO_NODE;
1800 domain->flags = flags;
1801 if (first_level_by_default())
1802 domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
1803 domain->has_iotlb_device = false;
1804 INIT_LIST_HEAD(&domain->devices);
1809 /* Must be called with iommu->lock */
1810 static int domain_attach_iommu(struct dmar_domain *domain,
1811 struct intel_iommu *iommu)
1813 unsigned long ndomains;
1816 assert_spin_locked(&device_domain_lock);
1817 assert_spin_locked(&iommu->lock);
1819 domain->iommu_refcnt[iommu->seq_id] += 1;
1820 domain->iommu_count += 1;
1821 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1822 ndomains = cap_ndoms(iommu->cap);
1823 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1825 if (num >= ndomains) {
1826 pr_err("%s: No free domain ids\n", iommu->name);
1827 domain->iommu_refcnt[iommu->seq_id] -= 1;
1828 domain->iommu_count -= 1;
1832 set_bit(num, iommu->domain_ids);
1833 set_iommu_domain(iommu, num, domain);
1835 domain->iommu_did[iommu->seq_id] = num;
1836 domain->nid = iommu->node;
1838 domain_update_iommu_cap(domain);
1844 static int domain_detach_iommu(struct dmar_domain *domain,
1845 struct intel_iommu *iommu)
1849 assert_spin_locked(&device_domain_lock);
1850 assert_spin_locked(&iommu->lock);
1852 domain->iommu_refcnt[iommu->seq_id] -= 1;
1853 count = --domain->iommu_count;
1854 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1855 num = domain->iommu_did[iommu->seq_id];
1856 clear_bit(num, iommu->domain_ids);
1857 set_iommu_domain(iommu, num, NULL);
1859 domain_update_iommu_cap(domain);
1860 domain->iommu_did[iommu->seq_id] = 0;
1866 static struct iova_domain reserved_iova_list;
1867 static struct lock_class_key reserved_rbtree_key;
1869 static int dmar_init_reserved_ranges(void)
1871 struct pci_dev *pdev = NULL;
1875 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1877 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1878 &reserved_rbtree_key);
1880 /* IOAPIC ranges shouldn't be accessed by DMA */
1881 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1882 IOVA_PFN(IOAPIC_RANGE_END));
1884 pr_err("Reserve IOAPIC range failed\n");
1888 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1889 for_each_pci_dev(pdev) {
1892 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1893 r = &pdev->resource[i];
1894 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1896 iova = reserve_iova(&reserved_iova_list,
1900 pci_err(pdev, "Reserve iova for %pR failed\n", r);
1908 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1910 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1913 static inline int guestwidth_to_adjustwidth(int gaw)
1916 int r = (gaw - 12) % 9;
1927 static void domain_exit(struct dmar_domain *domain)
1930 /* Remove associated devices and clear attached or cached domains */
1931 domain_remove_dev_info(domain);
1934 put_iova_domain(&domain->iovad);
1937 struct page *freelist;
1939 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1940 dma_free_pagelist(freelist);
1943 free_domain_mem(domain);
1947 * Get the PASID directory size for scalable mode context entry.
1948 * Value of X in the PDTS field of a scalable mode context entry
1949 * indicates PASID directory with 2^(X + 7) entries.
1951 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1955 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1956 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1964 * Set the RID_PASID field of a scalable mode context entry. The
1965 * IOMMU hardware will use the PASID value set in this field for
1966 * DMA translations of DMA requests without PASID.
1969 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1971 context->hi |= pasid & ((1 << 20) - 1);
1972 context->hi |= (1 << 20);
1976 * Set the DTE(Device-TLB Enable) field of a scalable mode context
1979 static inline void context_set_sm_dte(struct context_entry *context)
1981 context->lo |= (1 << 2);
1985 * Set the PRE(Page Request Enable) field of a scalable mode context
1988 static inline void context_set_sm_pre(struct context_entry *context)
1990 context->lo |= (1 << 4);
1993 /* Convert value to context PASID directory size field coding. */
1994 #define context_pdts(pds) (((pds) & 0x7) << 9)
1996 static int domain_context_mapping_one(struct dmar_domain *domain,
1997 struct intel_iommu *iommu,
1998 struct pasid_table *table,
2001 u16 did = domain->iommu_did[iommu->seq_id];
2002 int translation = CONTEXT_TT_MULTI_LEVEL;
2003 struct device_domain_info *info = NULL;
2004 struct context_entry *context;
2005 unsigned long flags;
2010 if (hw_pass_through && domain_type_is_si(domain))
2011 translation = CONTEXT_TT_PASS_THROUGH;
2013 pr_debug("Set context mapping for %02x:%02x.%d\n",
2014 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
2016 BUG_ON(!domain->pgd);
2018 spin_lock_irqsave(&device_domain_lock, flags);
2019 spin_lock(&iommu->lock);
2022 context = iommu_context_addr(iommu, bus, devfn, 1);
2027 if (context_present(context))
2031 * For kdump cases, old valid entries may be cached due to the
2032 * in-flight DMA and copied pgtable, but there is no unmapping
2033 * behaviour for them, thus we need an explicit cache flush for
2034 * the newly-mapped device. For kdump, at this point, the device
2035 * is supposed to finish reset at its driver probe stage, so no
2036 * in-flight DMA will exist, and we don't need to worry anymore
2039 if (context_copied(context)) {
2040 u16 did_old = context_domain_id(context);
2042 if (did_old < cap_ndoms(iommu->cap)) {
2043 iommu->flush.flush_context(iommu, did_old,
2044 (((u16)bus) << 8) | devfn,
2045 DMA_CCMD_MASK_NOBIT,
2046 DMA_CCMD_DEVICE_INVL);
2047 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2052 context_clear_entry(context);
2054 if (sm_supported(iommu)) {
2059 /* Setup the PASID DIR pointer: */
2060 pds = context_get_sm_pds(table);
2061 context->lo = (u64)virt_to_phys(table->table) |
2064 /* Setup the RID_PASID field: */
2065 context_set_sm_rid2pasid(context, PASID_RID2PASID);
2068 * Setup the Device-TLB enable bit and Page request
2071 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2072 if (info && info->ats_supported)
2073 context_set_sm_dte(context);
2074 if (info && info->pri_supported)
2075 context_set_sm_pre(context);
2077 struct dma_pte *pgd = domain->pgd;
2080 context_set_domain_id(context, did);
2082 if (translation != CONTEXT_TT_PASS_THROUGH) {
2084 * Skip top levels of page tables for iommu which has
2085 * less agaw than default. Unnecessary for PT mode.
2087 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2089 pgd = phys_to_virt(dma_pte_addr(pgd));
2090 if (!dma_pte_present(pgd))
2094 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2095 if (info && info->ats_supported)
2096 translation = CONTEXT_TT_DEV_IOTLB;
2098 translation = CONTEXT_TT_MULTI_LEVEL;
2100 context_set_address_root(context, virt_to_phys(pgd));
2101 context_set_address_width(context, agaw);
2104 * In pass through mode, AW must be programmed to
2105 * indicate the largest AGAW value supported by
2106 * hardware. And ASR is ignored by hardware.
2108 context_set_address_width(context, iommu->msagaw);
2111 context_set_translation_type(context, translation);
2114 context_set_fault_enable(context);
2115 context_set_present(context);
2116 domain_flush_cache(domain, context, sizeof(*context));
2119 * It's a non-present to present mapping. If hardware doesn't cache
2120 * non-present entry we only need to flush the write-buffer. If the
2121 * _does_ cache non-present entries, then it does so in the special
2122 * domain #0, which we have to flush:
2124 if (cap_caching_mode(iommu->cap)) {
2125 iommu->flush.flush_context(iommu, 0,
2126 (((u16)bus) << 8) | devfn,
2127 DMA_CCMD_MASK_NOBIT,
2128 DMA_CCMD_DEVICE_INVL);
2129 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2131 iommu_flush_write_buffer(iommu);
2133 iommu_enable_dev_iotlb(info);
2138 spin_unlock(&iommu->lock);
2139 spin_unlock_irqrestore(&device_domain_lock, flags);
2144 struct domain_context_mapping_data {
2145 struct dmar_domain *domain;
2146 struct intel_iommu *iommu;
2147 struct pasid_table *table;
2150 static int domain_context_mapping_cb(struct pci_dev *pdev,
2151 u16 alias, void *opaque)
2153 struct domain_context_mapping_data *data = opaque;
2155 return domain_context_mapping_one(data->domain, data->iommu,
2156 data->table, PCI_BUS_NUM(alias),
2161 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2163 struct domain_context_mapping_data data;
2164 struct pasid_table *table;
2165 struct intel_iommu *iommu;
2168 iommu = device_to_iommu(dev, &bus, &devfn);
2172 table = intel_pasid_get_table(dev);
2174 if (!dev_is_pci(dev))
2175 return domain_context_mapping_one(domain, iommu, table,
2178 data.domain = domain;
2182 return pci_for_each_dma_alias(to_pci_dev(dev),
2183 &domain_context_mapping_cb, &data);
2186 static int domain_context_mapped_cb(struct pci_dev *pdev,
2187 u16 alias, void *opaque)
2189 struct intel_iommu *iommu = opaque;
2191 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2194 static int domain_context_mapped(struct device *dev)
2196 struct intel_iommu *iommu;
2199 iommu = device_to_iommu(dev, &bus, &devfn);
2203 if (!dev_is_pci(dev))
2204 return device_context_mapped(iommu, bus, devfn);
2206 return !pci_for_each_dma_alias(to_pci_dev(dev),
2207 domain_context_mapped_cb, iommu);
2210 /* Returns a number of VTD pages, but aligned to MM page size */
2211 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2214 host_addr &= ~PAGE_MASK;
2215 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2218 /* Return largest possible superpage level for a given mapping */
2219 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2220 unsigned long iov_pfn,
2221 unsigned long phy_pfn,
2222 unsigned long pages)
2224 int support, level = 1;
2225 unsigned long pfnmerge;
2227 support = domain->iommu_superpage;
2229 /* To use a large page, the virtual *and* physical addresses
2230 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2231 of them will mean we have to use smaller pages. So just
2232 merge them and check both at once. */
2233 pfnmerge = iov_pfn | phy_pfn;
2235 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2236 pages >>= VTD_STRIDE_SHIFT;
2239 pfnmerge >>= VTD_STRIDE_SHIFT;
2246 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2247 struct scatterlist *sg, unsigned long phys_pfn,
2248 unsigned long nr_pages, int prot)
2250 struct dma_pte *first_pte = NULL, *pte = NULL;
2251 phys_addr_t uninitialized_var(pteval);
2252 unsigned long sg_res = 0;
2253 unsigned int largepage_lvl = 0;
2254 unsigned long lvl_pages = 0;
2257 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2259 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2262 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2263 if (domain_use_first_level(domain))
2264 attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD;
2268 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
2271 while (nr_pages > 0) {
2275 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2277 sg_res = aligned_nrpages(sg->offset, sg->length);
2278 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2279 sg->dma_length = sg->length;
2280 pteval = (sg_phys(sg) - pgoff) | attr;
2281 phys_pfn = pteval >> VTD_PAGE_SHIFT;
2285 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2287 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2290 /* It is large page*/
2291 if (largepage_lvl > 1) {
2292 unsigned long nr_superpages, end_pfn;
2294 pteval |= DMA_PTE_LARGE_PAGE;
2295 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2297 nr_superpages = sg_res / lvl_pages;
2298 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2301 * Ensure that old small page tables are
2302 * removed to make room for superpage(s).
2303 * We're adding new large pages, so make sure
2304 * we don't remove their parent tables.
2306 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2309 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2313 /* We don't need lock here, nobody else
2314 * touches the iova range
2316 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2318 static int dumps = 5;
2319 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2320 iov_pfn, tmp, (unsigned long long)pteval);
2323 debug_dma_dump_mappings(NULL);
2328 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2330 BUG_ON(nr_pages < lvl_pages);
2331 BUG_ON(sg_res < lvl_pages);
2333 nr_pages -= lvl_pages;
2334 iov_pfn += lvl_pages;
2335 phys_pfn += lvl_pages;
2336 pteval += lvl_pages * VTD_PAGE_SIZE;
2337 sg_res -= lvl_pages;
2339 /* If the next PTE would be the first in a new page, then we
2340 need to flush the cache on the entries we've just written.
2341 And then we'll need to recalculate 'pte', so clear it and
2342 let it get set again in the if (!pte) block above.
2344 If we're done (!nr_pages) we need to flush the cache too.
2346 Also if we've been setting superpages, we may need to
2347 recalculate 'pte' and switch back to smaller pages for the
2348 end of the mapping, if the trailing size is not enough to
2349 use another superpage (i.e. sg_res < lvl_pages). */
2351 if (!nr_pages || first_pte_in_page(pte) ||
2352 (largepage_lvl > 1 && sg_res < lvl_pages)) {
2353 domain_flush_cache(domain, first_pte,
2354 (void *)pte - (void *)first_pte);
2358 if (!sg_res && nr_pages)
2364 static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2365 struct scatterlist *sg, unsigned long phys_pfn,
2366 unsigned long nr_pages, int prot)
2369 struct intel_iommu *iommu;
2371 /* Do the real mapping first */
2372 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2376 for_each_domain_iommu(iommu_id, domain) {
2377 iommu = g_iommus[iommu_id];
2378 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2384 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2385 struct scatterlist *sg, unsigned long nr_pages,
2388 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2391 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2392 unsigned long phys_pfn, unsigned long nr_pages,
2395 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2398 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2400 unsigned long flags;
2401 struct context_entry *context;
2407 spin_lock_irqsave(&iommu->lock, flags);
2408 context = iommu_context_addr(iommu, bus, devfn, 0);
2410 spin_unlock_irqrestore(&iommu->lock, flags);
2413 did_old = context_domain_id(context);
2414 context_clear_entry(context);
2415 __iommu_flush_cache(iommu, context, sizeof(*context));
2416 spin_unlock_irqrestore(&iommu->lock, flags);
2417 iommu->flush.flush_context(iommu,
2419 (((u16)bus) << 8) | devfn,
2420 DMA_CCMD_MASK_NOBIT,
2421 DMA_CCMD_DEVICE_INVL);
2422 iommu->flush.flush_iotlb(iommu,
2429 static inline void unlink_domain_info(struct device_domain_info *info)
2431 assert_spin_locked(&device_domain_lock);
2432 list_del(&info->link);
2433 list_del(&info->global);
2435 info->dev->archdata.iommu = NULL;
2438 static void domain_remove_dev_info(struct dmar_domain *domain)
2440 struct device_domain_info *info, *tmp;
2441 unsigned long flags;
2443 spin_lock_irqsave(&device_domain_lock, flags);
2444 list_for_each_entry_safe(info, tmp, &domain->devices, link)
2445 __dmar_remove_one_dev_info(info);
2446 spin_unlock_irqrestore(&device_domain_lock, flags);
2449 struct dmar_domain *find_domain(struct device *dev)
2451 struct device_domain_info *info;
2453 if (unlikely(attach_deferred(dev) || iommu_dummy(dev)))
2456 if (dev_is_pci(dev))
2457 dev = &pci_real_dma_dev(to_pci_dev(dev))->dev;
2459 /* No lock here, assumes no domain exit in normal case */
2460 info = dev->archdata.iommu;
2462 return info->domain;
2467 static void do_deferred_attach(struct device *dev)
2469 struct iommu_domain *domain;
2471 dev->archdata.iommu = NULL;
2472 domain = iommu_get_domain_for_dev(dev);
2474 intel_iommu_attach_device(domain, dev);
2477 static inline struct device_domain_info *
2478 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2480 struct device_domain_info *info;
2482 list_for_each_entry(info, &device_domain_list, global)
2483 if (info->iommu->segment == segment && info->bus == bus &&
2484 info->devfn == devfn)
2490 static int domain_setup_first_level(struct intel_iommu *iommu,
2491 struct dmar_domain *domain,
2495 int flags = PASID_FLAG_SUPERVISOR_MODE;
2496 struct dma_pte *pgd = domain->pgd;
2500 * Skip top levels of page tables for iommu which has
2501 * less agaw than default. Unnecessary for PT mode.
2503 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2504 pgd = phys_to_virt(dma_pte_addr(pgd));
2505 if (!dma_pte_present(pgd))
2509 level = agaw_to_level(agaw);
2510 if (level != 4 && level != 5)
2513 flags |= (level == 5) ? PASID_FLAG_FL5LP : 0;
2515 return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2516 domain->iommu_did[iommu->seq_id],
2520 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2523 struct dmar_domain *domain)
2525 struct dmar_domain *found = NULL;
2526 struct device_domain_info *info;
2527 unsigned long flags;
2530 info = alloc_devinfo_mem();
2535 info->devfn = devfn;
2536 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2537 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2540 info->domain = domain;
2541 info->iommu = iommu;
2542 info->pasid_table = NULL;
2543 info->auxd_enabled = 0;
2544 INIT_LIST_HEAD(&info->auxiliary_domains);
2546 if (dev && dev_is_pci(dev)) {
2547 struct pci_dev *pdev = to_pci_dev(info->dev);
2549 if (!pdev->untrusted &&
2550 !pci_ats_disabled() &&
2551 ecap_dev_iotlb_support(iommu->ecap) &&
2552 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2553 dmar_find_matched_atsr_unit(pdev))
2554 info->ats_supported = 1;
2556 if (sm_supported(iommu)) {
2557 if (pasid_supported(iommu)) {
2558 int features = pci_pasid_features(pdev);
2560 info->pasid_supported = features | 1;
2563 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2564 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2565 info->pri_supported = 1;
2569 spin_lock_irqsave(&device_domain_lock, flags);
2571 found = find_domain(dev);
2574 struct device_domain_info *info2;
2575 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2577 found = info2->domain;
2583 spin_unlock_irqrestore(&device_domain_lock, flags);
2584 free_devinfo_mem(info);
2585 /* Caller must free the original domain */
2589 spin_lock(&iommu->lock);
2590 ret = domain_attach_iommu(domain, iommu);
2591 spin_unlock(&iommu->lock);
2594 spin_unlock_irqrestore(&device_domain_lock, flags);
2595 free_devinfo_mem(info);
2599 list_add(&info->link, &domain->devices);
2600 list_add(&info->global, &device_domain_list);
2602 dev->archdata.iommu = info;
2603 spin_unlock_irqrestore(&device_domain_lock, flags);
2605 /* PASID table is mandatory for a PCI device in scalable mode. */
2606 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2607 ret = intel_pasid_alloc_table(dev);
2609 dev_err(dev, "PASID table allocation failed\n");
2610 dmar_remove_one_dev_info(dev);
2614 /* Setup the PASID entry for requests without PASID: */
2615 spin_lock(&iommu->lock);
2616 if (hw_pass_through && domain_type_is_si(domain))
2617 ret = intel_pasid_setup_pass_through(iommu, domain,
2618 dev, PASID_RID2PASID);
2619 else if (domain_use_first_level(domain))
2620 ret = domain_setup_first_level(iommu, domain, dev,
2623 ret = intel_pasid_setup_second_level(iommu, domain,
2624 dev, PASID_RID2PASID);
2625 spin_unlock(&iommu->lock);
2627 dev_err(dev, "Setup RID2PASID failed\n");
2628 dmar_remove_one_dev_info(dev);
2633 if (dev && domain_context_mapping(domain, dev)) {
2634 dev_err(dev, "Domain context map failed\n");
2635 dmar_remove_one_dev_info(dev);
2642 static int iommu_domain_identity_map(struct dmar_domain *domain,
2643 unsigned long long start,
2644 unsigned long long end)
2646 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2647 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2649 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2650 dma_to_mm_pfn(last_vpfn))) {
2651 pr_err("Reserving iova failed\n");
2655 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2657 * RMRR range might have overlap with physical memory range,
2660 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2662 return __domain_mapping(domain, first_vpfn, NULL,
2663 first_vpfn, last_vpfn - first_vpfn + 1,
2664 DMA_PTE_READ|DMA_PTE_WRITE);
2667 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2669 static int __init si_domain_init(int hw)
2671 struct dmar_rmrr_unit *rmrr;
2675 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2679 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2680 domain_exit(si_domain);
2687 for_each_online_node(nid) {
2688 unsigned long start_pfn, end_pfn;
2691 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2692 ret = iommu_domain_identity_map(si_domain,
2693 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2700 * Identity map the RMRRs so that devices with RMRRs could also use
2703 for_each_rmrr_units(rmrr) {
2704 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2706 unsigned long long start = rmrr->base_address;
2707 unsigned long long end = rmrr->end_address;
2709 if (WARN_ON(end < start ||
2710 end >> agaw_to_width(si_domain->agaw)))
2713 ret = iommu_domain_identity_map(si_domain, start, end);
2722 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2724 struct dmar_domain *ndomain;
2725 struct intel_iommu *iommu;
2728 iommu = device_to_iommu(dev, &bus, &devfn);
2732 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2733 if (ndomain != domain)
2739 static bool device_has_rmrr(struct device *dev)
2741 struct dmar_rmrr_unit *rmrr;
2746 for_each_rmrr_units(rmrr) {
2748 * Return TRUE if this RMRR contains the device that
2751 for_each_active_dev_scope(rmrr->devices,
2752 rmrr->devices_cnt, i, tmp)
2754 is_downstream_to_pci_bridge(dev, tmp)) {
2764 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2765 * is relaxable (ie. is allowed to be not enforced under some conditions)
2766 * @dev: device handle
2768 * We assume that PCI USB devices with RMRRs have them largely
2769 * for historical reasons and that the RMRR space is not actively used post
2770 * boot. This exclusion may change if vendors begin to abuse it.
2772 * The same exception is made for graphics devices, with the requirement that
2773 * any use of the RMRR regions will be torn down before assigning the device
2776 * Return: true if the RMRR is relaxable, false otherwise
2778 static bool device_rmrr_is_relaxable(struct device *dev)
2780 struct pci_dev *pdev;
2782 if (!dev_is_pci(dev))
2785 pdev = to_pci_dev(dev);
2786 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2793 * There are a couple cases where we need to restrict the functionality of
2794 * devices associated with RMRRs. The first is when evaluating a device for
2795 * identity mapping because problems exist when devices are moved in and out
2796 * of domains and their respective RMRR information is lost. This means that
2797 * a device with associated RMRRs will never be in a "passthrough" domain.
2798 * The second is use of the device through the IOMMU API. This interface
2799 * expects to have full control of the IOVA space for the device. We cannot
2800 * satisfy both the requirement that RMRR access is maintained and have an
2801 * unencumbered IOVA space. We also have no ability to quiesce the device's
2802 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2803 * We therefore prevent devices associated with an RMRR from participating in
2804 * the IOMMU API, which eliminates them from device assignment.
2806 * In both cases, devices which have relaxable RMRRs are not concerned by this
2807 * restriction. See device_rmrr_is_relaxable comment.
2809 static bool device_is_rmrr_locked(struct device *dev)
2811 if (!device_has_rmrr(dev))
2814 if (device_rmrr_is_relaxable(dev))
2821 * Return the required default domain type for a specific device.
2823 * @dev: the device in query
2824 * @startup: true if this is during early boot
2827 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2828 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2829 * - 0: both identity and dynamic domains work for this device
2831 static int device_def_domain_type(struct device *dev)
2833 if (dev_is_pci(dev)) {
2834 struct pci_dev *pdev = to_pci_dev(dev);
2837 * Prevent any device marked as untrusted from getting
2838 * placed into the statically identity mapping domain.
2840 if (pdev->untrusted)
2841 return IOMMU_DOMAIN_DMA;
2843 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2844 return IOMMU_DOMAIN_IDENTITY;
2846 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2847 return IOMMU_DOMAIN_IDENTITY;
2853 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2856 * Start from the sane iommu hardware state.
2857 * If the queued invalidation is already initialized by us
2858 * (for example, while enabling interrupt-remapping) then
2859 * we got the things already rolling from a sane state.
2863 * Clear any previous faults.
2865 dmar_fault(-1, iommu);
2867 * Disable queued invalidation if supported and already enabled
2868 * before OS handover.
2870 dmar_disable_qi(iommu);
2873 if (dmar_enable_qi(iommu)) {
2875 * Queued Invalidate not enabled, use Register Based Invalidate
2877 iommu->flush.flush_context = __iommu_flush_context;
2878 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2879 pr_info("%s: Using Register based invalidation\n",
2882 iommu->flush.flush_context = qi_flush_context;
2883 iommu->flush.flush_iotlb = qi_flush_iotlb;
2884 pr_info("%s: Using Queued invalidation\n", iommu->name);
2888 static int copy_context_table(struct intel_iommu *iommu,
2889 struct root_entry *old_re,
2890 struct context_entry **tbl,
2893 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2894 struct context_entry *new_ce = NULL, ce;
2895 struct context_entry *old_ce = NULL;
2896 struct root_entry re;
2897 phys_addr_t old_ce_phys;
2899 tbl_idx = ext ? bus * 2 : bus;
2900 memcpy(&re, old_re, sizeof(re));
2902 for (devfn = 0; devfn < 256; devfn++) {
2903 /* First calculate the correct index */
2904 idx = (ext ? devfn * 2 : devfn) % 256;
2907 /* First save what we may have and clean up */
2909 tbl[tbl_idx] = new_ce;
2910 __iommu_flush_cache(iommu, new_ce,
2920 old_ce_phys = root_entry_lctp(&re);
2922 old_ce_phys = root_entry_uctp(&re);
2925 if (ext && devfn == 0) {
2926 /* No LCTP, try UCTP */
2935 old_ce = memremap(old_ce_phys, PAGE_SIZE,
2940 new_ce = alloc_pgtable_page(iommu->node);
2947 /* Now copy the context entry */
2948 memcpy(&ce, old_ce + idx, sizeof(ce));
2950 if (!__context_present(&ce))
2953 did = context_domain_id(&ce);
2954 if (did >= 0 && did < cap_ndoms(iommu->cap))
2955 set_bit(did, iommu->domain_ids);
2958 * We need a marker for copied context entries. This
2959 * marker needs to work for the old format as well as
2960 * for extended context entries.
2962 * Bit 67 of the context entry is used. In the old
2963 * format this bit is available to software, in the
2964 * extended format it is the PGE bit, but PGE is ignored
2965 * by HW if PASIDs are disabled (and thus still
2968 * So disable PASIDs first and then mark the entry
2969 * copied. This means that we don't copy PASID
2970 * translations from the old kernel, but this is fine as
2971 * faults there are not fatal.
2973 context_clear_pasid_enable(&ce);
2974 context_set_copied(&ce);
2979 tbl[tbl_idx + pos] = new_ce;
2981 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2990 static int copy_translation_tables(struct intel_iommu *iommu)
2992 struct context_entry **ctxt_tbls;
2993 struct root_entry *old_rt;
2994 phys_addr_t old_rt_phys;
2995 int ctxt_table_entries;
2996 unsigned long flags;
3001 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3002 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
3003 new_ext = !!ecap_ecs(iommu->ecap);
3006 * The RTT bit can only be changed when translation is disabled,
3007 * but disabling translation means to open a window for data
3008 * corruption. So bail out and don't copy anything if we would
3009 * have to change the bit.
3014 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3018 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3022 /* This is too big for the stack - allocate it from slab */
3023 ctxt_table_entries = ext ? 512 : 256;
3025 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3029 for (bus = 0; bus < 256; bus++) {
3030 ret = copy_context_table(iommu, &old_rt[bus],
3031 ctxt_tbls, bus, ext);
3033 pr_err("%s: Failed to copy context table for bus %d\n",
3039 spin_lock_irqsave(&iommu->lock, flags);
3041 /* Context tables are copied, now write them to the root_entry table */
3042 for (bus = 0; bus < 256; bus++) {
3043 int idx = ext ? bus * 2 : bus;
3046 if (ctxt_tbls[idx]) {
3047 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3048 iommu->root_entry[bus].lo = val;
3051 if (!ext || !ctxt_tbls[idx + 1])
3054 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3055 iommu->root_entry[bus].hi = val;
3058 spin_unlock_irqrestore(&iommu->lock, flags);
3062 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3072 static int __init init_dmars(void)
3074 struct dmar_drhd_unit *drhd;
3075 struct intel_iommu *iommu;
3081 * initialize and program root entry to not present
3084 for_each_drhd_unit(drhd) {
3086 * lock not needed as this is only incremented in the single
3087 * threaded kernel __init code path all other access are read
3090 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3094 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3097 /* Preallocate enough resources for IOMMU hot-addition */
3098 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3099 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3101 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3104 pr_err("Allocating global iommu array failed\n");
3109 for_each_iommu(iommu, drhd) {
3110 if (drhd->ignored) {
3111 iommu_disable_translation(iommu);
3116 * Find the max pasid size of all IOMMU's in the system.
3117 * We need to ensure the system pasid table is no bigger
3118 * than the smallest supported.
3120 if (pasid_supported(iommu)) {
3121 u32 temp = 2 << ecap_pss(iommu->ecap);
3123 intel_pasid_max_id = min_t(u32, temp,
3124 intel_pasid_max_id);
3127 g_iommus[iommu->seq_id] = iommu;
3129 intel_iommu_init_qi(iommu);
3131 ret = iommu_init_domains(iommu);
3135 init_translation_status(iommu);
3137 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3138 iommu_disable_translation(iommu);
3139 clear_translation_pre_enabled(iommu);
3140 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3146 * we could share the same root & context tables
3147 * among all IOMMU's. Need to Split it later.
3149 ret = iommu_alloc_root_entry(iommu);
3153 if (translation_pre_enabled(iommu)) {
3154 pr_info("Translation already enabled - trying to copy translation structures\n");
3156 ret = copy_translation_tables(iommu);
3159 * We found the IOMMU with translation
3160 * enabled - but failed to copy over the
3161 * old root-entry table. Try to proceed
3162 * by disabling translation now and
3163 * allocating a clean root-entry table.
3164 * This might cause DMAR faults, but
3165 * probably the dump will still succeed.
3167 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3169 iommu_disable_translation(iommu);
3170 clear_translation_pre_enabled(iommu);
3172 pr_info("Copied translation tables from previous kernel for %s\n",
3177 if (!ecap_pass_through(iommu->ecap))
3178 hw_pass_through = 0;
3179 intel_svm_check(iommu);
3183 * Now that qi is enabled on all iommus, set the root entry and flush
3184 * caches. This is required on some Intel X58 chipsets, otherwise the
3185 * flush_context function will loop forever and the boot hangs.
3187 for_each_active_iommu(iommu, drhd) {
3188 iommu_flush_write_buffer(iommu);
3189 iommu_set_root_entry(iommu);
3190 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3191 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3194 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3199 iommu_identity_mapping |= IDENTMAP_GFX;
3201 check_tylersburg_isoch();
3203 ret = si_domain_init(hw_pass_through);
3210 * global invalidate context cache
3211 * global invalidate iotlb
3212 * enable translation
3214 for_each_iommu(iommu, drhd) {
3215 if (drhd->ignored) {
3217 * we always have to disable PMRs or DMA may fail on
3221 iommu_disable_protect_mem_regions(iommu);
3225 iommu_flush_write_buffer(iommu);
3227 #ifdef CONFIG_INTEL_IOMMU_SVM
3228 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3230 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3231 * could cause possible lock race condition.
3233 up_write(&dmar_global_lock);
3234 ret = intel_svm_enable_prq(iommu);
3235 down_write(&dmar_global_lock);
3240 ret = dmar_set_interrupt(iommu);
3248 for_each_active_iommu(iommu, drhd) {
3249 disable_dmar_iommu(iommu);
3250 free_dmar_iommu(iommu);
3259 /* This takes a number of _MM_ pages, not VTD pages */
3260 static unsigned long intel_alloc_iova(struct device *dev,
3261 struct dmar_domain *domain,
3262 unsigned long nrpages, uint64_t dma_mask)
3264 unsigned long iova_pfn;
3267 * Restrict dma_mask to the width that the iommu can handle.
3268 * First-level translation restricts the input-address to a
3269 * canonical address (i.e., address bits 63:N have the same
3270 * value as address bit [N-1], where N is 48-bits with 4-level
3271 * paging and 57-bits with 5-level paging). Hence, skip bit
3274 if (domain_use_first_level(domain))
3275 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw - 1),
3278 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw),
3281 /* Ensure we reserve the whole size-aligned region */
3282 nrpages = __roundup_pow_of_two(nrpages);
3284 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3286 * First try to allocate an io virtual address in
3287 * DMA_BIT_MASK(32) and if that fails then try allocating
3290 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3291 IOVA_PFN(DMA_BIT_MASK(32)), false);
3295 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3296 IOVA_PFN(dma_mask), true);
3297 if (unlikely(!iova_pfn)) {
3298 dev_err_once(dev, "Allocating %ld-page iova failed\n",
3306 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3307 size_t size, int dir, u64 dma_mask)
3309 struct dmar_domain *domain;
3310 phys_addr_t start_paddr;
3311 unsigned long iova_pfn;
3314 struct intel_iommu *iommu;
3315 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3317 BUG_ON(dir == DMA_NONE);
3319 if (unlikely(attach_deferred(dev)))
3320 do_deferred_attach(dev);
3322 domain = find_domain(dev);
3324 return DMA_MAPPING_ERROR;
3326 iommu = domain_get_iommu(domain);
3327 size = aligned_nrpages(paddr, size);
3329 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3334 * Check if DMAR supports zero-length reads on write only
3337 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3338 !cap_zlr(iommu->cap))
3339 prot |= DMA_PTE_READ;
3340 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3341 prot |= DMA_PTE_WRITE;
3343 * paddr - (paddr + size) might be partial page, we should map the whole
3344 * page. Note: if two part of one page are separately mapped, we
3345 * might have two guest_addr mapping to the same host paddr, but this
3346 * is not a big problem
3348 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3349 mm_to_dma_pfn(paddr_pfn), size, prot);
3353 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
3354 start_paddr += paddr & ~PAGE_MASK;
3356 trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
3362 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3363 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3364 size, (unsigned long long)paddr, dir);
3365 return DMA_MAPPING_ERROR;
3368 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3369 unsigned long offset, size_t size,
3370 enum dma_data_direction dir,
3371 unsigned long attrs)
3373 return __intel_map_single(dev, page_to_phys(page) + offset,
3374 size, dir, *dev->dma_mask);
3377 static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3378 size_t size, enum dma_data_direction dir,
3379 unsigned long attrs)
3381 return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask);
3384 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
3386 struct dmar_domain *domain;
3387 unsigned long start_pfn, last_pfn;
3388 unsigned long nrpages;
3389 unsigned long iova_pfn;
3390 struct intel_iommu *iommu;
3391 struct page *freelist;
3392 struct pci_dev *pdev = NULL;
3394 domain = find_domain(dev);
3397 iommu = domain_get_iommu(domain);
3399 iova_pfn = IOVA_PFN(dev_addr);
3401 nrpages = aligned_nrpages(dev_addr, size);
3402 start_pfn = mm_to_dma_pfn(iova_pfn);
3403 last_pfn = start_pfn + nrpages - 1;
3405 if (dev_is_pci(dev))
3406 pdev = to_pci_dev(dev);
3408 freelist = domain_unmap(domain, start_pfn, last_pfn);
3409 if (intel_iommu_strict || (pdev && pdev->untrusted) ||
3410 !has_iova_flush_queue(&domain->iovad)) {
3411 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3412 nrpages, !freelist, 0);
3414 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3415 dma_free_pagelist(freelist);
3417 queue_iova(&domain->iovad, iova_pfn, nrpages,
3418 (unsigned long)freelist);
3420 * queue up the release of the unmap to save the 1/6th of the
3421 * cpu used up by the iotlb flush operation...
3425 trace_unmap_single(dev, dev_addr, size);
3428 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3429 size_t size, enum dma_data_direction dir,
3430 unsigned long attrs)
3432 intel_unmap(dev, dev_addr, size);
3435 static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3436 size_t size, enum dma_data_direction dir, unsigned long attrs)
3438 intel_unmap(dev, dev_addr, size);
3441 static void *intel_alloc_coherent(struct device *dev, size_t size,
3442 dma_addr_t *dma_handle, gfp_t flags,
3443 unsigned long attrs)
3445 struct page *page = NULL;
3448 if (unlikely(attach_deferred(dev)))
3449 do_deferred_attach(dev);
3451 size = PAGE_ALIGN(size);
3452 order = get_order(size);
3454 if (gfpflags_allow_blocking(flags)) {
3455 unsigned int count = size >> PAGE_SHIFT;
3457 page = dma_alloc_from_contiguous(dev, count, order,
3458 flags & __GFP_NOWARN);
3462 page = alloc_pages(flags, order);
3465 memset(page_address(page), 0, size);
3467 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3469 dev->coherent_dma_mask);
3470 if (*dma_handle != DMA_MAPPING_ERROR)
3471 return page_address(page);
3472 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3473 __free_pages(page, order);
3478 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3479 dma_addr_t dma_handle, unsigned long attrs)
3482 struct page *page = virt_to_page(vaddr);
3484 size = PAGE_ALIGN(size);
3485 order = get_order(size);
3487 intel_unmap(dev, dma_handle, size);
3488 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3489 __free_pages(page, order);
3492 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3493 int nelems, enum dma_data_direction dir,
3494 unsigned long attrs)
3496 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3497 unsigned long nrpages = 0;
3498 struct scatterlist *sg;
3501 for_each_sg(sglist, sg, nelems, i) {
3502 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3505 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3507 trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3510 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3511 enum dma_data_direction dir, unsigned long attrs)
3514 struct dmar_domain *domain;
3517 unsigned long iova_pfn;
3519 struct scatterlist *sg;
3520 unsigned long start_vpfn;
3521 struct intel_iommu *iommu;
3523 BUG_ON(dir == DMA_NONE);
3525 if (unlikely(attach_deferred(dev)))
3526 do_deferred_attach(dev);
3528 domain = find_domain(dev);
3532 iommu = domain_get_iommu(domain);
3534 for_each_sg(sglist, sg, nelems, i)
3535 size += aligned_nrpages(sg->offset, sg->length);
3537 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3540 sglist->dma_length = 0;
3545 * Check if DMAR supports zero-length reads on write only
3548 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3549 !cap_zlr(iommu->cap))
3550 prot |= DMA_PTE_READ;
3551 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3552 prot |= DMA_PTE_WRITE;
3554 start_vpfn = mm_to_dma_pfn(iova_pfn);
3556 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3557 if (unlikely(ret)) {
3558 dma_pte_free_pagetable(domain, start_vpfn,
3559 start_vpfn + size - 1,
3560 agaw_to_level(domain->agaw) + 1);
3561 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3565 for_each_sg(sglist, sg, nelems, i)
3566 trace_map_sg(dev, i + 1, nelems, sg);
3571 static u64 intel_get_required_mask(struct device *dev)
3573 return DMA_BIT_MASK(32);
3576 static const struct dma_map_ops intel_dma_ops = {
3577 .alloc = intel_alloc_coherent,
3578 .free = intel_free_coherent,
3579 .map_sg = intel_map_sg,
3580 .unmap_sg = intel_unmap_sg,
3581 .map_page = intel_map_page,
3582 .unmap_page = intel_unmap_page,
3583 .map_resource = intel_map_resource,
3584 .unmap_resource = intel_unmap_resource,
3585 .dma_supported = dma_direct_supported,
3586 .mmap = dma_common_mmap,
3587 .get_sgtable = dma_common_get_sgtable,
3588 .get_required_mask = intel_get_required_mask,
3592 bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size,
3593 enum dma_data_direction dir, enum dma_sync_target target)
3595 struct dmar_domain *domain;
3596 phys_addr_t tlb_addr;
3598 domain = find_domain(dev);
3599 if (WARN_ON(!domain))
3602 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr);
3603 if (is_swiotlb_buffer(tlb_addr))
3604 swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
3608 bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
3609 enum dma_data_direction dir, unsigned long attrs,
3612 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3613 struct dmar_domain *domain;
3614 struct intel_iommu *iommu;
3615 unsigned long iova_pfn;
3616 unsigned long nrpages;
3617 phys_addr_t tlb_addr;
3621 if (unlikely(attach_deferred(dev)))
3622 do_deferred_attach(dev);
3624 domain = find_domain(dev);
3626 if (WARN_ON(dir == DMA_NONE || !domain))
3627 return DMA_MAPPING_ERROR;
3629 iommu = domain_get_iommu(domain);
3630 if (WARN_ON(!iommu))
3631 return DMA_MAPPING_ERROR;
3633 nrpages = aligned_nrpages(0, size);
3634 iova_pfn = intel_alloc_iova(dev, domain,
3635 dma_to_mm_pfn(nrpages), dma_mask);
3637 return DMA_MAPPING_ERROR;
3640 * Check if DMAR supports zero-length reads on write only
3643 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL ||
3644 !cap_zlr(iommu->cap))
3645 prot |= DMA_PTE_READ;
3646 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3647 prot |= DMA_PTE_WRITE;
3650 * If both the physical buffer start address and size are
3651 * page aligned, we don't need to use a bounce page.
3653 if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
3654 tlb_addr = swiotlb_tbl_map_single(dev,
3655 __phys_to_dma(dev, io_tlb_start),
3656 paddr, size, aligned_size, dir, attrs);
3657 if (tlb_addr == DMA_MAPPING_ERROR) {
3660 /* Cleanup the padding area. */
3661 void *padding_start = phys_to_virt(tlb_addr);
3662 size_t padding_size = aligned_size;
3664 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
3665 (dir == DMA_TO_DEVICE ||
3666 dir == DMA_BIDIRECTIONAL)) {
3667 padding_start += size;
3668 padding_size -= size;
3671 memset(padding_start, 0, padding_size);
3677 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3678 tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot);
3682 trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size);
3684 return (phys_addr_t)iova_pfn << PAGE_SHIFT;
3687 if (is_swiotlb_buffer(tlb_addr))
3688 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3689 aligned_size, dir, attrs);
3691 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3692 dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n",
3693 size, (unsigned long long)paddr, dir);
3695 return DMA_MAPPING_ERROR;
3699 bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
3700 enum dma_data_direction dir, unsigned long attrs)
3702 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3703 struct dmar_domain *domain;
3704 phys_addr_t tlb_addr;
3706 domain = find_domain(dev);
3707 if (WARN_ON(!domain))
3710 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr);
3711 if (WARN_ON(!tlb_addr))
3714 intel_unmap(dev, dev_addr, size);
3715 if (is_swiotlb_buffer(tlb_addr))
3716 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3717 aligned_size, dir, attrs);
3719 trace_bounce_unmap_single(dev, dev_addr, size);
3723 bounce_map_page(struct device *dev, struct page *page, unsigned long offset,
3724 size_t size, enum dma_data_direction dir, unsigned long attrs)
3726 return bounce_map_single(dev, page_to_phys(page) + offset,
3727 size, dir, attrs, *dev->dma_mask);
3731 bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
3732 enum dma_data_direction dir, unsigned long attrs)
3734 return bounce_map_single(dev, phys_addr, size,
3735 dir, attrs, *dev->dma_mask);
3739 bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size,
3740 enum dma_data_direction dir, unsigned long attrs)
3742 bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3746 bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size,
3747 enum dma_data_direction dir, unsigned long attrs)
3749 bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3753 bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3754 enum dma_data_direction dir, unsigned long attrs)
3756 struct scatterlist *sg;
3759 for_each_sg(sglist, sg, nelems, i)
3760 bounce_unmap_page(dev, sg->dma_address,
3761 sg_dma_len(sg), dir, attrs);
3765 bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3766 enum dma_data_direction dir, unsigned long attrs)
3769 struct scatterlist *sg;
3771 for_each_sg(sglist, sg, nelems, i) {
3772 sg->dma_address = bounce_map_page(dev, sg_page(sg),
3773 sg->offset, sg->length,
3775 if (sg->dma_address == DMA_MAPPING_ERROR)
3777 sg_dma_len(sg) = sg->length;
3780 for_each_sg(sglist, sg, nelems, i)
3781 trace_bounce_map_sg(dev, i + 1, nelems, sg);
3786 bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
3791 bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
3792 size_t size, enum dma_data_direction dir)
3794 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU);
3798 bounce_sync_single_for_device(struct device *dev, dma_addr_t addr,
3799 size_t size, enum dma_data_direction dir)
3801 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE);
3805 bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
3806 int nelems, enum dma_data_direction dir)
3808 struct scatterlist *sg;
3811 for_each_sg(sglist, sg, nelems, i)
3812 bounce_sync_single(dev, sg_dma_address(sg),
3813 sg_dma_len(sg), dir, SYNC_FOR_CPU);
3817 bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
3818 int nelems, enum dma_data_direction dir)
3820 struct scatterlist *sg;
3823 for_each_sg(sglist, sg, nelems, i)
3824 bounce_sync_single(dev, sg_dma_address(sg),
3825 sg_dma_len(sg), dir, SYNC_FOR_DEVICE);
3828 static const struct dma_map_ops bounce_dma_ops = {
3829 .alloc = intel_alloc_coherent,
3830 .free = intel_free_coherent,
3831 .map_sg = bounce_map_sg,
3832 .unmap_sg = bounce_unmap_sg,
3833 .map_page = bounce_map_page,
3834 .unmap_page = bounce_unmap_page,
3835 .sync_single_for_cpu = bounce_sync_single_for_cpu,
3836 .sync_single_for_device = bounce_sync_single_for_device,
3837 .sync_sg_for_cpu = bounce_sync_sg_for_cpu,
3838 .sync_sg_for_device = bounce_sync_sg_for_device,
3839 .map_resource = bounce_map_resource,
3840 .unmap_resource = bounce_unmap_resource,
3841 .dma_supported = dma_direct_supported,
3844 static inline int iommu_domain_cache_init(void)
3848 iommu_domain_cache = kmem_cache_create("iommu_domain",
3849 sizeof(struct dmar_domain),
3854 if (!iommu_domain_cache) {
3855 pr_err("Couldn't create iommu_domain cache\n");
3862 static inline int iommu_devinfo_cache_init(void)
3866 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3867 sizeof(struct device_domain_info),
3871 if (!iommu_devinfo_cache) {
3872 pr_err("Couldn't create devinfo cache\n");
3879 static int __init iommu_init_mempool(void)
3882 ret = iova_cache_get();
3886 ret = iommu_domain_cache_init();
3890 ret = iommu_devinfo_cache_init();
3894 kmem_cache_destroy(iommu_domain_cache);
3901 static void __init iommu_exit_mempool(void)
3903 kmem_cache_destroy(iommu_devinfo_cache);
3904 kmem_cache_destroy(iommu_domain_cache);
3908 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3910 struct dmar_drhd_unit *drhd;
3914 /* We know that this device on this chipset has its own IOMMU.
3915 * If we find it under a different IOMMU, then the BIOS is lying
3916 * to us. Hope that the IOMMU for this device is actually
3917 * disabled, and it needs no translation...
3919 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3921 /* "can't" happen */
3922 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3925 vtbar &= 0xffff0000;
3927 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3928 drhd = dmar_find_matched_drhd_unit(pdev);
3929 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
3930 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
3931 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
3932 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3935 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3937 static void __init init_no_remapping_devices(void)
3939 struct dmar_drhd_unit *drhd;
3943 for_each_drhd_unit(drhd) {
3944 if (!drhd->include_all) {
3945 for_each_active_dev_scope(drhd->devices,
3946 drhd->devices_cnt, i, dev)
3948 /* ignore DMAR unit if no devices exist */
3949 if (i == drhd->devices_cnt)
3954 for_each_active_drhd_unit(drhd) {
3955 if (drhd->include_all)
3958 for_each_active_dev_scope(drhd->devices,
3959 drhd->devices_cnt, i, dev)
3960 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3962 if (i < drhd->devices_cnt)
3965 /* This IOMMU has *only* gfx devices. Either bypass it or
3966 set the gfx_mapped flag, as appropriate */
3967 if (!dmar_map_gfx) {
3969 for_each_active_dev_scope(drhd->devices,
3970 drhd->devices_cnt, i, dev)
3971 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3976 #ifdef CONFIG_SUSPEND
3977 static int init_iommu_hw(void)
3979 struct dmar_drhd_unit *drhd;
3980 struct intel_iommu *iommu = NULL;
3982 for_each_active_iommu(iommu, drhd)
3984 dmar_reenable_qi(iommu);
3986 for_each_iommu(iommu, drhd) {
3987 if (drhd->ignored) {
3989 * we always have to disable PMRs or DMA may fail on
3993 iommu_disable_protect_mem_regions(iommu);
3997 iommu_flush_write_buffer(iommu);
3999 iommu_set_root_entry(iommu);
4001 iommu->flush.flush_context(iommu, 0, 0, 0,
4002 DMA_CCMD_GLOBAL_INVL);
4003 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4004 iommu_enable_translation(iommu);
4005 iommu_disable_protect_mem_regions(iommu);
4011 static void iommu_flush_all(void)
4013 struct dmar_drhd_unit *drhd;
4014 struct intel_iommu *iommu;
4016 for_each_active_iommu(iommu, drhd) {
4017 iommu->flush.flush_context(iommu, 0, 0, 0,
4018 DMA_CCMD_GLOBAL_INVL);
4019 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4020 DMA_TLB_GLOBAL_FLUSH);
4024 static int iommu_suspend(void)
4026 struct dmar_drhd_unit *drhd;
4027 struct intel_iommu *iommu = NULL;
4030 for_each_active_iommu(iommu, drhd) {
4031 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
4033 if (!iommu->iommu_state)
4039 for_each_active_iommu(iommu, drhd) {
4040 iommu_disable_translation(iommu);
4042 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4044 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4045 readl(iommu->reg + DMAR_FECTL_REG);
4046 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4047 readl(iommu->reg + DMAR_FEDATA_REG);
4048 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4049 readl(iommu->reg + DMAR_FEADDR_REG);
4050 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4051 readl(iommu->reg + DMAR_FEUADDR_REG);
4053 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4058 for_each_active_iommu(iommu, drhd)
4059 kfree(iommu->iommu_state);
4064 static void iommu_resume(void)
4066 struct dmar_drhd_unit *drhd;
4067 struct intel_iommu *iommu = NULL;
4070 if (init_iommu_hw()) {
4072 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4074 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4078 for_each_active_iommu(iommu, drhd) {
4080 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4082 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4083 iommu->reg + DMAR_FECTL_REG);
4084 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4085 iommu->reg + DMAR_FEDATA_REG);
4086 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4087 iommu->reg + DMAR_FEADDR_REG);
4088 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4089 iommu->reg + DMAR_FEUADDR_REG);
4091 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4094 for_each_active_iommu(iommu, drhd)
4095 kfree(iommu->iommu_state);
4098 static struct syscore_ops iommu_syscore_ops = {
4099 .resume = iommu_resume,
4100 .suspend = iommu_suspend,
4103 static void __init init_iommu_pm_ops(void)
4105 register_syscore_ops(&iommu_syscore_ops);
4109 static inline void init_iommu_pm_ops(void) {}
4110 #endif /* CONFIG_PM */
4112 static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
4114 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
4115 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
4116 rmrr->end_address <= rmrr->base_address ||
4117 arch_rmrr_sanity_check(rmrr))
4123 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4125 struct acpi_dmar_reserved_memory *rmrr;
4126 struct dmar_rmrr_unit *rmrru;
4128 rmrr = (struct acpi_dmar_reserved_memory *)header;
4129 if (rmrr_sanity_check(rmrr)) {
4131 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
4132 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4133 rmrr->base_address, rmrr->end_address,
4134 dmi_get_system_info(DMI_BIOS_VENDOR),
4135 dmi_get_system_info(DMI_BIOS_VERSION),
4136 dmi_get_system_info(DMI_PRODUCT_VERSION));
4137 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
4140 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4144 rmrru->hdr = header;
4146 rmrru->base_address = rmrr->base_address;
4147 rmrru->end_address = rmrr->end_address;
4149 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4150 ((void *)rmrr) + rmrr->header.length,
4151 &rmrru->devices_cnt);
4152 if (rmrru->devices_cnt && rmrru->devices == NULL)
4155 list_add(&rmrru->list, &dmar_rmrr_units);
4164 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4166 struct dmar_atsr_unit *atsru;
4167 struct acpi_dmar_atsr *tmp;
4169 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
4171 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4172 if (atsr->segment != tmp->segment)
4174 if (atsr->header.length != tmp->header.length)
4176 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4183 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4185 struct acpi_dmar_atsr *atsr;
4186 struct dmar_atsr_unit *atsru;
4188 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
4191 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4192 atsru = dmar_find_atsr(atsr);
4196 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4201 * If memory is allocated from slab by ACPI _DSM method, we need to
4202 * copy the memory content because the memory buffer will be freed
4205 atsru->hdr = (void *)(atsru + 1);
4206 memcpy(atsru->hdr, hdr, hdr->length);
4207 atsru->include_all = atsr->flags & 0x1;
4208 if (!atsru->include_all) {
4209 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4210 (void *)atsr + atsr->header.length,
4211 &atsru->devices_cnt);
4212 if (atsru->devices_cnt && atsru->devices == NULL) {
4218 list_add_rcu(&atsru->list, &dmar_atsr_units);
4223 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4225 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4229 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4231 struct acpi_dmar_atsr *atsr;
4232 struct dmar_atsr_unit *atsru;
4234 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4235 atsru = dmar_find_atsr(atsr);
4237 list_del_rcu(&atsru->list);
4239 intel_iommu_free_atsr(atsru);
4245 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4249 struct acpi_dmar_atsr *atsr;
4250 struct dmar_atsr_unit *atsru;
4252 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4253 atsru = dmar_find_atsr(atsr);
4257 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4258 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4266 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4269 struct intel_iommu *iommu = dmaru->iommu;
4271 if (g_iommus[iommu->seq_id])
4274 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4275 pr_warn("%s: Doesn't support hardware pass through.\n",
4279 if (!ecap_sc_support(iommu->ecap) &&
4280 domain_update_iommu_snooping(iommu)) {
4281 pr_warn("%s: Doesn't support snooping.\n",
4285 sp = domain_update_iommu_superpage(NULL, iommu) - 1;
4286 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4287 pr_warn("%s: Doesn't support large page.\n",
4293 * Disable translation if already enabled prior to OS handover.
4295 if (iommu->gcmd & DMA_GCMD_TE)
4296 iommu_disable_translation(iommu);
4298 g_iommus[iommu->seq_id] = iommu;
4299 ret = iommu_init_domains(iommu);
4301 ret = iommu_alloc_root_entry(iommu);
4305 intel_svm_check(iommu);
4307 if (dmaru->ignored) {
4309 * we always have to disable PMRs or DMA may fail on this device
4312 iommu_disable_protect_mem_regions(iommu);
4316 intel_iommu_init_qi(iommu);
4317 iommu_flush_write_buffer(iommu);
4319 #ifdef CONFIG_INTEL_IOMMU_SVM
4320 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
4321 ret = intel_svm_enable_prq(iommu);
4326 ret = dmar_set_interrupt(iommu);
4330 iommu_set_root_entry(iommu);
4331 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4332 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4333 iommu_enable_translation(iommu);
4335 iommu_disable_protect_mem_regions(iommu);
4339 disable_dmar_iommu(iommu);
4341 free_dmar_iommu(iommu);
4345 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4348 struct intel_iommu *iommu = dmaru->iommu;
4350 if (!intel_iommu_enabled)
4356 ret = intel_iommu_add(dmaru);
4358 disable_dmar_iommu(iommu);
4359 free_dmar_iommu(iommu);
4365 static void intel_iommu_free_dmars(void)
4367 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4368 struct dmar_atsr_unit *atsru, *atsr_n;
4370 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4371 list_del(&rmrru->list);
4372 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4376 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4377 list_del(&atsru->list);
4378 intel_iommu_free_atsr(atsru);
4382 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4385 struct pci_bus *bus;
4386 struct pci_dev *bridge = NULL;
4388 struct acpi_dmar_atsr *atsr;
4389 struct dmar_atsr_unit *atsru;
4391 dev = pci_physfn(dev);
4392 for (bus = dev->bus; bus; bus = bus->parent) {
4394 /* If it's an integrated device, allow ATS */
4397 /* Connected via non-PCIe: no ATS */
4398 if (!pci_is_pcie(bridge) ||
4399 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4401 /* If we found the root port, look it up in the ATSR */
4402 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4407 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4408 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4409 if (atsr->segment != pci_domain_nr(dev->bus))
4412 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4413 if (tmp == &bridge->dev)
4416 if (atsru->include_all)
4426 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4429 struct dmar_rmrr_unit *rmrru;
4430 struct dmar_atsr_unit *atsru;
4431 struct acpi_dmar_atsr *atsr;
4432 struct acpi_dmar_reserved_memory *rmrr;
4434 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4437 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4438 rmrr = container_of(rmrru->hdr,
4439 struct acpi_dmar_reserved_memory, header);
4440 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4441 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4442 ((void *)rmrr) + rmrr->header.length,
4443 rmrr->segment, rmrru->devices,
4444 rmrru->devices_cnt);
4447 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4448 dmar_remove_dev_scope(info, rmrr->segment,
4449 rmrru->devices, rmrru->devices_cnt);
4453 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4454 if (atsru->include_all)
4457 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4458 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4459 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4460 (void *)atsr + atsr->header.length,
4461 atsr->segment, atsru->devices,
4462 atsru->devices_cnt);
4467 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4468 if (dmar_remove_dev_scope(info, atsr->segment,
4469 atsru->devices, atsru->devices_cnt))
4477 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4478 unsigned long val, void *v)
4480 struct memory_notify *mhp = v;
4481 unsigned long long start, end;
4482 unsigned long start_vpfn, last_vpfn;
4485 case MEM_GOING_ONLINE:
4486 start = mhp->start_pfn << PAGE_SHIFT;
4487 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4488 if (iommu_domain_identity_map(si_domain, start, end)) {
4489 pr_warn("Failed to build identity map for [%llx-%llx]\n",
4496 case MEM_CANCEL_ONLINE:
4497 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4498 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4499 while (start_vpfn <= last_vpfn) {
4501 struct dmar_drhd_unit *drhd;
4502 struct intel_iommu *iommu;
4503 struct page *freelist;
4505 iova = find_iova(&si_domain->iovad, start_vpfn);
4507 pr_debug("Failed get IOVA for PFN %lx\n",
4512 iova = split_and_remove_iova(&si_domain->iovad, iova,
4513 start_vpfn, last_vpfn);
4515 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4516 start_vpfn, last_vpfn);
4520 freelist = domain_unmap(si_domain, iova->pfn_lo,
4524 for_each_active_iommu(iommu, drhd)
4525 iommu_flush_iotlb_psi(iommu, si_domain,
4526 iova->pfn_lo, iova_size(iova),
4529 dma_free_pagelist(freelist);
4531 start_vpfn = iova->pfn_hi + 1;
4532 free_iova_mem(iova);
4540 static struct notifier_block intel_iommu_memory_nb = {
4541 .notifier_call = intel_iommu_memory_notifier,
4545 static void free_all_cpu_cached_iovas(unsigned int cpu)
4549 for (i = 0; i < g_num_of_iommus; i++) {
4550 struct intel_iommu *iommu = g_iommus[i];
4551 struct dmar_domain *domain;
4557 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
4558 domain = get_iommu_domain(iommu, (u16)did);
4562 free_cpu_cached_iovas(cpu, &domain->iovad);
4567 static int intel_iommu_cpu_dead(unsigned int cpu)
4569 free_all_cpu_cached_iovas(cpu);
4573 static void intel_disable_iommus(void)
4575 struct intel_iommu *iommu = NULL;
4576 struct dmar_drhd_unit *drhd;
4578 for_each_iommu(iommu, drhd)
4579 iommu_disable_translation(iommu);
4582 void intel_iommu_shutdown(void)
4584 struct dmar_drhd_unit *drhd;
4585 struct intel_iommu *iommu = NULL;
4587 if (no_iommu || dmar_disabled)
4590 down_write(&dmar_global_lock);
4592 /* Disable PMRs explicitly here. */
4593 for_each_iommu(iommu, drhd)
4594 iommu_disable_protect_mem_regions(iommu);
4596 /* Make sure the IOMMUs are switched off */
4597 intel_disable_iommus();
4599 up_write(&dmar_global_lock);
4602 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4604 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4606 return container_of(iommu_dev, struct intel_iommu, iommu);
4609 static ssize_t intel_iommu_show_version(struct device *dev,
4610 struct device_attribute *attr,
4613 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4614 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4615 return sprintf(buf, "%d:%d\n",
4616 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4618 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4620 static ssize_t intel_iommu_show_address(struct device *dev,
4621 struct device_attribute *attr,
4624 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4625 return sprintf(buf, "%llx\n", iommu->reg_phys);
4627 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4629 static ssize_t intel_iommu_show_cap(struct device *dev,
4630 struct device_attribute *attr,
4633 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4634 return sprintf(buf, "%llx\n", iommu->cap);
4636 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4638 static ssize_t intel_iommu_show_ecap(struct device *dev,
4639 struct device_attribute *attr,
4642 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4643 return sprintf(buf, "%llx\n", iommu->ecap);
4645 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4647 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4648 struct device_attribute *attr,
4651 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4652 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4654 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4656 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4657 struct device_attribute *attr,
4660 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4661 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4662 cap_ndoms(iommu->cap)));
4664 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4666 static struct attribute *intel_iommu_attrs[] = {
4667 &dev_attr_version.attr,
4668 &dev_attr_address.attr,
4670 &dev_attr_ecap.attr,
4671 &dev_attr_domains_supported.attr,
4672 &dev_attr_domains_used.attr,
4676 static struct attribute_group intel_iommu_group = {
4677 .name = "intel-iommu",
4678 .attrs = intel_iommu_attrs,
4681 const struct attribute_group *intel_iommu_groups[] = {
4686 static inline bool has_untrusted_dev(void)
4688 struct pci_dev *pdev = NULL;
4690 for_each_pci_dev(pdev)
4691 if (pdev->untrusted)
4697 static int __init platform_optin_force_iommu(void)
4699 if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
4702 if (no_iommu || dmar_disabled)
4703 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4706 * If Intel-IOMMU is disabled by default, we will apply identity
4707 * map for all devices except those marked as being untrusted.
4710 iommu_set_default_passthrough(false);
4718 static int __init probe_acpi_namespace_devices(void)
4720 struct dmar_drhd_unit *drhd;
4721 /* To avoid a -Wunused-but-set-variable warning. */
4722 struct intel_iommu *iommu __maybe_unused;
4726 for_each_active_iommu(iommu, drhd) {
4727 for_each_active_dev_scope(drhd->devices,
4728 drhd->devices_cnt, i, dev) {
4729 struct acpi_device_physical_node *pn;
4730 struct iommu_group *group;
4731 struct acpi_device *adev;
4733 if (dev->bus != &acpi_bus_type)
4736 adev = to_acpi_device(dev);
4737 mutex_lock(&adev->physical_node_lock);
4738 list_for_each_entry(pn,
4739 &adev->physical_node_list, node) {
4740 group = iommu_group_get(pn->dev);
4742 iommu_group_put(group);
4746 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4747 ret = iommu_probe_device(pn->dev);
4751 mutex_unlock(&adev->physical_node_lock);
4761 int __init intel_iommu_init(void)
4764 struct dmar_drhd_unit *drhd;
4765 struct intel_iommu *iommu;
4768 * Intel IOMMU is required for a TXT/tboot launch or platform
4769 * opt in, so enforce that.
4771 force_on = tboot_force_iommu() || platform_optin_force_iommu();
4773 if (iommu_init_mempool()) {
4775 panic("tboot: Failed to initialize iommu memory\n");
4779 down_write(&dmar_global_lock);
4780 if (dmar_table_init()) {
4782 panic("tboot: Failed to initialize DMAR table\n");
4786 if (dmar_dev_scope_init() < 0) {
4788 panic("tboot: Failed to initialize DMAR device scope\n");
4792 up_write(&dmar_global_lock);
4795 * The bus notifier takes the dmar_global_lock, so lockdep will
4796 * complain later when we register it under the lock.
4798 dmar_register_bus_notifier();
4800 down_write(&dmar_global_lock);
4803 intel_iommu_debugfs_init();
4805 if (no_iommu || dmar_disabled) {
4807 * We exit the function here to ensure IOMMU's remapping and
4808 * mempool aren't setup, which means that the IOMMU's PMRs
4809 * won't be disabled via the call to init_dmars(). So disable
4810 * it explicitly here. The PMRs were setup by tboot prior to
4811 * calling SENTER, but the kernel is expected to reset/tear
4814 if (intel_iommu_tboot_noforce) {
4815 for_each_iommu(iommu, drhd)
4816 iommu_disable_protect_mem_regions(iommu);
4820 * Make sure the IOMMUs are switched off, even when we
4821 * boot into a kexec kernel and the previous kernel left
4824 intel_disable_iommus();
4828 if (list_empty(&dmar_rmrr_units))
4829 pr_info("No RMRR found\n");
4831 if (list_empty(&dmar_atsr_units))
4832 pr_info("No ATSR found\n");
4834 if (dmar_init_reserved_ranges()) {
4836 panic("tboot: Failed to reserve iommu ranges\n");
4837 goto out_free_reserved_range;
4841 intel_iommu_gfx_mapped = 1;
4843 init_no_remapping_devices();
4848 panic("tboot: Failed to initialize DMARs\n");
4849 pr_err("Initialization failed\n");
4850 goto out_free_reserved_range;
4852 up_write(&dmar_global_lock);
4854 init_iommu_pm_ops();
4856 down_read(&dmar_global_lock);
4857 for_each_active_iommu(iommu, drhd) {
4858 iommu_device_sysfs_add(&iommu->iommu, NULL,
4861 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
4862 iommu_device_register(&iommu->iommu);
4864 up_read(&dmar_global_lock);
4866 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4867 if (si_domain && !hw_pass_through)
4868 register_memory_notifier(&intel_iommu_memory_nb);
4869 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
4870 intel_iommu_cpu_dead);
4872 down_read(&dmar_global_lock);
4873 if (probe_acpi_namespace_devices())
4874 pr_warn("ACPI name space devices didn't probe correctly\n");
4876 /* Finally, we enable the DMA remapping hardware. */
4877 for_each_iommu(iommu, drhd) {
4878 if (!drhd->ignored && !translation_pre_enabled(iommu))
4879 iommu_enable_translation(iommu);
4881 iommu_disable_protect_mem_regions(iommu);
4883 up_read(&dmar_global_lock);
4885 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4887 intel_iommu_enabled = 1;
4891 out_free_reserved_range:
4892 put_iova_domain(&reserved_iova_list);
4894 intel_iommu_free_dmars();
4895 up_write(&dmar_global_lock);
4896 iommu_exit_mempool();
4900 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4902 struct intel_iommu *iommu = opaque;
4904 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4909 * NB - intel-iommu lacks any sort of reference counting for the users of
4910 * dependent devices. If multiple endpoints have intersecting dependent
4911 * devices, unbinding the driver from any one of them will possibly leave
4912 * the others unable to operate.
4914 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4916 if (!iommu || !dev || !dev_is_pci(dev))
4919 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4922 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4924 struct dmar_domain *domain;
4925 struct intel_iommu *iommu;
4926 unsigned long flags;
4928 assert_spin_locked(&device_domain_lock);
4933 iommu = info->iommu;
4934 domain = info->domain;
4937 if (dev_is_pci(info->dev) && sm_supported(iommu))
4938 intel_pasid_tear_down_entry(iommu, info->dev,
4941 iommu_disable_dev_iotlb(info);
4942 domain_context_clear(iommu, info->dev);
4943 intel_pasid_free_table(info->dev);
4946 unlink_domain_info(info);
4948 spin_lock_irqsave(&iommu->lock, flags);
4949 domain_detach_iommu(domain, iommu);
4950 spin_unlock_irqrestore(&iommu->lock, flags);
4952 free_devinfo_mem(info);
4955 static void dmar_remove_one_dev_info(struct device *dev)
4957 struct device_domain_info *info;
4958 unsigned long flags;
4960 spin_lock_irqsave(&device_domain_lock, flags);
4961 info = dev->archdata.iommu;
4962 if (info && info != DEFER_DEVICE_DOMAIN_INFO
4963 && info != DUMMY_DEVICE_DOMAIN_INFO)
4964 __dmar_remove_one_dev_info(info);
4965 spin_unlock_irqrestore(&device_domain_lock, flags);
4968 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4972 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
4973 domain_reserve_special_ranges(domain);
4975 /* calculate AGAW */
4976 domain->gaw = guest_width;
4977 adjust_width = guestwidth_to_adjustwidth(guest_width);
4978 domain->agaw = width_to_agaw(adjust_width);
4980 domain->iommu_coherency = 0;
4981 domain->iommu_snooping = 0;
4982 domain->iommu_superpage = 0;
4983 domain->max_addr = 0;
4985 /* always allocate the top pgd */
4986 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4989 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4993 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4995 struct dmar_domain *dmar_domain;
4996 struct iommu_domain *domain;
5000 case IOMMU_DOMAIN_DMA:
5002 case IOMMU_DOMAIN_UNMANAGED:
5003 dmar_domain = alloc_domain(0);
5005 pr_err("Can't allocate dmar_domain\n");
5008 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
5009 pr_err("Domain initialization failed\n");
5010 domain_exit(dmar_domain);
5014 if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) {
5015 ret = init_iova_flush_queue(&dmar_domain->iovad,
5019 pr_info("iova flush queue initialization failed\n");
5022 domain_update_iommu_cap(dmar_domain);
5024 domain = &dmar_domain->domain;
5025 domain->geometry.aperture_start = 0;
5026 domain->geometry.aperture_end =
5027 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5028 domain->geometry.force_aperture = true;
5031 case IOMMU_DOMAIN_IDENTITY:
5032 return &si_domain->domain;
5040 static void intel_iommu_domain_free(struct iommu_domain *domain)
5042 if (domain != &si_domain->domain)
5043 domain_exit(to_dmar_domain(domain));
5047 * Check whether a @domain could be attached to the @dev through the
5048 * aux-domain attach/detach APIs.
5051 is_aux_domain(struct device *dev, struct iommu_domain *domain)
5053 struct device_domain_info *info = dev->archdata.iommu;
5055 return info && info->auxd_enabled &&
5056 domain->type == IOMMU_DOMAIN_UNMANAGED;
5059 static void auxiliary_link_device(struct dmar_domain *domain,
5062 struct device_domain_info *info = dev->archdata.iommu;
5064 assert_spin_locked(&device_domain_lock);
5068 domain->auxd_refcnt++;
5069 list_add(&domain->auxd, &info->auxiliary_domains);
5072 static void auxiliary_unlink_device(struct dmar_domain *domain,
5075 struct device_domain_info *info = dev->archdata.iommu;
5077 assert_spin_locked(&device_domain_lock);
5081 list_del(&domain->auxd);
5082 domain->auxd_refcnt--;
5084 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5085 ioasid_free(domain->default_pasid);
5088 static int aux_domain_add_dev(struct dmar_domain *domain,
5093 unsigned long flags;
5094 struct intel_iommu *iommu;
5096 iommu = device_to_iommu(dev, &bus, &devfn);
5100 if (domain->default_pasid <= 0) {
5103 /* No private data needed for the default pasid */
5104 pasid = ioasid_alloc(NULL, PASID_MIN,
5105 pci_max_pasids(to_pci_dev(dev)) - 1,
5107 if (pasid == INVALID_IOASID) {
5108 pr_err("Can't allocate default pasid\n");
5111 domain->default_pasid = pasid;
5114 spin_lock_irqsave(&device_domain_lock, flags);
5116 * iommu->lock must be held to attach domain to iommu and setup the
5117 * pasid entry for second level translation.
5119 spin_lock(&iommu->lock);
5120 ret = domain_attach_iommu(domain, iommu);
5124 /* Setup the PASID entry for mediated devices: */
5125 if (domain_use_first_level(domain))
5126 ret = domain_setup_first_level(iommu, domain, dev,
5127 domain->default_pasid);
5129 ret = intel_pasid_setup_second_level(iommu, domain, dev,
5130 domain->default_pasid);
5133 spin_unlock(&iommu->lock);
5135 auxiliary_link_device(domain, dev);
5137 spin_unlock_irqrestore(&device_domain_lock, flags);
5142 domain_detach_iommu(domain, iommu);
5144 spin_unlock(&iommu->lock);
5145 spin_unlock_irqrestore(&device_domain_lock, flags);
5146 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5147 ioasid_free(domain->default_pasid);
5152 static void aux_domain_remove_dev(struct dmar_domain *domain,
5155 struct device_domain_info *info;
5156 struct intel_iommu *iommu;
5157 unsigned long flags;
5159 if (!is_aux_domain(dev, &domain->domain))
5162 spin_lock_irqsave(&device_domain_lock, flags);
5163 info = dev->archdata.iommu;
5164 iommu = info->iommu;
5166 auxiliary_unlink_device(domain, dev);
5168 spin_lock(&iommu->lock);
5169 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5170 domain_detach_iommu(domain, iommu);
5171 spin_unlock(&iommu->lock);
5173 spin_unlock_irqrestore(&device_domain_lock, flags);
5176 static int prepare_domain_attach_device(struct iommu_domain *domain,
5179 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5180 struct intel_iommu *iommu;
5184 iommu = device_to_iommu(dev, &bus, &devfn);
5188 /* check if this iommu agaw is sufficient for max mapped address */
5189 addr_width = agaw_to_width(iommu->agaw);
5190 if (addr_width > cap_mgaw(iommu->cap))
5191 addr_width = cap_mgaw(iommu->cap);
5193 if (dmar_domain->max_addr > (1LL << addr_width)) {
5194 dev_err(dev, "%s: iommu width (%d) is not "
5195 "sufficient for the mapped address (%llx)\n",
5196 __func__, addr_width, dmar_domain->max_addr);
5199 dmar_domain->gaw = addr_width;
5202 * Knock out extra levels of page tables if necessary
5204 while (iommu->agaw < dmar_domain->agaw) {
5205 struct dma_pte *pte;
5207 pte = dmar_domain->pgd;
5208 if (dma_pte_present(pte)) {
5209 dmar_domain->pgd = (struct dma_pte *)
5210 phys_to_virt(dma_pte_addr(pte));
5211 free_pgtable_page(pte);
5213 dmar_domain->agaw--;
5219 static int intel_iommu_attach_device(struct iommu_domain *domain,
5224 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
5225 device_is_rmrr_locked(dev)) {
5226 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5230 if (is_aux_domain(dev, domain))
5233 /* normally dev is not mapped */
5234 if (unlikely(domain_context_mapped(dev))) {
5235 struct dmar_domain *old_domain;
5237 old_domain = find_domain(dev);
5239 dmar_remove_one_dev_info(dev);
5242 ret = prepare_domain_attach_device(domain, dev);
5246 return domain_add_dev_info(to_dmar_domain(domain), dev);
5249 static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5254 if (!is_aux_domain(dev, domain))
5257 ret = prepare_domain_attach_device(domain, dev);
5261 return aux_domain_add_dev(to_dmar_domain(domain), dev);
5264 static void intel_iommu_detach_device(struct iommu_domain *domain,
5267 dmar_remove_one_dev_info(dev);
5270 static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5273 aux_domain_remove_dev(to_dmar_domain(domain), dev);
5276 static int intel_iommu_map(struct iommu_domain *domain,
5277 unsigned long iova, phys_addr_t hpa,
5278 size_t size, int iommu_prot, gfp_t gfp)
5280 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5285 if (iommu_prot & IOMMU_READ)
5286 prot |= DMA_PTE_READ;
5287 if (iommu_prot & IOMMU_WRITE)
5288 prot |= DMA_PTE_WRITE;
5289 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5290 prot |= DMA_PTE_SNP;
5292 max_addr = iova + size;
5293 if (dmar_domain->max_addr < max_addr) {
5296 /* check if minimum agaw is sufficient for mapped address */
5297 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5298 if (end < max_addr) {
5299 pr_err("%s: iommu width (%d) is not "
5300 "sufficient for the mapped address (%llx)\n",
5301 __func__, dmar_domain->gaw, max_addr);
5304 dmar_domain->max_addr = max_addr;
5306 /* Round up size to next multiple of PAGE_SIZE, if it and
5307 the low bits of hpa would take us onto the next page */
5308 size = aligned_nrpages(hpa, size);
5309 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5310 hpa >> VTD_PAGE_SHIFT, size, prot);
5314 static size_t intel_iommu_unmap(struct iommu_domain *domain,
5315 unsigned long iova, size_t size,
5316 struct iommu_iotlb_gather *gather)
5318 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5319 struct page *freelist = NULL;
5320 unsigned long start_pfn, last_pfn;
5321 unsigned int npages;
5322 int iommu_id, level = 0;
5324 /* Cope with horrid API which requires us to unmap more than the
5325 size argument if it happens to be a large-page mapping. */
5326 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5328 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5329 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5331 start_pfn = iova >> VTD_PAGE_SHIFT;
5332 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5334 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5336 npages = last_pfn - start_pfn + 1;
5338 for_each_domain_iommu(iommu_id, dmar_domain)
5339 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5340 start_pfn, npages, !freelist, 0);
5342 dma_free_pagelist(freelist);
5344 if (dmar_domain->max_addr == iova + size)
5345 dmar_domain->max_addr = iova;
5350 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5353 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5354 struct dma_pte *pte;
5358 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5359 if (pte && dma_pte_present(pte))
5360 phys = dma_pte_addr(pte) +
5361 (iova & (BIT_MASK(level_to_offset_bits(level) +
5362 VTD_PAGE_SHIFT) - 1));
5367 static inline bool scalable_mode_support(void)
5369 struct dmar_drhd_unit *drhd;
5370 struct intel_iommu *iommu;
5374 for_each_active_iommu(iommu, drhd) {
5375 if (!sm_supported(iommu)) {
5385 static inline bool iommu_pasid_support(void)
5387 struct dmar_drhd_unit *drhd;
5388 struct intel_iommu *iommu;
5392 for_each_active_iommu(iommu, drhd) {
5393 if (!pasid_supported(iommu)) {
5403 static inline bool nested_mode_support(void)
5405 struct dmar_drhd_unit *drhd;
5406 struct intel_iommu *iommu;
5410 for_each_active_iommu(iommu, drhd) {
5411 if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) {
5421 static bool intel_iommu_capable(enum iommu_cap cap)
5423 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5424 return domain_update_iommu_snooping(NULL) == 1;
5425 if (cap == IOMMU_CAP_INTR_REMAP)
5426 return irq_remapping_enabled == 1;
5431 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
5433 struct intel_iommu *iommu;
5436 iommu = device_to_iommu(dev, &bus, &devfn);
5438 return ERR_PTR(-ENODEV);
5440 if (translation_pre_enabled(iommu))
5441 dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
5443 return &iommu->iommu;
5446 static void intel_iommu_release_device(struct device *dev)
5448 struct intel_iommu *iommu;
5451 iommu = device_to_iommu(dev, &bus, &devfn);
5455 dmar_remove_one_dev_info(dev);
5457 set_dma_ops(dev, NULL);
5460 static void intel_iommu_probe_finalize(struct device *dev)
5462 struct iommu_domain *domain;
5464 domain = iommu_get_domain_for_dev(dev);
5465 if (device_needs_bounce(dev))
5466 set_dma_ops(dev, &bounce_dma_ops);
5467 else if (domain && domain->type == IOMMU_DOMAIN_DMA)
5468 set_dma_ops(dev, &intel_dma_ops);
5470 set_dma_ops(dev, NULL);
5473 static void intel_iommu_get_resv_regions(struct device *device,
5474 struct list_head *head)
5476 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
5477 struct iommu_resv_region *reg;
5478 struct dmar_rmrr_unit *rmrr;
5479 struct device *i_dev;
5482 down_read(&dmar_global_lock);
5483 for_each_rmrr_units(rmrr) {
5484 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5486 struct iommu_resv_region *resv;
5487 enum iommu_resv_type type;
5490 if (i_dev != device &&
5491 !is_downstream_to_pci_bridge(device, i_dev))
5494 length = rmrr->end_address - rmrr->base_address + 1;
5496 type = device_rmrr_is_relaxable(device) ?
5497 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5499 resv = iommu_alloc_resv_region(rmrr->base_address,
5500 length, prot, type);
5504 list_add_tail(&resv->list, head);
5507 up_read(&dmar_global_lock);
5509 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5510 if (dev_is_pci(device)) {
5511 struct pci_dev *pdev = to_pci_dev(device);
5513 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5514 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
5515 IOMMU_RESV_DIRECT_RELAXABLE);
5517 list_add_tail(®->list, head);
5520 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5522 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5523 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5527 list_add_tail(®->list, head);
5530 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
5532 struct device_domain_info *info;
5533 struct context_entry *context;
5534 struct dmar_domain *domain;
5535 unsigned long flags;
5539 domain = find_domain(dev);
5543 spin_lock_irqsave(&device_domain_lock, flags);
5544 spin_lock(&iommu->lock);
5547 info = dev->archdata.iommu;
5548 if (!info || !info->pasid_supported)
5551 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5552 if (WARN_ON(!context))
5555 ctx_lo = context[0].lo;
5557 if (!(ctx_lo & CONTEXT_PASIDE)) {
5558 ctx_lo |= CONTEXT_PASIDE;
5559 context[0].lo = ctx_lo;
5561 iommu->flush.flush_context(iommu,
5562 domain->iommu_did[iommu->seq_id],
5563 PCI_DEVID(info->bus, info->devfn),
5564 DMA_CCMD_MASK_NOBIT,
5565 DMA_CCMD_DEVICE_INVL);
5568 /* Enable PASID support in the device, if it wasn't already */
5569 if (!info->pasid_enabled)
5570 iommu_enable_dev_iotlb(info);
5575 spin_unlock(&iommu->lock);
5576 spin_unlock_irqrestore(&device_domain_lock, flags);
5581 static void intel_iommu_apply_resv_region(struct device *dev,
5582 struct iommu_domain *domain,
5583 struct iommu_resv_region *region)
5585 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5586 unsigned long start, end;
5588 start = IOVA_PFN(region->start);
5589 end = IOVA_PFN(region->start + region->length - 1);
5591 WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5594 static struct iommu_group *intel_iommu_device_group(struct device *dev)
5596 if (dev_is_pci(dev))
5597 return pci_device_group(dev);
5598 return generic_device_group(dev);
5601 #ifdef CONFIG_INTEL_IOMMU_SVM
5602 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5604 struct intel_iommu *iommu;
5607 if (iommu_dummy(dev)) {
5609 "No IOMMU translation for device; cannot enable SVM\n");
5613 iommu = device_to_iommu(dev, &bus, &devfn);
5615 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5621 #endif /* CONFIG_INTEL_IOMMU_SVM */
5623 static int intel_iommu_enable_auxd(struct device *dev)
5625 struct device_domain_info *info;
5626 struct intel_iommu *iommu;
5627 unsigned long flags;
5631 iommu = device_to_iommu(dev, &bus, &devfn);
5632 if (!iommu || dmar_disabled)
5635 if (!sm_supported(iommu) || !pasid_supported(iommu))
5638 ret = intel_iommu_enable_pasid(iommu, dev);
5642 spin_lock_irqsave(&device_domain_lock, flags);
5643 info = dev->archdata.iommu;
5644 info->auxd_enabled = 1;
5645 spin_unlock_irqrestore(&device_domain_lock, flags);
5650 static int intel_iommu_disable_auxd(struct device *dev)
5652 struct device_domain_info *info;
5653 unsigned long flags;
5655 spin_lock_irqsave(&device_domain_lock, flags);
5656 info = dev->archdata.iommu;
5657 if (!WARN_ON(!info))
5658 info->auxd_enabled = 0;
5659 spin_unlock_irqrestore(&device_domain_lock, flags);
5665 * A PCI express designated vendor specific extended capability is defined
5666 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5667 * for system software and tools to detect endpoint devices supporting the
5668 * Intel scalable IO virtualization without host driver dependency.
5670 * Returns the address of the matching extended capability structure within
5671 * the device's PCI configuration space or 0 if the device does not support
5674 static int siov_find_pci_dvsec(struct pci_dev *pdev)
5679 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5681 pci_read_config_word(pdev, pos + 4, &vendor);
5682 pci_read_config_word(pdev, pos + 8, &id);
5683 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5686 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5693 intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5695 if (feat == IOMMU_DEV_FEAT_AUX) {
5698 if (!dev_is_pci(dev) || dmar_disabled ||
5699 !scalable_mode_support() || !iommu_pasid_support())
5702 ret = pci_pasid_features(to_pci_dev(dev));
5706 return !!siov_find_pci_dvsec(to_pci_dev(dev));
5713 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5715 if (feat == IOMMU_DEV_FEAT_AUX)
5716 return intel_iommu_enable_auxd(dev);
5722 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5724 if (feat == IOMMU_DEV_FEAT_AUX)
5725 return intel_iommu_disable_auxd(dev);
5731 intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5733 struct device_domain_info *info = dev->archdata.iommu;
5735 if (feat == IOMMU_DEV_FEAT_AUX)
5736 return scalable_mode_support() && info && info->auxd_enabled;
5742 intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5744 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5746 return dmar_domain->default_pasid > 0 ?
5747 dmar_domain->default_pasid : -EINVAL;
5750 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5753 return attach_deferred(dev);
5757 intel_iommu_domain_set_attr(struct iommu_domain *domain,
5758 enum iommu_attr attr, void *data)
5760 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5761 unsigned long flags;
5764 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
5768 case DOMAIN_ATTR_NESTING:
5769 spin_lock_irqsave(&device_domain_lock, flags);
5770 if (nested_mode_support() &&
5771 list_empty(&dmar_domain->devices)) {
5772 dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
5773 dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
5777 spin_unlock_irqrestore(&device_domain_lock, flags);
5787 const struct iommu_ops intel_iommu_ops = {
5788 .capable = intel_iommu_capable,
5789 .domain_alloc = intel_iommu_domain_alloc,
5790 .domain_free = intel_iommu_domain_free,
5791 .domain_set_attr = intel_iommu_domain_set_attr,
5792 .attach_dev = intel_iommu_attach_device,
5793 .detach_dev = intel_iommu_detach_device,
5794 .aux_attach_dev = intel_iommu_aux_attach_device,
5795 .aux_detach_dev = intel_iommu_aux_detach_device,
5796 .aux_get_pasid = intel_iommu_aux_get_pasid,
5797 .map = intel_iommu_map,
5798 .unmap = intel_iommu_unmap,
5799 .iova_to_phys = intel_iommu_iova_to_phys,
5800 .probe_device = intel_iommu_probe_device,
5801 .probe_finalize = intel_iommu_probe_finalize,
5802 .release_device = intel_iommu_release_device,
5803 .get_resv_regions = intel_iommu_get_resv_regions,
5804 .put_resv_regions = generic_iommu_put_resv_regions,
5805 .apply_resv_region = intel_iommu_apply_resv_region,
5806 .device_group = intel_iommu_device_group,
5807 .dev_has_feat = intel_iommu_dev_has_feat,
5808 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
5809 .dev_enable_feat = intel_iommu_dev_enable_feat,
5810 .dev_disable_feat = intel_iommu_dev_disable_feat,
5811 .is_attach_deferred = intel_iommu_is_attach_deferred,
5812 .def_domain_type = device_def_domain_type,
5813 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
5816 static void quirk_iommu_igfx(struct pci_dev *dev)
5818 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
5822 /* G4x/GM45 integrated gfx dmar support is totally busted. */
5823 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
5824 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
5825 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
5826 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
5827 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
5828 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
5829 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
5831 /* Broadwell igfx malfunctions with dmar */
5832 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
5833 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
5834 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
5835 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
5836 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
5837 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
5838 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
5839 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
5840 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
5841 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
5842 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
5843 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
5844 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
5845 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
5846 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
5847 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
5848 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
5849 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
5850 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
5851 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
5852 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
5853 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
5854 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
5855 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
5857 static void quirk_iommu_rwbf(struct pci_dev *dev)
5860 * Mobile 4 Series Chipset neglects to set RWBF capability,
5861 * but needs it. Same seems to hold for the desktop versions.
5863 pci_info(dev, "Forcing write-buffer flush capability\n");
5867 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5868 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5869 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5870 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5871 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5872 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5873 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5876 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
5877 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5878 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
5879 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
5880 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5881 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5882 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5883 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5885 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5889 if (pci_read_config_word(dev, GGC, &ggc))
5892 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5893 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5895 } else if (dmar_map_gfx) {
5896 /* we have to ensure the gfx device is idle before we flush */
5897 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
5898 intel_iommu_strict = 1;
5901 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5902 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5903 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5904 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5906 /* On Tylersburg chipsets, some BIOSes have been known to enable the
5907 ISOCH DMAR unit for the Azalia sound device, but not give it any
5908 TLB entries, which causes it to deadlock. Check for that. We do
5909 this in a function called from init_dmars(), instead of in a PCI
5910 quirk, because we don't want to print the obnoxious "BIOS broken"
5911 message if VT-d is actually disabled.
5913 static void __init check_tylersburg_isoch(void)
5915 struct pci_dev *pdev;
5916 uint32_t vtisochctrl;
5918 /* If there's no Azalia in the system anyway, forget it. */
5919 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5924 /* System Management Registers. Might be hidden, in which case
5925 we can't do the sanity check. But that's OK, because the
5926 known-broken BIOSes _don't_ actually hide it, so far. */
5927 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5931 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5938 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5939 if (vtisochctrl & 1)
5942 /* Drop all bits other than the number of TLB entries */
5943 vtisochctrl &= 0x1c;
5945 /* If we have the recommended number of TLB entries (16), fine. */
5946 if (vtisochctrl == 0x10)
5949 /* Zero TLB entries? You get to ride the short bus to school. */
5951 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5952 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5953 dmi_get_system_info(DMI_BIOS_VENDOR),
5954 dmi_get_system_info(DMI_BIOS_VERSION),
5955 dmi_get_system_info(DMI_PRODUCT_VERSION));
5956 iommu_identity_mapping |= IDENTMAP_AZALIA;
5960 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",