intel-iommu: Export a flag indicating that the IOMMU is used for iGFX.
[linux-2.6-microblaze.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <linux/syscore_ops.h>
40 #include <linux/tboot.h>
41 #include <linux/dmi.h>
42 #include <linux/pci-ats.h>
43 #include <asm/cacheflush.h>
44 #include <asm/iommu.h>
45
46 #define ROOT_SIZE               VTD_PAGE_SIZE
47 #define CONTEXT_SIZE            VTD_PAGE_SIZE
48
49 #define IS_BRIDGE_HOST_DEVICE(pdev) \
50                             ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
51 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
52 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
53 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
54
55 #define IOAPIC_RANGE_START      (0xfee00000)
56 #define IOAPIC_RANGE_END        (0xfeefffff)
57 #define IOVA_START_ADDR         (0x1000)
58
59 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
60
61 #define MAX_AGAW_WIDTH 64
62
63 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
64 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
65
66 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
67    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
68 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
69                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
70 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
71
72 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
73 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
74 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
75
76 /* page table handling */
77 #define LEVEL_STRIDE            (9)
78 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
79
80 static inline int agaw_to_level(int agaw)
81 {
82         return agaw + 2;
83 }
84
85 static inline int agaw_to_width(int agaw)
86 {
87         return 30 + agaw * LEVEL_STRIDE;
88 }
89
90 static inline int width_to_agaw(int width)
91 {
92         return (width - 30) / LEVEL_STRIDE;
93 }
94
95 static inline unsigned int level_to_offset_bits(int level)
96 {
97         return (level - 1) * LEVEL_STRIDE;
98 }
99
100 static inline int pfn_level_offset(unsigned long pfn, int level)
101 {
102         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
103 }
104
105 static inline unsigned long level_mask(int level)
106 {
107         return -1UL << level_to_offset_bits(level);
108 }
109
110 static inline unsigned long level_size(int level)
111 {
112         return 1UL << level_to_offset_bits(level);
113 }
114
115 static inline unsigned long align_to_level(unsigned long pfn, int level)
116 {
117         return (pfn + level_size(level) - 1) & level_mask(level);
118 }
119
120 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
121 {
122         return  1 << ((lvl - 1) * LEVEL_STRIDE);
123 }
124
125 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
126    are never going to work. */
127 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
128 {
129         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
130 }
131
132 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
133 {
134         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
135 }
136 static inline unsigned long page_to_dma_pfn(struct page *pg)
137 {
138         return mm_to_dma_pfn(page_to_pfn(pg));
139 }
140 static inline unsigned long virt_to_dma_pfn(void *p)
141 {
142         return page_to_dma_pfn(virt_to_page(p));
143 }
144
145 /* global iommu list, set NULL for ignored DMAR units */
146 static struct intel_iommu **g_iommus;
147
148 static void __init check_tylersburg_isoch(void);
149 static int rwbf_quirk;
150
151 /*
152  * set to 1 to panic kernel if can't successfully enable VT-d
153  * (used when kernel is launched w/ TXT)
154  */
155 static int force_on = 0;
156
157 /*
158  * 0: Present
159  * 1-11: Reserved
160  * 12-63: Context Ptr (12 - (haw-1))
161  * 64-127: Reserved
162  */
163 struct root_entry {
164         u64     val;
165         u64     rsvd1;
166 };
167 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
168 static inline bool root_present(struct root_entry *root)
169 {
170         return (root->val & 1);
171 }
172 static inline void set_root_present(struct root_entry *root)
173 {
174         root->val |= 1;
175 }
176 static inline void set_root_value(struct root_entry *root, unsigned long value)
177 {
178         root->val |= value & VTD_PAGE_MASK;
179 }
180
181 static inline struct context_entry *
182 get_context_addr_from_root(struct root_entry *root)
183 {
184         return (struct context_entry *)
185                 (root_present(root)?phys_to_virt(
186                 root->val & VTD_PAGE_MASK) :
187                 NULL);
188 }
189
190 /*
191  * low 64 bits:
192  * 0: present
193  * 1: fault processing disable
194  * 2-3: translation type
195  * 12-63: address space root
196  * high 64 bits:
197  * 0-2: address width
198  * 3-6: aval
199  * 8-23: domain id
200  */
201 struct context_entry {
202         u64 lo;
203         u64 hi;
204 };
205
206 static inline bool context_present(struct context_entry *context)
207 {
208         return (context->lo & 1);
209 }
210 static inline void context_set_present(struct context_entry *context)
211 {
212         context->lo |= 1;
213 }
214
215 static inline void context_set_fault_enable(struct context_entry *context)
216 {
217         context->lo &= (((u64)-1) << 2) | 1;
218 }
219
220 static inline void context_set_translation_type(struct context_entry *context,
221                                                 unsigned long value)
222 {
223         context->lo &= (((u64)-1) << 4) | 3;
224         context->lo |= (value & 3) << 2;
225 }
226
227 static inline void context_set_address_root(struct context_entry *context,
228                                             unsigned long value)
229 {
230         context->lo |= value & VTD_PAGE_MASK;
231 }
232
233 static inline void context_set_address_width(struct context_entry *context,
234                                              unsigned long value)
235 {
236         context->hi |= value & 7;
237 }
238
239 static inline void context_set_domain_id(struct context_entry *context,
240                                          unsigned long value)
241 {
242         context->hi |= (value & ((1 << 16) - 1)) << 8;
243 }
244
245 static inline void context_clear_entry(struct context_entry *context)
246 {
247         context->lo = 0;
248         context->hi = 0;
249 }
250
251 /*
252  * 0: readable
253  * 1: writable
254  * 2-6: reserved
255  * 7: super page
256  * 8-10: available
257  * 11: snoop behavior
258  * 12-63: Host physcial address
259  */
260 struct dma_pte {
261         u64 val;
262 };
263
264 static inline void dma_clear_pte(struct dma_pte *pte)
265 {
266         pte->val = 0;
267 }
268
269 static inline void dma_set_pte_readable(struct dma_pte *pte)
270 {
271         pte->val |= DMA_PTE_READ;
272 }
273
274 static inline void dma_set_pte_writable(struct dma_pte *pte)
275 {
276         pte->val |= DMA_PTE_WRITE;
277 }
278
279 static inline void dma_set_pte_snp(struct dma_pte *pte)
280 {
281         pte->val |= DMA_PTE_SNP;
282 }
283
284 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
285 {
286         pte->val = (pte->val & ~3) | (prot & 3);
287 }
288
289 static inline u64 dma_pte_addr(struct dma_pte *pte)
290 {
291 #ifdef CONFIG_64BIT
292         return pte->val & VTD_PAGE_MASK;
293 #else
294         /* Must have a full atomic 64-bit read */
295         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
296 #endif
297 }
298
299 static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
300 {
301         pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
302 }
303
304 static inline bool dma_pte_present(struct dma_pte *pte)
305 {
306         return (pte->val & 3) != 0;
307 }
308
309 static inline int first_pte_in_page(struct dma_pte *pte)
310 {
311         return !((unsigned long)pte & ~VTD_PAGE_MASK);
312 }
313
314 /*
315  * This domain is a statically identity mapping domain.
316  *      1. This domain creats a static 1:1 mapping to all usable memory.
317  *      2. It maps to each iommu if successful.
318  *      3. Each iommu mapps to this domain if successful.
319  */
320 static struct dmar_domain *si_domain;
321 static int hw_pass_through = 1;
322
323 /* devices under the same p2p bridge are owned in one domain */
324 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
325
326 /* domain represents a virtual machine, more than one devices
327  * across iommus may be owned in one domain, e.g. kvm guest.
328  */
329 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 1)
330
331 /* si_domain contains mulitple devices */
332 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 2)
333
334 struct dmar_domain {
335         int     id;                     /* domain id */
336         int     nid;                    /* node id */
337         unsigned long iommu_bmp;        /* bitmap of iommus this domain uses*/
338
339         struct list_head devices;       /* all devices' list */
340         struct iova_domain iovad;       /* iova's that belong to this domain */
341
342         struct dma_pte  *pgd;           /* virtual address */
343         int             gaw;            /* max guest address width */
344
345         /* adjusted guest address width, 0 is level 2 30-bit */
346         int             agaw;
347
348         int             flags;          /* flags to find out type of domain */
349
350         int             iommu_coherency;/* indicate coherency of iommu access */
351         int             iommu_snooping; /* indicate snooping control feature*/
352         int             iommu_count;    /* reference count of iommu */
353         int             iommu_superpage;/* Level of superpages supported:
354                                            0 == 4KiB (no superpages), 1 == 2MiB,
355                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
356         spinlock_t      iommu_lock;     /* protect iommu set in domain */
357         u64             max_addr;       /* maximum mapped address */
358 };
359
360 /* PCI domain-device relationship */
361 struct device_domain_info {
362         struct list_head link;  /* link to domain siblings */
363         struct list_head global; /* link to global list */
364         int segment;            /* PCI domain */
365         u8 bus;                 /* PCI bus number */
366         u8 devfn;               /* PCI devfn number */
367         struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
368         struct intel_iommu *iommu; /* IOMMU used by this device */
369         struct dmar_domain *domain; /* pointer to domain */
370 };
371
372 static void flush_unmaps_timeout(unsigned long data);
373
374 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
375
376 #define HIGH_WATER_MARK 250
377 struct deferred_flush_tables {
378         int next;
379         struct iova *iova[HIGH_WATER_MARK];
380         struct dmar_domain *domain[HIGH_WATER_MARK];
381 };
382
383 static struct deferred_flush_tables *deferred_flush;
384
385 /* bitmap for indexing intel_iommus */
386 static int g_num_of_iommus;
387
388 static DEFINE_SPINLOCK(async_umap_flush_lock);
389 static LIST_HEAD(unmaps_to_do);
390
391 static int timer_on;
392 static long list_size;
393
394 static void domain_remove_dev_info(struct dmar_domain *domain);
395
396 #ifdef CONFIG_DMAR_DEFAULT_ON
397 int dmar_disabled = 0;
398 #else
399 int dmar_disabled = 1;
400 #endif /*CONFIG_DMAR_DEFAULT_ON*/
401
402 static int dmar_map_gfx = 1;
403 static int dmar_forcedac;
404 static int intel_iommu_strict;
405 static int intel_iommu_superpage = 1;
406
407 int intel_iommu_gfx_mapped;
408 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
409
410 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
411 static DEFINE_SPINLOCK(device_domain_lock);
412 static LIST_HEAD(device_domain_list);
413
414 static struct iommu_ops intel_iommu_ops;
415
416 static int __init intel_iommu_setup(char *str)
417 {
418         if (!str)
419                 return -EINVAL;
420         while (*str) {
421                 if (!strncmp(str, "on", 2)) {
422                         dmar_disabled = 0;
423                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
424                 } else if (!strncmp(str, "off", 3)) {
425                         dmar_disabled = 1;
426                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
427                 } else if (!strncmp(str, "igfx_off", 8)) {
428                         dmar_map_gfx = 0;
429                         printk(KERN_INFO
430                                 "Intel-IOMMU: disable GFX device mapping\n");
431                 } else if (!strncmp(str, "forcedac", 8)) {
432                         printk(KERN_INFO
433                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
434                         dmar_forcedac = 1;
435                 } else if (!strncmp(str, "strict", 6)) {
436                         printk(KERN_INFO
437                                 "Intel-IOMMU: disable batched IOTLB flush\n");
438                         intel_iommu_strict = 1;
439                 } else if (!strncmp(str, "sp_off", 6)) {
440                         printk(KERN_INFO
441                                 "Intel-IOMMU: disable supported super page\n");
442                         intel_iommu_superpage = 0;
443                 }
444
445                 str += strcspn(str, ",");
446                 while (*str == ',')
447                         str++;
448         }
449         return 0;
450 }
451 __setup("intel_iommu=", intel_iommu_setup);
452
453 static struct kmem_cache *iommu_domain_cache;
454 static struct kmem_cache *iommu_devinfo_cache;
455 static struct kmem_cache *iommu_iova_cache;
456
457 static inline void *alloc_pgtable_page(int node)
458 {
459         struct page *page;
460         void *vaddr = NULL;
461
462         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
463         if (page)
464                 vaddr = page_address(page);
465         return vaddr;
466 }
467
468 static inline void free_pgtable_page(void *vaddr)
469 {
470         free_page((unsigned long)vaddr);
471 }
472
473 static inline void *alloc_domain_mem(void)
474 {
475         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
476 }
477
478 static void free_domain_mem(void *vaddr)
479 {
480         kmem_cache_free(iommu_domain_cache, vaddr);
481 }
482
483 static inline void * alloc_devinfo_mem(void)
484 {
485         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
486 }
487
488 static inline void free_devinfo_mem(void *vaddr)
489 {
490         kmem_cache_free(iommu_devinfo_cache, vaddr);
491 }
492
493 struct iova *alloc_iova_mem(void)
494 {
495         return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
496 }
497
498 void free_iova_mem(struct iova *iova)
499 {
500         kmem_cache_free(iommu_iova_cache, iova);
501 }
502
503
504 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
505 {
506         unsigned long sagaw;
507         int agaw = -1;
508
509         sagaw = cap_sagaw(iommu->cap);
510         for (agaw = width_to_agaw(max_gaw);
511              agaw >= 0; agaw--) {
512                 if (test_bit(agaw, &sagaw))
513                         break;
514         }
515
516         return agaw;
517 }
518
519 /*
520  * Calculate max SAGAW for each iommu.
521  */
522 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
523 {
524         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
525 }
526
527 /*
528  * calculate agaw for each iommu.
529  * "SAGAW" may be different across iommus, use a default agaw, and
530  * get a supported less agaw for iommus that don't support the default agaw.
531  */
532 int iommu_calculate_agaw(struct intel_iommu *iommu)
533 {
534         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
535 }
536
537 /* This functionin only returns single iommu in a domain */
538 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
539 {
540         int iommu_id;
541
542         /* si_domain and vm domain should not get here. */
543         BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
544         BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
545
546         iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
547         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
548                 return NULL;
549
550         return g_iommus[iommu_id];
551 }
552
553 static void domain_update_iommu_coherency(struct dmar_domain *domain)
554 {
555         int i;
556
557         domain->iommu_coherency = 1;
558
559         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
560                 if (!ecap_coherent(g_iommus[i]->ecap)) {
561                         domain->iommu_coherency = 0;
562                         break;
563                 }
564         }
565 }
566
567 static void domain_update_iommu_snooping(struct dmar_domain *domain)
568 {
569         int i;
570
571         domain->iommu_snooping = 1;
572
573         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
574                 if (!ecap_sc_support(g_iommus[i]->ecap)) {
575                         domain->iommu_snooping = 0;
576                         break;
577                 }
578         }
579 }
580
581 static void domain_update_iommu_superpage(struct dmar_domain *domain)
582 {
583         int i, mask = 0xf;
584
585         if (!intel_iommu_superpage) {
586                 domain->iommu_superpage = 0;
587                 return;
588         }
589
590         domain->iommu_superpage = 4; /* 1TiB */
591
592         for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
593                 mask |= cap_super_page_val(g_iommus[i]->cap);
594                 if (!mask) {
595                         break;
596                 }
597         }
598         domain->iommu_superpage = fls(mask);
599 }
600
601 /* Some capabilities may be different across iommus */
602 static void domain_update_iommu_cap(struct dmar_domain *domain)
603 {
604         domain_update_iommu_coherency(domain);
605         domain_update_iommu_snooping(domain);
606         domain_update_iommu_superpage(domain);
607 }
608
609 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
610 {
611         struct dmar_drhd_unit *drhd = NULL;
612         int i;
613
614         for_each_drhd_unit(drhd) {
615                 if (drhd->ignored)
616                         continue;
617                 if (segment != drhd->segment)
618                         continue;
619
620                 for (i = 0; i < drhd->devices_cnt; i++) {
621                         if (drhd->devices[i] &&
622                             drhd->devices[i]->bus->number == bus &&
623                             drhd->devices[i]->devfn == devfn)
624                                 return drhd->iommu;
625                         if (drhd->devices[i] &&
626                             drhd->devices[i]->subordinate &&
627                             drhd->devices[i]->subordinate->number <= bus &&
628                             drhd->devices[i]->subordinate->subordinate >= bus)
629                                 return drhd->iommu;
630                 }
631
632                 if (drhd->include_all)
633                         return drhd->iommu;
634         }
635
636         return NULL;
637 }
638
639 static void domain_flush_cache(struct dmar_domain *domain,
640                                void *addr, int size)
641 {
642         if (!domain->iommu_coherency)
643                 clflush_cache_range(addr, size);
644 }
645
646 /* Gets context entry for a given bus and devfn */
647 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
648                 u8 bus, u8 devfn)
649 {
650         struct root_entry *root;
651         struct context_entry *context;
652         unsigned long phy_addr;
653         unsigned long flags;
654
655         spin_lock_irqsave(&iommu->lock, flags);
656         root = &iommu->root_entry[bus];
657         context = get_context_addr_from_root(root);
658         if (!context) {
659                 context = (struct context_entry *)
660                                 alloc_pgtable_page(iommu->node);
661                 if (!context) {
662                         spin_unlock_irqrestore(&iommu->lock, flags);
663                         return NULL;
664                 }
665                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
666                 phy_addr = virt_to_phys((void *)context);
667                 set_root_value(root, phy_addr);
668                 set_root_present(root);
669                 __iommu_flush_cache(iommu, root, sizeof(*root));
670         }
671         spin_unlock_irqrestore(&iommu->lock, flags);
672         return &context[devfn];
673 }
674
675 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
676 {
677         struct root_entry *root;
678         struct context_entry *context;
679         int ret;
680         unsigned long flags;
681
682         spin_lock_irqsave(&iommu->lock, flags);
683         root = &iommu->root_entry[bus];
684         context = get_context_addr_from_root(root);
685         if (!context) {
686                 ret = 0;
687                 goto out;
688         }
689         ret = context_present(&context[devfn]);
690 out:
691         spin_unlock_irqrestore(&iommu->lock, flags);
692         return ret;
693 }
694
695 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
696 {
697         struct root_entry *root;
698         struct context_entry *context;
699         unsigned long flags;
700
701         spin_lock_irqsave(&iommu->lock, flags);
702         root = &iommu->root_entry[bus];
703         context = get_context_addr_from_root(root);
704         if (context) {
705                 context_clear_entry(&context[devfn]);
706                 __iommu_flush_cache(iommu, &context[devfn], \
707                         sizeof(*context));
708         }
709         spin_unlock_irqrestore(&iommu->lock, flags);
710 }
711
712 static void free_context_table(struct intel_iommu *iommu)
713 {
714         struct root_entry *root;
715         int i;
716         unsigned long flags;
717         struct context_entry *context;
718
719         spin_lock_irqsave(&iommu->lock, flags);
720         if (!iommu->root_entry) {
721                 goto out;
722         }
723         for (i = 0; i < ROOT_ENTRY_NR; i++) {
724                 root = &iommu->root_entry[i];
725                 context = get_context_addr_from_root(root);
726                 if (context)
727                         free_pgtable_page(context);
728         }
729         free_pgtable_page(iommu->root_entry);
730         iommu->root_entry = NULL;
731 out:
732         spin_unlock_irqrestore(&iommu->lock, flags);
733 }
734
735 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
736                                       unsigned long pfn, int large_level)
737 {
738         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
739         struct dma_pte *parent, *pte = NULL;
740         int level = agaw_to_level(domain->agaw);
741         int offset, target_level;
742
743         BUG_ON(!domain->pgd);
744         BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
745         parent = domain->pgd;
746
747         /* Search pte */
748         if (!large_level)
749                 target_level = 1;
750         else
751                 target_level = large_level;
752
753         while (level > 0) {
754                 void *tmp_page;
755
756                 offset = pfn_level_offset(pfn, level);
757                 pte = &parent[offset];
758                 if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
759                         break;
760                 if (level == target_level)
761                         break;
762
763                 if (!dma_pte_present(pte)) {
764                         uint64_t pteval;
765
766                         tmp_page = alloc_pgtable_page(domain->nid);
767
768                         if (!tmp_page)
769                                 return NULL;
770
771                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
772                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
773                         if (cmpxchg64(&pte->val, 0ULL, pteval)) {
774                                 /* Someone else set it while we were thinking; use theirs. */
775                                 free_pgtable_page(tmp_page);
776                         } else {
777                                 dma_pte_addr(pte);
778                                 domain_flush_cache(domain, pte, sizeof(*pte));
779                         }
780                 }
781                 parent = phys_to_virt(dma_pte_addr(pte));
782                 level--;
783         }
784
785         return pte;
786 }
787
788
789 /* return address's pte at specific level */
790 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
791                                          unsigned long pfn,
792                                          int level, int *large_page)
793 {
794         struct dma_pte *parent, *pte = NULL;
795         int total = agaw_to_level(domain->agaw);
796         int offset;
797
798         parent = domain->pgd;
799         while (level <= total) {
800                 offset = pfn_level_offset(pfn, total);
801                 pte = &parent[offset];
802                 if (level == total)
803                         return pte;
804
805                 if (!dma_pte_present(pte)) {
806                         *large_page = total;
807                         break;
808                 }
809
810                 if (pte->val & DMA_PTE_LARGE_PAGE) {
811                         *large_page = total;
812                         return pte;
813                 }
814
815                 parent = phys_to_virt(dma_pte_addr(pte));
816                 total--;
817         }
818         return NULL;
819 }
820
821 /* clear last level pte, a tlb flush should be followed */
822 static void dma_pte_clear_range(struct dmar_domain *domain,
823                                 unsigned long start_pfn,
824                                 unsigned long last_pfn)
825 {
826         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
827         unsigned int large_page = 1;
828         struct dma_pte *first_pte, *pte;
829
830         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
831         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
832         BUG_ON(start_pfn > last_pfn);
833
834         /* we don't need lock here; nobody else touches the iova range */
835         do {
836                 large_page = 1;
837                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
838                 if (!pte) {
839                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
840                         continue;
841                 }
842                 do {
843                         dma_clear_pte(pte);
844                         start_pfn += lvl_to_nr_pages(large_page);
845                         pte++;
846                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
847
848                 domain_flush_cache(domain, first_pte,
849                                    (void *)pte - (void *)first_pte);
850
851         } while (start_pfn && start_pfn <= last_pfn);
852 }
853
854 /* free page table pages. last level pte should already be cleared */
855 static void dma_pte_free_pagetable(struct dmar_domain *domain,
856                                    unsigned long start_pfn,
857                                    unsigned long last_pfn)
858 {
859         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
860         struct dma_pte *first_pte, *pte;
861         int total = agaw_to_level(domain->agaw);
862         int level;
863         unsigned long tmp;
864         int large_page = 2;
865
866         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
867         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
868         BUG_ON(start_pfn > last_pfn);
869
870         /* We don't need lock here; nobody else touches the iova range */
871         level = 2;
872         while (level <= total) {
873                 tmp = align_to_level(start_pfn, level);
874
875                 /* If we can't even clear one PTE at this level, we're done */
876                 if (tmp + level_size(level) - 1 > last_pfn)
877                         return;
878
879                 do {
880                         large_page = level;
881                         first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
882                         if (large_page > level)
883                                 level = large_page + 1;
884                         if (!pte) {
885                                 tmp = align_to_level(tmp + 1, level + 1);
886                                 continue;
887                         }
888                         do {
889                                 if (dma_pte_present(pte)) {
890                                         free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
891                                         dma_clear_pte(pte);
892                                 }
893                                 pte++;
894                                 tmp += level_size(level);
895                         } while (!first_pte_in_page(pte) &&
896                                  tmp + level_size(level) - 1 <= last_pfn);
897
898                         domain_flush_cache(domain, first_pte,
899                                            (void *)pte - (void *)first_pte);
900                         
901                 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
902                 level++;
903         }
904         /* free pgd */
905         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
906                 free_pgtable_page(domain->pgd);
907                 domain->pgd = NULL;
908         }
909 }
910
911 /* iommu handling */
912 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
913 {
914         struct root_entry *root;
915         unsigned long flags;
916
917         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
918         if (!root)
919                 return -ENOMEM;
920
921         __iommu_flush_cache(iommu, root, ROOT_SIZE);
922
923         spin_lock_irqsave(&iommu->lock, flags);
924         iommu->root_entry = root;
925         spin_unlock_irqrestore(&iommu->lock, flags);
926
927         return 0;
928 }
929
930 static void iommu_set_root_entry(struct intel_iommu *iommu)
931 {
932         void *addr;
933         u32 sts;
934         unsigned long flag;
935
936         addr = iommu->root_entry;
937
938         spin_lock_irqsave(&iommu->register_lock, flag);
939         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
940
941         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
942
943         /* Make sure hardware complete it */
944         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
945                       readl, (sts & DMA_GSTS_RTPS), sts);
946
947         spin_unlock_irqrestore(&iommu->register_lock, flag);
948 }
949
950 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
951 {
952         u32 val;
953         unsigned long flag;
954
955         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
956                 return;
957
958         spin_lock_irqsave(&iommu->register_lock, flag);
959         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
960
961         /* Make sure hardware complete it */
962         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
963                       readl, (!(val & DMA_GSTS_WBFS)), val);
964
965         spin_unlock_irqrestore(&iommu->register_lock, flag);
966 }
967
968 /* return value determine if we need a write buffer flush */
969 static void __iommu_flush_context(struct intel_iommu *iommu,
970                                   u16 did, u16 source_id, u8 function_mask,
971                                   u64 type)
972 {
973         u64 val = 0;
974         unsigned long flag;
975
976         switch (type) {
977         case DMA_CCMD_GLOBAL_INVL:
978                 val = DMA_CCMD_GLOBAL_INVL;
979                 break;
980         case DMA_CCMD_DOMAIN_INVL:
981                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
982                 break;
983         case DMA_CCMD_DEVICE_INVL:
984                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
985                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
986                 break;
987         default:
988                 BUG();
989         }
990         val |= DMA_CCMD_ICC;
991
992         spin_lock_irqsave(&iommu->register_lock, flag);
993         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
994
995         /* Make sure hardware complete it */
996         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
997                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
998
999         spin_unlock_irqrestore(&iommu->register_lock, flag);
1000 }
1001
1002 /* return value determine if we need a write buffer flush */
1003 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1004                                 u64 addr, unsigned int size_order, u64 type)
1005 {
1006         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1007         u64 val = 0, val_iva = 0;
1008         unsigned long flag;
1009
1010         switch (type) {
1011         case DMA_TLB_GLOBAL_FLUSH:
1012                 /* global flush doesn't need set IVA_REG */
1013                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1014                 break;
1015         case DMA_TLB_DSI_FLUSH:
1016                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1017                 break;
1018         case DMA_TLB_PSI_FLUSH:
1019                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1020                 /* Note: always flush non-leaf currently */
1021                 val_iva = size_order | addr;
1022                 break;
1023         default:
1024                 BUG();
1025         }
1026         /* Note: set drain read/write */
1027 #if 0
1028         /*
1029          * This is probably to be super secure.. Looks like we can
1030          * ignore it without any impact.
1031          */
1032         if (cap_read_drain(iommu->cap))
1033                 val |= DMA_TLB_READ_DRAIN;
1034 #endif
1035         if (cap_write_drain(iommu->cap))
1036                 val |= DMA_TLB_WRITE_DRAIN;
1037
1038         spin_lock_irqsave(&iommu->register_lock, flag);
1039         /* Note: Only uses first TLB reg currently */
1040         if (val_iva)
1041                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1042         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1043
1044         /* Make sure hardware complete it */
1045         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1046                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1047
1048         spin_unlock_irqrestore(&iommu->register_lock, flag);
1049
1050         /* check IOTLB invalidation granularity */
1051         if (DMA_TLB_IAIG(val) == 0)
1052                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1053         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1054                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1055                         (unsigned long long)DMA_TLB_IIRG(type),
1056                         (unsigned long long)DMA_TLB_IAIG(val));
1057 }
1058
1059 static struct device_domain_info *iommu_support_dev_iotlb(
1060         struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1061 {
1062         int found = 0;
1063         unsigned long flags;
1064         struct device_domain_info *info;
1065         struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1066
1067         if (!ecap_dev_iotlb_support(iommu->ecap))
1068                 return NULL;
1069
1070         if (!iommu->qi)
1071                 return NULL;
1072
1073         spin_lock_irqsave(&device_domain_lock, flags);
1074         list_for_each_entry(info, &domain->devices, link)
1075                 if (info->bus == bus && info->devfn == devfn) {
1076                         found = 1;
1077                         break;
1078                 }
1079         spin_unlock_irqrestore(&device_domain_lock, flags);
1080
1081         if (!found || !info->dev)
1082                 return NULL;
1083
1084         if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1085                 return NULL;
1086
1087         if (!dmar_find_matched_atsr_unit(info->dev))
1088                 return NULL;
1089
1090         info->iommu = iommu;
1091
1092         return info;
1093 }
1094
1095 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1096 {
1097         if (!info)
1098                 return;
1099
1100         pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1101 }
1102
1103 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1104 {
1105         if (!info->dev || !pci_ats_enabled(info->dev))
1106                 return;
1107
1108         pci_disable_ats(info->dev);
1109 }
1110
1111 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1112                                   u64 addr, unsigned mask)
1113 {
1114         u16 sid, qdep;
1115         unsigned long flags;
1116         struct device_domain_info *info;
1117
1118         spin_lock_irqsave(&device_domain_lock, flags);
1119         list_for_each_entry(info, &domain->devices, link) {
1120                 if (!info->dev || !pci_ats_enabled(info->dev))
1121                         continue;
1122
1123                 sid = info->bus << 8 | info->devfn;
1124                 qdep = pci_ats_queue_depth(info->dev);
1125                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1126         }
1127         spin_unlock_irqrestore(&device_domain_lock, flags);
1128 }
1129
1130 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1131                                   unsigned long pfn, unsigned int pages, int map)
1132 {
1133         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1134         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1135
1136         BUG_ON(pages == 0);
1137
1138         /*
1139          * Fallback to domain selective flush if no PSI support or the size is
1140          * too big.
1141          * PSI requires page size to be 2 ^ x, and the base address is naturally
1142          * aligned to the size
1143          */
1144         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1145                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1146                                                 DMA_TLB_DSI_FLUSH);
1147         else
1148                 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1149                                                 DMA_TLB_PSI_FLUSH);
1150
1151         /*
1152          * In caching mode, changes of pages from non-present to present require
1153          * flush. However, device IOTLB doesn't need to be flushed in this case.
1154          */
1155         if (!cap_caching_mode(iommu->cap) || !map)
1156                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1157 }
1158
1159 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1160 {
1161         u32 pmen;
1162         unsigned long flags;
1163
1164         spin_lock_irqsave(&iommu->register_lock, flags);
1165         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1166         pmen &= ~DMA_PMEN_EPM;
1167         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1168
1169         /* wait for the protected region status bit to clear */
1170         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1171                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1172
1173         spin_unlock_irqrestore(&iommu->register_lock, flags);
1174 }
1175
1176 static int iommu_enable_translation(struct intel_iommu *iommu)
1177 {
1178         u32 sts;
1179         unsigned long flags;
1180
1181         spin_lock_irqsave(&iommu->register_lock, flags);
1182         iommu->gcmd |= DMA_GCMD_TE;
1183         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1184
1185         /* Make sure hardware complete it */
1186         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1187                       readl, (sts & DMA_GSTS_TES), sts);
1188
1189         spin_unlock_irqrestore(&iommu->register_lock, flags);
1190         return 0;
1191 }
1192
1193 static int iommu_disable_translation(struct intel_iommu *iommu)
1194 {
1195         u32 sts;
1196         unsigned long flag;
1197
1198         spin_lock_irqsave(&iommu->register_lock, flag);
1199         iommu->gcmd &= ~DMA_GCMD_TE;
1200         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1201
1202         /* Make sure hardware complete it */
1203         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1204                       readl, (!(sts & DMA_GSTS_TES)), sts);
1205
1206         spin_unlock_irqrestore(&iommu->register_lock, flag);
1207         return 0;
1208 }
1209
1210
1211 static int iommu_init_domains(struct intel_iommu *iommu)
1212 {
1213         unsigned long ndomains;
1214         unsigned long nlongs;
1215
1216         ndomains = cap_ndoms(iommu->cap);
1217         pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1218                         ndomains);
1219         nlongs = BITS_TO_LONGS(ndomains);
1220
1221         spin_lock_init(&iommu->lock);
1222
1223         /* TBD: there might be 64K domains,
1224          * consider other allocation for future chip
1225          */
1226         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1227         if (!iommu->domain_ids) {
1228                 printk(KERN_ERR "Allocating domain id array failed\n");
1229                 return -ENOMEM;
1230         }
1231         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1232                         GFP_KERNEL);
1233         if (!iommu->domains) {
1234                 printk(KERN_ERR "Allocating domain array failed\n");
1235                 return -ENOMEM;
1236         }
1237
1238         /*
1239          * if Caching mode is set, then invalid translations are tagged
1240          * with domainid 0. Hence we need to pre-allocate it.
1241          */
1242         if (cap_caching_mode(iommu->cap))
1243                 set_bit(0, iommu->domain_ids);
1244         return 0;
1245 }
1246
1247
1248 static void domain_exit(struct dmar_domain *domain);
1249 static void vm_domain_exit(struct dmar_domain *domain);
1250
1251 void free_dmar_iommu(struct intel_iommu *iommu)
1252 {
1253         struct dmar_domain *domain;
1254         int i;
1255         unsigned long flags;
1256
1257         if ((iommu->domains) && (iommu->domain_ids)) {
1258                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1259                         domain = iommu->domains[i];
1260                         clear_bit(i, iommu->domain_ids);
1261
1262                         spin_lock_irqsave(&domain->iommu_lock, flags);
1263                         if (--domain->iommu_count == 0) {
1264                                 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1265                                         vm_domain_exit(domain);
1266                                 else
1267                                         domain_exit(domain);
1268                         }
1269                         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1270                 }
1271         }
1272
1273         if (iommu->gcmd & DMA_GCMD_TE)
1274                 iommu_disable_translation(iommu);
1275
1276         if (iommu->irq) {
1277                 irq_set_handler_data(iommu->irq, NULL);
1278                 /* This will mask the irq */
1279                 free_irq(iommu->irq, iommu);
1280                 destroy_irq(iommu->irq);
1281         }
1282
1283         kfree(iommu->domains);
1284         kfree(iommu->domain_ids);
1285
1286         g_iommus[iommu->seq_id] = NULL;
1287
1288         /* if all iommus are freed, free g_iommus */
1289         for (i = 0; i < g_num_of_iommus; i++) {
1290                 if (g_iommus[i])
1291                         break;
1292         }
1293
1294         if (i == g_num_of_iommus)
1295                 kfree(g_iommus);
1296
1297         /* free context mapping */
1298         free_context_table(iommu);
1299 }
1300
1301 static struct dmar_domain *alloc_domain(void)
1302 {
1303         struct dmar_domain *domain;
1304
1305         domain = alloc_domain_mem();
1306         if (!domain)
1307                 return NULL;
1308
1309         domain->nid = -1;
1310         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1311         domain->flags = 0;
1312
1313         return domain;
1314 }
1315
1316 static int iommu_attach_domain(struct dmar_domain *domain,
1317                                struct intel_iommu *iommu)
1318 {
1319         int num;
1320         unsigned long ndomains;
1321         unsigned long flags;
1322
1323         ndomains = cap_ndoms(iommu->cap);
1324
1325         spin_lock_irqsave(&iommu->lock, flags);
1326
1327         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1328         if (num >= ndomains) {
1329                 spin_unlock_irqrestore(&iommu->lock, flags);
1330                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1331                 return -ENOMEM;
1332         }
1333
1334         domain->id = num;
1335         set_bit(num, iommu->domain_ids);
1336         set_bit(iommu->seq_id, &domain->iommu_bmp);
1337         iommu->domains[num] = domain;
1338         spin_unlock_irqrestore(&iommu->lock, flags);
1339
1340         return 0;
1341 }
1342
1343 static void iommu_detach_domain(struct dmar_domain *domain,
1344                                 struct intel_iommu *iommu)
1345 {
1346         unsigned long flags;
1347         int num, ndomains;
1348         int found = 0;
1349
1350         spin_lock_irqsave(&iommu->lock, flags);
1351         ndomains = cap_ndoms(iommu->cap);
1352         for_each_set_bit(num, iommu->domain_ids, ndomains) {
1353                 if (iommu->domains[num] == domain) {
1354                         found = 1;
1355                         break;
1356                 }
1357         }
1358
1359         if (found) {
1360                 clear_bit(num, iommu->domain_ids);
1361                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1362                 iommu->domains[num] = NULL;
1363         }
1364         spin_unlock_irqrestore(&iommu->lock, flags);
1365 }
1366
1367 static struct iova_domain reserved_iova_list;
1368 static struct lock_class_key reserved_rbtree_key;
1369
1370 static int dmar_init_reserved_ranges(void)
1371 {
1372         struct pci_dev *pdev = NULL;
1373         struct iova *iova;
1374         int i;
1375
1376         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1377
1378         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1379                 &reserved_rbtree_key);
1380
1381         /* IOAPIC ranges shouldn't be accessed by DMA */
1382         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1383                 IOVA_PFN(IOAPIC_RANGE_END));
1384         if (!iova) {
1385                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1386                 return -ENODEV;
1387         }
1388
1389         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1390         for_each_pci_dev(pdev) {
1391                 struct resource *r;
1392
1393                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1394                         r = &pdev->resource[i];
1395                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1396                                 continue;
1397                         iova = reserve_iova(&reserved_iova_list,
1398                                             IOVA_PFN(r->start),
1399                                             IOVA_PFN(r->end));
1400                         if (!iova) {
1401                                 printk(KERN_ERR "Reserve iova failed\n");
1402                                 return -ENODEV;
1403                         }
1404                 }
1405         }
1406         return 0;
1407 }
1408
1409 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1410 {
1411         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1412 }
1413
1414 static inline int guestwidth_to_adjustwidth(int gaw)
1415 {
1416         int agaw;
1417         int r = (gaw - 12) % 9;
1418
1419         if (r == 0)
1420                 agaw = gaw;
1421         else
1422                 agaw = gaw + 9 - r;
1423         if (agaw > 64)
1424                 agaw = 64;
1425         return agaw;
1426 }
1427
1428 static int domain_init(struct dmar_domain *domain, int guest_width)
1429 {
1430         struct intel_iommu *iommu;
1431         int adjust_width, agaw;
1432         unsigned long sagaw;
1433
1434         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1435         spin_lock_init(&domain->iommu_lock);
1436
1437         domain_reserve_special_ranges(domain);
1438
1439         /* calculate AGAW */
1440         iommu = domain_get_iommu(domain);
1441         if (guest_width > cap_mgaw(iommu->cap))
1442                 guest_width = cap_mgaw(iommu->cap);
1443         domain->gaw = guest_width;
1444         adjust_width = guestwidth_to_adjustwidth(guest_width);
1445         agaw = width_to_agaw(adjust_width);
1446         sagaw = cap_sagaw(iommu->cap);
1447         if (!test_bit(agaw, &sagaw)) {
1448                 /* hardware doesn't support it, choose a bigger one */
1449                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1450                 agaw = find_next_bit(&sagaw, 5, agaw);
1451                 if (agaw >= 5)
1452                         return -ENODEV;
1453         }
1454         domain->agaw = agaw;
1455         INIT_LIST_HEAD(&domain->devices);
1456
1457         if (ecap_coherent(iommu->ecap))
1458                 domain->iommu_coherency = 1;
1459         else
1460                 domain->iommu_coherency = 0;
1461
1462         if (ecap_sc_support(iommu->ecap))
1463                 domain->iommu_snooping = 1;
1464         else
1465                 domain->iommu_snooping = 0;
1466
1467         domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1468         domain->iommu_count = 1;
1469         domain->nid = iommu->node;
1470
1471         /* always allocate the top pgd */
1472         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1473         if (!domain->pgd)
1474                 return -ENOMEM;
1475         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1476         return 0;
1477 }
1478
1479 static void domain_exit(struct dmar_domain *domain)
1480 {
1481         struct dmar_drhd_unit *drhd;
1482         struct intel_iommu *iommu;
1483
1484         /* Domain 0 is reserved, so dont process it */
1485         if (!domain)
1486                 return;
1487
1488         /* Flush any lazy unmaps that may reference this domain */
1489         if (!intel_iommu_strict)
1490                 flush_unmaps_timeout(0);
1491
1492         domain_remove_dev_info(domain);
1493         /* destroy iovas */
1494         put_iova_domain(&domain->iovad);
1495
1496         /* clear ptes */
1497         dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1498
1499         /* free page tables */
1500         dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1501
1502         for_each_active_iommu(iommu, drhd)
1503                 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1504                         iommu_detach_domain(domain, iommu);
1505
1506         free_domain_mem(domain);
1507 }
1508
1509 static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1510                                  u8 bus, u8 devfn, int translation)
1511 {
1512         struct context_entry *context;
1513         unsigned long flags;
1514         struct intel_iommu *iommu;
1515         struct dma_pte *pgd;
1516         unsigned long num;
1517         unsigned long ndomains;
1518         int id;
1519         int agaw;
1520         struct device_domain_info *info = NULL;
1521
1522         pr_debug("Set context mapping for %02x:%02x.%d\n",
1523                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1524
1525         BUG_ON(!domain->pgd);
1526         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1527                translation != CONTEXT_TT_MULTI_LEVEL);
1528
1529         iommu = device_to_iommu(segment, bus, devfn);
1530         if (!iommu)
1531                 return -ENODEV;
1532
1533         context = device_to_context_entry(iommu, bus, devfn);
1534         if (!context)
1535                 return -ENOMEM;
1536         spin_lock_irqsave(&iommu->lock, flags);
1537         if (context_present(context)) {
1538                 spin_unlock_irqrestore(&iommu->lock, flags);
1539                 return 0;
1540         }
1541
1542         id = domain->id;
1543         pgd = domain->pgd;
1544
1545         if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1546             domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1547                 int found = 0;
1548
1549                 /* find an available domain id for this device in iommu */
1550                 ndomains = cap_ndoms(iommu->cap);
1551                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1552                         if (iommu->domains[num] == domain) {
1553                                 id = num;
1554                                 found = 1;
1555                                 break;
1556                         }
1557                 }
1558
1559                 if (found == 0) {
1560                         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1561                         if (num >= ndomains) {
1562                                 spin_unlock_irqrestore(&iommu->lock, flags);
1563                                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1564                                 return -EFAULT;
1565                         }
1566
1567                         set_bit(num, iommu->domain_ids);
1568                         iommu->domains[num] = domain;
1569                         id = num;
1570                 }
1571
1572                 /* Skip top levels of page tables for
1573                  * iommu which has less agaw than default.
1574                  * Unnecessary for PT mode.
1575                  */
1576                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1577                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1578                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1579                                 if (!dma_pte_present(pgd)) {
1580                                         spin_unlock_irqrestore(&iommu->lock, flags);
1581                                         return -ENOMEM;
1582                                 }
1583                         }
1584                 }
1585         }
1586
1587         context_set_domain_id(context, id);
1588
1589         if (translation != CONTEXT_TT_PASS_THROUGH) {
1590                 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1591                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1592                                      CONTEXT_TT_MULTI_LEVEL;
1593         }
1594         /*
1595          * In pass through mode, AW must be programmed to indicate the largest
1596          * AGAW value supported by hardware. And ASR is ignored by hardware.
1597          */
1598         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1599                 context_set_address_width(context, iommu->msagaw);
1600         else {
1601                 context_set_address_root(context, virt_to_phys(pgd));
1602                 context_set_address_width(context, iommu->agaw);
1603         }
1604
1605         context_set_translation_type(context, translation);
1606         context_set_fault_enable(context);
1607         context_set_present(context);
1608         domain_flush_cache(domain, context, sizeof(*context));
1609
1610         /*
1611          * It's a non-present to present mapping. If hardware doesn't cache
1612          * non-present entry we only need to flush the write-buffer. If the
1613          * _does_ cache non-present entries, then it does so in the special
1614          * domain #0, which we have to flush:
1615          */
1616         if (cap_caching_mode(iommu->cap)) {
1617                 iommu->flush.flush_context(iommu, 0,
1618                                            (((u16)bus) << 8) | devfn,
1619                                            DMA_CCMD_MASK_NOBIT,
1620                                            DMA_CCMD_DEVICE_INVL);
1621                 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1622         } else {
1623                 iommu_flush_write_buffer(iommu);
1624         }
1625         iommu_enable_dev_iotlb(info);
1626         spin_unlock_irqrestore(&iommu->lock, flags);
1627
1628         spin_lock_irqsave(&domain->iommu_lock, flags);
1629         if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1630                 domain->iommu_count++;
1631                 if (domain->iommu_count == 1)
1632                         domain->nid = iommu->node;
1633                 domain_update_iommu_cap(domain);
1634         }
1635         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1636         return 0;
1637 }
1638
1639 static int
1640 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1641                         int translation)
1642 {
1643         int ret;
1644         struct pci_dev *tmp, *parent;
1645
1646         ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1647                                          pdev->bus->number, pdev->devfn,
1648                                          translation);
1649         if (ret)
1650                 return ret;
1651
1652         /* dependent device mapping */
1653         tmp = pci_find_upstream_pcie_bridge(pdev);
1654         if (!tmp)
1655                 return 0;
1656         /* Secondary interface's bus number and devfn 0 */
1657         parent = pdev->bus->self;
1658         while (parent != tmp) {
1659                 ret = domain_context_mapping_one(domain,
1660                                                  pci_domain_nr(parent->bus),
1661                                                  parent->bus->number,
1662                                                  parent->devfn, translation);
1663                 if (ret)
1664                         return ret;
1665                 parent = parent->bus->self;
1666         }
1667         if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1668                 return domain_context_mapping_one(domain,
1669                                         pci_domain_nr(tmp->subordinate),
1670                                         tmp->subordinate->number, 0,
1671                                         translation);
1672         else /* this is a legacy PCI bridge */
1673                 return domain_context_mapping_one(domain,
1674                                                   pci_domain_nr(tmp->bus),
1675                                                   tmp->bus->number,
1676                                                   tmp->devfn,
1677                                                   translation);
1678 }
1679
1680 static int domain_context_mapped(struct pci_dev *pdev)
1681 {
1682         int ret;
1683         struct pci_dev *tmp, *parent;
1684         struct intel_iommu *iommu;
1685
1686         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1687                                 pdev->devfn);
1688         if (!iommu)
1689                 return -ENODEV;
1690
1691         ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1692         if (!ret)
1693                 return ret;
1694         /* dependent device mapping */
1695         tmp = pci_find_upstream_pcie_bridge(pdev);
1696         if (!tmp)
1697                 return ret;
1698         /* Secondary interface's bus number and devfn 0 */
1699         parent = pdev->bus->self;
1700         while (parent != tmp) {
1701                 ret = device_context_mapped(iommu, parent->bus->number,
1702                                             parent->devfn);
1703                 if (!ret)
1704                         return ret;
1705                 parent = parent->bus->self;
1706         }
1707         if (pci_is_pcie(tmp))
1708                 return device_context_mapped(iommu, tmp->subordinate->number,
1709                                              0);
1710         else
1711                 return device_context_mapped(iommu, tmp->bus->number,
1712                                              tmp->devfn);
1713 }
1714
1715 /* Returns a number of VTD pages, but aligned to MM page size */
1716 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1717                                             size_t size)
1718 {
1719         host_addr &= ~PAGE_MASK;
1720         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1721 }
1722
1723 /* Return largest possible superpage level for a given mapping */
1724 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1725                                           unsigned long iov_pfn,
1726                                           unsigned long phy_pfn,
1727                                           unsigned long pages)
1728 {
1729         int support, level = 1;
1730         unsigned long pfnmerge;
1731
1732         support = domain->iommu_superpage;
1733
1734         /* To use a large page, the virtual *and* physical addresses
1735            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1736            of them will mean we have to use smaller pages. So just
1737            merge them and check both at once. */
1738         pfnmerge = iov_pfn | phy_pfn;
1739
1740         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1741                 pages >>= VTD_STRIDE_SHIFT;
1742                 if (!pages)
1743                         break;
1744                 pfnmerge >>= VTD_STRIDE_SHIFT;
1745                 level++;
1746                 support--;
1747         }
1748         return level;
1749 }
1750
1751 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1752                             struct scatterlist *sg, unsigned long phys_pfn,
1753                             unsigned long nr_pages, int prot)
1754 {
1755         struct dma_pte *first_pte = NULL, *pte = NULL;
1756         phys_addr_t uninitialized_var(pteval);
1757         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1758         unsigned long sg_res;
1759         unsigned int largepage_lvl = 0;
1760         unsigned long lvl_pages = 0;
1761
1762         BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1763
1764         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1765                 return -EINVAL;
1766
1767         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1768
1769         if (sg)
1770                 sg_res = 0;
1771         else {
1772                 sg_res = nr_pages + 1;
1773                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1774         }
1775
1776         while (nr_pages > 0) {
1777                 uint64_t tmp;
1778
1779                 if (!sg_res) {
1780                         sg_res = aligned_nrpages(sg->offset, sg->length);
1781                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1782                         sg->dma_length = sg->length;
1783                         pteval = page_to_phys(sg_page(sg)) | prot;
1784                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
1785                 }
1786
1787                 if (!pte) {
1788                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1789
1790                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1791                         if (!pte)
1792                                 return -ENOMEM;
1793                         /* It is large page*/
1794                         if (largepage_lvl > 1)
1795                                 pteval |= DMA_PTE_LARGE_PAGE;
1796                         else
1797                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1798
1799                 }
1800                 /* We don't need lock here, nobody else
1801                  * touches the iova range
1802                  */
1803                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1804                 if (tmp) {
1805                         static int dumps = 5;
1806                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1807                                iov_pfn, tmp, (unsigned long long)pteval);
1808                         if (dumps) {
1809                                 dumps--;
1810                                 debug_dma_dump_mappings(NULL);
1811                         }
1812                         WARN_ON(1);
1813                 }
1814
1815                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1816
1817                 BUG_ON(nr_pages < lvl_pages);
1818                 BUG_ON(sg_res < lvl_pages);
1819
1820                 nr_pages -= lvl_pages;
1821                 iov_pfn += lvl_pages;
1822                 phys_pfn += lvl_pages;
1823                 pteval += lvl_pages * VTD_PAGE_SIZE;
1824                 sg_res -= lvl_pages;
1825
1826                 /* If the next PTE would be the first in a new page, then we
1827                    need to flush the cache on the entries we've just written.
1828                    And then we'll need to recalculate 'pte', so clear it and
1829                    let it get set again in the if (!pte) block above.
1830
1831                    If we're done (!nr_pages) we need to flush the cache too.
1832
1833                    Also if we've been setting superpages, we may need to
1834                    recalculate 'pte' and switch back to smaller pages for the
1835                    end of the mapping, if the trailing size is not enough to
1836                    use another superpage (i.e. sg_res < lvl_pages). */
1837                 pte++;
1838                 if (!nr_pages || first_pte_in_page(pte) ||
1839                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
1840                         domain_flush_cache(domain, first_pte,
1841                                            (void *)pte - (void *)first_pte);
1842                         pte = NULL;
1843                 }
1844
1845                 if (!sg_res && nr_pages)
1846                         sg = sg_next(sg);
1847         }
1848         return 0;
1849 }
1850
1851 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1852                                     struct scatterlist *sg, unsigned long nr_pages,
1853                                     int prot)
1854 {
1855         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1856 }
1857
1858 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1859                                      unsigned long phys_pfn, unsigned long nr_pages,
1860                                      int prot)
1861 {
1862         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1863 }
1864
1865 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1866 {
1867         if (!iommu)
1868                 return;
1869
1870         clear_context_table(iommu, bus, devfn);
1871         iommu->flush.flush_context(iommu, 0, 0, 0,
1872                                            DMA_CCMD_GLOBAL_INVL);
1873         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1874 }
1875
1876 static void domain_remove_dev_info(struct dmar_domain *domain)
1877 {
1878         struct device_domain_info *info;
1879         unsigned long flags;
1880         struct intel_iommu *iommu;
1881
1882         spin_lock_irqsave(&device_domain_lock, flags);
1883         while (!list_empty(&domain->devices)) {
1884                 info = list_entry(domain->devices.next,
1885                         struct device_domain_info, link);
1886                 list_del(&info->link);
1887                 list_del(&info->global);
1888                 if (info->dev)
1889                         info->dev->dev.archdata.iommu = NULL;
1890                 spin_unlock_irqrestore(&device_domain_lock, flags);
1891
1892                 iommu_disable_dev_iotlb(info);
1893                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1894                 iommu_detach_dev(iommu, info->bus, info->devfn);
1895                 free_devinfo_mem(info);
1896
1897                 spin_lock_irqsave(&device_domain_lock, flags);
1898         }
1899         spin_unlock_irqrestore(&device_domain_lock, flags);
1900 }
1901
1902 /*
1903  * find_domain
1904  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1905  */
1906 static struct dmar_domain *
1907 find_domain(struct pci_dev *pdev)
1908 {
1909         struct device_domain_info *info;
1910
1911         /* No lock here, assumes no domain exit in normal case */
1912         info = pdev->dev.archdata.iommu;
1913         if (info)
1914                 return info->domain;
1915         return NULL;
1916 }
1917
1918 /* domain is initialized */
1919 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1920 {
1921         struct dmar_domain *domain, *found = NULL;
1922         struct intel_iommu *iommu;
1923         struct dmar_drhd_unit *drhd;
1924         struct device_domain_info *info, *tmp;
1925         struct pci_dev *dev_tmp;
1926         unsigned long flags;
1927         int bus = 0, devfn = 0;
1928         int segment;
1929         int ret;
1930
1931         domain = find_domain(pdev);
1932         if (domain)
1933                 return domain;
1934
1935         segment = pci_domain_nr(pdev->bus);
1936
1937         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1938         if (dev_tmp) {
1939                 if (pci_is_pcie(dev_tmp)) {
1940                         bus = dev_tmp->subordinate->number;
1941                         devfn = 0;
1942                 } else {
1943                         bus = dev_tmp->bus->number;
1944                         devfn = dev_tmp->devfn;
1945                 }
1946                 spin_lock_irqsave(&device_domain_lock, flags);
1947                 list_for_each_entry(info, &device_domain_list, global) {
1948                         if (info->segment == segment &&
1949                             info->bus == bus && info->devfn == devfn) {
1950                                 found = info->domain;
1951                                 break;
1952                         }
1953                 }
1954                 spin_unlock_irqrestore(&device_domain_lock, flags);
1955                 /* pcie-pci bridge already has a domain, uses it */
1956                 if (found) {
1957                         domain = found;
1958                         goto found_domain;
1959                 }
1960         }
1961
1962         domain = alloc_domain();
1963         if (!domain)
1964                 goto error;
1965
1966         /* Allocate new domain for the device */
1967         drhd = dmar_find_matched_drhd_unit(pdev);
1968         if (!drhd) {
1969                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1970                         pci_name(pdev));
1971                 return NULL;
1972         }
1973         iommu = drhd->iommu;
1974
1975         ret = iommu_attach_domain(domain, iommu);
1976         if (ret) {
1977                 free_domain_mem(domain);
1978                 goto error;
1979         }
1980
1981         if (domain_init(domain, gaw)) {
1982                 domain_exit(domain);
1983                 goto error;
1984         }
1985
1986         /* register pcie-to-pci device */
1987         if (dev_tmp) {
1988                 info = alloc_devinfo_mem();
1989                 if (!info) {
1990                         domain_exit(domain);
1991                         goto error;
1992                 }
1993                 info->segment = segment;
1994                 info->bus = bus;
1995                 info->devfn = devfn;
1996                 info->dev = NULL;
1997                 info->domain = domain;
1998                 /* This domain is shared by devices under p2p bridge */
1999                 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2000
2001                 /* pcie-to-pci bridge already has a domain, uses it */
2002                 found = NULL;
2003                 spin_lock_irqsave(&device_domain_lock, flags);
2004                 list_for_each_entry(tmp, &device_domain_list, global) {
2005                         if (tmp->segment == segment &&
2006                             tmp->bus == bus && tmp->devfn == devfn) {
2007                                 found = tmp->domain;
2008                                 break;
2009                         }
2010                 }
2011                 if (found) {
2012                         spin_unlock_irqrestore(&device_domain_lock, flags);
2013                         free_devinfo_mem(info);
2014                         domain_exit(domain);
2015                         domain = found;
2016                 } else {
2017                         list_add(&info->link, &domain->devices);
2018                         list_add(&info->global, &device_domain_list);
2019                         spin_unlock_irqrestore(&device_domain_lock, flags);
2020                 }
2021         }
2022
2023 found_domain:
2024         info = alloc_devinfo_mem();
2025         if (!info)
2026                 goto error;
2027         info->segment = segment;
2028         info->bus = pdev->bus->number;
2029         info->devfn = pdev->devfn;
2030         info->dev = pdev;
2031         info->domain = domain;
2032         spin_lock_irqsave(&device_domain_lock, flags);
2033         /* somebody is fast */
2034         found = find_domain(pdev);
2035         if (found != NULL) {
2036                 spin_unlock_irqrestore(&device_domain_lock, flags);
2037                 if (found != domain) {
2038                         domain_exit(domain);
2039                         domain = found;
2040                 }
2041                 free_devinfo_mem(info);
2042                 return domain;
2043         }
2044         list_add(&info->link, &domain->devices);
2045         list_add(&info->global, &device_domain_list);
2046         pdev->dev.archdata.iommu = info;
2047         spin_unlock_irqrestore(&device_domain_lock, flags);
2048         return domain;
2049 error:
2050         /* recheck it here, maybe others set it */
2051         return find_domain(pdev);
2052 }
2053
2054 static int iommu_identity_mapping;
2055 #define IDENTMAP_ALL            1
2056 #define IDENTMAP_GFX            2
2057 #define IDENTMAP_AZALIA         4
2058
2059 static int iommu_domain_identity_map(struct dmar_domain *domain,
2060                                      unsigned long long start,
2061                                      unsigned long long end)
2062 {
2063         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2064         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2065
2066         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2067                           dma_to_mm_pfn(last_vpfn))) {
2068                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2069                 return -ENOMEM;
2070         }
2071
2072         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2073                  start, end, domain->id);
2074         /*
2075          * RMRR range might have overlap with physical memory range,
2076          * clear it first
2077          */
2078         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2079
2080         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2081                                   last_vpfn - first_vpfn + 1,
2082                                   DMA_PTE_READ|DMA_PTE_WRITE);
2083 }
2084
2085 static int iommu_prepare_identity_map(struct pci_dev *pdev,
2086                                       unsigned long long start,
2087                                       unsigned long long end)
2088 {
2089         struct dmar_domain *domain;
2090         int ret;
2091
2092         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2093         if (!domain)
2094                 return -ENOMEM;
2095
2096         /* For _hardware_ passthrough, don't bother. But for software
2097            passthrough, we do it anyway -- it may indicate a memory
2098            range which is reserved in E820, so which didn't get set
2099            up to start with in si_domain */
2100         if (domain == si_domain && hw_pass_through) {
2101                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2102                        pci_name(pdev), start, end);
2103                 return 0;
2104         }
2105
2106         printk(KERN_INFO
2107                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2108                pci_name(pdev), start, end);
2109         
2110         if (end < start) {
2111                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2112                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2113                         dmi_get_system_info(DMI_BIOS_VENDOR),
2114                         dmi_get_system_info(DMI_BIOS_VERSION),
2115                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2116                 ret = -EIO;
2117                 goto error;
2118         }
2119
2120         if (end >> agaw_to_width(domain->agaw)) {
2121                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2122                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2123                      agaw_to_width(domain->agaw),
2124                      dmi_get_system_info(DMI_BIOS_VENDOR),
2125                      dmi_get_system_info(DMI_BIOS_VERSION),
2126                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2127                 ret = -EIO;
2128                 goto error;
2129         }
2130
2131         ret = iommu_domain_identity_map(domain, start, end);
2132         if (ret)
2133                 goto error;
2134
2135         /* context entry init */
2136         ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2137         if (ret)
2138                 goto error;
2139
2140         return 0;
2141
2142  error:
2143         domain_exit(domain);
2144         return ret;
2145 }
2146
2147 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2148         struct pci_dev *pdev)
2149 {
2150         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2151                 return 0;
2152         return iommu_prepare_identity_map(pdev, rmrr->base_address,
2153                 rmrr->end_address);
2154 }
2155
2156 #ifdef CONFIG_DMAR_FLOPPY_WA
2157 static inline void iommu_prepare_isa(void)
2158 {
2159         struct pci_dev *pdev;
2160         int ret;
2161
2162         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2163         if (!pdev)
2164                 return;
2165
2166         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2167         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2168
2169         if (ret)
2170                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2171                        "floppy might not work\n");
2172
2173 }
2174 #else
2175 static inline void iommu_prepare_isa(void)
2176 {
2177         return;
2178 }
2179 #endif /* !CONFIG_DMAR_FLPY_WA */
2180
2181 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2182
2183 static int __init si_domain_work_fn(unsigned long start_pfn,
2184                                     unsigned long end_pfn, void *datax)
2185 {
2186         int *ret = datax;
2187
2188         *ret = iommu_domain_identity_map(si_domain,
2189                                          (uint64_t)start_pfn << PAGE_SHIFT,
2190                                          (uint64_t)end_pfn << PAGE_SHIFT);
2191         return *ret;
2192
2193 }
2194
2195 static int __init si_domain_init(int hw)
2196 {
2197         struct dmar_drhd_unit *drhd;
2198         struct intel_iommu *iommu;
2199         int nid, ret = 0;
2200
2201         si_domain = alloc_domain();
2202         if (!si_domain)
2203                 return -EFAULT;
2204
2205         pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2206
2207         for_each_active_iommu(iommu, drhd) {
2208                 ret = iommu_attach_domain(si_domain, iommu);
2209                 if (ret) {
2210                         domain_exit(si_domain);
2211                         return -EFAULT;
2212                 }
2213         }
2214
2215         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2216                 domain_exit(si_domain);
2217                 return -EFAULT;
2218         }
2219
2220         si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2221
2222         if (hw)
2223                 return 0;
2224
2225         for_each_online_node(nid) {
2226                 work_with_active_regions(nid, si_domain_work_fn, &ret);
2227                 if (ret)
2228                         return ret;
2229         }
2230
2231         return 0;
2232 }
2233
2234 static void domain_remove_one_dev_info(struct dmar_domain *domain,
2235                                           struct pci_dev *pdev);
2236 static int identity_mapping(struct pci_dev *pdev)
2237 {
2238         struct device_domain_info *info;
2239
2240         if (likely(!iommu_identity_mapping))
2241                 return 0;
2242
2243         info = pdev->dev.archdata.iommu;
2244         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2245                 return (info->domain == si_domain);
2246
2247         return 0;
2248 }
2249
2250 static int domain_add_dev_info(struct dmar_domain *domain,
2251                                struct pci_dev *pdev,
2252                                int translation)
2253 {
2254         struct device_domain_info *info;
2255         unsigned long flags;
2256         int ret;
2257
2258         info = alloc_devinfo_mem();
2259         if (!info)
2260                 return -ENOMEM;
2261
2262         ret = domain_context_mapping(domain, pdev, translation);
2263         if (ret) {
2264                 free_devinfo_mem(info);
2265                 return ret;
2266         }
2267
2268         info->segment = pci_domain_nr(pdev->bus);
2269         info->bus = pdev->bus->number;
2270         info->devfn = pdev->devfn;
2271         info->dev = pdev;
2272         info->domain = domain;
2273
2274         spin_lock_irqsave(&device_domain_lock, flags);
2275         list_add(&info->link, &domain->devices);
2276         list_add(&info->global, &device_domain_list);
2277         pdev->dev.archdata.iommu = info;
2278         spin_unlock_irqrestore(&device_domain_lock, flags);
2279
2280         return 0;
2281 }
2282
2283 static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2284 {
2285         if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2286                 return 1;
2287
2288         if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2289                 return 1;
2290
2291         if (!(iommu_identity_mapping & IDENTMAP_ALL))
2292                 return 0;
2293
2294         /*
2295          * We want to start off with all devices in the 1:1 domain, and
2296          * take them out later if we find they can't access all of memory.
2297          *
2298          * However, we can't do this for PCI devices behind bridges,
2299          * because all PCI devices behind the same bridge will end up
2300          * with the same source-id on their transactions.
2301          *
2302          * Practically speaking, we can't change things around for these
2303          * devices at run-time, because we can't be sure there'll be no
2304          * DMA transactions in flight for any of their siblings.
2305          * 
2306          * So PCI devices (unless they're on the root bus) as well as
2307          * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2308          * the 1:1 domain, just in _case_ one of their siblings turns out
2309          * not to be able to map all of memory.
2310          */
2311         if (!pci_is_pcie(pdev)) {
2312                 if (!pci_is_root_bus(pdev->bus))
2313                         return 0;
2314                 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2315                         return 0;
2316         } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2317                 return 0;
2318
2319         /* 
2320          * At boot time, we don't yet know if devices will be 64-bit capable.
2321          * Assume that they will -- if they turn out not to be, then we can 
2322          * take them out of the 1:1 domain later.
2323          */
2324         if (!startup) {
2325                 /*
2326                  * If the device's dma_mask is less than the system's memory
2327                  * size then this is not a candidate for identity mapping.
2328                  */
2329                 u64 dma_mask = pdev->dma_mask;
2330
2331                 if (pdev->dev.coherent_dma_mask &&
2332                     pdev->dev.coherent_dma_mask < dma_mask)
2333                         dma_mask = pdev->dev.coherent_dma_mask;
2334
2335                 return dma_mask >= dma_get_required_mask(&pdev->dev);
2336         }
2337
2338         return 1;
2339 }
2340
2341 static int __init iommu_prepare_static_identity_mapping(int hw)
2342 {
2343         struct pci_dev *pdev = NULL;
2344         int ret;
2345
2346         ret = si_domain_init(hw);
2347         if (ret)
2348                 return -EFAULT;
2349
2350         for_each_pci_dev(pdev) {
2351                 /* Skip Host/PCI Bridge devices */
2352                 if (IS_BRIDGE_HOST_DEVICE(pdev))
2353                         continue;
2354                 if (iommu_should_identity_map(pdev, 1)) {
2355                         printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2356                                hw ? "hardware" : "software", pci_name(pdev));
2357
2358                         ret = domain_add_dev_info(si_domain, pdev,
2359                                                      hw ? CONTEXT_TT_PASS_THROUGH :
2360                                                      CONTEXT_TT_MULTI_LEVEL);
2361                         if (ret)
2362                                 return ret;
2363                 }
2364         }
2365
2366         return 0;
2367 }
2368
2369 static int __init init_dmars(void)
2370 {
2371         struct dmar_drhd_unit *drhd;
2372         struct dmar_rmrr_unit *rmrr;
2373         struct pci_dev *pdev;
2374         struct intel_iommu *iommu;
2375         int i, ret;
2376
2377         /*
2378          * for each drhd
2379          *    allocate root
2380          *    initialize and program root entry to not present
2381          * endfor
2382          */
2383         for_each_drhd_unit(drhd) {
2384                 g_num_of_iommus++;
2385                 /*
2386                  * lock not needed as this is only incremented in the single
2387                  * threaded kernel __init code path all other access are read
2388                  * only
2389                  */
2390         }
2391
2392         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2393                         GFP_KERNEL);
2394         if (!g_iommus) {
2395                 printk(KERN_ERR "Allocating global iommu array failed\n");
2396                 ret = -ENOMEM;
2397                 goto error;
2398         }
2399
2400         deferred_flush = kzalloc(g_num_of_iommus *
2401                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2402         if (!deferred_flush) {
2403                 ret = -ENOMEM;
2404                 goto error;
2405         }
2406
2407         for_each_drhd_unit(drhd) {
2408                 if (drhd->ignored)
2409                         continue;
2410
2411                 iommu = drhd->iommu;
2412                 g_iommus[iommu->seq_id] = iommu;
2413
2414                 ret = iommu_init_domains(iommu);
2415                 if (ret)
2416                         goto error;
2417
2418                 /*
2419                  * TBD:
2420                  * we could share the same root & context tables
2421                  * among all IOMMU's. Need to Split it later.
2422                  */
2423                 ret = iommu_alloc_root_entry(iommu);
2424                 if (ret) {
2425                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2426                         goto error;
2427                 }
2428                 if (!ecap_pass_through(iommu->ecap))
2429                         hw_pass_through = 0;
2430         }
2431
2432         /*
2433          * Start from the sane iommu hardware state.
2434          */
2435         for_each_drhd_unit(drhd) {
2436                 if (drhd->ignored)
2437                         continue;
2438
2439                 iommu = drhd->iommu;
2440
2441                 /*
2442                  * If the queued invalidation is already initialized by us
2443                  * (for example, while enabling interrupt-remapping) then
2444                  * we got the things already rolling from a sane state.
2445                  */
2446                 if (iommu->qi)
2447                         continue;
2448
2449                 /*
2450                  * Clear any previous faults.
2451                  */
2452                 dmar_fault(-1, iommu);
2453                 /*
2454                  * Disable queued invalidation if supported and already enabled
2455                  * before OS handover.
2456                  */
2457                 dmar_disable_qi(iommu);
2458         }
2459
2460         for_each_drhd_unit(drhd) {
2461                 if (drhd->ignored)
2462                         continue;
2463
2464                 iommu = drhd->iommu;
2465
2466                 if (dmar_enable_qi(iommu)) {
2467                         /*
2468                          * Queued Invalidate not enabled, use Register Based
2469                          * Invalidate
2470                          */
2471                         iommu->flush.flush_context = __iommu_flush_context;
2472                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2473                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2474                                "invalidation\n",
2475                                 iommu->seq_id,
2476                                (unsigned long long)drhd->reg_base_addr);
2477                 } else {
2478                         iommu->flush.flush_context = qi_flush_context;
2479                         iommu->flush.flush_iotlb = qi_flush_iotlb;
2480                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2481                                "invalidation\n",
2482                                 iommu->seq_id,
2483                                (unsigned long long)drhd->reg_base_addr);
2484                 }
2485         }
2486
2487         if (iommu_pass_through)
2488                 iommu_identity_mapping |= IDENTMAP_ALL;
2489
2490 #ifdef CONFIG_DMAR_BROKEN_GFX_WA
2491         iommu_identity_mapping |= IDENTMAP_GFX;
2492 #endif
2493
2494         check_tylersburg_isoch();
2495
2496         /*
2497          * If pass through is not set or not enabled, setup context entries for
2498          * identity mappings for rmrr, gfx, and isa and may fall back to static
2499          * identity mapping if iommu_identity_mapping is set.
2500          */
2501         if (iommu_identity_mapping) {
2502                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2503                 if (ret) {
2504                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2505                         goto error;
2506                 }
2507         }
2508         /*
2509          * For each rmrr
2510          *   for each dev attached to rmrr
2511          *   do
2512          *     locate drhd for dev, alloc domain for dev
2513          *     allocate free domain
2514          *     allocate page table entries for rmrr
2515          *     if context not allocated for bus
2516          *           allocate and init context
2517          *           set present in root table for this bus
2518          *     init context with domain, translation etc
2519          *    endfor
2520          * endfor
2521          */
2522         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2523         for_each_rmrr_units(rmrr) {
2524                 for (i = 0; i < rmrr->devices_cnt; i++) {
2525                         pdev = rmrr->devices[i];
2526                         /*
2527                          * some BIOS lists non-exist devices in DMAR
2528                          * table.
2529                          */
2530                         if (!pdev)
2531                                 continue;
2532                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2533                         if (ret)
2534                                 printk(KERN_ERR
2535                                        "IOMMU: mapping reserved region failed\n");
2536                 }
2537         }
2538
2539         iommu_prepare_isa();
2540
2541         /*
2542          * for each drhd
2543          *   enable fault log
2544          *   global invalidate context cache
2545          *   global invalidate iotlb
2546          *   enable translation
2547          */
2548         for_each_drhd_unit(drhd) {
2549                 if (drhd->ignored) {
2550                         /*
2551                          * we always have to disable PMRs or DMA may fail on
2552                          * this device
2553                          */
2554                         if (force_on)
2555                                 iommu_disable_protect_mem_regions(drhd->iommu);
2556                         continue;
2557                 }
2558                 iommu = drhd->iommu;
2559
2560                 iommu_flush_write_buffer(iommu);
2561
2562                 ret = dmar_set_interrupt(iommu);
2563                 if (ret)
2564                         goto error;
2565
2566                 iommu_set_root_entry(iommu);
2567
2568                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2569                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2570
2571                 ret = iommu_enable_translation(iommu);
2572                 if (ret)
2573                         goto error;
2574
2575                 iommu_disable_protect_mem_regions(iommu);
2576         }
2577
2578         return 0;
2579 error:
2580         for_each_drhd_unit(drhd) {
2581                 if (drhd->ignored)
2582                         continue;
2583                 iommu = drhd->iommu;
2584                 free_iommu(iommu);
2585         }
2586         kfree(g_iommus);
2587         return ret;
2588 }
2589
2590 /* This takes a number of _MM_ pages, not VTD pages */
2591 static struct iova *intel_alloc_iova(struct device *dev,
2592                                      struct dmar_domain *domain,
2593                                      unsigned long nrpages, uint64_t dma_mask)
2594 {
2595         struct pci_dev *pdev = to_pci_dev(dev);
2596         struct iova *iova = NULL;
2597
2598         /* Restrict dma_mask to the width that the iommu can handle */
2599         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2600
2601         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2602                 /*
2603                  * First try to allocate an io virtual address in
2604                  * DMA_BIT_MASK(32) and if that fails then try allocating
2605                  * from higher range
2606                  */
2607                 iova = alloc_iova(&domain->iovad, nrpages,
2608                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2609                 if (iova)
2610                         return iova;
2611         }
2612         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2613         if (unlikely(!iova)) {
2614                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2615                        nrpages, pci_name(pdev));
2616                 return NULL;
2617         }
2618
2619         return iova;
2620 }
2621
2622 static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2623 {
2624         struct dmar_domain *domain;
2625         int ret;
2626
2627         domain = get_domain_for_dev(pdev,
2628                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
2629         if (!domain) {
2630                 printk(KERN_ERR
2631                         "Allocating domain for %s failed", pci_name(pdev));
2632                 return NULL;
2633         }
2634
2635         /* make sure context mapping is ok */
2636         if (unlikely(!domain_context_mapped(pdev))) {
2637                 ret = domain_context_mapping(domain, pdev,
2638                                              CONTEXT_TT_MULTI_LEVEL);
2639                 if (ret) {
2640                         printk(KERN_ERR
2641                                 "Domain context map for %s failed",
2642                                 pci_name(pdev));
2643                         return NULL;
2644                 }
2645         }
2646
2647         return domain;
2648 }
2649
2650 static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2651 {
2652         struct device_domain_info *info;
2653
2654         /* No lock here, assumes no domain exit in normal case */
2655         info = dev->dev.archdata.iommu;
2656         if (likely(info))
2657                 return info->domain;
2658
2659         return __get_valid_domain_for_dev(dev);
2660 }
2661
2662 static int iommu_dummy(struct pci_dev *pdev)
2663 {
2664         return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2665 }
2666
2667 /* Check if the pdev needs to go through non-identity map and unmap process.*/
2668 static int iommu_no_mapping(struct device *dev)
2669 {
2670         struct pci_dev *pdev;
2671         int found;
2672
2673         if (unlikely(dev->bus != &pci_bus_type))
2674                 return 1;
2675
2676         pdev = to_pci_dev(dev);
2677         if (iommu_dummy(pdev))
2678                 return 1;
2679
2680         if (!iommu_identity_mapping)
2681                 return 0;
2682
2683         found = identity_mapping(pdev);
2684         if (found) {
2685                 if (iommu_should_identity_map(pdev, 0))
2686                         return 1;
2687                 else {
2688                         /*
2689                          * 32 bit DMA is removed from si_domain and fall back
2690                          * to non-identity mapping.
2691                          */
2692                         domain_remove_one_dev_info(si_domain, pdev);
2693                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2694                                pci_name(pdev));
2695                         return 0;
2696                 }
2697         } else {
2698                 /*
2699                  * In case of a detached 64 bit DMA device from vm, the device
2700                  * is put into si_domain for identity mapping.
2701                  */
2702                 if (iommu_should_identity_map(pdev, 0)) {
2703                         int ret;
2704                         ret = domain_add_dev_info(si_domain, pdev,
2705                                                   hw_pass_through ?
2706                                                   CONTEXT_TT_PASS_THROUGH :
2707                                                   CONTEXT_TT_MULTI_LEVEL);
2708                         if (!ret) {
2709                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
2710                                        pci_name(pdev));
2711                                 return 1;
2712                         }
2713                 }
2714         }
2715
2716         return 0;
2717 }
2718
2719 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2720                                      size_t size, int dir, u64 dma_mask)
2721 {
2722         struct pci_dev *pdev = to_pci_dev(hwdev);
2723         struct dmar_domain *domain;
2724         phys_addr_t start_paddr;
2725         struct iova *iova;
2726         int prot = 0;
2727         int ret;
2728         struct intel_iommu *iommu;
2729         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2730
2731         BUG_ON(dir == DMA_NONE);
2732
2733         if (iommu_no_mapping(hwdev))
2734                 return paddr;
2735
2736         domain = get_valid_domain_for_dev(pdev);
2737         if (!domain)
2738                 return 0;
2739
2740         iommu = domain_get_iommu(domain);
2741         size = aligned_nrpages(paddr, size);
2742
2743         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2744         if (!iova)
2745                 goto error;
2746
2747         /*
2748          * Check if DMAR supports zero-length reads on write only
2749          * mappings..
2750          */
2751         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2752                         !cap_zlr(iommu->cap))
2753                 prot |= DMA_PTE_READ;
2754         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2755                 prot |= DMA_PTE_WRITE;
2756         /*
2757          * paddr - (paddr + size) might be partial page, we should map the whole
2758          * page.  Note: if two part of one page are separately mapped, we
2759          * might have two guest_addr mapping to the same host paddr, but this
2760          * is not a big problem
2761          */
2762         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2763                                  mm_to_dma_pfn(paddr_pfn), size, prot);
2764         if (ret)
2765                 goto error;
2766
2767         /* it's a non-present to present mapping. Only flush if caching mode */
2768         if (cap_caching_mode(iommu->cap))
2769                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2770         else
2771                 iommu_flush_write_buffer(iommu);
2772
2773         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2774         start_paddr += paddr & ~PAGE_MASK;
2775         return start_paddr;
2776
2777 error:
2778         if (iova)
2779                 __free_iova(&domain->iovad, iova);
2780         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2781                 pci_name(pdev), size, (unsigned long long)paddr, dir);
2782         return 0;
2783 }
2784
2785 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2786                                  unsigned long offset, size_t size,
2787                                  enum dma_data_direction dir,
2788                                  struct dma_attrs *attrs)
2789 {
2790         return __intel_map_single(dev, page_to_phys(page) + offset, size,
2791                                   dir, to_pci_dev(dev)->dma_mask);
2792 }
2793
2794 static void flush_unmaps(void)
2795 {
2796         int i, j;
2797
2798         timer_on = 0;
2799
2800         /* just flush them all */
2801         for (i = 0; i < g_num_of_iommus; i++) {
2802                 struct intel_iommu *iommu = g_iommus[i];
2803                 if (!iommu)
2804                         continue;
2805
2806                 if (!deferred_flush[i].next)
2807                         continue;
2808
2809                 /* In caching mode, global flushes turn emulation expensive */
2810                 if (!cap_caching_mode(iommu->cap))
2811                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2812                                          DMA_TLB_GLOBAL_FLUSH);
2813                 for (j = 0; j < deferred_flush[i].next; j++) {
2814                         unsigned long mask;
2815                         struct iova *iova = deferred_flush[i].iova[j];
2816                         struct dmar_domain *domain = deferred_flush[i].domain[j];
2817
2818                         /* On real hardware multiple invalidations are expensive */
2819                         if (cap_caching_mode(iommu->cap))
2820                                 iommu_flush_iotlb_psi(iommu, domain->id,
2821                                 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2822                         else {
2823                                 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2824                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2825                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2826                         }
2827                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2828                 }
2829                 deferred_flush[i].next = 0;
2830         }
2831
2832         list_size = 0;
2833 }
2834
2835 static void flush_unmaps_timeout(unsigned long data)
2836 {
2837         unsigned long flags;
2838
2839         spin_lock_irqsave(&async_umap_flush_lock, flags);
2840         flush_unmaps();
2841         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2842 }
2843
2844 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2845 {
2846         unsigned long flags;
2847         int next, iommu_id;
2848         struct intel_iommu *iommu;
2849
2850         spin_lock_irqsave(&async_umap_flush_lock, flags);
2851         if (list_size == HIGH_WATER_MARK)
2852                 flush_unmaps();
2853
2854         iommu = domain_get_iommu(dom);
2855         iommu_id = iommu->seq_id;
2856
2857         next = deferred_flush[iommu_id].next;
2858         deferred_flush[iommu_id].domain[next] = dom;
2859         deferred_flush[iommu_id].iova[next] = iova;
2860         deferred_flush[iommu_id].next++;
2861
2862         if (!timer_on) {
2863                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2864                 timer_on = 1;
2865         }
2866         list_size++;
2867         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2868 }
2869
2870 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2871                              size_t size, enum dma_data_direction dir,
2872                              struct dma_attrs *attrs)
2873 {
2874         struct pci_dev *pdev = to_pci_dev(dev);
2875         struct dmar_domain *domain;
2876         unsigned long start_pfn, last_pfn;
2877         struct iova *iova;
2878         struct intel_iommu *iommu;
2879
2880         if (iommu_no_mapping(dev))
2881                 return;
2882
2883         domain = find_domain(pdev);
2884         BUG_ON(!domain);
2885
2886         iommu = domain_get_iommu(domain);
2887
2888         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2889         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2890                       (unsigned long long)dev_addr))
2891                 return;
2892
2893         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2894         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2895
2896         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2897                  pci_name(pdev), start_pfn, last_pfn);
2898
2899         /*  clear the whole page */
2900         dma_pte_clear_range(domain, start_pfn, last_pfn);
2901
2902         /* free page tables */
2903         dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2904
2905         if (intel_iommu_strict) {
2906                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2907                                       last_pfn - start_pfn + 1, 0);
2908                 /* free iova */
2909                 __free_iova(&domain->iovad, iova);
2910         } else {
2911                 add_unmap(domain, iova);
2912                 /*
2913                  * queue up the release of the unmap to save the 1/6th of the
2914                  * cpu used up by the iotlb flush operation...
2915                  */
2916         }
2917 }
2918
2919 static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2920                                   dma_addr_t *dma_handle, gfp_t flags)
2921 {
2922         void *vaddr;
2923         int order;
2924
2925         size = PAGE_ALIGN(size);
2926         order = get_order(size);
2927
2928         if (!iommu_no_mapping(hwdev))
2929                 flags &= ~(GFP_DMA | GFP_DMA32);
2930         else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2931                 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2932                         flags |= GFP_DMA;
2933                 else
2934                         flags |= GFP_DMA32;
2935         }
2936
2937         vaddr = (void *)__get_free_pages(flags, order);
2938         if (!vaddr)
2939                 return NULL;
2940         memset(vaddr, 0, size);
2941
2942         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2943                                          DMA_BIDIRECTIONAL,
2944                                          hwdev->coherent_dma_mask);
2945         if (*dma_handle)
2946                 return vaddr;
2947         free_pages((unsigned long)vaddr, order);
2948         return NULL;
2949 }
2950
2951 static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2952                                 dma_addr_t dma_handle)
2953 {
2954         int order;
2955
2956         size = PAGE_ALIGN(size);
2957         order = get_order(size);
2958
2959         intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
2960         free_pages((unsigned long)vaddr, order);
2961 }
2962
2963 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2964                            int nelems, enum dma_data_direction dir,
2965                            struct dma_attrs *attrs)
2966 {
2967         struct pci_dev *pdev = to_pci_dev(hwdev);
2968         struct dmar_domain *domain;
2969         unsigned long start_pfn, last_pfn;
2970         struct iova *iova;
2971         struct intel_iommu *iommu;
2972
2973         if (iommu_no_mapping(hwdev))
2974                 return;
2975
2976         domain = find_domain(pdev);
2977         BUG_ON(!domain);
2978
2979         iommu = domain_get_iommu(domain);
2980
2981         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2982         if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2983                       (unsigned long long)sglist[0].dma_address))
2984                 return;
2985
2986         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2987         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2988
2989         /*  clear the whole page */
2990         dma_pte_clear_range(domain, start_pfn, last_pfn);
2991
2992         /* free page tables */
2993         dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2994
2995         if (intel_iommu_strict) {
2996                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2997                                       last_pfn - start_pfn + 1, 0);
2998                 /* free iova */
2999                 __free_iova(&domain->iovad, iova);
3000         } else {
3001                 add_unmap(domain, iova);
3002                 /*
3003                  * queue up the release of the unmap to save the 1/6th of the
3004                  * cpu used up by the iotlb flush operation...
3005                  */
3006         }
3007 }
3008
3009 static int intel_nontranslate_map_sg(struct device *hddev,
3010         struct scatterlist *sglist, int nelems, int dir)
3011 {
3012         int i;
3013         struct scatterlist *sg;
3014
3015         for_each_sg(sglist, sg, nelems, i) {
3016                 BUG_ON(!sg_page(sg));
3017                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3018                 sg->dma_length = sg->length;
3019         }
3020         return nelems;
3021 }
3022
3023 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3024                         enum dma_data_direction dir, struct dma_attrs *attrs)
3025 {
3026         int i;
3027         struct pci_dev *pdev = to_pci_dev(hwdev);
3028         struct dmar_domain *domain;
3029         size_t size = 0;
3030         int prot = 0;
3031         struct iova *iova = NULL;
3032         int ret;
3033         struct scatterlist *sg;
3034         unsigned long start_vpfn;
3035         struct intel_iommu *iommu;
3036
3037         BUG_ON(dir == DMA_NONE);
3038         if (iommu_no_mapping(hwdev))
3039                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3040
3041         domain = get_valid_domain_for_dev(pdev);
3042         if (!domain)
3043                 return 0;
3044
3045         iommu = domain_get_iommu(domain);
3046
3047         for_each_sg(sglist, sg, nelems, i)
3048                 size += aligned_nrpages(sg->offset, sg->length);
3049
3050         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3051                                 pdev->dma_mask);
3052         if (!iova) {
3053                 sglist->dma_length = 0;
3054                 return 0;
3055         }
3056
3057         /*
3058          * Check if DMAR supports zero-length reads on write only
3059          * mappings..
3060          */
3061         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3062                         !cap_zlr(iommu->cap))
3063                 prot |= DMA_PTE_READ;
3064         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3065                 prot |= DMA_PTE_WRITE;
3066
3067         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3068
3069         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3070         if (unlikely(ret)) {
3071                 /*  clear the page */
3072                 dma_pte_clear_range(domain, start_vpfn,
3073                                     start_vpfn + size - 1);
3074                 /* free page tables */
3075                 dma_pte_free_pagetable(domain, start_vpfn,
3076                                        start_vpfn + size - 1);
3077                 /* free iova */
3078                 __free_iova(&domain->iovad, iova);
3079                 return 0;
3080         }
3081
3082         /* it's a non-present to present mapping. Only flush if caching mode */
3083         if (cap_caching_mode(iommu->cap))
3084                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3085         else
3086                 iommu_flush_write_buffer(iommu);
3087
3088         return nelems;
3089 }
3090
3091 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3092 {
3093         return !dma_addr;
3094 }
3095
3096 struct dma_map_ops intel_dma_ops = {
3097         .alloc_coherent = intel_alloc_coherent,
3098         .free_coherent = intel_free_coherent,
3099         .map_sg = intel_map_sg,
3100         .unmap_sg = intel_unmap_sg,
3101         .map_page = intel_map_page,
3102         .unmap_page = intel_unmap_page,
3103         .mapping_error = intel_mapping_error,
3104 };
3105
3106 static inline int iommu_domain_cache_init(void)
3107 {
3108         int ret = 0;
3109
3110         iommu_domain_cache = kmem_cache_create("iommu_domain",
3111                                          sizeof(struct dmar_domain),
3112                                          0,
3113                                          SLAB_HWCACHE_ALIGN,
3114
3115                                          NULL);
3116         if (!iommu_domain_cache) {
3117                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3118                 ret = -ENOMEM;
3119         }
3120
3121         return ret;
3122 }
3123
3124 static inline int iommu_devinfo_cache_init(void)
3125 {
3126         int ret = 0;
3127
3128         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3129                                          sizeof(struct device_domain_info),
3130                                          0,
3131                                          SLAB_HWCACHE_ALIGN,
3132                                          NULL);
3133         if (!iommu_devinfo_cache) {
3134                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3135                 ret = -ENOMEM;
3136         }
3137
3138         return ret;
3139 }
3140
3141 static inline int iommu_iova_cache_init(void)
3142 {
3143         int ret = 0;
3144
3145         iommu_iova_cache = kmem_cache_create("iommu_iova",
3146                                          sizeof(struct iova),
3147                                          0,
3148                                          SLAB_HWCACHE_ALIGN,
3149                                          NULL);
3150         if (!iommu_iova_cache) {
3151                 printk(KERN_ERR "Couldn't create iova cache\n");
3152                 ret = -ENOMEM;
3153         }
3154
3155         return ret;
3156 }
3157
3158 static int __init iommu_init_mempool(void)
3159 {
3160         int ret;
3161         ret = iommu_iova_cache_init();
3162         if (ret)
3163                 return ret;
3164
3165         ret = iommu_domain_cache_init();
3166         if (ret)
3167                 goto domain_error;
3168
3169         ret = iommu_devinfo_cache_init();
3170         if (!ret)
3171                 return ret;
3172
3173         kmem_cache_destroy(iommu_domain_cache);
3174 domain_error:
3175         kmem_cache_destroy(iommu_iova_cache);
3176
3177         return -ENOMEM;
3178 }
3179
3180 static void __init iommu_exit_mempool(void)
3181 {
3182         kmem_cache_destroy(iommu_devinfo_cache);
3183         kmem_cache_destroy(iommu_domain_cache);
3184         kmem_cache_destroy(iommu_iova_cache);
3185
3186 }
3187
3188 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3189 {
3190         struct dmar_drhd_unit *drhd;
3191         u32 vtbar;
3192         int rc;
3193
3194         /* We know that this device on this chipset has its own IOMMU.
3195          * If we find it under a different IOMMU, then the BIOS is lying
3196          * to us. Hope that the IOMMU for this device is actually
3197          * disabled, and it needs no translation...
3198          */
3199         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3200         if (rc) {
3201                 /* "can't" happen */
3202                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3203                 return;
3204         }
3205         vtbar &= 0xffff0000;
3206
3207         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3208         drhd = dmar_find_matched_drhd_unit(pdev);
3209         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3210                             TAINT_FIRMWARE_WORKAROUND,
3211                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3212                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3213 }
3214 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3215
3216 static void __init init_no_remapping_devices(void)
3217 {
3218         struct dmar_drhd_unit *drhd;
3219
3220         for_each_drhd_unit(drhd) {
3221                 if (!drhd->include_all) {
3222                         int i;
3223                         for (i = 0; i < drhd->devices_cnt; i++)
3224                                 if (drhd->devices[i] != NULL)
3225                                         break;
3226                         /* ignore DMAR unit if no pci devices exist */
3227                         if (i == drhd->devices_cnt)
3228                                 drhd->ignored = 1;
3229                 }
3230         }
3231
3232         for_each_drhd_unit(drhd) {
3233                 int i;
3234                 if (drhd->ignored || drhd->include_all)
3235                         continue;
3236
3237                 for (i = 0; i < drhd->devices_cnt; i++)
3238                         if (drhd->devices[i] &&
3239                             !IS_GFX_DEVICE(drhd->devices[i]))
3240                                 break;
3241
3242                 if (i < drhd->devices_cnt)
3243                         continue;
3244
3245                 /* This IOMMU has *only* gfx devices. Either bypass it or
3246                    set the gfx_mapped flag, as appropriate */
3247                 if (dmar_map_gfx) {
3248                         intel_iommu_gfx_mapped = 1;
3249                 } else {
3250                         drhd->ignored = 1;
3251                         for (i = 0; i < drhd->devices_cnt; i++) {
3252                                 if (!drhd->devices[i])
3253                                         continue;
3254                                 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3255                         }
3256                 }
3257         }
3258 }
3259
3260 #ifdef CONFIG_SUSPEND
3261 static int init_iommu_hw(void)
3262 {
3263         struct dmar_drhd_unit *drhd;
3264         struct intel_iommu *iommu = NULL;
3265
3266         for_each_active_iommu(iommu, drhd)
3267                 if (iommu->qi)
3268                         dmar_reenable_qi(iommu);
3269
3270         for_each_iommu(iommu, drhd) {
3271                 if (drhd->ignored) {
3272                         /*
3273                          * we always have to disable PMRs or DMA may fail on
3274                          * this device
3275                          */
3276                         if (force_on)
3277                                 iommu_disable_protect_mem_regions(iommu);
3278                         continue;
3279                 }
3280         
3281                 iommu_flush_write_buffer(iommu);
3282
3283                 iommu_set_root_entry(iommu);
3284
3285                 iommu->flush.flush_context(iommu, 0, 0, 0,
3286                                            DMA_CCMD_GLOBAL_INVL);
3287                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3288                                          DMA_TLB_GLOBAL_FLUSH);
3289                 if (iommu_enable_translation(iommu))
3290                         return 1;
3291                 iommu_disable_protect_mem_regions(iommu);
3292         }
3293
3294         return 0;
3295 }
3296
3297 static void iommu_flush_all(void)
3298 {
3299         struct dmar_drhd_unit *drhd;
3300         struct intel_iommu *iommu;
3301
3302         for_each_active_iommu(iommu, drhd) {
3303                 iommu->flush.flush_context(iommu, 0, 0, 0,
3304                                            DMA_CCMD_GLOBAL_INVL);
3305                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3306                                          DMA_TLB_GLOBAL_FLUSH);
3307         }
3308 }
3309
3310 static int iommu_suspend(void)
3311 {
3312         struct dmar_drhd_unit *drhd;
3313         struct intel_iommu *iommu = NULL;
3314         unsigned long flag;
3315
3316         for_each_active_iommu(iommu, drhd) {
3317                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3318                                                  GFP_ATOMIC);
3319                 if (!iommu->iommu_state)
3320                         goto nomem;
3321         }
3322
3323         iommu_flush_all();
3324
3325         for_each_active_iommu(iommu, drhd) {
3326                 iommu_disable_translation(iommu);
3327
3328                 spin_lock_irqsave(&iommu->register_lock, flag);
3329
3330                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3331                         readl(iommu->reg + DMAR_FECTL_REG);
3332                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3333                         readl(iommu->reg + DMAR_FEDATA_REG);
3334                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3335                         readl(iommu->reg + DMAR_FEADDR_REG);
3336                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3337                         readl(iommu->reg + DMAR_FEUADDR_REG);
3338
3339                 spin_unlock_irqrestore(&iommu->register_lock, flag);
3340         }
3341         return 0;
3342
3343 nomem:
3344         for_each_active_iommu(iommu, drhd)
3345                 kfree(iommu->iommu_state);
3346
3347         return -ENOMEM;
3348 }
3349
3350 static void iommu_resume(void)
3351 {
3352         struct dmar_drhd_unit *drhd;
3353         struct intel_iommu *iommu = NULL;
3354         unsigned long flag;
3355
3356         if (init_iommu_hw()) {
3357                 if (force_on)
3358                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3359                 else
3360                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3361                 return;
3362         }
3363
3364         for_each_active_iommu(iommu, drhd) {
3365
3366                 spin_lock_irqsave(&iommu->register_lock, flag);
3367
3368                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3369                         iommu->reg + DMAR_FECTL_REG);
3370                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3371                         iommu->reg + DMAR_FEDATA_REG);
3372                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3373                         iommu->reg + DMAR_FEADDR_REG);
3374                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3375                         iommu->reg + DMAR_FEUADDR_REG);
3376
3377                 spin_unlock_irqrestore(&iommu->register_lock, flag);
3378         }
3379
3380         for_each_active_iommu(iommu, drhd)
3381                 kfree(iommu->iommu_state);
3382 }
3383
3384 static struct syscore_ops iommu_syscore_ops = {
3385         .resume         = iommu_resume,
3386         .suspend        = iommu_suspend,
3387 };
3388
3389 static void __init init_iommu_pm_ops(void)
3390 {
3391         register_syscore_ops(&iommu_syscore_ops);
3392 }
3393
3394 #else
3395 static inline void init_iommu_pm_ops(void) {}
3396 #endif  /* CONFIG_PM */
3397
3398 /*
3399  * Here we only respond to action of unbound device from driver.
3400  *
3401  * Added device is not attached to its DMAR domain here yet. That will happen
3402  * when mapping the device to iova.
3403  */
3404 static int device_notifier(struct notifier_block *nb,
3405                                   unsigned long action, void *data)
3406 {
3407         struct device *dev = data;
3408         struct pci_dev *pdev = to_pci_dev(dev);
3409         struct dmar_domain *domain;
3410
3411         if (iommu_no_mapping(dev))
3412                 return 0;
3413
3414         domain = find_domain(pdev);
3415         if (!domain)
3416                 return 0;
3417
3418         if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3419                 domain_remove_one_dev_info(domain, pdev);
3420
3421                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3422                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3423                     list_empty(&domain->devices))
3424                         domain_exit(domain);
3425         }
3426
3427         return 0;
3428 }
3429
3430 static struct notifier_block device_nb = {
3431         .notifier_call = device_notifier,
3432 };
3433
3434 int __init intel_iommu_init(void)
3435 {
3436         int ret = 0;
3437
3438         /* VT-d is required for a TXT/tboot launch, so enforce that */
3439         force_on = tboot_force_iommu();
3440
3441         if (dmar_table_init()) {
3442                 if (force_on)
3443                         panic("tboot: Failed to initialize DMAR table\n");
3444                 return  -ENODEV;
3445         }
3446
3447         if (dmar_dev_scope_init()) {
3448                 if (force_on)
3449                         panic("tboot: Failed to initialize DMAR device scope\n");
3450                 return  -ENODEV;
3451         }
3452
3453         /*
3454          * Check the need for DMA-remapping initialization now.
3455          * Above initialization will also be used by Interrupt-remapping.
3456          */
3457         if (no_iommu || dmar_disabled)
3458                 return -ENODEV;
3459
3460         if (iommu_init_mempool()) {
3461                 if (force_on)
3462                         panic("tboot: Failed to initialize iommu memory\n");
3463                 return  -ENODEV;
3464         }
3465
3466         if (dmar_init_reserved_ranges()) {
3467                 if (force_on)
3468                         panic("tboot: Failed to reserve iommu ranges\n");
3469                 return  -ENODEV;
3470         }
3471
3472         init_no_remapping_devices();
3473
3474         ret = init_dmars();
3475         if (ret) {
3476                 if (force_on)
3477                         panic("tboot: Failed to initialize DMARs\n");
3478                 printk(KERN_ERR "IOMMU: dmar init failed\n");
3479                 put_iova_domain(&reserved_iova_list);
3480                 iommu_exit_mempool();
3481                 return ret;
3482         }
3483         printk(KERN_INFO
3484         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3485
3486         init_timer(&unmap_timer);
3487 #ifdef CONFIG_SWIOTLB
3488         swiotlb = 0;
3489 #endif
3490         dma_ops = &intel_dma_ops;
3491
3492         init_iommu_pm_ops();
3493
3494         register_iommu(&intel_iommu_ops);
3495
3496         bus_register_notifier(&pci_bus_type, &device_nb);
3497
3498         return 0;
3499 }
3500
3501 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3502                                            struct pci_dev *pdev)
3503 {
3504         struct pci_dev *tmp, *parent;
3505
3506         if (!iommu || !pdev)
3507                 return;
3508
3509         /* dependent device detach */
3510         tmp = pci_find_upstream_pcie_bridge(pdev);
3511         /* Secondary interface's bus number and devfn 0 */
3512         if (tmp) {
3513                 parent = pdev->bus->self;
3514                 while (parent != tmp) {
3515                         iommu_detach_dev(iommu, parent->bus->number,
3516                                          parent->devfn);
3517                         parent = parent->bus->self;
3518                 }
3519                 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3520                         iommu_detach_dev(iommu,
3521                                 tmp->subordinate->number, 0);
3522                 else /* this is a legacy PCI bridge */
3523                         iommu_detach_dev(iommu, tmp->bus->number,
3524                                          tmp->devfn);
3525         }
3526 }
3527
3528 static void domain_remove_one_dev_info(struct dmar_domain *domain,
3529                                           struct pci_dev *pdev)
3530 {
3531         struct device_domain_info *info;
3532         struct intel_iommu *iommu;
3533         unsigned long flags;
3534         int found = 0;
3535         struct list_head *entry, *tmp;
3536
3537         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3538                                 pdev->devfn);
3539         if (!iommu)
3540                 return;
3541
3542         spin_lock_irqsave(&device_domain_lock, flags);
3543         list_for_each_safe(entry, tmp, &domain->devices) {
3544                 info = list_entry(entry, struct device_domain_info, link);
3545                 if (info->segment == pci_domain_nr(pdev->bus) &&
3546                     info->bus == pdev->bus->number &&
3547                     info->devfn == pdev->devfn) {
3548                         list_del(&info->link);
3549                         list_del(&info->global);
3550                         if (info->dev)
3551                                 info->dev->dev.archdata.iommu = NULL;
3552                         spin_unlock_irqrestore(&device_domain_lock, flags);
3553
3554                         iommu_disable_dev_iotlb(info);
3555                         iommu_detach_dev(iommu, info->bus, info->devfn);
3556                         iommu_detach_dependent_devices(iommu, pdev);
3557                         free_devinfo_mem(info);
3558
3559                         spin_lock_irqsave(&device_domain_lock, flags);
3560
3561                         if (found)
3562                                 break;
3563                         else
3564                                 continue;
3565                 }
3566
3567                 /* if there is no other devices under the same iommu
3568                  * owned by this domain, clear this iommu in iommu_bmp
3569                  * update iommu count and coherency
3570                  */
3571                 if (iommu == device_to_iommu(info->segment, info->bus,
3572                                             info->devfn))
3573                         found = 1;
3574         }
3575
3576         if (found == 0) {
3577                 unsigned long tmp_flags;
3578                 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3579                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3580                 domain->iommu_count--;
3581                 domain_update_iommu_cap(domain);
3582                 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3583
3584                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3585                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3586                         spin_lock_irqsave(&iommu->lock, tmp_flags);
3587                         clear_bit(domain->id, iommu->domain_ids);
3588                         iommu->domains[domain->id] = NULL;
3589                         spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3590                 }
3591         }
3592
3593         spin_unlock_irqrestore(&device_domain_lock, flags);
3594 }
3595
3596 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3597 {
3598         struct device_domain_info *info;
3599         struct intel_iommu *iommu;
3600         unsigned long flags1, flags2;
3601
3602         spin_lock_irqsave(&device_domain_lock, flags1);
3603         while (!list_empty(&domain->devices)) {
3604                 info = list_entry(domain->devices.next,
3605                         struct device_domain_info, link);
3606                 list_del(&info->link);
3607                 list_del(&info->global);
3608                 if (info->dev)
3609                         info->dev->dev.archdata.iommu = NULL;
3610
3611                 spin_unlock_irqrestore(&device_domain_lock, flags1);
3612
3613                 iommu_disable_dev_iotlb(info);
3614                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3615                 iommu_detach_dev(iommu, info->bus, info->devfn);
3616                 iommu_detach_dependent_devices(iommu, info->dev);
3617
3618                 /* clear this iommu in iommu_bmp, update iommu count
3619                  * and capabilities
3620                  */
3621                 spin_lock_irqsave(&domain->iommu_lock, flags2);
3622                 if (test_and_clear_bit(iommu->seq_id,
3623                                        &domain->iommu_bmp)) {
3624                         domain->iommu_count--;
3625                         domain_update_iommu_cap(domain);
3626                 }
3627                 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3628
3629                 free_devinfo_mem(info);
3630                 spin_lock_irqsave(&device_domain_lock, flags1);
3631         }
3632         spin_unlock_irqrestore(&device_domain_lock, flags1);
3633 }
3634
3635 /* domain id for virtual machine, it won't be set in context */
3636 static unsigned long vm_domid;
3637
3638 static struct dmar_domain *iommu_alloc_vm_domain(void)
3639 {
3640         struct dmar_domain *domain;
3641
3642         domain = alloc_domain_mem();
3643         if (!domain)
3644                 return NULL;
3645
3646         domain->id = vm_domid++;
3647         domain->nid = -1;
3648         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3649         domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3650
3651         return domain;
3652 }
3653
3654 static int md_domain_init(struct dmar_domain *domain, int guest_width)
3655 {
3656         int adjust_width;
3657
3658         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3659         spin_lock_init(&domain->iommu_lock);
3660
3661         domain_reserve_special_ranges(domain);
3662
3663         /* calculate AGAW */
3664         domain->gaw = guest_width;
3665         adjust_width = guestwidth_to_adjustwidth(guest_width);
3666         domain->agaw = width_to_agaw(adjust_width);
3667
3668         INIT_LIST_HEAD(&domain->devices);
3669
3670         domain->iommu_count = 0;
3671         domain->iommu_coherency = 0;
3672         domain->iommu_snooping = 0;
3673         domain->iommu_superpage = 0;
3674         domain->max_addr = 0;
3675         domain->nid = -1;
3676
3677         /* always allocate the top pgd */
3678         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3679         if (!domain->pgd)
3680                 return -ENOMEM;
3681         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3682         return 0;
3683 }
3684
3685 static void iommu_free_vm_domain(struct dmar_domain *domain)
3686 {
3687         unsigned long flags;
3688         struct dmar_drhd_unit *drhd;
3689         struct intel_iommu *iommu;
3690         unsigned long i;
3691         unsigned long ndomains;
3692
3693         for_each_drhd_unit(drhd) {
3694                 if (drhd->ignored)
3695                         continue;
3696                 iommu = drhd->iommu;
3697
3698                 ndomains = cap_ndoms(iommu->cap);
3699                 for_each_set_bit(i, iommu->domain_ids, ndomains) {
3700                         if (iommu->domains[i] == domain) {
3701                                 spin_lock_irqsave(&iommu->lock, flags);
3702                                 clear_bit(i, iommu->domain_ids);
3703                                 iommu->domains[i] = NULL;
3704                                 spin_unlock_irqrestore(&iommu->lock, flags);
3705                                 break;
3706                         }
3707                 }
3708         }
3709 }
3710
3711 static void vm_domain_exit(struct dmar_domain *domain)
3712 {
3713         /* Domain 0 is reserved, so dont process it */
3714         if (!domain)
3715                 return;
3716
3717         vm_domain_remove_all_dev_info(domain);
3718         /* destroy iovas */
3719         put_iova_domain(&domain->iovad);
3720
3721         /* clear ptes */
3722         dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3723
3724         /* free page tables */
3725         dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3726
3727         iommu_free_vm_domain(domain);
3728         free_domain_mem(domain);
3729 }
3730
3731 static int intel_iommu_domain_init(struct iommu_domain *domain)
3732 {
3733         struct dmar_domain *dmar_domain;
3734
3735         dmar_domain = iommu_alloc_vm_domain();
3736         if (!dmar_domain) {
3737                 printk(KERN_ERR
3738                         "intel_iommu_domain_init: dmar_domain == NULL\n");
3739                 return -ENOMEM;
3740         }
3741         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3742                 printk(KERN_ERR
3743                         "intel_iommu_domain_init() failed\n");
3744                 vm_domain_exit(dmar_domain);
3745                 return -ENOMEM;
3746         }
3747         domain->priv = dmar_domain;
3748
3749         return 0;
3750 }
3751
3752 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3753 {
3754         struct dmar_domain *dmar_domain = domain->priv;
3755
3756         domain->priv = NULL;
3757         vm_domain_exit(dmar_domain);
3758 }
3759
3760 static int intel_iommu_attach_device(struct iommu_domain *domain,
3761                                      struct device *dev)
3762 {
3763         struct dmar_domain *dmar_domain = domain->priv;
3764         struct pci_dev *pdev = to_pci_dev(dev);
3765         struct intel_iommu *iommu;
3766         int addr_width;
3767
3768         /* normally pdev is not mapped */
3769         if (unlikely(domain_context_mapped(pdev))) {
3770                 struct dmar_domain *old_domain;
3771
3772                 old_domain = find_domain(pdev);
3773                 if (old_domain) {
3774                         if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3775                             dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3776                                 domain_remove_one_dev_info(old_domain, pdev);
3777                         else
3778                                 domain_remove_dev_info(old_domain);
3779                 }
3780         }
3781
3782         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3783                                 pdev->devfn);
3784         if (!iommu)
3785                 return -ENODEV;
3786
3787         /* check if this iommu agaw is sufficient for max mapped address */
3788         addr_width = agaw_to_width(iommu->agaw);
3789         if (addr_width > cap_mgaw(iommu->cap))
3790                 addr_width = cap_mgaw(iommu->cap);
3791
3792         if (dmar_domain->max_addr > (1LL << addr_width)) {
3793                 printk(KERN_ERR "%s: iommu width (%d) is not "
3794                        "sufficient for the mapped address (%llx)\n",
3795                        __func__, addr_width, dmar_domain->max_addr);
3796                 return -EFAULT;
3797         }
3798         dmar_domain->gaw = addr_width;
3799
3800         /*
3801          * Knock out extra levels of page tables if necessary
3802          */
3803         while (iommu->agaw < dmar_domain->agaw) {
3804                 struct dma_pte *pte;
3805
3806                 pte = dmar_domain->pgd;
3807                 if (dma_pte_present(pte)) {
3808                         dmar_domain->pgd = (struct dma_pte *)
3809                                 phys_to_virt(dma_pte_addr(pte));
3810                         free_pgtable_page(pte);
3811                 }
3812                 dmar_domain->agaw--;
3813         }
3814
3815         return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3816 }
3817
3818 static void intel_iommu_detach_device(struct iommu_domain *domain,
3819                                       struct device *dev)
3820 {
3821         struct dmar_domain *dmar_domain = domain->priv;
3822         struct pci_dev *pdev = to_pci_dev(dev);
3823
3824         domain_remove_one_dev_info(dmar_domain, pdev);
3825 }
3826
3827 static int intel_iommu_map(struct iommu_domain *domain,
3828                            unsigned long iova, phys_addr_t hpa,
3829                            int gfp_order, int iommu_prot)
3830 {
3831         struct dmar_domain *dmar_domain = domain->priv;
3832         u64 max_addr;
3833         int prot = 0;
3834         size_t size;
3835         int ret;
3836
3837         if (iommu_prot & IOMMU_READ)
3838                 prot |= DMA_PTE_READ;
3839         if (iommu_prot & IOMMU_WRITE)
3840                 prot |= DMA_PTE_WRITE;
3841         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3842                 prot |= DMA_PTE_SNP;
3843
3844         size     = PAGE_SIZE << gfp_order;
3845         max_addr = iova + size;
3846         if (dmar_domain->max_addr < max_addr) {
3847                 u64 end;
3848
3849                 /* check if minimum agaw is sufficient for mapped address */
3850                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
3851                 if (end < max_addr) {
3852                         printk(KERN_ERR "%s: iommu width (%d) is not "
3853                                "sufficient for the mapped address (%llx)\n",
3854                                __func__, dmar_domain->gaw, max_addr);
3855                         return -EFAULT;
3856                 }
3857                 dmar_domain->max_addr = max_addr;
3858         }
3859         /* Round up size to next multiple of PAGE_SIZE, if it and
3860            the low bits of hpa would take us onto the next page */
3861         size = aligned_nrpages(hpa, size);
3862         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3863                                  hpa >> VTD_PAGE_SHIFT, size, prot);
3864         return ret;
3865 }
3866
3867 static int intel_iommu_unmap(struct iommu_domain *domain,
3868                              unsigned long iova, int gfp_order)
3869 {
3870         struct dmar_domain *dmar_domain = domain->priv;
3871         size_t size = PAGE_SIZE << gfp_order;
3872
3873         dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3874                             (iova + size - 1) >> VTD_PAGE_SHIFT);
3875
3876         if (dmar_domain->max_addr == iova + size)
3877                 dmar_domain->max_addr = iova;
3878
3879         return gfp_order;
3880 }
3881
3882 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3883                                             unsigned long iova)
3884 {
3885         struct dmar_domain *dmar_domain = domain->priv;
3886         struct dma_pte *pte;
3887         u64 phys = 0;
3888
3889         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
3890         if (pte)
3891                 phys = dma_pte_addr(pte);
3892
3893         return phys;
3894 }
3895
3896 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3897                                       unsigned long cap)
3898 {
3899         struct dmar_domain *dmar_domain = domain->priv;
3900
3901         if (cap == IOMMU_CAP_CACHE_COHERENCY)
3902                 return dmar_domain->iommu_snooping;
3903         if (cap == IOMMU_CAP_INTR_REMAP)
3904                 return intr_remapping_enabled;
3905
3906         return 0;
3907 }
3908
3909 static struct iommu_ops intel_iommu_ops = {
3910         .domain_init    = intel_iommu_domain_init,
3911         .domain_destroy = intel_iommu_domain_destroy,
3912         .attach_dev     = intel_iommu_attach_device,
3913         .detach_dev     = intel_iommu_detach_device,
3914         .map            = intel_iommu_map,
3915         .unmap          = intel_iommu_unmap,
3916         .iova_to_phys   = intel_iommu_iova_to_phys,
3917         .domain_has_cap = intel_iommu_domain_has_cap,
3918 };
3919
3920 static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3921 {
3922         /*
3923          * Mobile 4 Series Chipset neglects to set RWBF capability,
3924          * but needs it:
3925          */
3926         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3927         rwbf_quirk = 1;
3928
3929         /* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
3930         if (dev->revision == 0x07) {
3931                 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
3932                 dmar_map_gfx = 0;
3933         }
3934 }
3935
3936 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
3937
3938 #define GGC 0x52
3939 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
3940 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
3941 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
3942 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
3943 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
3944 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
3945 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
3946 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
3947
3948 static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
3949 {
3950         unsigned short ggc;
3951
3952         if (pci_read_config_word(dev, GGC, &ggc))
3953                 return;
3954
3955         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
3956                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
3957                 dmar_map_gfx = 0;
3958         } else if (dmar_map_gfx) {
3959                 /* we have to ensure the gfx device is idle before we flush */
3960                 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
3961                 intel_iommu_strict = 1;
3962        }
3963 }
3964 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
3965 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
3966 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
3967 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
3968
3969 /* On Tylersburg chipsets, some BIOSes have been known to enable the
3970    ISOCH DMAR unit for the Azalia sound device, but not give it any
3971    TLB entries, which causes it to deadlock. Check for that.  We do
3972    this in a function called from init_dmars(), instead of in a PCI
3973    quirk, because we don't want to print the obnoxious "BIOS broken"
3974    message if VT-d is actually disabled.
3975 */
3976 static void __init check_tylersburg_isoch(void)
3977 {
3978         struct pci_dev *pdev;
3979         uint32_t vtisochctrl;
3980
3981         /* If there's no Azalia in the system anyway, forget it. */
3982         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3983         if (!pdev)
3984                 return;
3985         pci_dev_put(pdev);
3986
3987         /* System Management Registers. Might be hidden, in which case
3988            we can't do the sanity check. But that's OK, because the
3989            known-broken BIOSes _don't_ actually hide it, so far. */
3990         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3991         if (!pdev)
3992                 return;
3993
3994         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3995                 pci_dev_put(pdev);
3996                 return;
3997         }
3998
3999         pci_dev_put(pdev);
4000
4001         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4002         if (vtisochctrl & 1)
4003                 return;
4004
4005         /* Drop all bits other than the number of TLB entries */
4006         vtisochctrl &= 0x1c;
4007
4008         /* If we have the recommended number of TLB entries (16), fine. */
4009         if (vtisochctrl == 0x10)
4010                 return;
4011
4012         /* Zero TLB entries? You get to ride the short bus to school. */
4013         if (!vtisochctrl) {
4014                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4015                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4016                      dmi_get_system_info(DMI_BIOS_VENDOR),
4017                      dmi_get_system_info(DMI_BIOS_VERSION),
4018                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4019                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4020                 return;
4021         }
4022         
4023         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4024                vtisochctrl);
4025 }