iommu/vt-d: Make iommu_dummy() take struct device instead of struct pci_dev
[linux-2.6-microblaze.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  */
19
20 #include <linux/init.h>
21 #include <linux/bitmap.h>
22 #include <linux/debugfs.h>
23 #include <linux/export.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/pci.h>
29 #include <linux/dmar.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/mempool.h>
32 #include <linux/memory.h>
33 #include <linux/timer.h>
34 #include <linux/iova.h>
35 #include <linux/iommu.h>
36 #include <linux/intel-iommu.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <asm/irq_remapping.h>
43 #include <asm/cacheflush.h>
44 #include <asm/iommu.h>
45
46 #include "irq_remapping.h"
47 #include "pci.h"
48
49 #define ROOT_SIZE               VTD_PAGE_SIZE
50 #define CONTEXT_SIZE            VTD_PAGE_SIZE
51
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56 #define IOAPIC_RANGE_START      (0xfee00000)
57 #define IOAPIC_RANGE_END        (0xfeefffff)
58 #define IOVA_START_ADDR         (0x1000)
59
60 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62 #define MAX_AGAW_WIDTH 64
63 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
64
65 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
71                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
73
74 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
75 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
76 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
77
78 /* page table handling */
79 #define LEVEL_STRIDE            (9)
80 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
81
82 /*
83  * This bitmap is used to advertise the page sizes our hardware support
84  * to the IOMMU core, which will then use this information to split
85  * physically contiguous memory regions it is mapping into page sizes
86  * that we support.
87  *
88  * Traditionally the IOMMU core just handed us the mappings directly,
89  * after making sure the size is an order of a 4KiB page and that the
90  * mapping has natural alignment.
91  *
92  * To retain this behavior, we currently advertise that we support
93  * all page sizes that are an order of 4KiB.
94  *
95  * If at some point we'd like to utilize the IOMMU core's new behavior,
96  * we could change this to advertise the real page sizes we support.
97  */
98 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
99
100 static inline int agaw_to_level(int agaw)
101 {
102         return agaw + 2;
103 }
104
105 static inline int agaw_to_width(int agaw)
106 {
107         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
108 }
109
110 static inline int width_to_agaw(int width)
111 {
112         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
113 }
114
115 static inline unsigned int level_to_offset_bits(int level)
116 {
117         return (level - 1) * LEVEL_STRIDE;
118 }
119
120 static inline int pfn_level_offset(unsigned long pfn, int level)
121 {
122         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123 }
124
125 static inline unsigned long level_mask(int level)
126 {
127         return -1UL << level_to_offset_bits(level);
128 }
129
130 static inline unsigned long level_size(int level)
131 {
132         return 1UL << level_to_offset_bits(level);
133 }
134
135 static inline unsigned long align_to_level(unsigned long pfn, int level)
136 {
137         return (pfn + level_size(level) - 1) & level_mask(level);
138 }
139
140 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141 {
142         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
143 }
144
145 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
146    are never going to work. */
147 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148 {
149         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150 }
151
152 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153 {
154         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155 }
156 static inline unsigned long page_to_dma_pfn(struct page *pg)
157 {
158         return mm_to_dma_pfn(page_to_pfn(pg));
159 }
160 static inline unsigned long virt_to_dma_pfn(void *p)
161 {
162         return page_to_dma_pfn(virt_to_page(p));
163 }
164
165 /* global iommu list, set NULL for ignored DMAR units */
166 static struct intel_iommu **g_iommus;
167
168 static void __init check_tylersburg_isoch(void);
169 static int rwbf_quirk;
170
171 /*
172  * set to 1 to panic kernel if can't successfully enable VT-d
173  * (used when kernel is launched w/ TXT)
174  */
175 static int force_on = 0;
176
177 /*
178  * 0: Present
179  * 1-11: Reserved
180  * 12-63: Context Ptr (12 - (haw-1))
181  * 64-127: Reserved
182  */
183 struct root_entry {
184         u64     val;
185         u64     rsvd1;
186 };
187 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188 static inline bool root_present(struct root_entry *root)
189 {
190         return (root->val & 1);
191 }
192 static inline void set_root_present(struct root_entry *root)
193 {
194         root->val |= 1;
195 }
196 static inline void set_root_value(struct root_entry *root, unsigned long value)
197 {
198         root->val |= value & VTD_PAGE_MASK;
199 }
200
201 static inline struct context_entry *
202 get_context_addr_from_root(struct root_entry *root)
203 {
204         return (struct context_entry *)
205                 (root_present(root)?phys_to_virt(
206                 root->val & VTD_PAGE_MASK) :
207                 NULL);
208 }
209
210 /*
211  * low 64 bits:
212  * 0: present
213  * 1: fault processing disable
214  * 2-3: translation type
215  * 12-63: address space root
216  * high 64 bits:
217  * 0-2: address width
218  * 3-6: aval
219  * 8-23: domain id
220  */
221 struct context_entry {
222         u64 lo;
223         u64 hi;
224 };
225
226 static inline bool context_present(struct context_entry *context)
227 {
228         return (context->lo & 1);
229 }
230 static inline void context_set_present(struct context_entry *context)
231 {
232         context->lo |= 1;
233 }
234
235 static inline void context_set_fault_enable(struct context_entry *context)
236 {
237         context->lo &= (((u64)-1) << 2) | 1;
238 }
239
240 static inline void context_set_translation_type(struct context_entry *context,
241                                                 unsigned long value)
242 {
243         context->lo &= (((u64)-1) << 4) | 3;
244         context->lo |= (value & 3) << 2;
245 }
246
247 static inline void context_set_address_root(struct context_entry *context,
248                                             unsigned long value)
249 {
250         context->lo |= value & VTD_PAGE_MASK;
251 }
252
253 static inline void context_set_address_width(struct context_entry *context,
254                                              unsigned long value)
255 {
256         context->hi |= value & 7;
257 }
258
259 static inline void context_set_domain_id(struct context_entry *context,
260                                          unsigned long value)
261 {
262         context->hi |= (value & ((1 << 16) - 1)) << 8;
263 }
264
265 static inline void context_clear_entry(struct context_entry *context)
266 {
267         context->lo = 0;
268         context->hi = 0;
269 }
270
271 /*
272  * 0: readable
273  * 1: writable
274  * 2-6: reserved
275  * 7: super page
276  * 8-10: available
277  * 11: snoop behavior
278  * 12-63: Host physcial address
279  */
280 struct dma_pte {
281         u64 val;
282 };
283
284 static inline void dma_clear_pte(struct dma_pte *pte)
285 {
286         pte->val = 0;
287 }
288
289 static inline u64 dma_pte_addr(struct dma_pte *pte)
290 {
291 #ifdef CONFIG_64BIT
292         return pte->val & VTD_PAGE_MASK;
293 #else
294         /* Must have a full atomic 64-bit read */
295         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
296 #endif
297 }
298
299 static inline bool dma_pte_present(struct dma_pte *pte)
300 {
301         return (pte->val & 3) != 0;
302 }
303
304 static inline bool dma_pte_superpage(struct dma_pte *pte)
305 {
306         return (pte->val & (1 << 7));
307 }
308
309 static inline int first_pte_in_page(struct dma_pte *pte)
310 {
311         return !((unsigned long)pte & ~VTD_PAGE_MASK);
312 }
313
314 /*
315  * This domain is a statically identity mapping domain.
316  *      1. This domain creats a static 1:1 mapping to all usable memory.
317  *      2. It maps to each iommu if successful.
318  *      3. Each iommu mapps to this domain if successful.
319  */
320 static struct dmar_domain *si_domain;
321 static int hw_pass_through = 1;
322
323 /* devices under the same p2p bridge are owned in one domain */
324 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
325
326 /* domain represents a virtual machine, more than one devices
327  * across iommus may be owned in one domain, e.g. kvm guest.
328  */
329 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 1)
330
331 /* si_domain contains mulitple devices */
332 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 2)
333
334 /* define the limit of IOMMUs supported in each domain */
335 #ifdef  CONFIG_X86
336 # define        IOMMU_UNITS_SUPPORTED   MAX_IO_APICS
337 #else
338 # define        IOMMU_UNITS_SUPPORTED   64
339 #endif
340
341 struct dmar_domain {
342         int     id;                     /* domain id */
343         int     nid;                    /* node id */
344         DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
345                                         /* bitmap of iommus this domain uses*/
346
347         struct list_head devices;       /* all devices' list */
348         struct iova_domain iovad;       /* iova's that belong to this domain */
349
350         struct dma_pte  *pgd;           /* virtual address */
351         int             gaw;            /* max guest address width */
352
353         /* adjusted guest address width, 0 is level 2 30-bit */
354         int             agaw;
355
356         int             flags;          /* flags to find out type of domain */
357
358         int             iommu_coherency;/* indicate coherency of iommu access */
359         int             iommu_snooping; /* indicate snooping control feature*/
360         int             iommu_count;    /* reference count of iommu */
361         int             iommu_superpage;/* Level of superpages supported:
362                                            0 == 4KiB (no superpages), 1 == 2MiB,
363                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
364         spinlock_t      iommu_lock;     /* protect iommu set in domain */
365         u64             max_addr;       /* maximum mapped address */
366 };
367
368 /* PCI domain-device relationship */
369 struct device_domain_info {
370         struct list_head link;  /* link to domain siblings */
371         struct list_head global; /* link to global list */
372         int segment;            /* PCI domain */
373         u8 bus;                 /* PCI bus number */
374         u8 devfn;               /* PCI devfn number */
375         struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
376         struct intel_iommu *iommu; /* IOMMU used by this device */
377         struct dmar_domain *domain; /* pointer to domain */
378 };
379
380 struct dmar_rmrr_unit {
381         struct list_head list;          /* list of rmrr units   */
382         struct acpi_dmar_header *hdr;   /* ACPI header          */
383         u64     base_address;           /* reserved base address*/
384         u64     end_address;            /* reserved end address */
385         struct dmar_dev_scope *devices; /* target devices */
386         int     devices_cnt;            /* target device count */
387 };
388
389 struct dmar_atsr_unit {
390         struct list_head list;          /* list of ATSR units */
391         struct acpi_dmar_header *hdr;   /* ACPI header */
392         struct dmar_dev_scope *devices; /* target devices */
393         int devices_cnt;                /* target device count */
394         u8 include_all:1;               /* include all ports */
395 };
396
397 static LIST_HEAD(dmar_atsr_units);
398 static LIST_HEAD(dmar_rmrr_units);
399
400 #define for_each_rmrr_units(rmrr) \
401         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
402
403 static void flush_unmaps_timeout(unsigned long data);
404
405 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
406
407 #define HIGH_WATER_MARK 250
408 struct deferred_flush_tables {
409         int next;
410         struct iova *iova[HIGH_WATER_MARK];
411         struct dmar_domain *domain[HIGH_WATER_MARK];
412         struct page *freelist[HIGH_WATER_MARK];
413 };
414
415 static struct deferred_flush_tables *deferred_flush;
416
417 /* bitmap for indexing intel_iommus */
418 static int g_num_of_iommus;
419
420 static DEFINE_SPINLOCK(async_umap_flush_lock);
421 static LIST_HEAD(unmaps_to_do);
422
423 static int timer_on;
424 static long list_size;
425
426 static void domain_exit(struct dmar_domain *domain);
427 static void domain_remove_dev_info(struct dmar_domain *domain);
428 static void domain_remove_one_dev_info(struct dmar_domain *domain,
429                                        struct pci_dev *pdev);
430 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
431                                            struct pci_dev *pdev);
432
433 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
434 int dmar_disabled = 0;
435 #else
436 int dmar_disabled = 1;
437 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
438
439 int intel_iommu_enabled = 0;
440 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
441
442 static int dmar_map_gfx = 1;
443 static int dmar_forcedac;
444 static int intel_iommu_strict;
445 static int intel_iommu_superpage = 1;
446
447 int intel_iommu_gfx_mapped;
448 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
449
450 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
451 static DEFINE_SPINLOCK(device_domain_lock);
452 static LIST_HEAD(device_domain_list);
453
454 static struct iommu_ops intel_iommu_ops;
455
456 static int __init intel_iommu_setup(char *str)
457 {
458         if (!str)
459                 return -EINVAL;
460         while (*str) {
461                 if (!strncmp(str, "on", 2)) {
462                         dmar_disabled = 0;
463                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
464                 } else if (!strncmp(str, "off", 3)) {
465                         dmar_disabled = 1;
466                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
467                 } else if (!strncmp(str, "igfx_off", 8)) {
468                         dmar_map_gfx = 0;
469                         printk(KERN_INFO
470                                 "Intel-IOMMU: disable GFX device mapping\n");
471                 } else if (!strncmp(str, "forcedac", 8)) {
472                         printk(KERN_INFO
473                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
474                         dmar_forcedac = 1;
475                 } else if (!strncmp(str, "strict", 6)) {
476                         printk(KERN_INFO
477                                 "Intel-IOMMU: disable batched IOTLB flush\n");
478                         intel_iommu_strict = 1;
479                 } else if (!strncmp(str, "sp_off", 6)) {
480                         printk(KERN_INFO
481                                 "Intel-IOMMU: disable supported super page\n");
482                         intel_iommu_superpage = 0;
483                 }
484
485                 str += strcspn(str, ",");
486                 while (*str == ',')
487                         str++;
488         }
489         return 0;
490 }
491 __setup("intel_iommu=", intel_iommu_setup);
492
493 static struct kmem_cache *iommu_domain_cache;
494 static struct kmem_cache *iommu_devinfo_cache;
495 static struct kmem_cache *iommu_iova_cache;
496
497 static inline void *alloc_pgtable_page(int node)
498 {
499         struct page *page;
500         void *vaddr = NULL;
501
502         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
503         if (page)
504                 vaddr = page_address(page);
505         return vaddr;
506 }
507
508 static inline void free_pgtable_page(void *vaddr)
509 {
510         free_page((unsigned long)vaddr);
511 }
512
513 static inline void *alloc_domain_mem(void)
514 {
515         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
516 }
517
518 static void free_domain_mem(void *vaddr)
519 {
520         kmem_cache_free(iommu_domain_cache, vaddr);
521 }
522
523 static inline void * alloc_devinfo_mem(void)
524 {
525         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
526 }
527
528 static inline void free_devinfo_mem(void *vaddr)
529 {
530         kmem_cache_free(iommu_devinfo_cache, vaddr);
531 }
532
533 struct iova *alloc_iova_mem(void)
534 {
535         return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
536 }
537
538 void free_iova_mem(struct iova *iova)
539 {
540         kmem_cache_free(iommu_iova_cache, iova);
541 }
542
543
544 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
545 {
546         unsigned long sagaw;
547         int agaw = -1;
548
549         sagaw = cap_sagaw(iommu->cap);
550         for (agaw = width_to_agaw(max_gaw);
551              agaw >= 0; agaw--) {
552                 if (test_bit(agaw, &sagaw))
553                         break;
554         }
555
556         return agaw;
557 }
558
559 /*
560  * Calculate max SAGAW for each iommu.
561  */
562 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
563 {
564         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
565 }
566
567 /*
568  * calculate agaw for each iommu.
569  * "SAGAW" may be different across iommus, use a default agaw, and
570  * get a supported less agaw for iommus that don't support the default agaw.
571  */
572 int iommu_calculate_agaw(struct intel_iommu *iommu)
573 {
574         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
575 }
576
577 /* This functionin only returns single iommu in a domain */
578 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
579 {
580         int iommu_id;
581
582         /* si_domain and vm domain should not get here. */
583         BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
584         BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
585
586         iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
587         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
588                 return NULL;
589
590         return g_iommus[iommu_id];
591 }
592
593 static void domain_update_iommu_coherency(struct dmar_domain *domain)
594 {
595         struct dmar_drhd_unit *drhd;
596         struct intel_iommu *iommu;
597         int i, found = 0;
598
599         domain->iommu_coherency = 1;
600
601         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
602                 found = 1;
603                 if (!ecap_coherent(g_iommus[i]->ecap)) {
604                         domain->iommu_coherency = 0;
605                         break;
606                 }
607         }
608         if (found)
609                 return;
610
611         /* No hardware attached; use lowest common denominator */
612         rcu_read_lock();
613         for_each_active_iommu(iommu, drhd) {
614                 if (!ecap_coherent(iommu->ecap)) {
615                         domain->iommu_coherency = 0;
616                         break;
617                 }
618         }
619         rcu_read_unlock();
620 }
621
622 static void domain_update_iommu_snooping(struct dmar_domain *domain)
623 {
624         int i;
625
626         domain->iommu_snooping = 1;
627
628         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
629                 if (!ecap_sc_support(g_iommus[i]->ecap)) {
630                         domain->iommu_snooping = 0;
631                         break;
632                 }
633         }
634 }
635
636 static void domain_update_iommu_superpage(struct dmar_domain *domain)
637 {
638         struct dmar_drhd_unit *drhd;
639         struct intel_iommu *iommu = NULL;
640         int mask = 0xf;
641
642         if (!intel_iommu_superpage) {
643                 domain->iommu_superpage = 0;
644                 return;
645         }
646
647         /* set iommu_superpage to the smallest common denominator */
648         rcu_read_lock();
649         for_each_active_iommu(iommu, drhd) {
650                 mask &= cap_super_page_val(iommu->cap);
651                 if (!mask) {
652                         break;
653                 }
654         }
655         rcu_read_unlock();
656
657         domain->iommu_superpage = fls(mask);
658 }
659
660 /* Some capabilities may be different across iommus */
661 static void domain_update_iommu_cap(struct dmar_domain *domain)
662 {
663         domain_update_iommu_coherency(domain);
664         domain_update_iommu_snooping(domain);
665         domain_update_iommu_superpage(domain);
666 }
667
668 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
669 {
670         struct dmar_drhd_unit *drhd = NULL;
671         struct intel_iommu *iommu;
672         struct device *dev;
673         struct pci_dev *pdev;
674         int i;
675
676         rcu_read_lock();
677         for_each_active_iommu(iommu, drhd) {
678                 if (segment != drhd->segment)
679                         continue;
680
681                 for_each_active_dev_scope(drhd->devices,
682                                           drhd->devices_cnt, i, dev) {
683                         if (!dev_is_pci(dev))
684                                 continue;
685                         pdev = to_pci_dev(dev);
686                         if (pdev->bus->number == bus && pdev->devfn == devfn)
687                                 goto out;
688                         if (pdev->subordinate &&
689                             pdev->subordinate->number <= bus &&
690                             pdev->subordinate->busn_res.end >= bus)
691                                 goto out;
692                 }
693
694                 if (drhd->include_all)
695                         goto out;
696         }
697         iommu = NULL;
698 out:
699         rcu_read_unlock();
700
701         return iommu;
702 }
703
704 static void domain_flush_cache(struct dmar_domain *domain,
705                                void *addr, int size)
706 {
707         if (!domain->iommu_coherency)
708                 clflush_cache_range(addr, size);
709 }
710
711 /* Gets context entry for a given bus and devfn */
712 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
713                 u8 bus, u8 devfn)
714 {
715         struct root_entry *root;
716         struct context_entry *context;
717         unsigned long phy_addr;
718         unsigned long flags;
719
720         spin_lock_irqsave(&iommu->lock, flags);
721         root = &iommu->root_entry[bus];
722         context = get_context_addr_from_root(root);
723         if (!context) {
724                 context = (struct context_entry *)
725                                 alloc_pgtable_page(iommu->node);
726                 if (!context) {
727                         spin_unlock_irqrestore(&iommu->lock, flags);
728                         return NULL;
729                 }
730                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
731                 phy_addr = virt_to_phys((void *)context);
732                 set_root_value(root, phy_addr);
733                 set_root_present(root);
734                 __iommu_flush_cache(iommu, root, sizeof(*root));
735         }
736         spin_unlock_irqrestore(&iommu->lock, flags);
737         return &context[devfn];
738 }
739
740 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
741 {
742         struct root_entry *root;
743         struct context_entry *context;
744         int ret;
745         unsigned long flags;
746
747         spin_lock_irqsave(&iommu->lock, flags);
748         root = &iommu->root_entry[bus];
749         context = get_context_addr_from_root(root);
750         if (!context) {
751                 ret = 0;
752                 goto out;
753         }
754         ret = context_present(&context[devfn]);
755 out:
756         spin_unlock_irqrestore(&iommu->lock, flags);
757         return ret;
758 }
759
760 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
761 {
762         struct root_entry *root;
763         struct context_entry *context;
764         unsigned long flags;
765
766         spin_lock_irqsave(&iommu->lock, flags);
767         root = &iommu->root_entry[bus];
768         context = get_context_addr_from_root(root);
769         if (context) {
770                 context_clear_entry(&context[devfn]);
771                 __iommu_flush_cache(iommu, &context[devfn], \
772                         sizeof(*context));
773         }
774         spin_unlock_irqrestore(&iommu->lock, flags);
775 }
776
777 static void free_context_table(struct intel_iommu *iommu)
778 {
779         struct root_entry *root;
780         int i;
781         unsigned long flags;
782         struct context_entry *context;
783
784         spin_lock_irqsave(&iommu->lock, flags);
785         if (!iommu->root_entry) {
786                 goto out;
787         }
788         for (i = 0; i < ROOT_ENTRY_NR; i++) {
789                 root = &iommu->root_entry[i];
790                 context = get_context_addr_from_root(root);
791                 if (context)
792                         free_pgtable_page(context);
793         }
794         free_pgtable_page(iommu->root_entry);
795         iommu->root_entry = NULL;
796 out:
797         spin_unlock_irqrestore(&iommu->lock, flags);
798 }
799
800 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
801                                       unsigned long pfn, int *target_level)
802 {
803         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
804         struct dma_pte *parent, *pte = NULL;
805         int level = agaw_to_level(domain->agaw);
806         int offset;
807
808         BUG_ON(!domain->pgd);
809
810         if (addr_width < BITS_PER_LONG && pfn >> addr_width)
811                 /* Address beyond IOMMU's addressing capabilities. */
812                 return NULL;
813
814         parent = domain->pgd;
815
816         while (1) {
817                 void *tmp_page;
818
819                 offset = pfn_level_offset(pfn, level);
820                 pte = &parent[offset];
821                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
822                         break;
823                 if (level == *target_level)
824                         break;
825
826                 if (!dma_pte_present(pte)) {
827                         uint64_t pteval;
828
829                         tmp_page = alloc_pgtable_page(domain->nid);
830
831                         if (!tmp_page)
832                                 return NULL;
833
834                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
835                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
836                         if (cmpxchg64(&pte->val, 0ULL, pteval)) {
837                                 /* Someone else set it while we were thinking; use theirs. */
838                                 free_pgtable_page(tmp_page);
839                         } else {
840                                 dma_pte_addr(pte);
841                                 domain_flush_cache(domain, pte, sizeof(*pte));
842                         }
843                 }
844                 if (level == 1)
845                         break;
846
847                 parent = phys_to_virt(dma_pte_addr(pte));
848                 level--;
849         }
850
851         if (!*target_level)
852                 *target_level = level;
853
854         return pte;
855 }
856
857
858 /* return address's pte at specific level */
859 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
860                                          unsigned long pfn,
861                                          int level, int *large_page)
862 {
863         struct dma_pte *parent, *pte = NULL;
864         int total = agaw_to_level(domain->agaw);
865         int offset;
866
867         parent = domain->pgd;
868         while (level <= total) {
869                 offset = pfn_level_offset(pfn, total);
870                 pte = &parent[offset];
871                 if (level == total)
872                         return pte;
873
874                 if (!dma_pte_present(pte)) {
875                         *large_page = total;
876                         break;
877                 }
878
879                 if (pte->val & DMA_PTE_LARGE_PAGE) {
880                         *large_page = total;
881                         return pte;
882                 }
883
884                 parent = phys_to_virt(dma_pte_addr(pte));
885                 total--;
886         }
887         return NULL;
888 }
889
890 /* clear last level pte, a tlb flush should be followed */
891 static void dma_pte_clear_range(struct dmar_domain *domain,
892                                 unsigned long start_pfn,
893                                 unsigned long last_pfn)
894 {
895         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
896         unsigned int large_page = 1;
897         struct dma_pte *first_pte, *pte;
898
899         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
900         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
901         BUG_ON(start_pfn > last_pfn);
902
903         /* we don't need lock here; nobody else touches the iova range */
904         do {
905                 large_page = 1;
906                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
907                 if (!pte) {
908                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
909                         continue;
910                 }
911                 do {
912                         dma_clear_pte(pte);
913                         start_pfn += lvl_to_nr_pages(large_page);
914                         pte++;
915                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
916
917                 domain_flush_cache(domain, first_pte,
918                                    (void *)pte - (void *)first_pte);
919
920         } while (start_pfn && start_pfn <= last_pfn);
921 }
922
923 static void dma_pte_free_level(struct dmar_domain *domain, int level,
924                                struct dma_pte *pte, unsigned long pfn,
925                                unsigned long start_pfn, unsigned long last_pfn)
926 {
927         pfn = max(start_pfn, pfn);
928         pte = &pte[pfn_level_offset(pfn, level)];
929
930         do {
931                 unsigned long level_pfn;
932                 struct dma_pte *level_pte;
933
934                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
935                         goto next;
936
937                 level_pfn = pfn & level_mask(level - 1);
938                 level_pte = phys_to_virt(dma_pte_addr(pte));
939
940                 if (level > 2)
941                         dma_pte_free_level(domain, level - 1, level_pte,
942                                            level_pfn, start_pfn, last_pfn);
943
944                 /* If range covers entire pagetable, free it */
945                 if (!(start_pfn > level_pfn ||
946                       last_pfn < level_pfn + level_size(level) - 1)) {
947                         dma_clear_pte(pte);
948                         domain_flush_cache(domain, pte, sizeof(*pte));
949                         free_pgtable_page(level_pte);
950                 }
951 next:
952                 pfn += level_size(level);
953         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
954 }
955
956 /* free page table pages. last level pte should already be cleared */
957 static void dma_pte_free_pagetable(struct dmar_domain *domain,
958                                    unsigned long start_pfn,
959                                    unsigned long last_pfn)
960 {
961         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
962
963         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
964         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
965         BUG_ON(start_pfn > last_pfn);
966
967         /* We don't need lock here; nobody else touches the iova range */
968         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
969                            domain->pgd, 0, start_pfn, last_pfn);
970
971         /* free pgd */
972         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
973                 free_pgtable_page(domain->pgd);
974                 domain->pgd = NULL;
975         }
976 }
977
978 /* When a page at a given level is being unlinked from its parent, we don't
979    need to *modify* it at all. All we need to do is make a list of all the
980    pages which can be freed just as soon as we've flushed the IOTLB and we
981    know the hardware page-walk will no longer touch them.
982    The 'pte' argument is the *parent* PTE, pointing to the page that is to
983    be freed. */
984 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
985                                             int level, struct dma_pte *pte,
986                                             struct page *freelist)
987 {
988         struct page *pg;
989
990         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
991         pg->freelist = freelist;
992         freelist = pg;
993
994         if (level == 1)
995                 return freelist;
996
997         for (pte = page_address(pg); !first_pte_in_page(pte); pte++) {
998                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
999                         freelist = dma_pte_list_pagetables(domain, level - 1,
1000                                                            pte, freelist);
1001         }
1002
1003         return freelist;
1004 }
1005
1006 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1007                                         struct dma_pte *pte, unsigned long pfn,
1008                                         unsigned long start_pfn,
1009                                         unsigned long last_pfn,
1010                                         struct page *freelist)
1011 {
1012         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1013
1014         pfn = max(start_pfn, pfn);
1015         pte = &pte[pfn_level_offset(pfn, level)];
1016
1017         do {
1018                 unsigned long level_pfn;
1019
1020                 if (!dma_pte_present(pte))
1021                         goto next;
1022
1023                 level_pfn = pfn & level_mask(level);
1024
1025                 /* If range covers entire pagetable, free it */
1026                 if (start_pfn <= level_pfn &&
1027                     last_pfn >= level_pfn + level_size(level) - 1) {
1028                         /* These suborbinate page tables are going away entirely. Don't
1029                            bother to clear them; we're just going to *free* them. */
1030                         if (level > 1 && !dma_pte_superpage(pte))
1031                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1032
1033                         dma_clear_pte(pte);
1034                         if (!first_pte)
1035                                 first_pte = pte;
1036                         last_pte = pte;
1037                 } else if (level > 1) {
1038                         /* Recurse down into a level that isn't *entirely* obsolete */
1039                         freelist = dma_pte_clear_level(domain, level - 1,
1040                                                        phys_to_virt(dma_pte_addr(pte)),
1041                                                        level_pfn, start_pfn, last_pfn,
1042                                                        freelist);
1043                 }
1044 next:
1045                 pfn += level_size(level);
1046         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1047
1048         if (first_pte)
1049                 domain_flush_cache(domain, first_pte,
1050                                    (void *)++last_pte - (void *)first_pte);
1051
1052         return freelist;
1053 }
1054
1055 /* We can't just free the pages because the IOMMU may still be walking
1056    the page tables, and may have cached the intermediate levels. The
1057    pages can only be freed after the IOTLB flush has been done. */
1058 struct page *domain_unmap(struct dmar_domain *domain,
1059                           unsigned long start_pfn,
1060                           unsigned long last_pfn)
1061 {
1062         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1063         struct page *freelist = NULL;
1064
1065         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
1066         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
1067         BUG_ON(start_pfn > last_pfn);
1068
1069         /* we don't need lock here; nobody else touches the iova range */
1070         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1071                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1072
1073         /* free pgd */
1074         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1075                 struct page *pgd_page = virt_to_page(domain->pgd);
1076                 pgd_page->freelist = freelist;
1077                 freelist = pgd_page;
1078
1079                 domain->pgd = NULL;
1080         }
1081
1082         return freelist;
1083 }
1084
1085 void dma_free_pagelist(struct page *freelist)
1086 {
1087         struct page *pg;
1088
1089         while ((pg = freelist)) {
1090                 freelist = pg->freelist;
1091                 free_pgtable_page(page_address(pg));
1092         }
1093 }
1094
1095 /* iommu handling */
1096 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1097 {
1098         struct root_entry *root;
1099         unsigned long flags;
1100
1101         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1102         if (!root)
1103                 return -ENOMEM;
1104
1105         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1106
1107         spin_lock_irqsave(&iommu->lock, flags);
1108         iommu->root_entry = root;
1109         spin_unlock_irqrestore(&iommu->lock, flags);
1110
1111         return 0;
1112 }
1113
1114 static void iommu_set_root_entry(struct intel_iommu *iommu)
1115 {
1116         void *addr;
1117         u32 sts;
1118         unsigned long flag;
1119
1120         addr = iommu->root_entry;
1121
1122         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1123         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1124
1125         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1126
1127         /* Make sure hardware complete it */
1128         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1129                       readl, (sts & DMA_GSTS_RTPS), sts);
1130
1131         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1132 }
1133
1134 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1135 {
1136         u32 val;
1137         unsigned long flag;
1138
1139         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1140                 return;
1141
1142         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1143         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1144
1145         /* Make sure hardware complete it */
1146         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1147                       readl, (!(val & DMA_GSTS_WBFS)), val);
1148
1149         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1150 }
1151
1152 /* return value determine if we need a write buffer flush */
1153 static void __iommu_flush_context(struct intel_iommu *iommu,
1154                                   u16 did, u16 source_id, u8 function_mask,
1155                                   u64 type)
1156 {
1157         u64 val = 0;
1158         unsigned long flag;
1159
1160         switch (type) {
1161         case DMA_CCMD_GLOBAL_INVL:
1162                 val = DMA_CCMD_GLOBAL_INVL;
1163                 break;
1164         case DMA_CCMD_DOMAIN_INVL:
1165                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1166                 break;
1167         case DMA_CCMD_DEVICE_INVL:
1168                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1169                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1170                 break;
1171         default:
1172                 BUG();
1173         }
1174         val |= DMA_CCMD_ICC;
1175
1176         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1177         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1178
1179         /* Make sure hardware complete it */
1180         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1181                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1182
1183         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1184 }
1185
1186 /* return value determine if we need a write buffer flush */
1187 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1188                                 u64 addr, unsigned int size_order, u64 type)
1189 {
1190         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1191         u64 val = 0, val_iva = 0;
1192         unsigned long flag;
1193
1194         switch (type) {
1195         case DMA_TLB_GLOBAL_FLUSH:
1196                 /* global flush doesn't need set IVA_REG */
1197                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1198                 break;
1199         case DMA_TLB_DSI_FLUSH:
1200                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1201                 break;
1202         case DMA_TLB_PSI_FLUSH:
1203                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1204                 /* IH bit is passed in as part of address */
1205                 val_iva = size_order | addr;
1206                 break;
1207         default:
1208                 BUG();
1209         }
1210         /* Note: set drain read/write */
1211 #if 0
1212         /*
1213          * This is probably to be super secure.. Looks like we can
1214          * ignore it without any impact.
1215          */
1216         if (cap_read_drain(iommu->cap))
1217                 val |= DMA_TLB_READ_DRAIN;
1218 #endif
1219         if (cap_write_drain(iommu->cap))
1220                 val |= DMA_TLB_WRITE_DRAIN;
1221
1222         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1223         /* Note: Only uses first TLB reg currently */
1224         if (val_iva)
1225                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1226         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1227
1228         /* Make sure hardware complete it */
1229         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1230                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1231
1232         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1233
1234         /* check IOTLB invalidation granularity */
1235         if (DMA_TLB_IAIG(val) == 0)
1236                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1237         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1238                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1239                         (unsigned long long)DMA_TLB_IIRG(type),
1240                         (unsigned long long)DMA_TLB_IAIG(val));
1241 }
1242
1243 static struct device_domain_info *iommu_support_dev_iotlb(
1244         struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1245 {
1246         int found = 0;
1247         unsigned long flags;
1248         struct device_domain_info *info;
1249         struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1250
1251         if (!ecap_dev_iotlb_support(iommu->ecap))
1252                 return NULL;
1253
1254         if (!iommu->qi)
1255                 return NULL;
1256
1257         spin_lock_irqsave(&device_domain_lock, flags);
1258         list_for_each_entry(info, &domain->devices, link)
1259                 if (info->bus == bus && info->devfn == devfn) {
1260                         found = 1;
1261                         break;
1262                 }
1263         spin_unlock_irqrestore(&device_domain_lock, flags);
1264
1265         if (!found || !info->dev)
1266                 return NULL;
1267
1268         if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1269                 return NULL;
1270
1271         if (!dmar_find_matched_atsr_unit(info->dev))
1272                 return NULL;
1273
1274         info->iommu = iommu;
1275
1276         return info;
1277 }
1278
1279 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1280 {
1281         if (!info)
1282                 return;
1283
1284         pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1285 }
1286
1287 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1288 {
1289         if (!info->dev || !pci_ats_enabled(info->dev))
1290                 return;
1291
1292         pci_disable_ats(info->dev);
1293 }
1294
1295 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1296                                   u64 addr, unsigned mask)
1297 {
1298         u16 sid, qdep;
1299         unsigned long flags;
1300         struct device_domain_info *info;
1301
1302         spin_lock_irqsave(&device_domain_lock, flags);
1303         list_for_each_entry(info, &domain->devices, link) {
1304                 if (!info->dev || !pci_ats_enabled(info->dev))
1305                         continue;
1306
1307                 sid = info->bus << 8 | info->devfn;
1308                 qdep = pci_ats_queue_depth(info->dev);
1309                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1310         }
1311         spin_unlock_irqrestore(&device_domain_lock, flags);
1312 }
1313
1314 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1315                                   unsigned long pfn, unsigned int pages, int ih, int map)
1316 {
1317         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1318         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1319
1320         BUG_ON(pages == 0);
1321
1322         if (ih)
1323                 ih = 1 << 6;
1324         /*
1325          * Fallback to domain selective flush if no PSI support or the size is
1326          * too big.
1327          * PSI requires page size to be 2 ^ x, and the base address is naturally
1328          * aligned to the size
1329          */
1330         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1331                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1332                                                 DMA_TLB_DSI_FLUSH);
1333         else
1334                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1335                                                 DMA_TLB_PSI_FLUSH);
1336
1337         /*
1338          * In caching mode, changes of pages from non-present to present require
1339          * flush. However, device IOTLB doesn't need to be flushed in this case.
1340          */
1341         if (!cap_caching_mode(iommu->cap) || !map)
1342                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1343 }
1344
1345 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1346 {
1347         u32 pmen;
1348         unsigned long flags;
1349
1350         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1351         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1352         pmen &= ~DMA_PMEN_EPM;
1353         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1354
1355         /* wait for the protected region status bit to clear */
1356         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1357                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1358
1359         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1360 }
1361
1362 static int iommu_enable_translation(struct intel_iommu *iommu)
1363 {
1364         u32 sts;
1365         unsigned long flags;
1366
1367         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1368         iommu->gcmd |= DMA_GCMD_TE;
1369         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1370
1371         /* Make sure hardware complete it */
1372         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1373                       readl, (sts & DMA_GSTS_TES), sts);
1374
1375         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1376         return 0;
1377 }
1378
1379 static int iommu_disable_translation(struct intel_iommu *iommu)
1380 {
1381         u32 sts;
1382         unsigned long flag;
1383
1384         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1385         iommu->gcmd &= ~DMA_GCMD_TE;
1386         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1387
1388         /* Make sure hardware complete it */
1389         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1390                       readl, (!(sts & DMA_GSTS_TES)), sts);
1391
1392         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1393         return 0;
1394 }
1395
1396
1397 static int iommu_init_domains(struct intel_iommu *iommu)
1398 {
1399         unsigned long ndomains;
1400         unsigned long nlongs;
1401
1402         ndomains = cap_ndoms(iommu->cap);
1403         pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1404                  iommu->seq_id, ndomains);
1405         nlongs = BITS_TO_LONGS(ndomains);
1406
1407         spin_lock_init(&iommu->lock);
1408
1409         /* TBD: there might be 64K domains,
1410          * consider other allocation for future chip
1411          */
1412         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1413         if (!iommu->domain_ids) {
1414                 pr_err("IOMMU%d: allocating domain id array failed\n",
1415                        iommu->seq_id);
1416                 return -ENOMEM;
1417         }
1418         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1419                         GFP_KERNEL);
1420         if (!iommu->domains) {
1421                 pr_err("IOMMU%d: allocating domain array failed\n",
1422                        iommu->seq_id);
1423                 kfree(iommu->domain_ids);
1424                 iommu->domain_ids = NULL;
1425                 return -ENOMEM;
1426         }
1427
1428         /*
1429          * if Caching mode is set, then invalid translations are tagged
1430          * with domainid 0. Hence we need to pre-allocate it.
1431          */
1432         if (cap_caching_mode(iommu->cap))
1433                 set_bit(0, iommu->domain_ids);
1434         return 0;
1435 }
1436
1437 static void free_dmar_iommu(struct intel_iommu *iommu)
1438 {
1439         struct dmar_domain *domain;
1440         int i, count;
1441         unsigned long flags;
1442
1443         if ((iommu->domains) && (iommu->domain_ids)) {
1444                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1445                         /*
1446                          * Domain id 0 is reserved for invalid translation
1447                          * if hardware supports caching mode.
1448                          */
1449                         if (cap_caching_mode(iommu->cap) && i == 0)
1450                                 continue;
1451
1452                         domain = iommu->domains[i];
1453                         clear_bit(i, iommu->domain_ids);
1454
1455                         spin_lock_irqsave(&domain->iommu_lock, flags);
1456                         count = --domain->iommu_count;
1457                         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1458                         if (count == 0)
1459                                 domain_exit(domain);
1460                 }
1461         }
1462
1463         if (iommu->gcmd & DMA_GCMD_TE)
1464                 iommu_disable_translation(iommu);
1465
1466         kfree(iommu->domains);
1467         kfree(iommu->domain_ids);
1468         iommu->domains = NULL;
1469         iommu->domain_ids = NULL;
1470
1471         g_iommus[iommu->seq_id] = NULL;
1472
1473         /* free context mapping */
1474         free_context_table(iommu);
1475 }
1476
1477 static struct dmar_domain *alloc_domain(bool vm)
1478 {
1479         /* domain id for virtual machine, it won't be set in context */
1480         static atomic_t vm_domid = ATOMIC_INIT(0);
1481         struct dmar_domain *domain;
1482
1483         domain = alloc_domain_mem();
1484         if (!domain)
1485                 return NULL;
1486
1487         domain->nid = -1;
1488         domain->iommu_count = 0;
1489         memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
1490         domain->flags = 0;
1491         spin_lock_init(&domain->iommu_lock);
1492         INIT_LIST_HEAD(&domain->devices);
1493         if (vm) {
1494                 domain->id = atomic_inc_return(&vm_domid);
1495                 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
1496         }
1497
1498         return domain;
1499 }
1500
1501 static int iommu_attach_domain(struct dmar_domain *domain,
1502                                struct intel_iommu *iommu)
1503 {
1504         int num;
1505         unsigned long ndomains;
1506         unsigned long flags;
1507
1508         ndomains = cap_ndoms(iommu->cap);
1509
1510         spin_lock_irqsave(&iommu->lock, flags);
1511
1512         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1513         if (num >= ndomains) {
1514                 spin_unlock_irqrestore(&iommu->lock, flags);
1515                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1516                 return -ENOMEM;
1517         }
1518
1519         domain->id = num;
1520         domain->iommu_count++;
1521         set_bit(num, iommu->domain_ids);
1522         set_bit(iommu->seq_id, domain->iommu_bmp);
1523         iommu->domains[num] = domain;
1524         spin_unlock_irqrestore(&iommu->lock, flags);
1525
1526         return 0;
1527 }
1528
1529 static void iommu_detach_domain(struct dmar_domain *domain,
1530                                 struct intel_iommu *iommu)
1531 {
1532         unsigned long flags;
1533         int num, ndomains;
1534
1535         spin_lock_irqsave(&iommu->lock, flags);
1536         ndomains = cap_ndoms(iommu->cap);
1537         for_each_set_bit(num, iommu->domain_ids, ndomains) {
1538                 if (iommu->domains[num] == domain) {
1539                         clear_bit(num, iommu->domain_ids);
1540                         iommu->domains[num] = NULL;
1541                         break;
1542                 }
1543         }
1544         spin_unlock_irqrestore(&iommu->lock, flags);
1545 }
1546
1547 static struct iova_domain reserved_iova_list;
1548 static struct lock_class_key reserved_rbtree_key;
1549
1550 static int dmar_init_reserved_ranges(void)
1551 {
1552         struct pci_dev *pdev = NULL;
1553         struct iova *iova;
1554         int i;
1555
1556         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1557
1558         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1559                 &reserved_rbtree_key);
1560
1561         /* IOAPIC ranges shouldn't be accessed by DMA */
1562         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1563                 IOVA_PFN(IOAPIC_RANGE_END));
1564         if (!iova) {
1565                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1566                 return -ENODEV;
1567         }
1568
1569         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1570         for_each_pci_dev(pdev) {
1571                 struct resource *r;
1572
1573                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1574                         r = &pdev->resource[i];
1575                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1576                                 continue;
1577                         iova = reserve_iova(&reserved_iova_list,
1578                                             IOVA_PFN(r->start),
1579                                             IOVA_PFN(r->end));
1580                         if (!iova) {
1581                                 printk(KERN_ERR "Reserve iova failed\n");
1582                                 return -ENODEV;
1583                         }
1584                 }
1585         }
1586         return 0;
1587 }
1588
1589 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1590 {
1591         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1592 }
1593
1594 static inline int guestwidth_to_adjustwidth(int gaw)
1595 {
1596         int agaw;
1597         int r = (gaw - 12) % 9;
1598
1599         if (r == 0)
1600                 agaw = gaw;
1601         else
1602                 agaw = gaw + 9 - r;
1603         if (agaw > 64)
1604                 agaw = 64;
1605         return agaw;
1606 }
1607
1608 static int domain_init(struct dmar_domain *domain, int guest_width)
1609 {
1610         struct intel_iommu *iommu;
1611         int adjust_width, agaw;
1612         unsigned long sagaw;
1613
1614         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1615         domain_reserve_special_ranges(domain);
1616
1617         /* calculate AGAW */
1618         iommu = domain_get_iommu(domain);
1619         if (guest_width > cap_mgaw(iommu->cap))
1620                 guest_width = cap_mgaw(iommu->cap);
1621         domain->gaw = guest_width;
1622         adjust_width = guestwidth_to_adjustwidth(guest_width);
1623         agaw = width_to_agaw(adjust_width);
1624         sagaw = cap_sagaw(iommu->cap);
1625         if (!test_bit(agaw, &sagaw)) {
1626                 /* hardware doesn't support it, choose a bigger one */
1627                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1628                 agaw = find_next_bit(&sagaw, 5, agaw);
1629                 if (agaw >= 5)
1630                         return -ENODEV;
1631         }
1632         domain->agaw = agaw;
1633
1634         if (ecap_coherent(iommu->ecap))
1635                 domain->iommu_coherency = 1;
1636         else
1637                 domain->iommu_coherency = 0;
1638
1639         if (ecap_sc_support(iommu->ecap))
1640                 domain->iommu_snooping = 1;
1641         else
1642                 domain->iommu_snooping = 0;
1643
1644         if (intel_iommu_superpage)
1645                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1646         else
1647                 domain->iommu_superpage = 0;
1648
1649         domain->nid = iommu->node;
1650
1651         /* always allocate the top pgd */
1652         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1653         if (!domain->pgd)
1654                 return -ENOMEM;
1655         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1656         return 0;
1657 }
1658
1659 static void domain_exit(struct dmar_domain *domain)
1660 {
1661         struct dmar_drhd_unit *drhd;
1662         struct intel_iommu *iommu;
1663         struct page *freelist = NULL;
1664
1665         /* Domain 0 is reserved, so dont process it */
1666         if (!domain)
1667                 return;
1668
1669         /* Flush any lazy unmaps that may reference this domain */
1670         if (!intel_iommu_strict)
1671                 flush_unmaps_timeout(0);
1672
1673         /* remove associated devices */
1674         domain_remove_dev_info(domain);
1675
1676         /* destroy iovas */
1677         put_iova_domain(&domain->iovad);
1678
1679         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1680
1681         /* clear attached or cached domains */
1682         rcu_read_lock();
1683         for_each_active_iommu(iommu, drhd)
1684                 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1685                     test_bit(iommu->seq_id, domain->iommu_bmp))
1686                         iommu_detach_domain(domain, iommu);
1687         rcu_read_unlock();
1688
1689         dma_free_pagelist(freelist);
1690
1691         free_domain_mem(domain);
1692 }
1693
1694 static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1695                                  u8 bus, u8 devfn, int translation)
1696 {
1697         struct context_entry *context;
1698         unsigned long flags;
1699         struct intel_iommu *iommu;
1700         struct dma_pte *pgd;
1701         unsigned long num;
1702         unsigned long ndomains;
1703         int id;
1704         int agaw;
1705         struct device_domain_info *info = NULL;
1706
1707         pr_debug("Set context mapping for %02x:%02x.%d\n",
1708                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1709
1710         BUG_ON(!domain->pgd);
1711         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1712                translation != CONTEXT_TT_MULTI_LEVEL);
1713
1714         iommu = device_to_iommu(segment, bus, devfn);
1715         if (!iommu)
1716                 return -ENODEV;
1717
1718         context = device_to_context_entry(iommu, bus, devfn);
1719         if (!context)
1720                 return -ENOMEM;
1721         spin_lock_irqsave(&iommu->lock, flags);
1722         if (context_present(context)) {
1723                 spin_unlock_irqrestore(&iommu->lock, flags);
1724                 return 0;
1725         }
1726
1727         id = domain->id;
1728         pgd = domain->pgd;
1729
1730         if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1731             domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1732                 int found = 0;
1733
1734                 /* find an available domain id for this device in iommu */
1735                 ndomains = cap_ndoms(iommu->cap);
1736                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1737                         if (iommu->domains[num] == domain) {
1738                                 id = num;
1739                                 found = 1;
1740                                 break;
1741                         }
1742                 }
1743
1744                 if (found == 0) {
1745                         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1746                         if (num >= ndomains) {
1747                                 spin_unlock_irqrestore(&iommu->lock, flags);
1748                                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1749                                 return -EFAULT;
1750                         }
1751
1752                         set_bit(num, iommu->domain_ids);
1753                         iommu->domains[num] = domain;
1754                         id = num;
1755                 }
1756
1757                 /* Skip top levels of page tables for
1758                  * iommu which has less agaw than default.
1759                  * Unnecessary for PT mode.
1760                  */
1761                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1762                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1763                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1764                                 if (!dma_pte_present(pgd)) {
1765                                         spin_unlock_irqrestore(&iommu->lock, flags);
1766                                         return -ENOMEM;
1767                                 }
1768                         }
1769                 }
1770         }
1771
1772         context_set_domain_id(context, id);
1773
1774         if (translation != CONTEXT_TT_PASS_THROUGH) {
1775                 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1776                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1777                                      CONTEXT_TT_MULTI_LEVEL;
1778         }
1779         /*
1780          * In pass through mode, AW must be programmed to indicate the largest
1781          * AGAW value supported by hardware. And ASR is ignored by hardware.
1782          */
1783         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1784                 context_set_address_width(context, iommu->msagaw);
1785         else {
1786                 context_set_address_root(context, virt_to_phys(pgd));
1787                 context_set_address_width(context, iommu->agaw);
1788         }
1789
1790         context_set_translation_type(context, translation);
1791         context_set_fault_enable(context);
1792         context_set_present(context);
1793         domain_flush_cache(domain, context, sizeof(*context));
1794
1795         /*
1796          * It's a non-present to present mapping. If hardware doesn't cache
1797          * non-present entry we only need to flush the write-buffer. If the
1798          * _does_ cache non-present entries, then it does so in the special
1799          * domain #0, which we have to flush:
1800          */
1801         if (cap_caching_mode(iommu->cap)) {
1802                 iommu->flush.flush_context(iommu, 0,
1803                                            (((u16)bus) << 8) | devfn,
1804                                            DMA_CCMD_MASK_NOBIT,
1805                                            DMA_CCMD_DEVICE_INVL);
1806                 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1807         } else {
1808                 iommu_flush_write_buffer(iommu);
1809         }
1810         iommu_enable_dev_iotlb(info);
1811         spin_unlock_irqrestore(&iommu->lock, flags);
1812
1813         spin_lock_irqsave(&domain->iommu_lock, flags);
1814         if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1815                 domain->iommu_count++;
1816                 if (domain->iommu_count == 1)
1817                         domain->nid = iommu->node;
1818                 domain_update_iommu_cap(domain);
1819         }
1820         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1821         return 0;
1822 }
1823
1824 static int
1825 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1826                         int translation)
1827 {
1828         int ret;
1829         struct pci_dev *tmp, *parent;
1830
1831         ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1832                                          pdev->bus->number, pdev->devfn,
1833                                          translation);
1834         if (ret)
1835                 return ret;
1836
1837         /* dependent device mapping */
1838         tmp = pci_find_upstream_pcie_bridge(pdev);
1839         if (!tmp)
1840                 return 0;
1841         /* Secondary interface's bus number and devfn 0 */
1842         parent = pdev->bus->self;
1843         while (parent != tmp) {
1844                 ret = domain_context_mapping_one(domain,
1845                                                  pci_domain_nr(parent->bus),
1846                                                  parent->bus->number,
1847                                                  parent->devfn, translation);
1848                 if (ret)
1849                         return ret;
1850                 parent = parent->bus->self;
1851         }
1852         if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1853                 return domain_context_mapping_one(domain,
1854                                         pci_domain_nr(tmp->subordinate),
1855                                         tmp->subordinate->number, 0,
1856                                         translation);
1857         else /* this is a legacy PCI bridge */
1858                 return domain_context_mapping_one(domain,
1859                                                   pci_domain_nr(tmp->bus),
1860                                                   tmp->bus->number,
1861                                                   tmp->devfn,
1862                                                   translation);
1863 }
1864
1865 static int domain_context_mapped(struct pci_dev *pdev)
1866 {
1867         int ret;
1868         struct pci_dev *tmp, *parent;
1869         struct intel_iommu *iommu;
1870
1871         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1872                                 pdev->devfn);
1873         if (!iommu)
1874                 return -ENODEV;
1875
1876         ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1877         if (!ret)
1878                 return ret;
1879         /* dependent device mapping */
1880         tmp = pci_find_upstream_pcie_bridge(pdev);
1881         if (!tmp)
1882                 return ret;
1883         /* Secondary interface's bus number and devfn 0 */
1884         parent = pdev->bus->self;
1885         while (parent != tmp) {
1886                 ret = device_context_mapped(iommu, parent->bus->number,
1887                                             parent->devfn);
1888                 if (!ret)
1889                         return ret;
1890                 parent = parent->bus->self;
1891         }
1892         if (pci_is_pcie(tmp))
1893                 return device_context_mapped(iommu, tmp->subordinate->number,
1894                                              0);
1895         else
1896                 return device_context_mapped(iommu, tmp->bus->number,
1897                                              tmp->devfn);
1898 }
1899
1900 /* Returns a number of VTD pages, but aligned to MM page size */
1901 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1902                                             size_t size)
1903 {
1904         host_addr &= ~PAGE_MASK;
1905         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1906 }
1907
1908 /* Return largest possible superpage level for a given mapping */
1909 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1910                                           unsigned long iov_pfn,
1911                                           unsigned long phy_pfn,
1912                                           unsigned long pages)
1913 {
1914         int support, level = 1;
1915         unsigned long pfnmerge;
1916
1917         support = domain->iommu_superpage;
1918
1919         /* To use a large page, the virtual *and* physical addresses
1920            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1921            of them will mean we have to use smaller pages. So just
1922            merge them and check both at once. */
1923         pfnmerge = iov_pfn | phy_pfn;
1924
1925         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1926                 pages >>= VTD_STRIDE_SHIFT;
1927                 if (!pages)
1928                         break;
1929                 pfnmerge >>= VTD_STRIDE_SHIFT;
1930                 level++;
1931                 support--;
1932         }
1933         return level;
1934 }
1935
1936 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1937                             struct scatterlist *sg, unsigned long phys_pfn,
1938                             unsigned long nr_pages, int prot)
1939 {
1940         struct dma_pte *first_pte = NULL, *pte = NULL;
1941         phys_addr_t uninitialized_var(pteval);
1942         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1943         unsigned long sg_res;
1944         unsigned int largepage_lvl = 0;
1945         unsigned long lvl_pages = 0;
1946
1947         BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1948
1949         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1950                 return -EINVAL;
1951
1952         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1953
1954         if (sg)
1955                 sg_res = 0;
1956         else {
1957                 sg_res = nr_pages + 1;
1958                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1959         }
1960
1961         while (nr_pages > 0) {
1962                 uint64_t tmp;
1963
1964                 if (!sg_res) {
1965                         sg_res = aligned_nrpages(sg->offset, sg->length);
1966                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1967                         sg->dma_length = sg->length;
1968                         pteval = page_to_phys(sg_page(sg)) | prot;
1969                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
1970                 }
1971
1972                 if (!pte) {
1973                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1974
1975                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
1976                         if (!pte)
1977                                 return -ENOMEM;
1978                         /* It is large page*/
1979                         if (largepage_lvl > 1) {
1980                                 pteval |= DMA_PTE_LARGE_PAGE;
1981                                 /* Ensure that old small page tables are removed to make room
1982                                    for superpage, if they exist. */
1983                                 dma_pte_clear_range(domain, iov_pfn,
1984                                                     iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1985                                 dma_pte_free_pagetable(domain, iov_pfn,
1986                                                        iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1987                         } else {
1988                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1989                         }
1990
1991                 }
1992                 /* We don't need lock here, nobody else
1993                  * touches the iova range
1994                  */
1995                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1996                 if (tmp) {
1997                         static int dumps = 5;
1998                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1999                                iov_pfn, tmp, (unsigned long long)pteval);
2000                         if (dumps) {
2001                                 dumps--;
2002                                 debug_dma_dump_mappings(NULL);
2003                         }
2004                         WARN_ON(1);
2005                 }
2006
2007                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2008
2009                 BUG_ON(nr_pages < lvl_pages);
2010                 BUG_ON(sg_res < lvl_pages);
2011
2012                 nr_pages -= lvl_pages;
2013                 iov_pfn += lvl_pages;
2014                 phys_pfn += lvl_pages;
2015                 pteval += lvl_pages * VTD_PAGE_SIZE;
2016                 sg_res -= lvl_pages;
2017
2018                 /* If the next PTE would be the first in a new page, then we
2019                    need to flush the cache on the entries we've just written.
2020                    And then we'll need to recalculate 'pte', so clear it and
2021                    let it get set again in the if (!pte) block above.
2022
2023                    If we're done (!nr_pages) we need to flush the cache too.
2024
2025                    Also if we've been setting superpages, we may need to
2026                    recalculate 'pte' and switch back to smaller pages for the
2027                    end of the mapping, if the trailing size is not enough to
2028                    use another superpage (i.e. sg_res < lvl_pages). */
2029                 pte++;
2030                 if (!nr_pages || first_pte_in_page(pte) ||
2031                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2032                         domain_flush_cache(domain, first_pte,
2033                                            (void *)pte - (void *)first_pte);
2034                         pte = NULL;
2035                 }
2036
2037                 if (!sg_res && nr_pages)
2038                         sg = sg_next(sg);
2039         }
2040         return 0;
2041 }
2042
2043 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2044                                     struct scatterlist *sg, unsigned long nr_pages,
2045                                     int prot)
2046 {
2047         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2048 }
2049
2050 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2051                                      unsigned long phys_pfn, unsigned long nr_pages,
2052                                      int prot)
2053 {
2054         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2055 }
2056
2057 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2058 {
2059         if (!iommu)
2060                 return;
2061
2062         clear_context_table(iommu, bus, devfn);
2063         iommu->flush.flush_context(iommu, 0, 0, 0,
2064                                            DMA_CCMD_GLOBAL_INVL);
2065         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2066 }
2067
2068 static inline void unlink_domain_info(struct device_domain_info *info)
2069 {
2070         assert_spin_locked(&device_domain_lock);
2071         list_del(&info->link);
2072         list_del(&info->global);
2073         if (info->dev)
2074                 info->dev->dev.archdata.iommu = NULL;
2075 }
2076
2077 static void domain_remove_dev_info(struct dmar_domain *domain)
2078 {
2079         struct device_domain_info *info;
2080         unsigned long flags, flags2;
2081         struct intel_iommu *iommu;
2082
2083         spin_lock_irqsave(&device_domain_lock, flags);
2084         while (!list_empty(&domain->devices)) {
2085                 info = list_entry(domain->devices.next,
2086                         struct device_domain_info, link);
2087                 unlink_domain_info(info);
2088                 spin_unlock_irqrestore(&device_domain_lock, flags);
2089
2090                 iommu_disable_dev_iotlb(info);
2091                 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
2092                 iommu_detach_dev(iommu, info->bus, info->devfn);
2093
2094                 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
2095                         iommu_detach_dependent_devices(iommu, info->dev);
2096                         /* clear this iommu in iommu_bmp, update iommu count
2097                          * and capabilities
2098                          */
2099                         spin_lock_irqsave(&domain->iommu_lock, flags2);
2100                         if (test_and_clear_bit(iommu->seq_id,
2101                                                domain->iommu_bmp)) {
2102                                 domain->iommu_count--;
2103                                 domain_update_iommu_cap(domain);
2104                         }
2105                         spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2106                 }
2107
2108                 free_devinfo_mem(info);
2109                 spin_lock_irqsave(&device_domain_lock, flags);
2110         }
2111         spin_unlock_irqrestore(&device_domain_lock, flags);
2112 }
2113
2114 /*
2115  * find_domain
2116  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
2117  */
2118 static struct dmar_domain *
2119 find_domain(struct pci_dev *pdev)
2120 {
2121         struct device_domain_info *info;
2122
2123         /* No lock here, assumes no domain exit in normal case */
2124         info = pdev->dev.archdata.iommu;
2125         if (info)
2126                 return info->domain;
2127         return NULL;
2128 }
2129
2130 static inline struct dmar_domain *
2131 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2132 {
2133         struct device_domain_info *info;
2134
2135         list_for_each_entry(info, &device_domain_list, global)
2136                 if (info->segment == segment && info->bus == bus &&
2137                     info->devfn == devfn)
2138                         return info->domain;
2139
2140         return NULL;
2141 }
2142
2143 static int dmar_insert_dev_info(int segment, int bus, int devfn,
2144                                 struct pci_dev *dev, struct dmar_domain **domp)
2145 {
2146         struct dmar_domain *found, *domain = *domp;
2147         struct device_domain_info *info;
2148         unsigned long flags;
2149
2150         info = alloc_devinfo_mem();
2151         if (!info)
2152                 return -ENOMEM;
2153
2154         info->segment = segment;
2155         info->bus = bus;
2156         info->devfn = devfn;
2157         info->dev = dev;
2158         info->domain = domain;
2159         if (!dev)
2160                 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2161
2162         spin_lock_irqsave(&device_domain_lock, flags);
2163         if (dev)
2164                 found = find_domain(dev);
2165         else
2166                 found = dmar_search_domain_by_dev_info(segment, bus, devfn);
2167         if (found) {
2168                 spin_unlock_irqrestore(&device_domain_lock, flags);
2169                 free_devinfo_mem(info);
2170                 if (found != domain) {
2171                         domain_exit(domain);
2172                         *domp = found;
2173                 }
2174         } else {
2175                 list_add(&info->link, &domain->devices);
2176                 list_add(&info->global, &device_domain_list);
2177                 if (dev)
2178                         dev->dev.archdata.iommu = info;
2179                 spin_unlock_irqrestore(&device_domain_lock, flags);
2180         }
2181
2182         return 0;
2183 }
2184
2185 /* domain is initialized */
2186 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
2187 {
2188         struct dmar_domain *domain, *free = NULL;
2189         struct intel_iommu *iommu;
2190         struct dmar_drhd_unit *drhd;
2191         struct pci_dev *dev_tmp;
2192         unsigned long flags;
2193         int bus = 0, devfn = 0;
2194         int segment;
2195
2196         domain = find_domain(pdev);
2197         if (domain)
2198                 return domain;
2199
2200         segment = pci_domain_nr(pdev->bus);
2201
2202         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
2203         if (dev_tmp) {
2204                 if (pci_is_pcie(dev_tmp)) {
2205                         bus = dev_tmp->subordinate->number;
2206                         devfn = 0;
2207                 } else {
2208                         bus = dev_tmp->bus->number;
2209                         devfn = dev_tmp->devfn;
2210                 }
2211                 spin_lock_irqsave(&device_domain_lock, flags);
2212                 domain = dmar_search_domain_by_dev_info(segment, bus, devfn);
2213                 spin_unlock_irqrestore(&device_domain_lock, flags);
2214                 /* pcie-pci bridge already has a domain, uses it */
2215                 if (domain)
2216                         goto found_domain;
2217         }
2218
2219         drhd = dmar_find_matched_drhd_unit(pdev);
2220         if (!drhd) {
2221                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
2222                         pci_name(pdev));
2223                 return NULL;
2224         }
2225         iommu = drhd->iommu;
2226
2227         /* Allocate and intialize new domain for the device */
2228         domain = alloc_domain(false);
2229         if (!domain)
2230                 goto error;
2231         if (iommu_attach_domain(domain, iommu)) {
2232                 free_domain_mem(domain);
2233                 goto error;
2234         }
2235         free = domain;
2236         if (domain_init(domain, gaw))
2237                 goto error;
2238
2239         /* register pcie-to-pci device */
2240         if (dev_tmp) {
2241                 if (dmar_insert_dev_info(segment, bus, devfn, NULL, &domain))
2242                         goto error;
2243                 else
2244                         free = NULL;
2245         }
2246
2247 found_domain:
2248         if (dmar_insert_dev_info(segment, pdev->bus->number, pdev->devfn,
2249                                  pdev, &domain) == 0)
2250                 return domain;
2251 error:
2252         if (free)
2253                 domain_exit(free);
2254         /* recheck it here, maybe others set it */
2255         return find_domain(pdev);
2256 }
2257
2258 static int iommu_identity_mapping;
2259 #define IDENTMAP_ALL            1
2260 #define IDENTMAP_GFX            2
2261 #define IDENTMAP_AZALIA         4
2262
2263 static int iommu_domain_identity_map(struct dmar_domain *domain,
2264                                      unsigned long long start,
2265                                      unsigned long long end)
2266 {
2267         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2268         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2269
2270         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2271                           dma_to_mm_pfn(last_vpfn))) {
2272                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2273                 return -ENOMEM;
2274         }
2275
2276         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2277                  start, end, domain->id);
2278         /*
2279          * RMRR range might have overlap with physical memory range,
2280          * clear it first
2281          */
2282         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2283
2284         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2285                                   last_vpfn - first_vpfn + 1,
2286                                   DMA_PTE_READ|DMA_PTE_WRITE);
2287 }
2288
2289 static int iommu_prepare_identity_map(struct pci_dev *pdev,
2290                                       unsigned long long start,
2291                                       unsigned long long end)
2292 {
2293         struct dmar_domain *domain;
2294         int ret;
2295
2296         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2297         if (!domain)
2298                 return -ENOMEM;
2299
2300         /* For _hardware_ passthrough, don't bother. But for software
2301            passthrough, we do it anyway -- it may indicate a memory
2302            range which is reserved in E820, so which didn't get set
2303            up to start with in si_domain */
2304         if (domain == si_domain && hw_pass_through) {
2305                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2306                        pci_name(pdev), start, end);
2307                 return 0;
2308         }
2309
2310         printk(KERN_INFO
2311                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2312                pci_name(pdev), start, end);
2313         
2314         if (end < start) {
2315                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2316                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2317                         dmi_get_system_info(DMI_BIOS_VENDOR),
2318                         dmi_get_system_info(DMI_BIOS_VERSION),
2319                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2320                 ret = -EIO;
2321                 goto error;
2322         }
2323
2324         if (end >> agaw_to_width(domain->agaw)) {
2325                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2326                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2327                      agaw_to_width(domain->agaw),
2328                      dmi_get_system_info(DMI_BIOS_VENDOR),
2329                      dmi_get_system_info(DMI_BIOS_VERSION),
2330                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2331                 ret = -EIO;
2332                 goto error;
2333         }
2334
2335         ret = iommu_domain_identity_map(domain, start, end);
2336         if (ret)
2337                 goto error;
2338
2339         /* context entry init */
2340         ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2341         if (ret)
2342                 goto error;
2343
2344         return 0;
2345
2346  error:
2347         domain_exit(domain);
2348         return ret;
2349 }
2350
2351 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2352         struct pci_dev *pdev)
2353 {
2354         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2355                 return 0;
2356         return iommu_prepare_identity_map(pdev, rmrr->base_address,
2357                 rmrr->end_address);
2358 }
2359
2360 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2361 static inline void iommu_prepare_isa(void)
2362 {
2363         struct pci_dev *pdev;
2364         int ret;
2365
2366         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2367         if (!pdev)
2368                 return;
2369
2370         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2371         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2372
2373         if (ret)
2374                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2375                        "floppy might not work\n");
2376
2377 }
2378 #else
2379 static inline void iommu_prepare_isa(void)
2380 {
2381         return;
2382 }
2383 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2384
2385 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2386
2387 static int __init si_domain_init(int hw)
2388 {
2389         struct dmar_drhd_unit *drhd;
2390         struct intel_iommu *iommu;
2391         int nid, ret = 0;
2392
2393         si_domain = alloc_domain(false);
2394         if (!si_domain)
2395                 return -EFAULT;
2396
2397         si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2398
2399         for_each_active_iommu(iommu, drhd) {
2400                 ret = iommu_attach_domain(si_domain, iommu);
2401                 if (ret) {
2402                         domain_exit(si_domain);
2403                         return -EFAULT;
2404                 }
2405         }
2406
2407         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2408                 domain_exit(si_domain);
2409                 return -EFAULT;
2410         }
2411
2412         pr_debug("IOMMU: identity mapping domain is domain %d\n",
2413                  si_domain->id);
2414
2415         if (hw)
2416                 return 0;
2417
2418         for_each_online_node(nid) {
2419                 unsigned long start_pfn, end_pfn;
2420                 int i;
2421
2422                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2423                         ret = iommu_domain_identity_map(si_domain,
2424                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2425                         if (ret)
2426                                 return ret;
2427                 }
2428         }
2429
2430         return 0;
2431 }
2432
2433 static int identity_mapping(struct pci_dev *pdev)
2434 {
2435         struct device_domain_info *info;
2436
2437         if (likely(!iommu_identity_mapping))
2438                 return 0;
2439
2440         info = pdev->dev.archdata.iommu;
2441         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2442                 return (info->domain == si_domain);
2443
2444         return 0;
2445 }
2446
2447 static int domain_add_dev_info(struct dmar_domain *domain,
2448                                struct pci_dev *pdev,
2449                                int translation)
2450 {
2451         struct device_domain_info *info;
2452         unsigned long flags;
2453         int ret;
2454
2455         info = alloc_devinfo_mem();
2456         if (!info)
2457                 return -ENOMEM;
2458
2459         info->segment = pci_domain_nr(pdev->bus);
2460         info->bus = pdev->bus->number;
2461         info->devfn = pdev->devfn;
2462         info->dev = pdev;
2463         info->domain = domain;
2464
2465         spin_lock_irqsave(&device_domain_lock, flags);
2466         list_add(&info->link, &domain->devices);
2467         list_add(&info->global, &device_domain_list);
2468         pdev->dev.archdata.iommu = info;
2469         spin_unlock_irqrestore(&device_domain_lock, flags);
2470
2471         ret = domain_context_mapping(domain, pdev, translation);
2472         if (ret) {
2473                 spin_lock_irqsave(&device_domain_lock, flags);
2474                 unlink_domain_info(info);
2475                 spin_unlock_irqrestore(&device_domain_lock, flags);
2476                 free_devinfo_mem(info);
2477                 return ret;
2478         }
2479
2480         return 0;
2481 }
2482
2483 static bool device_has_rmrr(struct pci_dev *dev)
2484 {
2485         struct dmar_rmrr_unit *rmrr;
2486         struct device *tmp;
2487         int i;
2488
2489         rcu_read_lock();
2490         for_each_rmrr_units(rmrr) {
2491                 /*
2492                  * Return TRUE if this RMRR contains the device that
2493                  * is passed in.
2494                  */
2495                 for_each_active_dev_scope(rmrr->devices,
2496                                           rmrr->devices_cnt, i, tmp)
2497                         if (tmp == &dev->dev) {
2498                                 rcu_read_unlock();
2499                                 return true;
2500                         }
2501         }
2502         rcu_read_unlock();
2503         return false;
2504 }
2505
2506 static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2507 {
2508
2509         /*
2510          * We want to prevent any device associated with an RMRR from
2511          * getting placed into the SI Domain. This is done because
2512          * problems exist when devices are moved in and out of domains
2513          * and their respective RMRR info is lost. We exempt USB devices
2514          * from this process due to their usage of RMRRs that are known
2515          * to not be needed after BIOS hand-off to OS.
2516          */
2517         if (device_has_rmrr(pdev) &&
2518             (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2519                 return 0;
2520
2521         if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2522                 return 1;
2523
2524         if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2525                 return 1;
2526
2527         if (!(iommu_identity_mapping & IDENTMAP_ALL))
2528                 return 0;
2529
2530         /*
2531          * We want to start off with all devices in the 1:1 domain, and
2532          * take them out later if we find they can't access all of memory.
2533          *
2534          * However, we can't do this for PCI devices behind bridges,
2535          * because all PCI devices behind the same bridge will end up
2536          * with the same source-id on their transactions.
2537          *
2538          * Practically speaking, we can't change things around for these
2539          * devices at run-time, because we can't be sure there'll be no
2540          * DMA transactions in flight for any of their siblings.
2541          * 
2542          * So PCI devices (unless they're on the root bus) as well as
2543          * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2544          * the 1:1 domain, just in _case_ one of their siblings turns out
2545          * not to be able to map all of memory.
2546          */
2547         if (!pci_is_pcie(pdev)) {
2548                 if (!pci_is_root_bus(pdev->bus))
2549                         return 0;
2550                 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2551                         return 0;
2552         } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2553                 return 0;
2554
2555         /* 
2556          * At boot time, we don't yet know if devices will be 64-bit capable.
2557          * Assume that they will -- if they turn out not to be, then we can 
2558          * take them out of the 1:1 domain later.
2559          */
2560         if (!startup) {
2561                 /*
2562                  * If the device's dma_mask is less than the system's memory
2563                  * size then this is not a candidate for identity mapping.
2564                  */
2565                 u64 dma_mask = pdev->dma_mask;
2566
2567                 if (pdev->dev.coherent_dma_mask &&
2568                     pdev->dev.coherent_dma_mask < dma_mask)
2569                         dma_mask = pdev->dev.coherent_dma_mask;
2570
2571                 return dma_mask >= dma_get_required_mask(&pdev->dev);
2572         }
2573
2574         return 1;
2575 }
2576
2577 static int __init iommu_prepare_static_identity_mapping(int hw)
2578 {
2579         struct pci_dev *pdev = NULL;
2580         int ret;
2581
2582         ret = si_domain_init(hw);
2583         if (ret)
2584                 return -EFAULT;
2585
2586         for_each_pci_dev(pdev) {
2587                 if (iommu_should_identity_map(pdev, 1)) {
2588                         ret = domain_add_dev_info(si_domain, pdev,
2589                                              hw ? CONTEXT_TT_PASS_THROUGH :
2590                                                   CONTEXT_TT_MULTI_LEVEL);
2591                         if (ret) {
2592                                 /* device not associated with an iommu */
2593                                 if (ret == -ENODEV)
2594                                         continue;
2595                                 return ret;
2596                         }
2597                         pr_info("IOMMU: %s identity mapping for device %s\n",
2598                                 hw ? "hardware" : "software", pci_name(pdev));
2599                 }
2600         }
2601
2602         return 0;
2603 }
2604
2605 static int __init init_dmars(void)
2606 {
2607         struct dmar_drhd_unit *drhd;
2608         struct dmar_rmrr_unit *rmrr;
2609         struct device *dev;
2610         struct intel_iommu *iommu;
2611         int i, ret;
2612
2613         /*
2614          * for each drhd
2615          *    allocate root
2616          *    initialize and program root entry to not present
2617          * endfor
2618          */
2619         for_each_drhd_unit(drhd) {
2620                 /*
2621                  * lock not needed as this is only incremented in the single
2622                  * threaded kernel __init code path all other access are read
2623                  * only
2624                  */
2625                 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2626                         g_num_of_iommus++;
2627                         continue;
2628                 }
2629                 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2630                           IOMMU_UNITS_SUPPORTED);
2631         }
2632
2633         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2634                         GFP_KERNEL);
2635         if (!g_iommus) {
2636                 printk(KERN_ERR "Allocating global iommu array failed\n");
2637                 ret = -ENOMEM;
2638                 goto error;
2639         }
2640
2641         deferred_flush = kzalloc(g_num_of_iommus *
2642                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2643         if (!deferred_flush) {
2644                 ret = -ENOMEM;
2645                 goto free_g_iommus;
2646         }
2647
2648         for_each_active_iommu(iommu, drhd) {
2649                 g_iommus[iommu->seq_id] = iommu;
2650
2651                 ret = iommu_init_domains(iommu);
2652                 if (ret)
2653                         goto free_iommu;
2654
2655                 /*
2656                  * TBD:
2657                  * we could share the same root & context tables
2658                  * among all IOMMU's. Need to Split it later.
2659                  */
2660                 ret = iommu_alloc_root_entry(iommu);
2661                 if (ret) {
2662                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2663                         goto free_iommu;
2664                 }
2665                 if (!ecap_pass_through(iommu->ecap))
2666                         hw_pass_through = 0;
2667         }
2668
2669         /*
2670          * Start from the sane iommu hardware state.
2671          */
2672         for_each_active_iommu(iommu, drhd) {
2673                 /*
2674                  * If the queued invalidation is already initialized by us
2675                  * (for example, while enabling interrupt-remapping) then
2676                  * we got the things already rolling from a sane state.
2677                  */
2678                 if (iommu->qi)
2679                         continue;
2680
2681                 /*
2682                  * Clear any previous faults.
2683                  */
2684                 dmar_fault(-1, iommu);
2685                 /*
2686                  * Disable queued invalidation if supported and already enabled
2687                  * before OS handover.
2688                  */
2689                 dmar_disable_qi(iommu);
2690         }
2691
2692         for_each_active_iommu(iommu, drhd) {
2693                 if (dmar_enable_qi(iommu)) {
2694                         /*
2695                          * Queued Invalidate not enabled, use Register Based
2696                          * Invalidate
2697                          */
2698                         iommu->flush.flush_context = __iommu_flush_context;
2699                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2700                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2701                                "invalidation\n",
2702                                 iommu->seq_id,
2703                                (unsigned long long)drhd->reg_base_addr);
2704                 } else {
2705                         iommu->flush.flush_context = qi_flush_context;
2706                         iommu->flush.flush_iotlb = qi_flush_iotlb;
2707                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2708                                "invalidation\n",
2709                                 iommu->seq_id,
2710                                (unsigned long long)drhd->reg_base_addr);
2711                 }
2712         }
2713
2714         if (iommu_pass_through)
2715                 iommu_identity_mapping |= IDENTMAP_ALL;
2716
2717 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2718         iommu_identity_mapping |= IDENTMAP_GFX;
2719 #endif
2720
2721         check_tylersburg_isoch();
2722
2723         /*
2724          * If pass through is not set or not enabled, setup context entries for
2725          * identity mappings for rmrr, gfx, and isa and may fall back to static
2726          * identity mapping if iommu_identity_mapping is set.
2727          */
2728         if (iommu_identity_mapping) {
2729                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2730                 if (ret) {
2731                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2732                         goto free_iommu;
2733                 }
2734         }
2735         /*
2736          * For each rmrr
2737          *   for each dev attached to rmrr
2738          *   do
2739          *     locate drhd for dev, alloc domain for dev
2740          *     allocate free domain
2741          *     allocate page table entries for rmrr
2742          *     if context not allocated for bus
2743          *           allocate and init context
2744          *           set present in root table for this bus
2745          *     init context with domain, translation etc
2746          *    endfor
2747          * endfor
2748          */
2749         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2750         for_each_rmrr_units(rmrr) {
2751                 /* some BIOS lists non-exist devices in DMAR table. */
2752                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2753                                           i, dev) {
2754                         if (!dev_is_pci(dev))
2755                                 continue;
2756                         ret = iommu_prepare_rmrr_dev(rmrr, to_pci_dev(dev));
2757                         if (ret)
2758                                 printk(KERN_ERR
2759                                        "IOMMU: mapping reserved region failed\n");
2760                 }
2761         }
2762
2763         iommu_prepare_isa();
2764
2765         /*
2766          * for each drhd
2767          *   enable fault log
2768          *   global invalidate context cache
2769          *   global invalidate iotlb
2770          *   enable translation
2771          */
2772         for_each_iommu(iommu, drhd) {
2773                 if (drhd->ignored) {
2774                         /*
2775                          * we always have to disable PMRs or DMA may fail on
2776                          * this device
2777                          */
2778                         if (force_on)
2779                                 iommu_disable_protect_mem_regions(iommu);
2780                         continue;
2781                 }
2782
2783                 iommu_flush_write_buffer(iommu);
2784
2785                 ret = dmar_set_interrupt(iommu);
2786                 if (ret)
2787                         goto free_iommu;
2788
2789                 iommu_set_root_entry(iommu);
2790
2791                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2792                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2793
2794                 ret = iommu_enable_translation(iommu);
2795                 if (ret)
2796                         goto free_iommu;
2797
2798                 iommu_disable_protect_mem_regions(iommu);
2799         }
2800
2801         return 0;
2802
2803 free_iommu:
2804         for_each_active_iommu(iommu, drhd)
2805                 free_dmar_iommu(iommu);
2806         kfree(deferred_flush);
2807 free_g_iommus:
2808         kfree(g_iommus);
2809 error:
2810         return ret;
2811 }
2812
2813 /* This takes a number of _MM_ pages, not VTD pages */
2814 static struct iova *intel_alloc_iova(struct device *dev,
2815                                      struct dmar_domain *domain,
2816                                      unsigned long nrpages, uint64_t dma_mask)
2817 {
2818         struct pci_dev *pdev = to_pci_dev(dev);
2819         struct iova *iova = NULL;
2820
2821         /* Restrict dma_mask to the width that the iommu can handle */
2822         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2823
2824         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2825                 /*
2826                  * First try to allocate an io virtual address in
2827                  * DMA_BIT_MASK(32) and if that fails then try allocating
2828                  * from higher range
2829                  */
2830                 iova = alloc_iova(&domain->iovad, nrpages,
2831                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2832                 if (iova)
2833                         return iova;
2834         }
2835         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2836         if (unlikely(!iova)) {
2837                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2838                        nrpages, pci_name(pdev));
2839                 return NULL;
2840         }
2841
2842         return iova;
2843 }
2844
2845 static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2846 {
2847         struct dmar_domain *domain;
2848         int ret;
2849
2850         domain = get_domain_for_dev(pdev,
2851                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
2852         if (!domain) {
2853                 printk(KERN_ERR
2854                         "Allocating domain for %s failed", pci_name(pdev));
2855                 return NULL;
2856         }
2857
2858         /* make sure context mapping is ok */
2859         if (unlikely(!domain_context_mapped(pdev))) {
2860                 ret = domain_context_mapping(domain, pdev,
2861                                              CONTEXT_TT_MULTI_LEVEL);
2862                 if (ret) {
2863                         printk(KERN_ERR
2864                                 "Domain context map for %s failed",
2865                                 pci_name(pdev));
2866                         return NULL;
2867                 }
2868         }
2869
2870         return domain;
2871 }
2872
2873 static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2874 {
2875         struct device_domain_info *info;
2876
2877         /* No lock here, assumes no domain exit in normal case */
2878         info = dev->dev.archdata.iommu;
2879         if (likely(info))
2880                 return info->domain;
2881
2882         return __get_valid_domain_for_dev(dev);
2883 }
2884
2885 static int iommu_dummy(struct device *dev)
2886 {
2887         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2888 }
2889
2890 /* Check if the pdev needs to go through non-identity map and unmap process.*/
2891 static int iommu_no_mapping(struct device *dev)
2892 {
2893         struct pci_dev *pdev;
2894         int found;
2895
2896         if (unlikely(!dev_is_pci(dev)))
2897                 return 1;
2898
2899         if (iommu_dummy(dev))
2900                 return 1;
2901
2902         if (!iommu_identity_mapping)
2903                 return 0;
2904
2905         pdev = to_pci_dev(dev);
2906         found = identity_mapping(pdev);
2907         if (found) {
2908                 if (iommu_should_identity_map(pdev, 0))
2909                         return 1;
2910                 else {
2911                         /*
2912                          * 32 bit DMA is removed from si_domain and fall back
2913                          * to non-identity mapping.
2914                          */
2915                         domain_remove_one_dev_info(si_domain, pdev);
2916                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2917                                pci_name(pdev));
2918                         return 0;
2919                 }
2920         } else {
2921                 /*
2922                  * In case of a detached 64 bit DMA device from vm, the device
2923                  * is put into si_domain for identity mapping.
2924                  */
2925                 if (iommu_should_identity_map(pdev, 0)) {
2926                         int ret;
2927                         ret = domain_add_dev_info(si_domain, pdev,
2928                                                   hw_pass_through ?
2929                                                   CONTEXT_TT_PASS_THROUGH :
2930                                                   CONTEXT_TT_MULTI_LEVEL);
2931                         if (!ret) {
2932                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
2933                                        pci_name(pdev));
2934                                 return 1;
2935                         }
2936                 }
2937         }
2938
2939         return 0;
2940 }
2941
2942 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2943                                      size_t size, int dir, u64 dma_mask)
2944 {
2945         struct pci_dev *pdev = to_pci_dev(hwdev);
2946         struct dmar_domain *domain;
2947         phys_addr_t start_paddr;
2948         struct iova *iova;
2949         int prot = 0;
2950         int ret;
2951         struct intel_iommu *iommu;
2952         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2953
2954         BUG_ON(dir == DMA_NONE);
2955
2956         if (iommu_no_mapping(hwdev))
2957                 return paddr;
2958
2959         domain = get_valid_domain_for_dev(pdev);
2960         if (!domain)
2961                 return 0;
2962
2963         iommu = domain_get_iommu(domain);
2964         size = aligned_nrpages(paddr, size);
2965
2966         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2967         if (!iova)
2968                 goto error;
2969
2970         /*
2971          * Check if DMAR supports zero-length reads on write only
2972          * mappings..
2973          */
2974         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2975                         !cap_zlr(iommu->cap))
2976                 prot |= DMA_PTE_READ;
2977         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2978                 prot |= DMA_PTE_WRITE;
2979         /*
2980          * paddr - (paddr + size) might be partial page, we should map the whole
2981          * page.  Note: if two part of one page are separately mapped, we
2982          * might have two guest_addr mapping to the same host paddr, but this
2983          * is not a big problem
2984          */
2985         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2986                                  mm_to_dma_pfn(paddr_pfn), size, prot);
2987         if (ret)
2988                 goto error;
2989
2990         /* it's a non-present to present mapping. Only flush if caching mode */
2991         if (cap_caching_mode(iommu->cap))
2992                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
2993         else
2994                 iommu_flush_write_buffer(iommu);
2995
2996         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2997         start_paddr += paddr & ~PAGE_MASK;
2998         return start_paddr;
2999
3000 error:
3001         if (iova)
3002                 __free_iova(&domain->iovad, iova);
3003         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
3004                 pci_name(pdev), size, (unsigned long long)paddr, dir);
3005         return 0;
3006 }
3007
3008 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3009                                  unsigned long offset, size_t size,
3010                                  enum dma_data_direction dir,
3011                                  struct dma_attrs *attrs)
3012 {
3013         return __intel_map_single(dev, page_to_phys(page) + offset, size,
3014                                   dir, to_pci_dev(dev)->dma_mask);
3015 }
3016
3017 static void flush_unmaps(void)
3018 {
3019         int i, j;
3020
3021         timer_on = 0;
3022
3023         /* just flush them all */
3024         for (i = 0; i < g_num_of_iommus; i++) {
3025                 struct intel_iommu *iommu = g_iommus[i];
3026                 if (!iommu)
3027                         continue;
3028
3029                 if (!deferred_flush[i].next)
3030                         continue;
3031
3032                 /* In caching mode, global flushes turn emulation expensive */
3033                 if (!cap_caching_mode(iommu->cap))
3034                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3035                                          DMA_TLB_GLOBAL_FLUSH);
3036                 for (j = 0; j < deferred_flush[i].next; j++) {
3037                         unsigned long mask;
3038                         struct iova *iova = deferred_flush[i].iova[j];
3039                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3040
3041                         /* On real hardware multiple invalidations are expensive */
3042                         if (cap_caching_mode(iommu->cap))
3043                                 iommu_flush_iotlb_psi(iommu, domain->id,
3044                                         iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
3045                                         !deferred_flush[i].freelist[j], 0);
3046                         else {
3047                                 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
3048                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3049                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3050                         }
3051                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3052                         if (deferred_flush[i].freelist[j])
3053                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3054                 }
3055                 deferred_flush[i].next = 0;
3056         }
3057
3058         list_size = 0;
3059 }
3060
3061 static void flush_unmaps_timeout(unsigned long data)
3062 {
3063         unsigned long flags;
3064
3065         spin_lock_irqsave(&async_umap_flush_lock, flags);
3066         flush_unmaps();
3067         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3068 }
3069
3070 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3071 {
3072         unsigned long flags;
3073         int next, iommu_id;
3074         struct intel_iommu *iommu;
3075
3076         spin_lock_irqsave(&async_umap_flush_lock, flags);
3077         if (list_size == HIGH_WATER_MARK)
3078                 flush_unmaps();
3079
3080         iommu = domain_get_iommu(dom);
3081         iommu_id = iommu->seq_id;
3082
3083         next = deferred_flush[iommu_id].next;
3084         deferred_flush[iommu_id].domain[next] = dom;
3085         deferred_flush[iommu_id].iova[next] = iova;
3086         deferred_flush[iommu_id].freelist[next] = freelist;
3087         deferred_flush[iommu_id].next++;
3088
3089         if (!timer_on) {
3090                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3091                 timer_on = 1;
3092         }
3093         list_size++;
3094         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3095 }
3096
3097 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3098                              size_t size, enum dma_data_direction dir,
3099                              struct dma_attrs *attrs)
3100 {
3101         struct pci_dev *pdev = to_pci_dev(dev);
3102         struct dmar_domain *domain;
3103         unsigned long start_pfn, last_pfn;
3104         struct iova *iova;
3105         struct intel_iommu *iommu;
3106         struct page *freelist;
3107
3108         if (iommu_no_mapping(dev))
3109                 return;
3110
3111         domain = find_domain(pdev);
3112         BUG_ON(!domain);
3113
3114         iommu = domain_get_iommu(domain);
3115
3116         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3117         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3118                       (unsigned long long)dev_addr))
3119                 return;
3120
3121         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3122         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3123
3124         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3125                  pci_name(pdev), start_pfn, last_pfn);
3126
3127         freelist = domain_unmap(domain, start_pfn, last_pfn);
3128
3129         if (intel_iommu_strict) {
3130                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3131                                       last_pfn - start_pfn + 1, !freelist, 0);
3132                 /* free iova */
3133                 __free_iova(&domain->iovad, iova);
3134                 dma_free_pagelist(freelist);
3135         } else {
3136                 add_unmap(domain, iova, freelist);
3137                 /*
3138                  * queue up the release of the unmap to save the 1/6th of the
3139                  * cpu used up by the iotlb flush operation...
3140                  */
3141         }
3142 }
3143
3144 static void *intel_alloc_coherent(struct device *hwdev, size_t size,
3145                                   dma_addr_t *dma_handle, gfp_t flags,
3146                                   struct dma_attrs *attrs)
3147 {
3148         void *vaddr;
3149         int order;
3150
3151         size = PAGE_ALIGN(size);
3152         order = get_order(size);
3153
3154         if (!iommu_no_mapping(hwdev))
3155                 flags &= ~(GFP_DMA | GFP_DMA32);
3156         else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
3157                 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
3158                         flags |= GFP_DMA;
3159                 else
3160                         flags |= GFP_DMA32;
3161         }
3162
3163         vaddr = (void *)__get_free_pages(flags, order);
3164         if (!vaddr)
3165                 return NULL;
3166         memset(vaddr, 0, size);
3167
3168         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
3169                                          DMA_BIDIRECTIONAL,
3170                                          hwdev->coherent_dma_mask);
3171         if (*dma_handle)
3172                 return vaddr;
3173         free_pages((unsigned long)vaddr, order);
3174         return NULL;
3175 }
3176
3177 static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
3178                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3179 {
3180         int order;
3181
3182         size = PAGE_ALIGN(size);
3183         order = get_order(size);
3184
3185         intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
3186         free_pages((unsigned long)vaddr, order);
3187 }
3188
3189 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
3190                            int nelems, enum dma_data_direction dir,
3191                            struct dma_attrs *attrs)
3192 {
3193         struct pci_dev *pdev = to_pci_dev(hwdev);
3194         struct dmar_domain *domain;
3195         unsigned long start_pfn, last_pfn;
3196         struct iova *iova;
3197         struct intel_iommu *iommu;
3198         struct page *freelist;
3199
3200         if (iommu_no_mapping(hwdev))
3201                 return;
3202
3203         domain = find_domain(pdev);
3204         BUG_ON(!domain);
3205
3206         iommu = domain_get_iommu(domain);
3207
3208         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3209         if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3210                       (unsigned long long)sglist[0].dma_address))
3211                 return;
3212
3213         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3214         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3215
3216         freelist = domain_unmap(domain, start_pfn, last_pfn);
3217
3218         if (intel_iommu_strict) {
3219                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3220                                       last_pfn - start_pfn + 1, !freelist, 0);
3221                 /* free iova */
3222                 __free_iova(&domain->iovad, iova);
3223                 dma_free_pagelist(freelist);
3224         } else {
3225                 add_unmap(domain, iova, freelist);
3226                 /*
3227                  * queue up the release of the unmap to save the 1/6th of the
3228                  * cpu used up by the iotlb flush operation...
3229                  */
3230         }
3231 }
3232
3233 static int intel_nontranslate_map_sg(struct device *hddev,
3234         struct scatterlist *sglist, int nelems, int dir)
3235 {
3236         int i;
3237         struct scatterlist *sg;
3238
3239         for_each_sg(sglist, sg, nelems, i) {
3240                 BUG_ON(!sg_page(sg));
3241                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3242                 sg->dma_length = sg->length;
3243         }
3244         return nelems;
3245 }
3246
3247 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3248                         enum dma_data_direction dir, struct dma_attrs *attrs)
3249 {
3250         int i;
3251         struct pci_dev *pdev = to_pci_dev(hwdev);
3252         struct dmar_domain *domain;
3253         size_t size = 0;
3254         int prot = 0;
3255         struct iova *iova = NULL;
3256         int ret;
3257         struct scatterlist *sg;
3258         unsigned long start_vpfn;
3259         struct intel_iommu *iommu;
3260
3261         BUG_ON(dir == DMA_NONE);
3262         if (iommu_no_mapping(hwdev))
3263                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3264
3265         domain = get_valid_domain_for_dev(pdev);
3266         if (!domain)
3267                 return 0;
3268
3269         iommu = domain_get_iommu(domain);
3270
3271         for_each_sg(sglist, sg, nelems, i)
3272                 size += aligned_nrpages(sg->offset, sg->length);
3273
3274         iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3275                                 pdev->dma_mask);
3276         if (!iova) {
3277                 sglist->dma_length = 0;
3278                 return 0;
3279         }
3280
3281         /*
3282          * Check if DMAR supports zero-length reads on write only
3283          * mappings..
3284          */
3285         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3286                         !cap_zlr(iommu->cap))
3287                 prot |= DMA_PTE_READ;
3288         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3289                 prot |= DMA_PTE_WRITE;
3290
3291         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3292
3293         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3294         if (unlikely(ret)) {
3295                 /*  clear the page */
3296                 dma_pte_clear_range(domain, start_vpfn,
3297                                     start_vpfn + size - 1);
3298                 /* free page tables */
3299                 dma_pte_free_pagetable(domain, start_vpfn,
3300                                        start_vpfn + size - 1);
3301                 /* free iova */
3302                 __free_iova(&domain->iovad, iova);
3303                 return 0;
3304         }
3305
3306         /* it's a non-present to present mapping. Only flush if caching mode */
3307         if (cap_caching_mode(iommu->cap))
3308                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3309         else
3310                 iommu_flush_write_buffer(iommu);
3311
3312         return nelems;
3313 }
3314
3315 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3316 {
3317         return !dma_addr;
3318 }
3319
3320 struct dma_map_ops intel_dma_ops = {
3321         .alloc = intel_alloc_coherent,
3322         .free = intel_free_coherent,
3323         .map_sg = intel_map_sg,
3324         .unmap_sg = intel_unmap_sg,
3325         .map_page = intel_map_page,
3326         .unmap_page = intel_unmap_page,
3327         .mapping_error = intel_mapping_error,
3328 };
3329
3330 static inline int iommu_domain_cache_init(void)
3331 {
3332         int ret = 0;
3333
3334         iommu_domain_cache = kmem_cache_create("iommu_domain",
3335                                          sizeof(struct dmar_domain),
3336                                          0,
3337                                          SLAB_HWCACHE_ALIGN,
3338
3339                                          NULL);
3340         if (!iommu_domain_cache) {
3341                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3342                 ret = -ENOMEM;
3343         }
3344
3345         return ret;
3346 }
3347
3348 static inline int iommu_devinfo_cache_init(void)
3349 {
3350         int ret = 0;
3351
3352         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3353                                          sizeof(struct device_domain_info),
3354                                          0,
3355                                          SLAB_HWCACHE_ALIGN,
3356                                          NULL);
3357         if (!iommu_devinfo_cache) {
3358                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3359                 ret = -ENOMEM;
3360         }
3361
3362         return ret;
3363 }
3364
3365 static inline int iommu_iova_cache_init(void)
3366 {
3367         int ret = 0;
3368
3369         iommu_iova_cache = kmem_cache_create("iommu_iova",
3370                                          sizeof(struct iova),
3371                                          0,
3372                                          SLAB_HWCACHE_ALIGN,
3373                                          NULL);
3374         if (!iommu_iova_cache) {
3375                 printk(KERN_ERR "Couldn't create iova cache\n");
3376                 ret = -ENOMEM;
3377         }
3378
3379         return ret;
3380 }
3381
3382 static int __init iommu_init_mempool(void)
3383 {
3384         int ret;
3385         ret = iommu_iova_cache_init();
3386         if (ret)
3387                 return ret;
3388
3389         ret = iommu_domain_cache_init();
3390         if (ret)
3391                 goto domain_error;
3392
3393         ret = iommu_devinfo_cache_init();
3394         if (!ret)
3395                 return ret;
3396
3397         kmem_cache_destroy(iommu_domain_cache);
3398 domain_error:
3399         kmem_cache_destroy(iommu_iova_cache);
3400
3401         return -ENOMEM;
3402 }
3403
3404 static void __init iommu_exit_mempool(void)
3405 {
3406         kmem_cache_destroy(iommu_devinfo_cache);
3407         kmem_cache_destroy(iommu_domain_cache);
3408         kmem_cache_destroy(iommu_iova_cache);
3409
3410 }
3411
3412 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3413 {
3414         struct dmar_drhd_unit *drhd;
3415         u32 vtbar;
3416         int rc;
3417
3418         /* We know that this device on this chipset has its own IOMMU.
3419          * If we find it under a different IOMMU, then the BIOS is lying
3420          * to us. Hope that the IOMMU for this device is actually
3421          * disabled, and it needs no translation...
3422          */
3423         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3424         if (rc) {
3425                 /* "can't" happen */
3426                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3427                 return;
3428         }
3429         vtbar &= 0xffff0000;
3430
3431         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3432         drhd = dmar_find_matched_drhd_unit(pdev);
3433         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3434                             TAINT_FIRMWARE_WORKAROUND,
3435                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3436                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3437 }
3438 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3439
3440 static void __init init_no_remapping_devices(void)
3441 {
3442         struct dmar_drhd_unit *drhd;
3443         struct device *dev;
3444         int i;
3445
3446         for_each_drhd_unit(drhd) {
3447                 if (!drhd->include_all) {
3448                         for_each_active_dev_scope(drhd->devices,
3449                                                   drhd->devices_cnt, i, dev)
3450                                 break;
3451                         /* ignore DMAR unit if no devices exist */
3452                         if (i == drhd->devices_cnt)
3453                                 drhd->ignored = 1;
3454                 }
3455         }
3456
3457         for_each_active_drhd_unit(drhd) {
3458                 if (drhd->include_all)
3459                         continue;
3460
3461                 for_each_active_dev_scope(drhd->devices,
3462                                           drhd->devices_cnt, i, dev)
3463                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3464                                 break;
3465                 if (i < drhd->devices_cnt)
3466                         continue;
3467
3468                 /* This IOMMU has *only* gfx devices. Either bypass it or
3469                    set the gfx_mapped flag, as appropriate */
3470                 if (dmar_map_gfx) {
3471                         intel_iommu_gfx_mapped = 1;
3472                 } else {
3473                         drhd->ignored = 1;
3474                         for_each_active_dev_scope(drhd->devices,
3475                                                   drhd->devices_cnt, i, dev)
3476                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3477                 }
3478         }
3479 }
3480
3481 #ifdef CONFIG_SUSPEND
3482 static int init_iommu_hw(void)
3483 {
3484         struct dmar_drhd_unit *drhd;
3485         struct intel_iommu *iommu = NULL;
3486
3487         for_each_active_iommu(iommu, drhd)
3488                 if (iommu->qi)
3489                         dmar_reenable_qi(iommu);
3490
3491         for_each_iommu(iommu, drhd) {
3492                 if (drhd->ignored) {
3493                         /*
3494                          * we always have to disable PMRs or DMA may fail on
3495                          * this device
3496                          */
3497                         if (force_on)
3498                                 iommu_disable_protect_mem_regions(iommu);
3499                         continue;
3500                 }
3501         
3502                 iommu_flush_write_buffer(iommu);
3503
3504                 iommu_set_root_entry(iommu);
3505
3506                 iommu->flush.flush_context(iommu, 0, 0, 0,
3507                                            DMA_CCMD_GLOBAL_INVL);
3508                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3509                                          DMA_TLB_GLOBAL_FLUSH);
3510                 if (iommu_enable_translation(iommu))
3511                         return 1;
3512                 iommu_disable_protect_mem_regions(iommu);
3513         }
3514
3515         return 0;
3516 }
3517
3518 static void iommu_flush_all(void)
3519 {
3520         struct dmar_drhd_unit *drhd;
3521         struct intel_iommu *iommu;
3522
3523         for_each_active_iommu(iommu, drhd) {
3524                 iommu->flush.flush_context(iommu, 0, 0, 0,
3525                                            DMA_CCMD_GLOBAL_INVL);
3526                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3527                                          DMA_TLB_GLOBAL_FLUSH);
3528         }
3529 }
3530
3531 static int iommu_suspend(void)
3532 {
3533         struct dmar_drhd_unit *drhd;
3534         struct intel_iommu *iommu = NULL;
3535         unsigned long flag;
3536
3537         for_each_active_iommu(iommu, drhd) {
3538                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3539                                                  GFP_ATOMIC);
3540                 if (!iommu->iommu_state)
3541                         goto nomem;
3542         }
3543
3544         iommu_flush_all();
3545
3546         for_each_active_iommu(iommu, drhd) {
3547                 iommu_disable_translation(iommu);
3548
3549                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3550
3551                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3552                         readl(iommu->reg + DMAR_FECTL_REG);
3553                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3554                         readl(iommu->reg + DMAR_FEDATA_REG);
3555                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3556                         readl(iommu->reg + DMAR_FEADDR_REG);
3557                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3558                         readl(iommu->reg + DMAR_FEUADDR_REG);
3559
3560                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3561         }
3562         return 0;
3563
3564 nomem:
3565         for_each_active_iommu(iommu, drhd)
3566                 kfree(iommu->iommu_state);
3567
3568         return -ENOMEM;
3569 }
3570
3571 static void iommu_resume(void)
3572 {
3573         struct dmar_drhd_unit *drhd;
3574         struct intel_iommu *iommu = NULL;
3575         unsigned long flag;
3576
3577         if (init_iommu_hw()) {
3578                 if (force_on)
3579                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3580                 else
3581                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3582                 return;
3583         }
3584
3585         for_each_active_iommu(iommu, drhd) {
3586
3587                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3588
3589                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3590                         iommu->reg + DMAR_FECTL_REG);
3591                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3592                         iommu->reg + DMAR_FEDATA_REG);
3593                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3594                         iommu->reg + DMAR_FEADDR_REG);
3595                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3596                         iommu->reg + DMAR_FEUADDR_REG);
3597
3598                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3599         }
3600
3601         for_each_active_iommu(iommu, drhd)
3602                 kfree(iommu->iommu_state);
3603 }
3604
3605 static struct syscore_ops iommu_syscore_ops = {
3606         .resume         = iommu_resume,
3607         .suspend        = iommu_suspend,
3608 };
3609
3610 static void __init init_iommu_pm_ops(void)
3611 {
3612         register_syscore_ops(&iommu_syscore_ops);
3613 }
3614
3615 #else
3616 static inline void init_iommu_pm_ops(void) {}
3617 #endif  /* CONFIG_PM */
3618
3619
3620 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3621 {
3622         struct acpi_dmar_reserved_memory *rmrr;
3623         struct dmar_rmrr_unit *rmrru;
3624
3625         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3626         if (!rmrru)
3627                 return -ENOMEM;
3628
3629         rmrru->hdr = header;
3630         rmrr = (struct acpi_dmar_reserved_memory *)header;
3631         rmrru->base_address = rmrr->base_address;
3632         rmrru->end_address = rmrr->end_address;
3633         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3634                                 ((void *)rmrr) + rmrr->header.length,
3635                                 &rmrru->devices_cnt);
3636         if (rmrru->devices_cnt && rmrru->devices == NULL) {
3637                 kfree(rmrru);
3638                 return -ENOMEM;
3639         }
3640
3641         list_add(&rmrru->list, &dmar_rmrr_units);
3642
3643         return 0;
3644 }
3645
3646 int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3647 {
3648         struct acpi_dmar_atsr *atsr;
3649         struct dmar_atsr_unit *atsru;
3650
3651         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3652         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3653         if (!atsru)
3654                 return -ENOMEM;
3655
3656         atsru->hdr = hdr;
3657         atsru->include_all = atsr->flags & 0x1;
3658         if (!atsru->include_all) {
3659                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3660                                 (void *)atsr + atsr->header.length,
3661                                 &atsru->devices_cnt);
3662                 if (atsru->devices_cnt && atsru->devices == NULL) {
3663                         kfree(atsru);
3664                         return -ENOMEM;
3665                 }
3666         }
3667
3668         list_add_rcu(&atsru->list, &dmar_atsr_units);
3669
3670         return 0;
3671 }
3672
3673 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3674 {
3675         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3676         kfree(atsru);
3677 }
3678
3679 static void intel_iommu_free_dmars(void)
3680 {
3681         struct dmar_rmrr_unit *rmrru, *rmrr_n;
3682         struct dmar_atsr_unit *atsru, *atsr_n;
3683
3684         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3685                 list_del(&rmrru->list);
3686                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3687                 kfree(rmrru);
3688         }
3689
3690         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3691                 list_del(&atsru->list);
3692                 intel_iommu_free_atsr(atsru);
3693         }
3694 }
3695
3696 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3697 {
3698         int i, ret = 1;
3699         struct pci_bus *bus;
3700         struct pci_dev *bridge = NULL;
3701         struct device *tmp;
3702         struct acpi_dmar_atsr *atsr;
3703         struct dmar_atsr_unit *atsru;
3704
3705         dev = pci_physfn(dev);
3706         for (bus = dev->bus; bus; bus = bus->parent) {
3707                 bridge = bus->self;
3708                 if (!bridge || !pci_is_pcie(bridge) ||
3709                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3710                         return 0;
3711                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3712                         break;
3713         }
3714         if (!bridge)
3715                 return 0;
3716
3717         rcu_read_lock();
3718         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3719                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3720                 if (atsr->segment != pci_domain_nr(dev->bus))
3721                         continue;
3722
3723                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3724                         if (tmp == &bridge->dev)
3725                                 goto out;
3726
3727                 if (atsru->include_all)
3728                         goto out;
3729         }
3730         ret = 0;
3731 out:
3732         rcu_read_unlock();
3733
3734         return ret;
3735 }
3736
3737 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3738 {
3739         int ret = 0;
3740         struct dmar_rmrr_unit *rmrru;
3741         struct dmar_atsr_unit *atsru;
3742         struct acpi_dmar_atsr *atsr;
3743         struct acpi_dmar_reserved_memory *rmrr;
3744
3745         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3746                 return 0;
3747
3748         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3749                 rmrr = container_of(rmrru->hdr,
3750                                     struct acpi_dmar_reserved_memory, header);
3751                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3752                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3753                                 ((void *)rmrr) + rmrr->header.length,
3754                                 rmrr->segment, rmrru->devices,
3755                                 rmrru->devices_cnt);
3756                         if (ret > 0)
3757                                 break;
3758                         else if(ret < 0)
3759                                 return ret;
3760                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3761                         if (dmar_remove_dev_scope(info, rmrr->segment,
3762                                 rmrru->devices, rmrru->devices_cnt))
3763                                 break;
3764                 }
3765         }
3766
3767         list_for_each_entry(atsru, &dmar_atsr_units, list) {
3768                 if (atsru->include_all)
3769                         continue;
3770
3771                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3772                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3773                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3774                                         (void *)atsr + atsr->header.length,
3775                                         atsr->segment, atsru->devices,
3776                                         atsru->devices_cnt);
3777                         if (ret > 0)
3778                                 break;
3779                         else if(ret < 0)
3780                                 return ret;
3781                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3782                         if (dmar_remove_dev_scope(info, atsr->segment,
3783                                         atsru->devices, atsru->devices_cnt))
3784                                 break;
3785                 }
3786         }
3787
3788         return 0;
3789 }
3790
3791 /*
3792  * Here we only respond to action of unbound device from driver.
3793  *
3794  * Added device is not attached to its DMAR domain here yet. That will happen
3795  * when mapping the device to iova.
3796  */
3797 static int device_notifier(struct notifier_block *nb,
3798                                   unsigned long action, void *data)
3799 {
3800         struct device *dev = data;
3801         struct pci_dev *pdev = to_pci_dev(dev);
3802         struct dmar_domain *domain;
3803
3804         if (iommu_dummy(dev))
3805                 return 0;
3806
3807         if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3808             action != BUS_NOTIFY_DEL_DEVICE)
3809                 return 0;
3810
3811         domain = find_domain(pdev);
3812         if (!domain)
3813                 return 0;
3814
3815         down_read(&dmar_global_lock);
3816         domain_remove_one_dev_info(domain, pdev);
3817         if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3818             !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3819             list_empty(&domain->devices))
3820                 domain_exit(domain);
3821         up_read(&dmar_global_lock);
3822
3823         return 0;
3824 }
3825
3826 static struct notifier_block device_nb = {
3827         .notifier_call = device_notifier,
3828 };
3829
3830 static int intel_iommu_memory_notifier(struct notifier_block *nb,
3831                                        unsigned long val, void *v)
3832 {
3833         struct memory_notify *mhp = v;
3834         unsigned long long start, end;
3835         unsigned long start_vpfn, last_vpfn;
3836
3837         switch (val) {
3838         case MEM_GOING_ONLINE:
3839                 start = mhp->start_pfn << PAGE_SHIFT;
3840                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3841                 if (iommu_domain_identity_map(si_domain, start, end)) {
3842                         pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3843                                 start, end);
3844                         return NOTIFY_BAD;
3845                 }
3846                 break;
3847
3848         case MEM_OFFLINE:
3849         case MEM_CANCEL_ONLINE:
3850                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3851                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3852                 while (start_vpfn <= last_vpfn) {
3853                         struct iova *iova;
3854                         struct dmar_drhd_unit *drhd;
3855                         struct intel_iommu *iommu;
3856                         struct page *freelist;
3857
3858                         iova = find_iova(&si_domain->iovad, start_vpfn);
3859                         if (iova == NULL) {
3860                                 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3861                                          start_vpfn);
3862                                 break;
3863                         }
3864
3865                         iova = split_and_remove_iova(&si_domain->iovad, iova,
3866                                                      start_vpfn, last_vpfn);
3867                         if (iova == NULL) {
3868                                 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3869                                         start_vpfn, last_vpfn);
3870                                 return NOTIFY_BAD;
3871                         }
3872
3873                         freelist = domain_unmap(si_domain, iova->pfn_lo,
3874                                                iova->pfn_hi);
3875
3876                         rcu_read_lock();
3877                         for_each_active_iommu(iommu, drhd)
3878                                 iommu_flush_iotlb_psi(iommu, si_domain->id,
3879                                         iova->pfn_lo,
3880                                         iova->pfn_hi - iova->pfn_lo + 1,
3881                                         !freelist, 0);
3882                         rcu_read_unlock();
3883                         dma_free_pagelist(freelist);
3884
3885                         start_vpfn = iova->pfn_hi + 1;
3886                         free_iova_mem(iova);
3887                 }
3888                 break;
3889         }
3890
3891         return NOTIFY_OK;
3892 }
3893
3894 static struct notifier_block intel_iommu_memory_nb = {
3895         .notifier_call = intel_iommu_memory_notifier,
3896         .priority = 0
3897 };
3898
3899 int __init intel_iommu_init(void)
3900 {
3901         int ret = -ENODEV;
3902         struct dmar_drhd_unit *drhd;
3903         struct intel_iommu *iommu;
3904
3905         /* VT-d is required for a TXT/tboot launch, so enforce that */
3906         force_on = tboot_force_iommu();
3907
3908         if (iommu_init_mempool()) {
3909                 if (force_on)
3910                         panic("tboot: Failed to initialize iommu memory\n");
3911                 return -ENOMEM;
3912         }
3913
3914         down_write(&dmar_global_lock);
3915         if (dmar_table_init()) {
3916                 if (force_on)
3917                         panic("tboot: Failed to initialize DMAR table\n");
3918                 goto out_free_dmar;
3919         }
3920
3921         /*
3922          * Disable translation if already enabled prior to OS handover.
3923          */
3924         for_each_active_iommu(iommu, drhd)
3925                 if (iommu->gcmd & DMA_GCMD_TE)
3926                         iommu_disable_translation(iommu);
3927
3928         if (dmar_dev_scope_init() < 0) {
3929                 if (force_on)
3930                         panic("tboot: Failed to initialize DMAR device scope\n");
3931                 goto out_free_dmar;
3932         }
3933
3934         if (no_iommu || dmar_disabled)
3935                 goto out_free_dmar;
3936
3937         if (list_empty(&dmar_rmrr_units))
3938                 printk(KERN_INFO "DMAR: No RMRR found\n");
3939
3940         if (list_empty(&dmar_atsr_units))
3941                 printk(KERN_INFO "DMAR: No ATSR found\n");
3942
3943         if (dmar_init_reserved_ranges()) {
3944                 if (force_on)
3945                         panic("tboot: Failed to reserve iommu ranges\n");
3946                 goto out_free_reserved_range;
3947         }
3948
3949         init_no_remapping_devices();
3950
3951         ret = init_dmars();
3952         if (ret) {
3953                 if (force_on)
3954                         panic("tboot: Failed to initialize DMARs\n");
3955                 printk(KERN_ERR "IOMMU: dmar init failed\n");
3956                 goto out_free_reserved_range;
3957         }
3958         up_write(&dmar_global_lock);
3959         printk(KERN_INFO
3960         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3961
3962         init_timer(&unmap_timer);
3963 #ifdef CONFIG_SWIOTLB
3964         swiotlb = 0;
3965 #endif
3966         dma_ops = &intel_dma_ops;
3967
3968         init_iommu_pm_ops();
3969
3970         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
3971         bus_register_notifier(&pci_bus_type, &device_nb);
3972         if (si_domain && !hw_pass_through)
3973                 register_memory_notifier(&intel_iommu_memory_nb);
3974
3975         intel_iommu_enabled = 1;
3976
3977         return 0;
3978
3979 out_free_reserved_range:
3980         put_iova_domain(&reserved_iova_list);
3981 out_free_dmar:
3982         intel_iommu_free_dmars();
3983         up_write(&dmar_global_lock);
3984         iommu_exit_mempool();
3985         return ret;
3986 }
3987
3988 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3989                                            struct pci_dev *pdev)
3990 {
3991         struct pci_dev *tmp, *parent;
3992
3993         if (!iommu || !pdev)
3994                 return;
3995
3996         /* dependent device detach */
3997         tmp = pci_find_upstream_pcie_bridge(pdev);
3998         /* Secondary interface's bus number and devfn 0 */
3999         if (tmp) {
4000                 parent = pdev->bus->self;
4001                 while (parent != tmp) {
4002                         iommu_detach_dev(iommu, parent->bus->number,
4003                                          parent->devfn);
4004                         parent = parent->bus->self;
4005                 }
4006                 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
4007                         iommu_detach_dev(iommu,
4008                                 tmp->subordinate->number, 0);
4009                 else /* this is a legacy PCI bridge */
4010                         iommu_detach_dev(iommu, tmp->bus->number,
4011                                          tmp->devfn);
4012         }
4013 }
4014
4015 static void domain_remove_one_dev_info(struct dmar_domain *domain,
4016                                           struct pci_dev *pdev)
4017 {
4018         struct device_domain_info *info, *tmp;
4019         struct intel_iommu *iommu;
4020         unsigned long flags;
4021         int found = 0;
4022
4023         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
4024                                 pdev->devfn);
4025         if (!iommu)
4026                 return;
4027
4028         spin_lock_irqsave(&device_domain_lock, flags);
4029         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4030                 if (info->segment == pci_domain_nr(pdev->bus) &&
4031                     info->bus == pdev->bus->number &&
4032                     info->devfn == pdev->devfn) {
4033                         unlink_domain_info(info);
4034                         spin_unlock_irqrestore(&device_domain_lock, flags);
4035
4036                         iommu_disable_dev_iotlb(info);
4037                         iommu_detach_dev(iommu, info->bus, info->devfn);
4038                         iommu_detach_dependent_devices(iommu, pdev);
4039                         free_devinfo_mem(info);
4040
4041                         spin_lock_irqsave(&device_domain_lock, flags);
4042
4043                         if (found)
4044                                 break;
4045                         else
4046                                 continue;
4047                 }
4048
4049                 /* if there is no other devices under the same iommu
4050                  * owned by this domain, clear this iommu in iommu_bmp
4051                  * update iommu count and coherency
4052                  */
4053                 if (iommu == device_to_iommu(info->segment, info->bus,
4054                                             info->devfn))
4055                         found = 1;
4056         }
4057
4058         spin_unlock_irqrestore(&device_domain_lock, flags);
4059
4060         if (found == 0) {
4061                 unsigned long tmp_flags;
4062                 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
4063                 clear_bit(iommu->seq_id, domain->iommu_bmp);
4064                 domain->iommu_count--;
4065                 domain_update_iommu_cap(domain);
4066                 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
4067
4068                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
4069                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
4070                         spin_lock_irqsave(&iommu->lock, tmp_flags);
4071                         clear_bit(domain->id, iommu->domain_ids);
4072                         iommu->domains[domain->id] = NULL;
4073                         spin_unlock_irqrestore(&iommu->lock, tmp_flags);
4074                 }
4075         }
4076 }
4077
4078 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4079 {
4080         int adjust_width;
4081
4082         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
4083         domain_reserve_special_ranges(domain);
4084
4085         /* calculate AGAW */
4086         domain->gaw = guest_width;
4087         adjust_width = guestwidth_to_adjustwidth(guest_width);
4088         domain->agaw = width_to_agaw(adjust_width);
4089
4090         domain->iommu_coherency = 0;
4091         domain->iommu_snooping = 0;
4092         domain->iommu_superpage = 0;
4093         domain->max_addr = 0;
4094         domain->nid = -1;
4095
4096         /* always allocate the top pgd */
4097         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4098         if (!domain->pgd)
4099                 return -ENOMEM;
4100         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4101         return 0;
4102 }
4103
4104 static int intel_iommu_domain_init(struct iommu_domain *domain)
4105 {
4106         struct dmar_domain *dmar_domain;
4107
4108         dmar_domain = alloc_domain(true);
4109         if (!dmar_domain) {
4110                 printk(KERN_ERR
4111                         "intel_iommu_domain_init: dmar_domain == NULL\n");
4112                 return -ENOMEM;
4113         }
4114         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4115                 printk(KERN_ERR
4116                         "intel_iommu_domain_init() failed\n");
4117                 domain_exit(dmar_domain);
4118                 return -ENOMEM;
4119         }
4120         domain_update_iommu_cap(dmar_domain);
4121         domain->priv = dmar_domain;
4122
4123         domain->geometry.aperture_start = 0;
4124         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4125         domain->geometry.force_aperture = true;
4126
4127         return 0;
4128 }
4129
4130 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
4131 {
4132         struct dmar_domain *dmar_domain = domain->priv;
4133
4134         domain->priv = NULL;
4135         domain_exit(dmar_domain);
4136 }
4137
4138 static int intel_iommu_attach_device(struct iommu_domain *domain,
4139                                      struct device *dev)
4140 {
4141         struct dmar_domain *dmar_domain = domain->priv;
4142         struct pci_dev *pdev = to_pci_dev(dev);
4143         struct intel_iommu *iommu;
4144         int addr_width;
4145
4146         /* normally pdev is not mapped */
4147         if (unlikely(domain_context_mapped(pdev))) {
4148                 struct dmar_domain *old_domain;
4149
4150                 old_domain = find_domain(pdev);
4151                 if (old_domain) {
4152                         if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
4153                             dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
4154                                 domain_remove_one_dev_info(old_domain, pdev);
4155                         else
4156                                 domain_remove_dev_info(old_domain);
4157                 }
4158         }
4159
4160         iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
4161                                 pdev->devfn);
4162         if (!iommu)
4163                 return -ENODEV;
4164
4165         /* check if this iommu agaw is sufficient for max mapped address */
4166         addr_width = agaw_to_width(iommu->agaw);
4167         if (addr_width > cap_mgaw(iommu->cap))
4168                 addr_width = cap_mgaw(iommu->cap);
4169
4170         if (dmar_domain->max_addr > (1LL << addr_width)) {
4171                 printk(KERN_ERR "%s: iommu width (%d) is not "
4172                        "sufficient for the mapped address (%llx)\n",
4173                        __func__, addr_width, dmar_domain->max_addr);
4174                 return -EFAULT;
4175         }
4176         dmar_domain->gaw = addr_width;
4177
4178         /*
4179          * Knock out extra levels of page tables if necessary
4180          */
4181         while (iommu->agaw < dmar_domain->agaw) {
4182                 struct dma_pte *pte;
4183
4184                 pte = dmar_domain->pgd;
4185                 if (dma_pte_present(pte)) {
4186                         dmar_domain->pgd = (struct dma_pte *)
4187                                 phys_to_virt(dma_pte_addr(pte));
4188                         free_pgtable_page(pte);
4189                 }
4190                 dmar_domain->agaw--;
4191         }
4192
4193         return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
4194 }
4195
4196 static void intel_iommu_detach_device(struct iommu_domain *domain,
4197                                       struct device *dev)
4198 {
4199         struct dmar_domain *dmar_domain = domain->priv;
4200         struct pci_dev *pdev = to_pci_dev(dev);
4201
4202         domain_remove_one_dev_info(dmar_domain, pdev);
4203 }
4204
4205 static int intel_iommu_map(struct iommu_domain *domain,
4206                            unsigned long iova, phys_addr_t hpa,
4207                            size_t size, int iommu_prot)
4208 {
4209         struct dmar_domain *dmar_domain = domain->priv;
4210         u64 max_addr;
4211         int prot = 0;
4212         int ret;
4213
4214         if (iommu_prot & IOMMU_READ)
4215                 prot |= DMA_PTE_READ;
4216         if (iommu_prot & IOMMU_WRITE)
4217                 prot |= DMA_PTE_WRITE;
4218         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4219                 prot |= DMA_PTE_SNP;
4220
4221         max_addr = iova + size;
4222         if (dmar_domain->max_addr < max_addr) {
4223                 u64 end;
4224
4225                 /* check if minimum agaw is sufficient for mapped address */
4226                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4227                 if (end < max_addr) {
4228                         printk(KERN_ERR "%s: iommu width (%d) is not "
4229                                "sufficient for the mapped address (%llx)\n",
4230                                __func__, dmar_domain->gaw, max_addr);
4231                         return -EFAULT;
4232                 }
4233                 dmar_domain->max_addr = max_addr;
4234         }
4235         /* Round up size to next multiple of PAGE_SIZE, if it and
4236            the low bits of hpa would take us onto the next page */
4237         size = aligned_nrpages(hpa, size);
4238         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4239                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4240         return ret;
4241 }
4242
4243 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4244                                 unsigned long iova, size_t size)
4245 {
4246         struct dmar_domain *dmar_domain = domain->priv;
4247         struct page *freelist = NULL;
4248         struct intel_iommu *iommu;
4249         unsigned long start_pfn, last_pfn;
4250         unsigned int npages;
4251         int iommu_id, num, ndomains, level = 0;
4252
4253         /* Cope with horrid API which requires us to unmap more than the
4254            size argument if it happens to be a large-page mapping. */
4255         if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4256                 BUG();
4257
4258         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4259                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4260
4261         start_pfn = iova >> VTD_PAGE_SHIFT;
4262         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4263
4264         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4265
4266         npages = last_pfn - start_pfn + 1;
4267
4268         for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4269                iommu = g_iommus[iommu_id];
4270
4271                /*
4272                 * find bit position of dmar_domain
4273                 */
4274                ndomains = cap_ndoms(iommu->cap);
4275                for_each_set_bit(num, iommu->domain_ids, ndomains) {
4276                        if (iommu->domains[num] == dmar_domain)
4277                                iommu_flush_iotlb_psi(iommu, num, start_pfn,
4278                                                      npages, !freelist, 0);
4279                }
4280
4281         }
4282
4283         dma_free_pagelist(freelist);
4284
4285         if (dmar_domain->max_addr == iova + size)
4286                 dmar_domain->max_addr = iova;
4287
4288         return size;
4289 }
4290
4291 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4292                                             dma_addr_t iova)
4293 {
4294         struct dmar_domain *dmar_domain = domain->priv;
4295         struct dma_pte *pte;
4296         int level = 0;
4297         u64 phys = 0;
4298
4299         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4300         if (pte)
4301                 phys = dma_pte_addr(pte);
4302
4303         return phys;
4304 }
4305
4306 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4307                                       unsigned long cap)
4308 {
4309         struct dmar_domain *dmar_domain = domain->priv;
4310
4311         if (cap == IOMMU_CAP_CACHE_COHERENCY)
4312                 return dmar_domain->iommu_snooping;
4313         if (cap == IOMMU_CAP_INTR_REMAP)
4314                 return irq_remapping_enabled;
4315
4316         return 0;
4317 }
4318
4319 #define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
4320
4321 static int intel_iommu_add_device(struct device *dev)
4322 {
4323         struct pci_dev *pdev = to_pci_dev(dev);
4324         struct pci_dev *bridge, *dma_pdev = NULL;
4325         struct iommu_group *group;
4326         int ret;
4327
4328         if (!device_to_iommu(pci_domain_nr(pdev->bus),
4329                              pdev->bus->number, pdev->devfn))
4330                 return -ENODEV;
4331
4332         bridge = pci_find_upstream_pcie_bridge(pdev);
4333         if (bridge) {
4334                 if (pci_is_pcie(bridge))
4335                         dma_pdev = pci_get_domain_bus_and_slot(
4336                                                 pci_domain_nr(pdev->bus),
4337                                                 bridge->subordinate->number, 0);
4338                 if (!dma_pdev)
4339                         dma_pdev = pci_dev_get(bridge);
4340         } else
4341                 dma_pdev = pci_dev_get(pdev);
4342
4343         /* Account for quirked devices */
4344         swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4345
4346         /*
4347          * If it's a multifunction device that does not support our
4348          * required ACS flags, add to the same group as lowest numbered
4349          * function that also does not suport the required ACS flags.
4350          */
4351         if (dma_pdev->multifunction &&
4352             !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
4353                 u8 i, slot = PCI_SLOT(dma_pdev->devfn);
4354
4355                 for (i = 0; i < 8; i++) {
4356                         struct pci_dev *tmp;
4357
4358                         tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
4359                         if (!tmp)
4360                                 continue;
4361
4362                         if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
4363                                 swap_pci_ref(&dma_pdev, tmp);
4364                                 break;
4365                         }
4366                         pci_dev_put(tmp);
4367                 }
4368         }
4369
4370         /*
4371          * Devices on the root bus go through the iommu.  If that's not us,
4372          * find the next upstream device and test ACS up to the root bus.
4373          * Finding the next device may require skipping virtual buses.
4374          */
4375         while (!pci_is_root_bus(dma_pdev->bus)) {
4376                 struct pci_bus *bus = dma_pdev->bus;
4377
4378                 while (!bus->self) {
4379                         if (!pci_is_root_bus(bus))
4380                                 bus = bus->parent;
4381                         else
4382                                 goto root_bus;
4383                 }
4384
4385                 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
4386                         break;
4387
4388                 swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
4389         }
4390
4391 root_bus:
4392         group = iommu_group_get(&dma_pdev->dev);
4393         pci_dev_put(dma_pdev);
4394         if (!group) {
4395                 group = iommu_group_alloc();
4396                 if (IS_ERR(group))
4397                         return PTR_ERR(group);
4398         }
4399
4400         ret = iommu_group_add_device(group, dev);
4401
4402         iommu_group_put(group);
4403         return ret;
4404 }
4405
4406 static void intel_iommu_remove_device(struct device *dev)
4407 {
4408         iommu_group_remove_device(dev);
4409 }
4410
4411 static struct iommu_ops intel_iommu_ops = {
4412         .domain_init    = intel_iommu_domain_init,
4413         .domain_destroy = intel_iommu_domain_destroy,
4414         .attach_dev     = intel_iommu_attach_device,
4415         .detach_dev     = intel_iommu_detach_device,
4416         .map            = intel_iommu_map,
4417         .unmap          = intel_iommu_unmap,
4418         .iova_to_phys   = intel_iommu_iova_to_phys,
4419         .domain_has_cap = intel_iommu_domain_has_cap,
4420         .add_device     = intel_iommu_add_device,
4421         .remove_device  = intel_iommu_remove_device,
4422         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
4423 };
4424
4425 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4426 {
4427         /* G4x/GM45 integrated gfx dmar support is totally busted. */
4428         printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4429         dmar_map_gfx = 0;
4430 }
4431
4432 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4433 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4434 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4435 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4436 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4437 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4438 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4439
4440 static void quirk_iommu_rwbf(struct pci_dev *dev)
4441 {
4442         /*
4443          * Mobile 4 Series Chipset neglects to set RWBF capability,
4444          * but needs it. Same seems to hold for the desktop versions.
4445          */
4446         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4447         rwbf_quirk = 1;
4448 }
4449
4450 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4451 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4452 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4453 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4454 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4455 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4456 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4457
4458 #define GGC 0x52
4459 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
4460 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
4461 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
4462 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
4463 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
4464 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
4465 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
4466 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
4467
4468 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4469 {
4470         unsigned short ggc;
4471
4472         if (pci_read_config_word(dev, GGC, &ggc))
4473                 return;
4474
4475         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4476                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4477                 dmar_map_gfx = 0;
4478         } else if (dmar_map_gfx) {
4479                 /* we have to ensure the gfx device is idle before we flush */
4480                 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4481                 intel_iommu_strict = 1;
4482        }
4483 }
4484 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4485 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4486 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4487 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4488
4489 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4490    ISOCH DMAR unit for the Azalia sound device, but not give it any
4491    TLB entries, which causes it to deadlock. Check for that.  We do
4492    this in a function called from init_dmars(), instead of in a PCI
4493    quirk, because we don't want to print the obnoxious "BIOS broken"
4494    message if VT-d is actually disabled.
4495 */
4496 static void __init check_tylersburg_isoch(void)
4497 {
4498         struct pci_dev *pdev;
4499         uint32_t vtisochctrl;
4500
4501         /* If there's no Azalia in the system anyway, forget it. */
4502         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4503         if (!pdev)
4504                 return;
4505         pci_dev_put(pdev);
4506
4507         /* System Management Registers. Might be hidden, in which case
4508            we can't do the sanity check. But that's OK, because the
4509            known-broken BIOSes _don't_ actually hide it, so far. */
4510         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4511         if (!pdev)
4512                 return;
4513
4514         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4515                 pci_dev_put(pdev);
4516                 return;
4517         }
4518
4519         pci_dev_put(pdev);
4520
4521         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4522         if (vtisochctrl & 1)
4523                 return;
4524
4525         /* Drop all bits other than the number of TLB entries */
4526         vtisochctrl &= 0x1c;
4527
4528         /* If we have the recommended number of TLB entries (16), fine. */
4529         if (vtisochctrl == 0x10)
4530                 return;
4531
4532         /* Zero TLB entries? You get to ride the short bus to school. */
4533         if (!vtisochctrl) {
4534                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4535                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4536                      dmi_get_system_info(DMI_BIOS_VENDOR),
4537                      dmi_get_system_info(DMI_BIOS_VERSION),
4538                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4539                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4540                 return;
4541         }
4542         
4543         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4544                vtisochctrl);
4545 }