iommu/vt-d: Make device_to_iommu() cope with non-PCI devices
[linux-2.6-microblaze.git] / drivers / iommu / intel-iommu.c
index a22c86c..cfc5eef 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, Intel Corporation.
+ * Copyright © 2006-2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Author: Ashok Raj <ashok.raj@intel.com>
- * Author: Shaohua Li <shaohua.li@intel.com>
- * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
- * Author: Fenghua Yu <fenghua.yu@intel.com>
+ * Authors: David Woodhouse <dwmw2@infradead.org>,
+ *          Ashok Raj <ashok.raj@intel.com>,
+ *          Shaohua Li <shaohua.li@intel.com>,
+ *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
+ *          Fenghua Yu <fenghua.yu@intel.com>
  */
 
 #include <linux/init.h>
@@ -33,6 +29,7 @@
 #include <linux/dmar.h>
 #include <linux/dma-mapping.h>
 #include <linux/mempool.h>
+#include <linux/memory.h>
 #include <linux/timer.h>
 #include <linux/iova.h>
 #include <linux/iommu.h>
@@ -372,14 +369,36 @@ struct dmar_domain {
 struct device_domain_info {
        struct list_head link;  /* link to domain siblings */
        struct list_head global; /* link to global list */
-       int segment;            /* PCI domain */
        u8 bus;                 /* PCI bus number */
        u8 devfn;               /* PCI devfn number */
-       struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
+       struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
        struct intel_iommu *iommu; /* IOMMU used by this device */
        struct dmar_domain *domain; /* pointer to domain */
 };
 
+struct dmar_rmrr_unit {
+       struct list_head list;          /* list of rmrr units   */
+       struct acpi_dmar_header *hdr;   /* ACPI header          */
+       u64     base_address;           /* reserved base address*/
+       u64     end_address;            /* reserved end address */
+       struct dmar_dev_scope *devices; /* target devices */
+       int     devices_cnt;            /* target device count */
+};
+
+struct dmar_atsr_unit {
+       struct list_head list;          /* list of ATSR units */
+       struct acpi_dmar_header *hdr;   /* ACPI header */
+       struct dmar_dev_scope *devices; /* target devices */
+       int devices_cnt;                /* target device count */
+       u8 include_all:1;               /* include all ports */
+};
+
+static LIST_HEAD(dmar_atsr_units);
+static LIST_HEAD(dmar_rmrr_units);
+
+#define for_each_rmrr_units(rmrr) \
+       list_for_each_entry(rmrr, &dmar_rmrr_units, list)
+
 static void flush_unmaps_timeout(unsigned long data);
 
 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
@@ -389,6 +408,7 @@ struct deferred_flush_tables {
        int next;
        struct iova *iova[HIGH_WATER_MARK];
        struct dmar_domain *domain[HIGH_WATER_MARK];
+       struct page *freelist[HIGH_WATER_MARK];
 };
 
 static struct deferred_flush_tables *deferred_flush;
@@ -402,7 +422,12 @@ static LIST_HEAD(unmaps_to_do);
 static int timer_on;
 static long list_size;
 
+static void domain_exit(struct dmar_domain *domain);
 static void domain_remove_dev_info(struct dmar_domain *domain);
+static void domain_remove_one_dev_info(struct dmar_domain *domain,
+                                      struct pci_dev *pdev);
+static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
+                                          struct device *dev);
 
 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
 int dmar_disabled = 0;
@@ -566,18 +591,31 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 
 static void domain_update_iommu_coherency(struct dmar_domain *domain)
 {
-       int i;
-
-       i = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
+       int i, found = 0;
 
-       domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
+       domain->iommu_coherency = 1;
 
        for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
+               found = 1;
                if (!ecap_coherent(g_iommus[i]->ecap)) {
                        domain->iommu_coherency = 0;
                        break;
                }
        }
+       if (found)
+               return;
+
+       /* No hardware attached; use lowest common denominator */
+       rcu_read_lock();
+       for_each_active_iommu(iommu, drhd) {
+               if (!ecap_coherent(iommu->ecap)) {
+                       domain->iommu_coherency = 0;
+                       break;
+               }
+       }
+       rcu_read_unlock();
 }
 
 static void domain_update_iommu_snooping(struct dmar_domain *domain)
@@ -606,12 +644,15 @@ static void domain_update_iommu_superpage(struct dmar_domain *domain)
        }
 
        /* set iommu_superpage to the smallest common denominator */
+       rcu_read_lock();
        for_each_active_iommu(iommu, drhd) {
                mask &= cap_super_page_val(iommu->cap);
                if (!mask) {
                        break;
                }
        }
+       rcu_read_unlock();
+
        domain->iommu_superpage = fls(mask);
 }
 
@@ -623,32 +664,56 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
        domain_update_iommu_superpage(domain);
 }
 
-static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
+static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
 {
        struct dmar_drhd_unit *drhd = NULL;
+       struct intel_iommu *iommu;
+       struct device *tmp;
+       struct pci_dev *ptmp, *pdev = NULL;
+       u16 segment;
        int i;
 
-       for_each_active_drhd_unit(drhd) {
-               if (segment != drhd->segment)
+       if (dev_is_pci(dev)) {
+               pdev = to_pci_dev(dev);
+               segment = pci_domain_nr(pdev->bus);
+       } else if (ACPI_COMPANION(dev))
+               dev = &ACPI_COMPANION(dev)->dev;
+
+       rcu_read_lock();
+       for_each_active_iommu(iommu, drhd) {
+               if (pdev && segment != drhd->segment)
                        continue;
 
-               for (i = 0; i < drhd->devices_cnt; i++) {
-                       if (drhd->devices[i] &&
-                           drhd->devices[i]->bus->number == bus &&
-                           drhd->devices[i]->devfn == devfn)
-                               return drhd->iommu;
-                       if (drhd->devices[i] &&
-                           drhd->devices[i]->subordinate &&
-                           drhd->devices[i]->subordinate->number <= bus &&
-                           drhd->devices[i]->subordinate->busn_res.end >= bus)
-                               return drhd->iommu;
+               for_each_active_dev_scope(drhd->devices,
+                                         drhd->devices_cnt, i, tmp) {
+                       if (tmp == dev) {
+                               *bus = drhd->devices[i].bus;
+                               *devfn = drhd->devices[i].devfn;
+                               goto out;
+                       }
+
+                       if (!pdev || !dev_is_pci(tmp))
+                               continue;
+
+                       ptmp = to_pci_dev(tmp);
+                       if (ptmp->subordinate &&
+                           ptmp->subordinate->number <= pdev->bus->number &&
+                           ptmp->subordinate->busn_res.end >= pdev->bus->number)
+                               goto got_pdev;
                }
 
-               if (drhd->include_all)
-                       return drhd->iommu;
+               if (pdev && drhd->include_all) {
+               got_pdev:
+                       *bus = pdev->bus->number;
+                       *devfn = pdev->devfn;
+                       goto out;
+               }
        }
+       iommu = NULL;
+ out:
+       rcu_read_unlock();
 
-       return NULL;
+       return iommu;
 }
 
 static void domain_flush_cache(struct dmar_domain *domain,
@@ -748,7 +813,7 @@ out:
 }
 
 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
-                                     unsigned long pfn, int target_level)
+                                     unsigned long pfn, int *target_level)
 {
        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
        struct dma_pte *parent, *pte = NULL;
@@ -763,14 +828,14 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 
        parent = domain->pgd;
 
-       while (level > 0) {
+       while (1) {
                void *tmp_page;
 
                offset = pfn_level_offset(pfn, level);
                pte = &parent[offset];
-               if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
+               if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
                        break;
-               if (level == target_level)
+               if (level == *target_level)
                        break;
 
                if (!dma_pte_present(pte)) {
@@ -791,10 +856,16 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
                                domain_flush_cache(domain, pte, sizeof(*pte));
                        }
                }
+               if (level == 1)
+                       break;
+
                parent = phys_to_virt(dma_pte_addr(pte));
                level--;
        }
 
+       if (!*target_level)
+               *target_level = level;
+
        return pte;
 }
 
@@ -832,7 +903,7 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
 }
 
 /* clear last level pte, a tlb flush should be followed */
-static int dma_pte_clear_range(struct dmar_domain *domain,
+static void dma_pte_clear_range(struct dmar_domain *domain,
                                unsigned long start_pfn,
                                unsigned long last_pfn)
 {
@@ -862,8 +933,6 @@ static int dma_pte_clear_range(struct dmar_domain *domain,
                                   (void *)pte - (void *)first_pte);
 
        } while (start_pfn && start_pfn <= last_pfn);
-
-       return min_t(int, (large_page - 1) * 9, MAX_AGAW_PFN_WIDTH);
 }
 
 static void dma_pte_free_level(struct dmar_domain *domain, int level,
@@ -921,6 +990,123 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
        }
 }
 
+/* When a page at a given level is being unlinked from its parent, we don't
+   need to *modify* it at all. All we need to do is make a list of all the
+   pages which can be freed just as soon as we've flushed the IOTLB and we
+   know the hardware page-walk will no longer touch them.
+   The 'pte' argument is the *parent* PTE, pointing to the page that is to
+   be freed. */
+static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
+                                           int level, struct dma_pte *pte,
+                                           struct page *freelist)
+{
+       struct page *pg;
+
+       pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
+       pg->freelist = freelist;
+       freelist = pg;
+
+       if (level == 1)
+               return freelist;
+
+       for (pte = page_address(pg); !first_pte_in_page(pte); pte++) {
+               if (dma_pte_present(pte) && !dma_pte_superpage(pte))
+                       freelist = dma_pte_list_pagetables(domain, level - 1,
+                                                          pte, freelist);
+       }
+
+       return freelist;
+}
+
+static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
+                                       struct dma_pte *pte, unsigned long pfn,
+                                       unsigned long start_pfn,
+                                       unsigned long last_pfn,
+                                       struct page *freelist)
+{
+       struct dma_pte *first_pte = NULL, *last_pte = NULL;
+
+       pfn = max(start_pfn, pfn);
+       pte = &pte[pfn_level_offset(pfn, level)];
+
+       do {
+               unsigned long level_pfn;
+
+               if (!dma_pte_present(pte))
+                       goto next;
+
+               level_pfn = pfn & level_mask(level);
+
+               /* If range covers entire pagetable, free it */
+               if (start_pfn <= level_pfn &&
+                   last_pfn >= level_pfn + level_size(level) - 1) {
+                       /* These suborbinate page tables are going away entirely. Don't
+                          bother to clear them; we're just going to *free* them. */
+                       if (level > 1 && !dma_pte_superpage(pte))
+                               freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
+
+                       dma_clear_pte(pte);
+                       if (!first_pte)
+                               first_pte = pte;
+                       last_pte = pte;
+               } else if (level > 1) {
+                       /* Recurse down into a level that isn't *entirely* obsolete */
+                       freelist = dma_pte_clear_level(domain, level - 1,
+                                                      phys_to_virt(dma_pte_addr(pte)),
+                                                      level_pfn, start_pfn, last_pfn,
+                                                      freelist);
+               }
+next:
+               pfn += level_size(level);
+       } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
+
+       if (first_pte)
+               domain_flush_cache(domain, first_pte,
+                                  (void *)++last_pte - (void *)first_pte);
+
+       return freelist;
+}
+
+/* We can't just free the pages because the IOMMU may still be walking
+   the page tables, and may have cached the intermediate levels. The
+   pages can only be freed after the IOTLB flush has been done. */
+struct page *domain_unmap(struct dmar_domain *domain,
+                         unsigned long start_pfn,
+                         unsigned long last_pfn)
+{
+       int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
+       struct page *freelist = NULL;
+
+       BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
+       BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
+       BUG_ON(start_pfn > last_pfn);
+
+       /* we don't need lock here; nobody else touches the iova range */
+       freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
+                                      domain->pgd, 0, start_pfn, last_pfn, NULL);
+
+       /* free pgd */
+       if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
+               struct page *pgd_page = virt_to_page(domain->pgd);
+               pgd_page->freelist = freelist;
+               freelist = pgd_page;
+
+               domain->pgd = NULL;
+       }
+
+       return freelist;
+}
+
+void dma_free_pagelist(struct page *freelist)
+{
+       struct page *pg;
+
+       while ((pg = freelist)) {
+               freelist = pg->freelist;
+               free_pgtable_page(page_address(pg));
+       }
+}
+
 /* iommu handling */
 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
 {
@@ -1030,7 +1216,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
                break;
        case DMA_TLB_PSI_FLUSH:
                val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
-               /* Note: always flush non-leaf currently */
+               /* IH bit is passed in as part of address */
                val_iva = size_order | addr;
                break;
        default:
@@ -1069,13 +1255,14 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
                        (unsigned long long)DMA_TLB_IAIG(val));
 }
 
-static struct device_domain_info *iommu_support_dev_iotlb(
-       struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
+static struct device_domain_info *
+iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
+                        u8 bus, u8 devfn)
 {
        int found = 0;
        unsigned long flags;
        struct device_domain_info *info;
-       struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
+       struct pci_dev *pdev;
 
        if (!ecap_dev_iotlb_support(iommu->ecap))
                return NULL;
@@ -1091,34 +1278,35 @@ static struct device_domain_info *iommu_support_dev_iotlb(
                }
        spin_unlock_irqrestore(&device_domain_lock, flags);
 
-       if (!found || !info->dev)
+       if (!found || !info->dev || !dev_is_pci(info->dev))
                return NULL;
 
-       if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
-               return NULL;
+       pdev = to_pci_dev(info->dev);
 
-       if (!dmar_find_matched_atsr_unit(info->dev))
+       if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
                return NULL;
 
-       info->iommu = iommu;
+       if (!dmar_find_matched_atsr_unit(pdev))
+               return NULL;
 
        return info;
 }
 
 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
 {
-       if (!info)
+       if (!info || !dev_is_pci(info->dev))
                return;
 
-       pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
+       pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
 }
 
 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
 {
-       if (!info->dev || !pci_ats_enabled(info->dev))
+       if (!info->dev || !dev_is_pci(info->dev) ||
+           !pci_ats_enabled(to_pci_dev(info->dev)))
                return;
 
-       pci_disable_ats(info->dev);
+       pci_disable_ats(to_pci_dev(info->dev));
 }
 
 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
@@ -1130,24 +1318,31 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
 
        spin_lock_irqsave(&device_domain_lock, flags);
        list_for_each_entry(info, &domain->devices, link) {
-               if (!info->dev || !pci_ats_enabled(info->dev))
+               struct pci_dev *pdev;
+               if (!info->dev || !dev_is_pci(info->dev))
+                       continue;
+
+               pdev = to_pci_dev(info->dev);
+               if (!pci_ats_enabled(pdev))
                        continue;
 
                sid = info->bus << 8 | info->devfn;
-               qdep = pci_ats_queue_depth(info->dev);
+               qdep = pci_ats_queue_depth(pdev);
                qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
        }
        spin_unlock_irqrestore(&device_domain_lock, flags);
 }
 
 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
-                                 unsigned long pfn, unsigned int pages, int map)
+                                 unsigned long pfn, unsigned int pages, int ih, int map)
 {
        unsigned int mask = ilog2(__roundup_pow_of_two(pages));
        uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
 
        BUG_ON(pages == 0);
 
+       if (ih)
+               ih = 1 << 6;
        /*
         * Fallback to domain selective flush if no PSI support or the size is
         * too big.
@@ -1158,7 +1353,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
                iommu->flush.flush_iotlb(iommu, did, 0, 0,
                                                DMA_TLB_DSI_FLUSH);
        else
-               iommu->flush.flush_iotlb(iommu, did, addr, mask,
+               iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
                                                DMA_TLB_PSI_FLUSH);
 
        /*
@@ -1261,10 +1456,6 @@ static int iommu_init_domains(struct intel_iommu *iommu)
        return 0;
 }
 
-
-static void domain_exit(struct dmar_domain *domain);
-static void vm_domain_exit(struct dmar_domain *domain);
-
 static void free_dmar_iommu(struct intel_iommu *iommu)
 {
        struct dmar_domain *domain;
@@ -1273,18 +1464,21 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
 
        if ((iommu->domains) && (iommu->domain_ids)) {
                for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
+                       /*
+                        * Domain id 0 is reserved for invalid translation
+                        * if hardware supports caching mode.
+                        */
+                       if (cap_caching_mode(iommu->cap) && i == 0)
+                               continue;
+
                        domain = iommu->domains[i];
                        clear_bit(i, iommu->domain_ids);
 
                        spin_lock_irqsave(&domain->iommu_lock, flags);
                        count = --domain->iommu_count;
                        spin_unlock_irqrestore(&domain->iommu_lock, flags);
-                       if (count == 0) {
-                               if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
-                                       vm_domain_exit(domain);
-                               else
-                                       domain_exit(domain);
-                       }
+                       if (count == 0)
+                               domain_exit(domain);
                }
        }
 
@@ -1298,21 +1492,14 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
 
        g_iommus[iommu->seq_id] = NULL;
 
-       /* if all iommus are freed, free g_iommus */
-       for (i = 0; i < g_num_of_iommus; i++) {
-               if (g_iommus[i])
-                       break;
-       }
-
-       if (i == g_num_of_iommus)
-               kfree(g_iommus);
-
        /* free context mapping */
        free_context_table(iommu);
 }
 
-static struct dmar_domain *alloc_domain(void)
+static struct dmar_domain *alloc_domain(bool vm)
 {
+       /* domain id for virtual machine, it won't be set in context */
+       static atomic_t vm_domid = ATOMIC_INIT(0);
        struct dmar_domain *domain;
 
        domain = alloc_domain_mem();
@@ -1320,8 +1507,15 @@ static struct dmar_domain *alloc_domain(void)
                return NULL;
 
        domain->nid = -1;
+       domain->iommu_count = 0;
        memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
        domain->flags = 0;
+       spin_lock_init(&domain->iommu_lock);
+       INIT_LIST_HEAD(&domain->devices);
+       if (vm) {
+               domain->id = atomic_inc_return(&vm_domid);
+               domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
+       }
 
        return domain;
 }
@@ -1345,6 +1539,7 @@ static int iommu_attach_domain(struct dmar_domain *domain,
        }
 
        domain->id = num;
+       domain->iommu_count++;
        set_bit(num, iommu->domain_ids);
        set_bit(iommu->seq_id, domain->iommu_bmp);
        iommu->domains[num] = domain;
@@ -1358,22 +1553,16 @@ static void iommu_detach_domain(struct dmar_domain *domain,
 {
        unsigned long flags;
        int num, ndomains;
-       int found = 0;
 
        spin_lock_irqsave(&iommu->lock, flags);
        ndomains = cap_ndoms(iommu->cap);
        for_each_set_bit(num, iommu->domain_ids, ndomains) {
                if (iommu->domains[num] == domain) {
-                       found = 1;
+                       clear_bit(num, iommu->domain_ids);
+                       iommu->domains[num] = NULL;
                        break;
                }
        }
-
-       if (found) {
-               clear_bit(num, iommu->domain_ids);
-               clear_bit(iommu->seq_id, domain->iommu_bmp);
-               iommu->domains[num] = NULL;
-       }
        spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -1445,8 +1634,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
        unsigned long sagaw;
 
        init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
-       spin_lock_init(&domain->iommu_lock);
-
        domain_reserve_special_ranges(domain);
 
        /* calculate AGAW */
@@ -1465,7 +1652,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
                        return -ENODEV;
        }
        domain->agaw = agaw;
-       INIT_LIST_HEAD(&domain->devices);
 
        if (ecap_coherent(iommu->ecap))
                domain->iommu_coherency = 1;
@@ -1477,8 +1663,11 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
        else
                domain->iommu_snooping = 0;
 
-       domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
-       domain->iommu_count = 1;
+       if (intel_iommu_superpage)
+               domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
+       else
+               domain->iommu_superpage = 0;
+
        domain->nid = iommu->node;
 
        /* always allocate the top pgd */
@@ -1493,6 +1682,7 @@ static void domain_exit(struct dmar_domain *domain)
 {
        struct dmar_drhd_unit *drhd;
        struct intel_iommu *iommu;
+       struct page *freelist = NULL;
 
        /* Domain 0 is reserved, so dont process it */
        if (!domain)
@@ -1502,29 +1692,33 @@ static void domain_exit(struct dmar_domain *domain)
        if (!intel_iommu_strict)
                flush_unmaps_timeout(0);
 
+       /* remove associated devices */
        domain_remove_dev_info(domain);
+
        /* destroy iovas */
        put_iova_domain(&domain->iovad);
 
-       /* clear ptes */
-       dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
-
-       /* free page tables */
-       dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
+       freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
 
+       /* clear attached or cached domains */
+       rcu_read_lock();
        for_each_active_iommu(iommu, drhd)
-               if (test_bit(iommu->seq_id, domain->iommu_bmp))
+               if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
+                   test_bit(iommu->seq_id, domain->iommu_bmp))
                        iommu_detach_domain(domain, iommu);
+       rcu_read_unlock();
+
+       dma_free_pagelist(freelist);
 
        free_domain_mem(domain);
 }
 
-static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
-                                u8 bus, u8 devfn, int translation)
+static int domain_context_mapping_one(struct dmar_domain *domain,
+                                     struct intel_iommu *iommu,
+                                     u8 bus, u8 devfn, int translation)
 {
        struct context_entry *context;
        unsigned long flags;
-       struct intel_iommu *iommu;
        struct dma_pte *pgd;
        unsigned long num;
        unsigned long ndomains;
@@ -1539,10 +1733,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
        BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
               translation != CONTEXT_TT_MULTI_LEVEL);
 
-       iommu = device_to_iommu(segment, bus, devfn);
-       if (!iommu)
-               return -ENODEV;
-
        context = device_to_context_entry(iommu, bus, devfn);
        if (!context)
                return -ENOMEM;
@@ -1600,7 +1790,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
        context_set_domain_id(context, id);
 
        if (translation != CONTEXT_TT_PASS_THROUGH) {
-               info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
+               info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
                translation = info ? CONTEXT_TT_DEV_IOTLB :
                                     CONTEXT_TT_MULTI_LEVEL;
        }
@@ -1655,9 +1845,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
 {
        int ret;
        struct pci_dev *tmp, *parent;
+       struct intel_iommu *iommu;
+       u8 bus, devfn;
+
+       iommu = device_to_iommu(&pdev->dev, &bus, &devfn);
+       if (!iommu)
+               return -ENODEV;
 
-       ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
-                                        pdev->bus->number, pdev->devfn,
+       ret = domain_context_mapping_one(domain, iommu, bus, devfn,
                                         translation);
        if (ret)
                return ret;
@@ -1669,8 +1864,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
        /* Secondary interface's bus number and devfn 0 */
        parent = pdev->bus->self;
        while (parent != tmp) {
-               ret = domain_context_mapping_one(domain,
-                                                pci_domain_nr(parent->bus),
+               ret = domain_context_mapping_one(domain, iommu,
                                                 parent->bus->number,
                                                 parent->devfn, translation);
                if (ret)
@@ -1678,13 +1872,11 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
                parent = parent->bus->self;
        }
        if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
-               return domain_context_mapping_one(domain,
-                                       pci_domain_nr(tmp->subordinate),
+               return domain_context_mapping_one(domain, iommu,
                                        tmp->subordinate->number, 0,
                                        translation);
        else /* this is a legacy PCI bridge */
-               return domain_context_mapping_one(domain,
-                                                 pci_domain_nr(tmp->bus),
+               return domain_context_mapping_one(domain, iommu,
                                                  tmp->bus->number,
                                                  tmp->devfn,
                                                  translation);
@@ -1695,13 +1887,13 @@ static int domain_context_mapped(struct pci_dev *pdev)
        int ret;
        struct pci_dev *tmp, *parent;
        struct intel_iommu *iommu;
+       u8 bus, devfn;
 
-       iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
-                               pdev->devfn);
+       iommu = device_to_iommu(&pdev->dev, &bus, &devfn);
        if (!iommu)
                return -ENODEV;
 
-       ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
+       ret = device_context_mapped(iommu, bus, devfn);
        if (!ret)
                return ret;
        /* dependent device mapping */
@@ -1800,7 +1992,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
                if (!pte) {
                        largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
 
-                       first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
+                       first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
                        if (!pte)
                                return -ENOMEM;
                        /* It is large page*/
@@ -1899,14 +2091,13 @@ static inline void unlink_domain_info(struct device_domain_info *info)
        list_del(&info->link);
        list_del(&info->global);
        if (info->dev)
-               info->dev->dev.archdata.iommu = NULL;
+               info->dev->archdata.iommu = NULL;
 }
 
 static void domain_remove_dev_info(struct dmar_domain *domain)
 {
        struct device_domain_info *info;
-       unsigned long flags;
-       struct intel_iommu *iommu;
+       unsigned long flags, flags2;
 
        spin_lock_irqsave(&device_domain_lock, flags);
        while (!list_empty(&domain->devices)) {
@@ -1916,10 +2107,23 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
                spin_unlock_irqrestore(&device_domain_lock, flags);
 
                iommu_disable_dev_iotlb(info);
-               iommu = device_to_iommu(info->segment, info->bus, info->devfn);
-               iommu_detach_dev(iommu, info->bus, info->devfn);
-               free_devinfo_mem(info);
+               iommu_detach_dev(info->iommu, info->bus, info->devfn);
 
+               if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
+                       iommu_detach_dependent_devices(info->iommu, info->dev);
+                       /* clear this iommu in iommu_bmp, update iommu count
+                        * and capabilities
+                        */
+                       spin_lock_irqsave(&domain->iommu_lock, flags2);
+                       if (test_and_clear_bit(info->iommu->seq_id,
+                                              domain->iommu_bmp)) {
+                               domain->iommu_count--;
+                               domain_update_iommu_cap(domain);
+                       }
+                       spin_unlock_irqrestore(&domain->iommu_lock, flags2);
+               }
+
+               free_devinfo_mem(info);
                spin_lock_irqsave(&device_domain_lock, flags);
        }
        spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -1927,34 +2131,91 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
 
 /*
  * find_domain
- * Note: we use struct pci_dev->dev.archdata.iommu stores the info
+ * Note: we use struct device->archdata.iommu stores the info
  */
-static struct dmar_domain *
-find_domain(struct pci_dev *pdev)
+static struct dmar_domain *find_domain(struct device *dev)
 {
        struct device_domain_info *info;
 
        /* No lock here, assumes no domain exit in normal case */
-       info = pdev->dev.archdata.iommu;
+       info = dev->archdata.iommu;
        if (info)
                return info->domain;
        return NULL;
 }
 
+static inline struct device_domain_info *
+dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
+{
+       struct device_domain_info *info;
+
+       list_for_each_entry(info, &device_domain_list, global)
+               if (info->iommu->segment == segment && info->bus == bus &&
+                   info->devfn == devfn)
+                       return info;
+
+       return NULL;
+}
+
+static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
+                                               int bus, int devfn,
+                                               struct device *dev,
+                                               struct dmar_domain *domain)
+{
+       struct dmar_domain *found = NULL;
+       struct device_domain_info *info;
+       unsigned long flags;
+
+       info = alloc_devinfo_mem();
+       if (!info)
+               return NULL;
+
+       info->bus = bus;
+       info->devfn = devfn;
+       info->dev = dev;
+       info->domain = domain;
+       info->iommu = iommu;
+       if (!dev)
+               domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
+
+       spin_lock_irqsave(&device_domain_lock, flags);
+       if (dev)
+               found = find_domain(dev);
+       else {
+               struct device_domain_info *info2;
+               info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
+               if (info2)
+                       found = info2->domain;
+       }
+       if (found) {
+               spin_unlock_irqrestore(&device_domain_lock, flags);
+               free_devinfo_mem(info);
+               /* Caller must free the original domain */
+               return found;
+       }
+
+       list_add(&info->link, &domain->devices);
+       list_add(&info->global, &device_domain_list);
+       if (dev)
+               dev->archdata.iommu = info;
+       spin_unlock_irqrestore(&device_domain_lock, flags);
+
+       return domain;
+}
+
 /* domain is initialized */
 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
 {
-       struct dmar_domain *domain, *found = NULL;
-       struct intel_iommu *iommu;
+       struct dmar_domain *domain, *free = NULL;
+       struct intel_iommu *iommu = NULL;
+       struct device_domain_info *info;
        struct dmar_drhd_unit *drhd;
-       struct device_domain_info *info, *tmp;
        struct pci_dev *dev_tmp;
        unsigned long flags;
        int bus = 0, devfn = 0;
        int segment;
-       int ret;
 
-       domain = find_domain(pdev);
+       domain = find_domain(&pdev->dev);
        if (domain)
                return domain;
 
@@ -1970,112 +2231,52 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
                        devfn = dev_tmp->devfn;
                }
                spin_lock_irqsave(&device_domain_lock, flags);
-               list_for_each_entry(info, &device_domain_list, global) {
-                       if (info->segment == segment &&
-                           info->bus == bus && info->devfn == devfn) {
-                               found = info->domain;
-                               break;
-                       }
+               info = dmar_search_domain_by_dev_info(segment, bus, devfn);
+               if (info) {
+                       iommu = info->iommu;
+                       domain = info->domain;
                }
                spin_unlock_irqrestore(&device_domain_lock, flags);
-               /* pcie-pci bridge already has a domain, uses it */
-               if (found) {
-                       domain = found;
+               if (info)
                        goto found_domain;
-               }
        }
 
-       domain = alloc_domain();
-       if (!domain)
-               goto error;
-
-       /* Allocate new domain for the device */
        drhd = dmar_find_matched_drhd_unit(pdev);
        if (!drhd) {
                printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
                        pci_name(pdev));
-               free_domain_mem(domain);
                return NULL;
        }
        iommu = drhd->iommu;
 
-       ret = iommu_attach_domain(domain, iommu);
-       if (ret) {
+       /* Allocate and intialize new domain for the device */
+       domain = alloc_domain(false);
+       if (!domain)
+               goto error;
+       if (iommu_attach_domain(domain, iommu)) {
                free_domain_mem(domain);
                goto error;
        }
-
-       if (domain_init(domain, gaw)) {
-               domain_exit(domain);
+       free = domain;
+       if (domain_init(domain, gaw))
                goto error;
-       }
 
        /* register pcie-to-pci device */
        if (dev_tmp) {
-               info = alloc_devinfo_mem();
-               if (!info) {
-                       domain_exit(domain);
+               domain = dmar_insert_dev_info(iommu, bus, devfn, NULL,
+                                             domain);
+               if (!domain)
                        goto error;
-               }
-               info->segment = segment;
-               info->bus = bus;
-               info->devfn = devfn;
-               info->dev = NULL;
-               info->domain = domain;
-               /* This domain is shared by devices under p2p bridge */
-               domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
-
-               /* pcie-to-pci bridge already has a domain, uses it */
-               found = NULL;
-               spin_lock_irqsave(&device_domain_lock, flags);
-               list_for_each_entry(tmp, &device_domain_list, global) {
-                       if (tmp->segment == segment &&
-                           tmp->bus == bus && tmp->devfn == devfn) {
-                               found = tmp->domain;
-                               break;
-                       }
-               }
-               if (found) {
-                       spin_unlock_irqrestore(&device_domain_lock, flags);
-                       free_devinfo_mem(info);
-                       domain_exit(domain);
-                       domain = found;
-               } else {
-                       list_add(&info->link, &domain->devices);
-                       list_add(&info->global, &device_domain_list);
-                       spin_unlock_irqrestore(&device_domain_lock, flags);
-               }
        }
 
 found_domain:
-       info = alloc_devinfo_mem();
-       if (!info)
-               goto error;
-       info->segment = segment;
-       info->bus = pdev->bus->number;
-       info->devfn = pdev->devfn;
-       info->dev = pdev;
-       info->domain = domain;
-       spin_lock_irqsave(&device_domain_lock, flags);
-       /* somebody is fast */
-       found = find_domain(pdev);
-       if (found != NULL) {
-               spin_unlock_irqrestore(&device_domain_lock, flags);
-               if (found != domain) {
-                       domain_exit(domain);
-                       domain = found;
-               }
-               free_devinfo_mem(info);
-               return domain;
-       }
-       list_add(&info->link, &domain->devices);
-       list_add(&info->global, &device_domain_list);
-       pdev->dev.archdata.iommu = info;
-       spin_unlock_irqrestore(&device_domain_lock, flags);
-       return domain;
+       domain = dmar_insert_dev_info(iommu, pdev->bus->number,
+                                     pdev->devfn, &pdev->dev, domain);
 error:
-       /* recheck it here, maybe others set it */
-       return find_domain(pdev);
+       if (free != domain)
+               domain_exit(free);
+
+       return domain;
 }
 
 static int iommu_identity_mapping;
@@ -2213,10 +2414,12 @@ static int __init si_domain_init(int hw)
        struct intel_iommu *iommu;
        int nid, ret = 0;
 
-       si_domain = alloc_domain();
+       si_domain = alloc_domain(false);
        if (!si_domain)
                return -EFAULT;
 
+       si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
+
        for_each_active_iommu(iommu, drhd) {
                ret = iommu_attach_domain(si_domain, iommu);
                if (ret) {
@@ -2230,7 +2433,6 @@ static int __init si_domain_init(int hw)
                return -EFAULT;
        }
 
-       si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
        pr_debug("IOMMU: identity mapping domain is domain %d\n",
                 si_domain->id);
 
@@ -2252,16 +2454,14 @@ static int __init si_domain_init(int hw)
        return 0;
 }
 
-static void domain_remove_one_dev_info(struct dmar_domain *domain,
-                                         struct pci_dev *pdev);
-static int identity_mapping(struct pci_dev *pdev)
+static int identity_mapping(struct device *dev)
 {
        struct device_domain_info *info;
 
        if (likely(!iommu_identity_mapping))
                return 0;
 
-       info = pdev->dev.archdata.iommu;
+       info = dev->archdata.iommu;
        if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
                return (info->domain == si_domain);
 
@@ -2272,32 +2472,22 @@ static int domain_add_dev_info(struct dmar_domain *domain,
                               struct pci_dev *pdev,
                               int translation)
 {
-       struct device_domain_info *info;
-       unsigned long flags;
+       struct dmar_domain *ndomain;
+       struct intel_iommu *iommu;
+       u8 bus, devfn;
        int ret;
 
-       info = alloc_devinfo_mem();
-       if (!info)
-               return -ENOMEM;
-
-       info->segment = pci_domain_nr(pdev->bus);
-       info->bus = pdev->bus->number;
-       info->devfn = pdev->devfn;
-       info->dev = pdev;
-       info->domain = domain;
+       iommu = device_to_iommu(&pdev->dev, &bus, &devfn);
+       if (!iommu)
+               return -ENODEV;
 
-       spin_lock_irqsave(&device_domain_lock, flags);
-       list_add(&info->link, &domain->devices);
-       list_add(&info->global, &device_domain_list);
-       pdev->dev.archdata.iommu = info;
-       spin_unlock_irqrestore(&device_domain_lock, flags);
+       ndomain = dmar_insert_dev_info(iommu, bus, devfn, &pdev->dev, domain);
+       if (ndomain != domain)
+               return -EBUSY;
 
        ret = domain_context_mapping(domain, pdev, translation);
        if (ret) {
-               spin_lock_irqsave(&device_domain_lock, flags);
-               unlink_domain_info(info);
-               spin_unlock_irqrestore(&device_domain_lock, flags);
-               free_devinfo_mem(info);
+               domain_remove_one_dev_info(domain, pdev);
                return ret;
        }
 
@@ -2307,18 +2497,23 @@ static int domain_add_dev_info(struct dmar_domain *domain,
 static bool device_has_rmrr(struct pci_dev *dev)
 {
        struct dmar_rmrr_unit *rmrr;
+       struct device *tmp;
        int i;
 
+       rcu_read_lock();
        for_each_rmrr_units(rmrr) {
-               for (i = 0; i < rmrr->devices_cnt; i++) {
-                       /*
-                        * Return TRUE if this RMRR contains the device that
-                        * is passed in.
-                        */
-                       if (rmrr->devices[i] == dev)
+               /*
+                * Return TRUE if this RMRR contains the device that
+                * is passed in.
+                */
+               for_each_active_dev_scope(rmrr->devices,
+                                         rmrr->devices_cnt, i, tmp)
+                       if (tmp == &dev->dev) {
+                               rcu_read_unlock();
                                return true;
-               }
+                       }
        }
+       rcu_read_unlock();
        return false;
 }
 
@@ -2425,7 +2620,7 @@ static int __init init_dmars(void)
 {
        struct dmar_drhd_unit *drhd;
        struct dmar_rmrr_unit *rmrr;
-       struct pci_dev *pdev;
+       struct device *dev;
        struct intel_iommu *iommu;
        int i, ret;
 
@@ -2461,7 +2656,7 @@ static int __init init_dmars(void)
                sizeof(struct deferred_flush_tables), GFP_KERNEL);
        if (!deferred_flush) {
                ret = -ENOMEM;
-               goto error;
+               goto free_g_iommus;
        }
 
        for_each_active_iommu(iommu, drhd) {
@@ -2469,7 +2664,7 @@ static int __init init_dmars(void)
 
                ret = iommu_init_domains(iommu);
                if (ret)
-                       goto error;
+                       goto free_iommu;
 
                /*
                 * TBD:
@@ -2479,7 +2674,7 @@ static int __init init_dmars(void)
                ret = iommu_alloc_root_entry(iommu);
                if (ret) {
                        printk(KERN_ERR "IOMMU: allocate root entry failed\n");
-                       goto error;
+                       goto free_iommu;
                }
                if (!ecap_pass_through(iommu->ecap))
                        hw_pass_through = 0;
@@ -2548,7 +2743,7 @@ static int __init init_dmars(void)
                ret = iommu_prepare_static_identity_mapping(hw_pass_through);
                if (ret) {
                        printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
-                       goto error;
+                       goto free_iommu;
                }
        }
        /*
@@ -2567,15 +2762,12 @@ static int __init init_dmars(void)
         */
        printk(KERN_INFO "IOMMU: Setting RMRR:\n");
        for_each_rmrr_units(rmrr) {
-               for (i = 0; i < rmrr->devices_cnt; i++) {
-                       pdev = rmrr->devices[i];
-                       /*
-                        * some BIOS lists non-exist devices in DMAR
-                        * table.
-                        */
-                       if (!pdev)
+               /* some BIOS lists non-exist devices in DMAR table. */
+               for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
+                                         i, dev) {
+                       if (!dev_is_pci(dev))
                                continue;
-                       ret = iommu_prepare_rmrr_dev(rmrr, pdev);
+                       ret = iommu_prepare_rmrr_dev(rmrr, to_pci_dev(dev));
                        if (ret)
                                printk(KERN_ERR
                                       "IOMMU: mapping reserved region failed\n");
@@ -2606,7 +2798,7 @@ static int __init init_dmars(void)
 
                ret = dmar_set_interrupt(iommu);
                if (ret)
-                       goto error;
+                       goto free_iommu;
 
                iommu_set_root_entry(iommu);
 
@@ -2615,17 +2807,20 @@ static int __init init_dmars(void)
 
                ret = iommu_enable_translation(iommu);
                if (ret)
-                       goto error;
+                       goto free_iommu;
 
                iommu_disable_protect_mem_regions(iommu);
        }
 
        return 0;
-error:
+
+free_iommu:
        for_each_active_iommu(iommu, drhd)
                free_dmar_iommu(iommu);
        kfree(deferred_flush);
+free_g_iommus:
        kfree(g_iommus);
+error:
        return ret;
 }
 
@@ -2701,9 +2896,9 @@ static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
        return __get_valid_domain_for_dev(dev);
 }
 
-static int iommu_dummy(struct pci_dev *pdev)
+static int iommu_dummy(struct device *dev)
 {
-       return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
+       return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
 }
 
 /* Check if the pdev needs to go through non-identity map and unmap process.*/
@@ -2715,14 +2910,14 @@ static int iommu_no_mapping(struct device *dev)
        if (unlikely(!dev_is_pci(dev)))
                return 1;
 
-       pdev = to_pci_dev(dev);
-       if (iommu_dummy(pdev))
+       if (iommu_dummy(dev))
                return 1;
 
        if (!iommu_identity_mapping)
                return 0;
 
-       found = identity_mapping(pdev);
+       pdev = to_pci_dev(dev);
+       found = identity_mapping(dev);
        if (found) {
                if (iommu_should_identity_map(pdev, 0))
                        return 1;
@@ -2808,7 +3003,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
 
        /* it's a non-present to present mapping. Only flush if caching mode */
        if (cap_caching_mode(iommu->cap))
-               iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
+               iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
        else
                iommu_flush_write_buffer(iommu);
 
@@ -2860,13 +3055,16 @@ static void flush_unmaps(void)
                        /* On real hardware multiple invalidations are expensive */
                        if (cap_caching_mode(iommu->cap))
                                iommu_flush_iotlb_psi(iommu, domain->id,
-                               iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
+                                       iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
+                                       !deferred_flush[i].freelist[j], 0);
                        else {
                                mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
                                iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
                                                (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
                        }
                        __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
+                       if (deferred_flush[i].freelist[j])
+                               dma_free_pagelist(deferred_flush[i].freelist[j]);
                }
                deferred_flush[i].next = 0;
        }
@@ -2883,7 +3081,7 @@ static void flush_unmaps_timeout(unsigned long data)
        spin_unlock_irqrestore(&async_umap_flush_lock, flags);
 }
 
-static void add_unmap(struct dmar_domain *dom, struct iova *iova)
+static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
 {
        unsigned long flags;
        int next, iommu_id;
@@ -2899,6 +3097,7 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
        next = deferred_flush[iommu_id].next;
        deferred_flush[iommu_id].domain[next] = dom;
        deferred_flush[iommu_id].iova[next] = iova;
+       deferred_flush[iommu_id].freelist[next] = freelist;
        deferred_flush[iommu_id].next++;
 
        if (!timer_on) {
@@ -2918,11 +3117,12 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
        unsigned long start_pfn, last_pfn;
        struct iova *iova;
        struct intel_iommu *iommu;
+       struct page *freelist;
 
        if (iommu_no_mapping(dev))
                return;
 
-       domain = find_domain(pdev);
+       domain = find_domain(dev);
        BUG_ON(!domain);
 
        iommu = domain_get_iommu(domain);
@@ -2938,19 +3138,16 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
        pr_debug("Device %s unmapping: pfn %lx-%lx\n",
                 pci_name(pdev), start_pfn, last_pfn);
 
-       /*  clear the whole page */
-       dma_pte_clear_range(domain, start_pfn, last_pfn);
-
-       /* free page tables */
-       dma_pte_free_pagetable(domain, start_pfn, last_pfn);
+       freelist = domain_unmap(domain, start_pfn, last_pfn);
 
        if (intel_iommu_strict) {
                iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
-                                     last_pfn - start_pfn + 1, 0);
+                                     last_pfn - start_pfn + 1, !freelist, 0);
                /* free iova */
                __free_iova(&domain->iovad, iova);
+               dma_free_pagelist(freelist);
        } else {
-               add_unmap(domain, iova);
+               add_unmap(domain, iova, freelist);
                /*
                 * queue up the release of the unmap to save the 1/6th of the
                 * cpu used up by the iotlb flush operation...
@@ -3007,16 +3204,16 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
                           int nelems, enum dma_data_direction dir,
                           struct dma_attrs *attrs)
 {
-       struct pci_dev *pdev = to_pci_dev(hwdev);
        struct dmar_domain *domain;
        unsigned long start_pfn, last_pfn;
        struct iova *iova;
        struct intel_iommu *iommu;
+       struct page *freelist;
 
        if (iommu_no_mapping(hwdev))
                return;
 
-       domain = find_domain(pdev);
+       domain = find_domain(hwdev);
        BUG_ON(!domain);
 
        iommu = domain_get_iommu(domain);
@@ -3029,19 +3226,16 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
        start_pfn = mm_to_dma_pfn(iova->pfn_lo);
        last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
 
-       /*  clear the whole page */
-       dma_pte_clear_range(domain, start_pfn, last_pfn);
-
-       /* free page tables */
-       dma_pte_free_pagetable(domain, start_pfn, last_pfn);
+       freelist = domain_unmap(domain, start_pfn, last_pfn);
 
        if (intel_iommu_strict) {
                iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
-                                     last_pfn - start_pfn + 1, 0);
+                                     last_pfn - start_pfn + 1, !freelist, 0);
                /* free iova */
                __free_iova(&domain->iovad, iova);
+               dma_free_pagelist(freelist);
        } else {
-               add_unmap(domain, iova);
+               add_unmap(domain, iova, freelist);
                /*
                 * queue up the release of the unmap to save the 1/6th of the
                 * cpu used up by the iotlb flush operation...
@@ -3124,7 +3318,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
 
        /* it's a non-present to present mapping. Only flush if caching mode */
        if (cap_caching_mode(iommu->cap))
-               iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
+               iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
        else
                iommu_flush_write_buffer(iommu);
 
@@ -3259,29 +3453,28 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quir
 static void __init init_no_remapping_devices(void)
 {
        struct dmar_drhd_unit *drhd;
+       struct device *dev;
+       int i;
 
        for_each_drhd_unit(drhd) {
                if (!drhd->include_all) {
-                       int i;
-                       for (i = 0; i < drhd->devices_cnt; i++)
-                               if (drhd->devices[i] != NULL)
-                                       break;
-                       /* ignore DMAR unit if no pci devices exist */
+                       for_each_active_dev_scope(drhd->devices,
+                                                 drhd->devices_cnt, i, dev)
+                               break;
+                       /* ignore DMAR unit if no devices exist */
                        if (i == drhd->devices_cnt)
                                drhd->ignored = 1;
                }
        }
 
        for_each_active_drhd_unit(drhd) {
-               int i;
                if (drhd->include_all)
                        continue;
 
-               for (i = 0; i < drhd->devices_cnt; i++)
-                       if (drhd->devices[i] &&
-                           !IS_GFX_DEVICE(drhd->devices[i]))
+               for_each_active_dev_scope(drhd->devices,
+                                         drhd->devices_cnt, i, dev)
+                       if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
                                break;
-
                if (i < drhd->devices_cnt)
                        continue;
 
@@ -3291,11 +3484,9 @@ static void __init init_no_remapping_devices(void)
                        intel_iommu_gfx_mapped = 1;
                } else {
                        drhd->ignored = 1;
-                       for (i = 0; i < drhd->devices_cnt; i++) {
-                               if (!drhd->devices[i])
-                                       continue;
-                               drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
-                       }
+                       for_each_active_dev_scope(drhd->devices,
+                                                 drhd->devices_cnt, i, dev)
+                               dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
                }
        }
 }
@@ -3438,13 +3629,6 @@ static void __init init_iommu_pm_ops(void)
 static inline void init_iommu_pm_ops(void) {}
 #endif /* CONFIG_PM */
 
-LIST_HEAD(dmar_rmrr_units);
-
-static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
-{
-       list_add(&rmrr->list, &dmar_rmrr_units);
-}
-
 
 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
 {
@@ -3459,25 +3643,19 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
        rmrr = (struct acpi_dmar_reserved_memory *)header;
        rmrru->base_address = rmrr->base_address;
        rmrru->end_address = rmrr->end_address;
+       rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
+                               ((void *)rmrr) + rmrr->header.length,
+                               &rmrru->devices_cnt);
+       if (rmrru->devices_cnt && rmrru->devices == NULL) {
+               kfree(rmrru);
+               return -ENOMEM;
+       }
 
-       dmar_register_rmrr_unit(rmrru);
-       return 0;
-}
-
-static int __init
-rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
-{
-       struct acpi_dmar_reserved_memory *rmrr;
+       list_add(&rmrru->list, &dmar_rmrr_units);
 
-       rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
-       return dmar_parse_dev_scope((void *)(rmrr + 1),
-                                   ((void *)rmrr) + rmrr->header.length,
-                                   &rmrru->devices_cnt, &rmrru->devices,
-                                   rmrr->segment);
+       return 0;
 }
 
-static LIST_HEAD(dmar_atsr_units);
-
 int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
 {
        struct acpi_dmar_atsr *atsr;
@@ -3490,26 +3668,21 @@ int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
 
        atsru->hdr = hdr;
        atsru->include_all = atsr->flags & 0x1;
+       if (!atsru->include_all) {
+               atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
+                               (void *)atsr + atsr->header.length,
+                               &atsru->devices_cnt);
+               if (atsru->devices_cnt && atsru->devices == NULL) {
+                       kfree(atsru);
+                       return -ENOMEM;
+               }
+       }
 
-       list_add(&atsru->list, &dmar_atsr_units);
+       list_add_rcu(&atsru->list, &dmar_atsr_units);
 
        return 0;
 }
 
-static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
-{
-       struct acpi_dmar_atsr *atsr;
-
-       if (atsru->include_all)
-               return 0;
-
-       atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
-       return dmar_parse_dev_scope((void *)(atsr + 1),
-                                   (void *)atsr + atsr->header.length,
-                                   &atsru->devices_cnt, &atsru->devices,
-                                   atsr->segment);
-}
-
 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
 {
        dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
@@ -3535,62 +3708,97 @@ static void intel_iommu_free_dmars(void)
 
 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
 {
-       int i;
+       int i, ret = 1;
        struct pci_bus *bus;
+       struct pci_dev *bridge = NULL;
+       struct device *tmp;
        struct acpi_dmar_atsr *atsr;
        struct dmar_atsr_unit *atsru;
 
        dev = pci_physfn(dev);
-
-       list_for_each_entry(atsru, &dmar_atsr_units, list) {
-               atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
-               if (atsr->segment == pci_domain_nr(dev->bus))
-                       goto found;
-       }
-
-       return 0;
-
-found:
        for (bus = dev->bus; bus; bus = bus->parent) {
-               struct pci_dev *bridge = bus->self;
-
+               bridge = bus->self;
                if (!bridge || !pci_is_pcie(bridge) ||
                    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
                        return 0;
-
-               if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
-                       for (i = 0; i < atsru->devices_cnt; i++)
-                               if (atsru->devices[i] == bridge)
-                                       return 1;
+               if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
                        break;
-               }
        }
+       if (!bridge)
+               return 0;
 
-       if (atsru->include_all)
-               return 1;
+       rcu_read_lock();
+       list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
+               atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
+               if (atsr->segment != pci_domain_nr(dev->bus))
+                       continue;
 
-       return 0;
+               for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
+                       if (tmp == &bridge->dev)
+                               goto out;
+
+               if (atsru->include_all)
+                       goto out;
+       }
+       ret = 0;
+out:
+       rcu_read_unlock();
+
+       return ret;
 }
 
-int __init dmar_parse_rmrr_atsr_dev(void)
+int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
 {
-       struct dmar_rmrr_unit *rmrr;
-       struct dmar_atsr_unit *atsr;
        int ret = 0;
+       struct dmar_rmrr_unit *rmrru;
+       struct dmar_atsr_unit *atsru;
+       struct acpi_dmar_atsr *atsr;
+       struct acpi_dmar_reserved_memory *rmrr;
 
-       list_for_each_entry(rmrr, &dmar_rmrr_units, list) {
-               ret = rmrr_parse_dev(rmrr);
-               if (ret)
-                       return ret;
+       if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
+               return 0;
+
+       list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
+               rmrr = container_of(rmrru->hdr,
+                                   struct acpi_dmar_reserved_memory, header);
+               if (info->event == BUS_NOTIFY_ADD_DEVICE) {
+                       ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
+                               ((void *)rmrr) + rmrr->header.length,
+                               rmrr->segment, rmrru->devices,
+                               rmrru->devices_cnt);
+                       if (ret > 0)
+                               break;
+                       else if(ret < 0)
+                               return ret;
+               } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
+                       if (dmar_remove_dev_scope(info, rmrr->segment,
+                               rmrru->devices, rmrru->devices_cnt))
+                               break;
+               }
        }
 
-       list_for_each_entry(atsr, &dmar_atsr_units, list) {
-               ret = atsr_parse_dev(atsr);
-               if (ret)
-                       return ret;
+       list_for_each_entry(atsru, &dmar_atsr_units, list) {
+               if (atsru->include_all)
+                       continue;
+
+               atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
+               if (info->event == BUS_NOTIFY_ADD_DEVICE) {
+                       ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
+                                       (void *)atsr + atsr->header.length,
+                                       atsr->segment, atsru->devices,
+                                       atsru->devices_cnt);
+                       if (ret > 0)
+                               break;
+                       else if(ret < 0)
+                               return ret;
+               } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
+                       if (dmar_remove_dev_scope(info, atsr->segment,
+                                       atsru->devices, atsru->devices_cnt))
+                               break;
+               }
        }
 
-       return ret;
+       return 0;
 }
 
 /*
@@ -3606,21 +3814,24 @@ static int device_notifier(struct notifier_block *nb,
        struct pci_dev *pdev = to_pci_dev(dev);
        struct dmar_domain *domain;
 
-       if (iommu_no_mapping(dev))
+       if (iommu_dummy(dev))
                return 0;
 
-       domain = find_domain(pdev);
-       if (!domain)
+       if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
+           action != BUS_NOTIFY_DEL_DEVICE)
                return 0;
 
-       if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
-               domain_remove_one_dev_info(domain, pdev);
+       domain = find_domain(dev);
+       if (!domain)
+               return 0;
 
-               if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
-                   !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
-                   list_empty(&domain->devices))
-                       domain_exit(domain);
-       }
+       down_read(&dmar_global_lock);
+       domain_remove_one_dev_info(domain, pdev);
+       if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
+           !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
+           list_empty(&domain->devices))
+               domain_exit(domain);
+       up_read(&dmar_global_lock);
 
        return 0;
 }
@@ -3629,6 +3840,75 @@ static struct notifier_block device_nb = {
        .notifier_call = device_notifier,
 };
 
+static int intel_iommu_memory_notifier(struct notifier_block *nb,
+                                      unsigned long val, void *v)
+{
+       struct memory_notify *mhp = v;
+       unsigned long long start, end;
+       unsigned long start_vpfn, last_vpfn;
+
+       switch (val) {
+       case MEM_GOING_ONLINE:
+               start = mhp->start_pfn << PAGE_SHIFT;
+               end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
+               if (iommu_domain_identity_map(si_domain, start, end)) {
+                       pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
+                               start, end);
+                       return NOTIFY_BAD;
+               }
+               break;
+
+       case MEM_OFFLINE:
+       case MEM_CANCEL_ONLINE:
+               start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
+               last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
+               while (start_vpfn <= last_vpfn) {
+                       struct iova *iova;
+                       struct dmar_drhd_unit *drhd;
+                       struct intel_iommu *iommu;
+                       struct page *freelist;
+
+                       iova = find_iova(&si_domain->iovad, start_vpfn);
+                       if (iova == NULL) {
+                               pr_debug("dmar: failed get IOVA for PFN %lx\n",
+                                        start_vpfn);
+                               break;
+                       }
+
+                       iova = split_and_remove_iova(&si_domain->iovad, iova,
+                                                    start_vpfn, last_vpfn);
+                       if (iova == NULL) {
+                               pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
+                                       start_vpfn, last_vpfn);
+                               return NOTIFY_BAD;
+                       }
+
+                       freelist = domain_unmap(si_domain, iova->pfn_lo,
+                                              iova->pfn_hi);
+
+                       rcu_read_lock();
+                       for_each_active_iommu(iommu, drhd)
+                               iommu_flush_iotlb_psi(iommu, si_domain->id,
+                                       iova->pfn_lo,
+                                       iova->pfn_hi - iova->pfn_lo + 1,
+                                       !freelist, 0);
+                       rcu_read_unlock();
+                       dma_free_pagelist(freelist);
+
+                       start_vpfn = iova->pfn_hi + 1;
+                       free_iova_mem(iova);
+               }
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block intel_iommu_memory_nb = {
+       .notifier_call = intel_iommu_memory_notifier,
+       .priority = 0
+};
+
 int __init intel_iommu_init(void)
 {
        int ret = -ENODEV;
@@ -3638,6 +3918,13 @@ int __init intel_iommu_init(void)
        /* VT-d is required for a TXT/tboot launch, so enforce that */
        force_on = tboot_force_iommu();
 
+       if (iommu_init_mempool()) {
+               if (force_on)
+                       panic("tboot: Failed to initialize iommu memory\n");
+               return -ENOMEM;
+       }
+
+       down_write(&dmar_global_lock);
        if (dmar_table_init()) {
                if (force_on)
                        panic("tboot: Failed to initialize DMAR table\n");
@@ -3660,12 +3947,6 @@ int __init intel_iommu_init(void)
        if (no_iommu || dmar_disabled)
                goto out_free_dmar;
 
-       if (iommu_init_mempool()) {
-               if (force_on)
-                       panic("tboot: Failed to initialize iommu memory\n");
-               goto out_free_dmar;
-       }
-
        if (list_empty(&dmar_rmrr_units))
                printk(KERN_INFO "DMAR: No RMRR found\n");
 
@@ -3675,7 +3956,7 @@ int __init intel_iommu_init(void)
        if (dmar_init_reserved_ranges()) {
                if (force_on)
                        panic("tboot: Failed to reserve iommu ranges\n");
-               goto out_free_mempool;
+               goto out_free_reserved_range;
        }
 
        init_no_remapping_devices();
@@ -3687,6 +3968,7 @@ int __init intel_iommu_init(void)
                printk(KERN_ERR "IOMMU: dmar init failed\n");
                goto out_free_reserved_range;
        }
+       up_write(&dmar_global_lock);
        printk(KERN_INFO
        "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
 
@@ -3699,8 +3981,9 @@ int __init intel_iommu_init(void)
        init_iommu_pm_ops();
 
        bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
-
        bus_register_notifier(&pci_bus_type, &device_nb);
+       if (si_domain && !hw_pass_through)
+               register_memory_notifier(&intel_iommu_memory_nb);
 
        intel_iommu_enabled = 1;
 
@@ -3708,21 +3991,23 @@ int __init intel_iommu_init(void)
 
 out_free_reserved_range:
        put_iova_domain(&reserved_iova_list);
-out_free_mempool:
-       iommu_exit_mempool();
 out_free_dmar:
        intel_iommu_free_dmars();
+       up_write(&dmar_global_lock);
+       iommu_exit_mempool();
        return ret;
 }
 
 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
-                                          struct pci_dev *pdev)
+                                          struct device *dev)
 {
-       struct pci_dev *tmp, *parent;
+       struct pci_dev *tmp, *parent, *pdev;
 
-       if (!iommu || !pdev)
+       if (!iommu || !dev || !dev_is_pci(dev))
                return;
 
+       pdev = to_pci_dev(dev);
+
        /* dependent device detach */
        tmp = pci_find_upstream_pcie_bridge(pdev);
        /* Secondary interface's bus number and devfn 0 */
@@ -3749,15 +4034,15 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
        struct intel_iommu *iommu;
        unsigned long flags;
        int found = 0;
+       u8 bus, devfn;
 
-       iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
-                               pdev->devfn);
+       iommu = device_to_iommu(&pdev->dev, &bus, &devfn);
        if (!iommu)
                return;
 
        spin_lock_irqsave(&device_domain_lock, flags);
        list_for_each_entry_safe(info, tmp, &domain->devices, link) {
-               if (info->segment == pci_domain_nr(pdev->bus) &&
+               if (info->iommu->segment == pci_domain_nr(pdev->bus) &&
                    info->bus == pdev->bus->number &&
                    info->devfn == pdev->devfn) {
                        unlink_domain_info(info);
@@ -3765,7 +4050,7 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
 
                        iommu_disable_dev_iotlb(info);
                        iommu_detach_dev(iommu, info->bus, info->devfn);
-                       iommu_detach_dependent_devices(iommu, pdev);
+                       iommu_detach_dependent_devices(iommu, &pdev->dev);
                        free_devinfo_mem(info);
 
                        spin_lock_irqsave(&device_domain_lock, flags);
@@ -3780,8 +4065,7 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
                 * owned by this domain, clear this iommu in iommu_bmp
                 * update iommu count and coherency
                 */
-               if (iommu == device_to_iommu(info->segment, info->bus,
-                                           info->devfn))
+               if (info->iommu == iommu)
                        found = 1;
        }
 
@@ -3805,67 +4089,11 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
        }
 }
 
-static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
-{
-       struct device_domain_info *info;
-       struct intel_iommu *iommu;
-       unsigned long flags1, flags2;
-
-       spin_lock_irqsave(&device_domain_lock, flags1);
-       while (!list_empty(&domain->devices)) {
-               info = list_entry(domain->devices.next,
-                       struct device_domain_info, link);
-               unlink_domain_info(info);
-               spin_unlock_irqrestore(&device_domain_lock, flags1);
-
-               iommu_disable_dev_iotlb(info);
-               iommu = device_to_iommu(info->segment, info->bus, info->devfn);
-               iommu_detach_dev(iommu, info->bus, info->devfn);
-               iommu_detach_dependent_devices(iommu, info->dev);
-
-               /* clear this iommu in iommu_bmp, update iommu count
-                * and capabilities
-                */
-               spin_lock_irqsave(&domain->iommu_lock, flags2);
-               if (test_and_clear_bit(iommu->seq_id,
-                                      domain->iommu_bmp)) {
-                       domain->iommu_count--;
-                       domain_update_iommu_cap(domain);
-               }
-               spin_unlock_irqrestore(&domain->iommu_lock, flags2);
-
-               free_devinfo_mem(info);
-               spin_lock_irqsave(&device_domain_lock, flags1);
-       }
-       spin_unlock_irqrestore(&device_domain_lock, flags1);
-}
-
-/* domain id for virtual machine, it won't be set in context */
-static atomic_t vm_domid = ATOMIC_INIT(0);
-
-static struct dmar_domain *iommu_alloc_vm_domain(void)
-{
-       struct dmar_domain *domain;
-
-       domain = alloc_domain_mem();
-       if (!domain)
-               return NULL;
-
-       domain->id = atomic_inc_return(&vm_domid);
-       domain->nid = -1;
-       memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
-       domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
-
-       return domain;
-}
-
 static int md_domain_init(struct dmar_domain *domain, int guest_width)
 {
        int adjust_width;
 
        init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
-       spin_lock_init(&domain->iommu_lock);
-
        domain_reserve_special_ranges(domain);
 
        /* calculate AGAW */
@@ -3873,9 +4101,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
        adjust_width = guestwidth_to_adjustwidth(guest_width);
        domain->agaw = width_to_agaw(adjust_width);
 
-       INIT_LIST_HEAD(&domain->devices);
-
-       domain->iommu_count = 0;
        domain->iommu_coherency = 0;
        domain->iommu_snooping = 0;
        domain->iommu_superpage = 0;
@@ -3890,53 +4115,11 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
        return 0;
 }
 
-static void iommu_free_vm_domain(struct dmar_domain *domain)
-{
-       unsigned long flags;
-       struct dmar_drhd_unit *drhd;
-       struct intel_iommu *iommu;
-       unsigned long i;
-       unsigned long ndomains;
-
-       for_each_active_iommu(iommu, drhd) {
-               ndomains = cap_ndoms(iommu->cap);
-               for_each_set_bit(i, iommu->domain_ids, ndomains) {
-                       if (iommu->domains[i] == domain) {
-                               spin_lock_irqsave(&iommu->lock, flags);
-                               clear_bit(i, iommu->domain_ids);
-                               iommu->domains[i] = NULL;
-                               spin_unlock_irqrestore(&iommu->lock, flags);
-                               break;
-                       }
-               }
-       }
-}
-
-static void vm_domain_exit(struct dmar_domain *domain)
-{
-       /* Domain 0 is reserved, so dont process it */
-       if (!domain)
-               return;
-
-       vm_domain_remove_all_dev_info(domain);
-       /* destroy iovas */
-       put_iova_domain(&domain->iovad);
-
-       /* clear ptes */
-       dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
-
-       /* free page tables */
-       dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
-
-       iommu_free_vm_domain(domain);
-       free_domain_mem(domain);
-}
-
 static int intel_iommu_domain_init(struct iommu_domain *domain)
 {
        struct dmar_domain *dmar_domain;
 
-       dmar_domain = iommu_alloc_vm_domain();
+       dmar_domain = alloc_domain(true);
        if (!dmar_domain) {
                printk(KERN_ERR
                        "intel_iommu_domain_init: dmar_domain == NULL\n");
@@ -3945,7 +4128,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain)
        if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
                printk(KERN_ERR
                        "intel_iommu_domain_init() failed\n");
-               vm_domain_exit(dmar_domain);
+               domain_exit(dmar_domain);
                return -ENOMEM;
        }
        domain_update_iommu_cap(dmar_domain);
@@ -3963,7 +4146,7 @@ static void intel_iommu_domain_destroy(struct iommu_domain *domain)
        struct dmar_domain *dmar_domain = domain->priv;
 
        domain->priv = NULL;
-       vm_domain_exit(dmar_domain);
+       domain_exit(dmar_domain);
 }
 
 static int intel_iommu_attach_device(struct iommu_domain *domain,
@@ -3973,12 +4156,13 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
        struct pci_dev *pdev = to_pci_dev(dev);
        struct intel_iommu *iommu;
        int addr_width;
+       u8 bus, devfn;
 
        /* normally pdev is not mapped */
        if (unlikely(domain_context_mapped(pdev))) {
                struct dmar_domain *old_domain;
 
-               old_domain = find_domain(pdev);
+               old_domain = find_domain(dev);
                if (old_domain) {
                        if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
                            dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
@@ -3988,8 +4172,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
                }
        }
 
-       iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
-                               pdev->devfn);
+       iommu = device_to_iommu(dev, &bus, &devfn);
        if (!iommu)
                return -ENODEV;
 
@@ -4072,18 +4255,51 @@ static int intel_iommu_map(struct iommu_domain *domain,
 }
 
 static size_t intel_iommu_unmap(struct iommu_domain *domain,
-                            unsigned long iova, size_t size)
+                               unsigned long iova, size_t size)
 {
        struct dmar_domain *dmar_domain = domain->priv;
-       int order;
+       struct page *freelist = NULL;
+       struct intel_iommu *iommu;
+       unsigned long start_pfn, last_pfn;
+       unsigned int npages;
+       int iommu_id, num, ndomains, level = 0;
+
+       /* Cope with horrid API which requires us to unmap more than the
+          size argument if it happens to be a large-page mapping. */
+       if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
+               BUG();
+
+       if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
+               size = VTD_PAGE_SIZE << level_to_offset_bits(level);
+
+       start_pfn = iova >> VTD_PAGE_SHIFT;
+       last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
+
+       freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
+
+       npages = last_pfn - start_pfn + 1;
+
+       for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
+               iommu = g_iommus[iommu_id];
+
+               /*
+                * find bit position of dmar_domain
+                */
+               ndomains = cap_ndoms(iommu->cap);
+               for_each_set_bit(num, iommu->domain_ids, ndomains) {
+                       if (iommu->domains[num] == dmar_domain)
+                               iommu_flush_iotlb_psi(iommu, num, start_pfn,
+                                                    npages, !freelist, 0);
+              }
+
+       }
 
-       order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
-                           (iova + size - 1) >> VTD_PAGE_SHIFT);
+       dma_free_pagelist(freelist);
 
        if (dmar_domain->max_addr == iova + size)
                dmar_domain->max_addr = iova;
 
-       return PAGE_SIZE << order;
+       return size;
 }
 
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -4091,9 +4307,10 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
 {
        struct dmar_domain *dmar_domain = domain->priv;
        struct dma_pte *pte;
+       int level = 0;
        u64 phys = 0;
 
-       pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
+       pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
        if (pte)
                phys = dma_pte_addr(pte);
 
@@ -4121,9 +4338,9 @@ static int intel_iommu_add_device(struct device *dev)
        struct pci_dev *bridge, *dma_pdev = NULL;
        struct iommu_group *group;
        int ret;
+       u8 bus, devfn;
 
-       if (!device_to_iommu(pci_domain_nr(pdev->bus),
-                            pdev->bus->number, pdev->devfn))
+       if (!device_to_iommu(dev, &bus, &devfn))
                return -ENODEV;
 
        bridge = pci_find_upstream_pcie_bridge(pdev);