Merge tag 'iommu-updates-v5.6' of git://git.kernel.org/pub/scm/linux/kernel/git/joro...
[linux-2.6-microblaze.git] / drivers / iommu / intel-iommu.c
index 35a4a3a..9dc3767 100644 (file)
@@ -307,6 +307,20 @@ static int hw_pass_through = 1;
  */
 #define DOMAIN_FLAG_LOSE_CHILDREN              BIT(1)
 
+/*
+ * When VT-d works in the scalable mode, it allows DMA translation to
+ * happen through either first level or second level page table. This
+ * bit marks that the DMA translation for the domain goes through the
+ * first level page table, otherwise, it goes through the second level.
+ */
+#define DOMAIN_FLAG_USE_FIRST_LEVEL            BIT(2)
+
+/*
+ * Domain represents a virtual machine which demands iommu nested
+ * translation mode support.
+ */
+#define DOMAIN_FLAG_NESTING_MODE               BIT(3)
+
 #define for_each_domain_iommu(idx, domain)                     \
        for (idx = 0; idx < g_num_of_iommus; idx++)             \
                if (domain->iommu_refcnt[idx])
@@ -355,9 +369,14 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
 int dmar_disabled = 0;
 #else
 int dmar_disabled = 1;
-#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
+#endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
 
+#ifdef INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
+int intel_iommu_sm = 1;
+#else
 int intel_iommu_sm;
+#endif /* INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
+
 int intel_iommu_enabled = 0;
 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
 
@@ -368,7 +387,6 @@ static int intel_iommu_superpage = 1;
 static int iommu_identity_mapping;
 static int intel_no_bounce;
 
-#define IDENTMAP_ALL           1
 #define IDENTMAP_GFX           2
 #define IDENTMAP_AZALIA                4
 
@@ -377,7 +395,7 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
-static DEFINE_SPINLOCK(device_domain_lock);
+DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);
 
 #define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) &&   \
@@ -552,6 +570,11 @@ static inline int domain_type_is_si(struct dmar_domain *domain)
        return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
 }
 
+static inline bool domain_use_first_level(struct dmar_domain *domain)
+{
+       return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
+}
+
 static inline int domain_pfn_supported(struct dmar_domain *domain,
                                       unsigned long pfn)
 {
@@ -661,11 +684,12 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip)
        return ret;
 }
 
-static int domain_update_iommu_superpage(struct intel_iommu *skip)
+static int domain_update_iommu_superpage(struct dmar_domain *domain,
+                                        struct intel_iommu *skip)
 {
        struct dmar_drhd_unit *drhd;
        struct intel_iommu *iommu;
-       int mask = 0xf;
+       int mask = 0x3;
 
        if (!intel_iommu_superpage) {
                return 0;
@@ -675,7 +699,13 @@ static int domain_update_iommu_superpage(struct intel_iommu *skip)
        rcu_read_lock();
        for_each_active_iommu(iommu, drhd) {
                if (iommu != skip) {
-                       mask &= cap_super_page_val(iommu->cap);
+                       if (domain && domain_use_first_level(domain)) {
+                               if (!cap_fl1gp_support(iommu->cap))
+                                       mask = 0x1;
+                       } else {
+                               mask &= cap_super_page_val(iommu->cap);
+                       }
+
                        if (!mask)
                                break;
                }
@@ -690,7 +720,7 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
 {
        domain_update_iommu_coherency(domain);
        domain->iommu_snooping = domain_update_iommu_snooping(NULL);
-       domain->iommu_superpage = domain_update_iommu_superpage(NULL);
+       domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
 }
 
 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
@@ -907,6 +937,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 
                        domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
                        pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
+                       if (domain_use_first_level(domain))
+                               pteval |= DMA_FL_PTE_XD;
                        if (cmpxchg64(&pte->val, 0ULL, pteval))
                                /* Someone else set it while we were thinking; use theirs. */
                                free_pgtable_page(tmp_page);
@@ -1477,6 +1509,20 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
        spin_unlock_irqrestore(&device_domain_lock, flags);
 }
 
+static void domain_flush_piotlb(struct intel_iommu *iommu,
+                               struct dmar_domain *domain,
+                               u64 addr, unsigned long npages, bool ih)
+{
+       u16 did = domain->iommu_did[iommu->seq_id];
+
+       if (domain->default_pasid)
+               qi_flush_piotlb(iommu, did, domain->default_pasid,
+                               addr, npages, ih);
+
+       if (!list_empty(&domain->devices))
+               qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
+}
+
 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
                                  struct dmar_domain *domain,
                                  unsigned long pfn, unsigned int pages,
@@ -1490,18 +1536,23 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
 
        if (ih)
                ih = 1 << 6;
-       /*
-        * Fallback to domain selective flush if no PSI support or the size is
-        * too big.
-        * PSI requires page size to be 2 ^ x, and the base address is naturally
-        * aligned to the size
-        */
-       if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
-               iommu->flush.flush_iotlb(iommu, did, 0, 0,
-                                               DMA_TLB_DSI_FLUSH);
-       else
-               iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
-                                               DMA_TLB_PSI_FLUSH);
+
+       if (domain_use_first_level(domain)) {
+               domain_flush_piotlb(iommu, domain, addr, pages, ih);
+       } else {
+               /*
+                * Fallback to domain selective flush if no PSI support or
+                * the size is too big. PSI requires page size to be 2 ^ x,
+                * and the base address is naturally aligned to the size.
+                */
+               if (!cap_pgsel_inv(iommu->cap) ||
+                   mask > cap_max_amask_val(iommu->cap))
+                       iommu->flush.flush_iotlb(iommu, did, 0, 0,
+                                                       DMA_TLB_DSI_FLUSH);
+               else
+                       iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
+                                                       DMA_TLB_PSI_FLUSH);
+       }
 
        /*
         * In caching mode, changes of pages from non-present to present require
@@ -1516,8 +1567,11 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu,
                                        struct dmar_domain *domain,
                                        unsigned long pfn, unsigned int pages)
 {
-       /* It's a non-present to present mapping. Only flush if caching mode */
-       if (cap_caching_mode(iommu->cap))
+       /*
+        * It's a non-present to present mapping. Only flush if caching mode
+        * and second level.
+        */
+       if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
                iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
        else
                iommu_flush_write_buffer(iommu);
@@ -1534,7 +1588,11 @@ static void iommu_flush_iova(struct iova_domain *iovad)
                struct intel_iommu *iommu = g_iommus[idx];
                u16 did = domain->iommu_did[iommu->seq_id];
 
-               iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+               if (domain_use_first_level(domain))
+                       domain_flush_piotlb(iommu, domain, 0, -1, 0);
+               else
+                       iommu->flush.flush_iotlb(iommu, did, 0, 0,
+                                                DMA_TLB_DSI_FLUSH);
 
                if (!cap_caching_mode(iommu->cap))
                        iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
@@ -1703,6 +1761,33 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
 #endif
 }
 
+/*
+ * Check and return whether first level is used by default for
+ * DMA translation.
+ */
+static bool first_level_by_default(void)
+{
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
+       static int first_level_support = -1;
+
+       if (likely(first_level_support != -1))
+               return first_level_support;
+
+       first_level_support = 1;
+
+       rcu_read_lock();
+       for_each_active_iommu(iommu, drhd) {
+               if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) {
+                       first_level_support = 0;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+
+       return first_level_support;
+}
+
 static struct dmar_domain *alloc_domain(int flags)
 {
        struct dmar_domain *domain;
@@ -1714,6 +1799,8 @@ static struct dmar_domain *alloc_domain(int flags)
        memset(domain, 0, sizeof(*domain));
        domain->nid = NUMA_NO_NODE;
        domain->flags = flags;
+       if (first_level_by_default())
+               domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
        domain->has_iotlb_device = false;
        INIT_LIST_HEAD(&domain->devices);
 
@@ -1843,14 +1930,16 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
 {
        int adjust_width, agaw;
        unsigned long sagaw;
-       int err;
+       int ret;
 
        init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
 
-       err = init_iova_flush_queue(&domain->iovad,
-                                   iommu_flush_iova, iova_entry_free);
-       if (err)
-               return err;
+       if (!intel_iommu_strict) {
+               ret = init_iova_flush_queue(&domain->iovad,
+                                           iommu_flush_iova, iova_entry_free);
+               if (ret)
+                       pr_info("iova flush queue initialization failed\n");
+       }
 
        domain_reserve_special_ranges(domain);
 
@@ -2223,17 +2312,20 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
        unsigned long sg_res = 0;
        unsigned int largepage_lvl = 0;
        unsigned long lvl_pages = 0;
+       u64 attr;
 
        BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
 
        if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
                return -EINVAL;
 
-       prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
+       attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
+       if (domain_use_first_level(domain))
+               attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD;
 
        if (!sg) {
                sg_res = nr_pages;
-               pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
+               pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
        }
 
        while (nr_pages > 0) {
@@ -2245,7 +2337,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
                        sg_res = aligned_nrpages(sg->offset, sg->length);
                        sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
                        sg->dma_length = sg->length;
-                       pteval = (sg_phys(sg) - pgoff) | prot;
+                       pteval = (sg_phys(sg) - pgoff) | attr;
                        phys_pfn = pteval >> VTD_PAGE_SHIFT;
                }
 
@@ -2414,7 +2506,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
        spin_unlock_irqrestore(&device_domain_lock, flags);
 }
 
-static struct dmar_domain *find_domain(struct device *dev)
+struct dmar_domain *find_domain(struct device *dev)
 {
        struct device_domain_info *info;
 
@@ -2460,6 +2552,36 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
        return NULL;
 }
 
+static int domain_setup_first_level(struct intel_iommu *iommu,
+                                   struct dmar_domain *domain,
+                                   struct device *dev,
+                                   int pasid)
+{
+       int flags = PASID_FLAG_SUPERVISOR_MODE;
+       struct dma_pte *pgd = domain->pgd;
+       int agaw, level;
+
+       /*
+        * Skip top levels of page tables for iommu which has
+        * less agaw than default. Unnecessary for PT mode.
+        */
+       for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+               pgd = phys_to_virt(dma_pte_addr(pgd));
+               if (!dma_pte_present(pgd))
+                       return -ENOMEM;
+       }
+
+       level = agaw_to_level(agaw);
+       if (level != 4 && level != 5)
+               return -EINVAL;
+
+       flags |= (level == 5) ? PASID_FLAG_FL5LP : 0;
+
+       return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
+                                            domain->iommu_did[iommu->seq_id],
+                                            flags);
+}
+
 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
                                                    int bus, int devfn,
                                                    struct device *dev,
@@ -2559,6 +2681,9 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
                if (hw_pass_through && domain_type_is_si(domain))
                        ret = intel_pasid_setup_pass_through(iommu, domain,
                                        dev, PASID_RID2PASID);
+               else if (domain_use_first_level(domain))
+                       ret = domain_setup_first_level(iommu, domain, dev,
+                                       PASID_RID2PASID);
                else
                        ret = intel_pasid_setup_second_level(iommu, domain,
                                        dev, PASID_RID2PASID);
@@ -2764,10 +2889,8 @@ static int __init si_domain_init(int hw)
        }
 
        /*
-        * Normally we use DMA domains for devices which have RMRRs. But we
-        * loose this requirement for graphic and usb devices. Identity map
-        * the RMRRs for graphic and USB devices so that they could use the
-        * si_domain.
+        * Identity map the RMRRs so that devices with RMRRs could also use
+        * the si_domain.
         */
        for_each_rmrr_units(rmrr) {
                for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
@@ -2775,9 +2898,6 @@ static int __init si_domain_init(int hw)
                        unsigned long long start = rmrr->base_address;
                        unsigned long long end = rmrr->end_address;
 
-                       if (device_is_rmrr_locked(dev))
-                               continue;
-
                        if (WARN_ON(end < start ||
                                    end >> agaw_to_width(si_domain->agaw)))
                                continue;
@@ -2916,9 +3036,6 @@ static int device_def_domain_type(struct device *dev)
        if (dev_is_pci(dev)) {
                struct pci_dev *pdev = to_pci_dev(dev);
 
-               if (device_is_rmrr_locked(dev))
-                       return IOMMU_DOMAIN_DMA;
-
                /*
                 * Prevent any device marked as untrusted from getting
                 * placed into the statically identity mapping domain.
@@ -2956,13 +3073,9 @@ static int device_def_domain_type(struct device *dev)
                                return IOMMU_DOMAIN_DMA;
                } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
                        return IOMMU_DOMAIN_DMA;
-       } else {
-               if (device_has_rmrr(dev))
-                       return IOMMU_DOMAIN_DMA;
        }
 
-       return (iommu_identity_mapping & IDENTMAP_ALL) ?
-                       IOMMU_DOMAIN_IDENTITY : 0;
+       return 0;
 }
 
 static void intel_iommu_init_qi(struct intel_iommu *iommu)
@@ -3291,10 +3404,7 @@ static int __init init_dmars(void)
 
                if (!ecap_pass_through(iommu->ecap))
                        hw_pass_through = 0;
-#ifdef CONFIG_INTEL_IOMMU_SVM
-               if (pasid_supported(iommu))
-                       intel_svm_init(iommu);
-#endif
+               intel_svm_check(iommu);
        }
 
        /*
@@ -3309,9 +3419,6 @@ static int __init init_dmars(void)
                iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
        }
 
-       if (iommu_default_passthrough())
-               iommu_identity_mapping |= IDENTMAP_ALL;
-
 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
        dmar_map_gfx = 0;
 #endif
@@ -3384,8 +3491,21 @@ static unsigned long intel_alloc_iova(struct device *dev,
 {
        unsigned long iova_pfn;
 
-       /* Restrict dma_mask to the width that the iommu can handle */
-       dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
+       /*
+        * Restrict dma_mask to the width that the iommu can handle.
+        * First-level translation restricts the input-address to a
+        * canonical address (i.e., address bits 63:N have the same
+        * value as address bit [N-1], where N is 48-bits with 4-level
+        * paging and 57-bits with 5-level paging). Hence, skip bit
+        * [N-1].
+        */
+       if (domain_use_first_level(domain))
+               dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw - 1),
+                                dma_mask);
+       else
+               dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw),
+                                dma_mask);
+
        /* Ensure we reserve the whole size-aligned region */
        nrpages = __roundup_pow_of_two(nrpages);
 
@@ -3403,7 +3523,8 @@ static unsigned long intel_alloc_iova(struct device *dev,
        iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
                                   IOVA_PFN(dma_mask), true);
        if (unlikely(!iova_pfn)) {
-               dev_err(dev, "Allocating %ld-page iova failed", nrpages);
+               dev_err_once(dev, "Allocating %ld-page iova failed\n",
+                            nrpages);
                return 0;
        }
 
@@ -3771,8 +3892,8 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
                return 0;
        }
 
-       trace_map_sg(dev, iova_pfn << PAGE_SHIFT,
-                    sg_phys(sglist), size << VTD_PAGE_SHIFT);
+       for_each_sg(sglist, sg, nelems, i)
+               trace_map_sg(dev, i + 1, nelems, sg);
 
        return nelems;
 }
@@ -3984,6 +4105,9 @@ bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
                sg_dma_len(sg) = sg->length;
        }
 
+       for_each_sg(sglist, sg, nelems, i)
+               trace_bounce_map_sg(dev, i + 1, nelems, sg);
+
        return nelems;
 
 out_unmap:
@@ -4312,16 +4436,31 @@ static void __init init_iommu_pm_ops(void)
 static inline void init_iommu_pm_ops(void) {}
 #endif /* CONFIG_PM */
 
+static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
+{
+       if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
+           !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
+           rmrr->end_address <= rmrr->base_address ||
+           arch_rmrr_sanity_check(rmrr))
+               return -EINVAL;
+
+       return 0;
+}
+
 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
 {
        struct acpi_dmar_reserved_memory *rmrr;
        struct dmar_rmrr_unit *rmrru;
-       int ret;
 
        rmrr = (struct acpi_dmar_reserved_memory *)header;
-       ret = arch_rmrr_sanity_check(rmrr);
-       if (ret)
-               return ret;
+       if (rmrr_sanity_check(rmrr))
+               WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND,
+                          "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
+                          "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
+                          rmrr->base_address, rmrr->end_address,
+                          dmi_get_system_info(DMI_BIOS_VENDOR),
+                          dmi_get_system_info(DMI_BIOS_VERSION),
+                          dmi_get_system_info(DMI_PRODUCT_VERSION));
 
        rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
        if (!rmrru)
@@ -4467,7 +4606,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
                        iommu->name);
                return -ENXIO;
        }
-       sp = domain_update_iommu_superpage(iommu) - 1;
+       sp = domain_update_iommu_superpage(NULL, iommu) - 1;
        if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
                pr_warn("%s: Doesn't support large page.\n",
                        iommu->name);
@@ -4487,10 +4626,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
        if (ret)
                goto out;
 
-#ifdef CONFIG_INTEL_IOMMU_SVM
-       if (pasid_supported(iommu))
-               intel_svm_init(iommu);
-#endif
+       intel_svm_check(iommu);
 
        if (dmaru->ignored) {
                /*
@@ -4895,7 +5031,7 @@ static int __init platform_optin_force_iommu(void)
         * map for all devices except those marked as being untrusted.
         */
        if (dmar_disabled)
-               iommu_identity_mapping |= IDENTMAP_ALL;
+               iommu_set_default_passthrough(false);
 
        dmar_disabled = 0;
        no_iommu = 0;
@@ -5195,6 +5331,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 {
        struct dmar_domain *dmar_domain;
        struct iommu_domain *domain;
+       int ret;
 
        switch (type) {
        case IOMMU_DOMAIN_DMA:
@@ -5211,11 +5348,12 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
                        return NULL;
                }
 
-               if (type == IOMMU_DOMAIN_DMA &&
-                   init_iova_flush_queue(&dmar_domain->iovad,
-                                         iommu_flush_iova, iova_entry_free)) {
-                       pr_warn("iova flush queue initialization failed\n");
-                       intel_iommu_strict = 1;
+               if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) {
+                       ret = init_iova_flush_queue(&dmar_domain->iovad,
+                                                   iommu_flush_iova,
+                                                   iova_entry_free);
+                       if (ret)
+                               pr_info("iova flush queue initialization failed\n");
                }
 
                domain_update_iommu_cap(dmar_domain);
@@ -5281,7 +5419,7 @@ static void auxiliary_unlink_device(struct dmar_domain *domain,
        domain->auxd_refcnt--;
 
        if (!domain->auxd_refcnt && domain->default_pasid > 0)
-               intel_pasid_free_id(domain->default_pasid);
+               ioasid_free(domain->default_pasid);
 }
 
 static int aux_domain_add_dev(struct dmar_domain *domain,
@@ -5299,10 +5437,11 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
        if (domain->default_pasid <= 0) {
                int pasid;
 
-               pasid = intel_pasid_alloc_id(domain, PASID_MIN,
-                                            pci_max_pasids(to_pci_dev(dev)),
-                                            GFP_KERNEL);
-               if (pasid <= 0) {
+               /* No private data needed for the default pasid */
+               pasid = ioasid_alloc(NULL, PASID_MIN,
+                                    pci_max_pasids(to_pci_dev(dev)) - 1,
+                                    NULL);
+               if (pasid == INVALID_IOASID) {
                        pr_err("Can't allocate default pasid\n");
                        return -ENODEV;
                }
@@ -5320,8 +5459,12 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
                goto attach_failed;
 
        /* Setup the PASID entry for mediated devices: */
-       ret = intel_pasid_setup_second_level(iommu, domain, dev,
-                                            domain->default_pasid);
+       if (domain_use_first_level(domain))
+               ret = domain_setup_first_level(iommu, domain, dev,
+                                              domain->default_pasid);
+       else
+               ret = intel_pasid_setup_second_level(iommu, domain, dev,
+                                                    domain->default_pasid);
        if (ret)
                goto table_failed;
        spin_unlock(&iommu->lock);
@@ -5338,7 +5481,7 @@ attach_failed:
        spin_unlock(&iommu->lock);
        spin_unlock_irqrestore(&device_domain_lock, flags);
        if (!domain->auxd_refcnt && domain->default_pasid > 0)
-               intel_pasid_free_id(domain->default_pasid);
+               ioasid_free(domain->default_pasid);
 
        return ret;
 }
@@ -5592,6 +5735,24 @@ static inline bool iommu_pasid_support(void)
        return ret;
 }
 
+static inline bool nested_mode_support(void)
+{
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
+       bool ret = true;
+
+       rcu_read_lock();
+       for_each_active_iommu(iommu, drhd) {
+               if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) {
+                       ret = false;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+
+       return ret;
+}
+
 static bool intel_iommu_capable(enum iommu_cap cap)
 {
        if (cap == IOMMU_CAP_CACHE_COHERENCY)
@@ -5749,15 +5910,6 @@ static void intel_iommu_get_resv_regions(struct device *device,
        list_add_tail(&reg->list, head);
 }
 
-static void intel_iommu_put_resv_regions(struct device *dev,
-                                        struct list_head *head)
-{
-       struct iommu_resv_region *entry, *next;
-
-       list_for_each_entry_safe(entry, next, head, list)
-               kfree(entry);
-}
-
 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
 {
        struct device_domain_info *info;
@@ -5984,10 +6136,42 @@ static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
        return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
 }
 
+static int
+intel_iommu_domain_set_attr(struct iommu_domain *domain,
+                           enum iommu_attr attr, void *data)
+{
+       struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+       unsigned long flags;
+       int ret = 0;
+
+       if (domain->type != IOMMU_DOMAIN_UNMANAGED)
+               return -EINVAL;
+
+       switch (attr) {
+       case DOMAIN_ATTR_NESTING:
+               spin_lock_irqsave(&device_domain_lock, flags);
+               if (nested_mode_support() &&
+                   list_empty(&dmar_domain->devices)) {
+                       dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
+                       dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
+               } else {
+                       ret = -ENODEV;
+               }
+               spin_unlock_irqrestore(&device_domain_lock, flags);
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
+}
+
 const struct iommu_ops intel_iommu_ops = {
        .capable                = intel_iommu_capable,
        .domain_alloc           = intel_iommu_domain_alloc,
        .domain_free            = intel_iommu_domain_free,
+       .domain_set_attr        = intel_iommu_domain_set_attr,
        .attach_dev             = intel_iommu_attach_device,
        .detach_dev             = intel_iommu_detach_device,
        .aux_attach_dev         = intel_iommu_aux_attach_device,
@@ -5999,7 +6183,7 @@ const struct iommu_ops intel_iommu_ops = {
        .add_device             = intel_iommu_add_device,
        .remove_device          = intel_iommu_remove_device,
        .get_resv_regions       = intel_iommu_get_resv_regions,
-       .put_resv_regions       = intel_iommu_put_resv_regions,
+       .put_resv_regions       = generic_iommu_put_resv_regions,
        .apply_resv_region      = intel_iommu_apply_resv_region,
        .device_group           = intel_iommu_device_group,
        .dev_has_feat           = intel_iommu_dev_has_feat,