iommu/vt-d: Only clear real DMA device's context entries
[linux-2.6-microblaze.git] / drivers / iommu / intel-iommu.c
index ef0a524..1ff45b2 100644 (file)
@@ -296,31 +296,6 @@ static inline void context_clear_entry(struct context_entry *context)
 static struct dmar_domain *si_domain;
 static int hw_pass_through = 1;
 
-/* si_domain contains mulitple devices */
-#define DOMAIN_FLAG_STATIC_IDENTITY            BIT(0)
-
-/*
- * This is a DMA domain allocated through the iommu domain allocation
- * interface. But one or more devices belonging to this domain have
- * been chosen to use a private domain. We should avoid to use the
- * map/unmap/iova_to_phys APIs on it.
- */
-#define DOMAIN_FLAG_LOSE_CHILDREN              BIT(1)
-
-/*
- * When VT-d works in the scalable mode, it allows DMA translation to
- * happen through either first level or second level page table. This
- * bit marks that the DMA translation for the domain goes through the
- * first level page table, otherwise, it goes through the second level.
- */
-#define DOMAIN_FLAG_USE_FIRST_LEVEL            BIT(2)
-
-/*
- * Domain represents a virtual machine which demands iommu nested
- * translation mode support.
- */
-#define DOMAIN_FLAG_NESTING_MODE               BIT(3)
-
 #define for_each_domain_iommu(idx, domain)                     \
        for (idx = 0; idx < g_num_of_iommus; idx++)             \
                if (domain->iommu_refcnt[idx])
@@ -355,11 +330,6 @@ static void domain_exit(struct dmar_domain *domain);
 static void domain_remove_dev_info(struct dmar_domain *domain);
 static void dmar_remove_one_dev_info(struct device *dev);
 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
-static void domain_context_clear(struct intel_iommu *iommu,
-                                struct device *dev);
-static int domain_detach_iommu(struct dmar_domain *domain,
-                              struct intel_iommu *iommu);
-static bool device_is_rmrr_locked(struct device *dev);
 static int intel_iommu_attach_device(struct iommu_domain *domain,
                                     struct device *dev);
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -371,11 +341,11 @@ int dmar_disabled = 0;
 int dmar_disabled = 1;
 #endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
 
-#ifdef INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
+#ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
 int intel_iommu_sm = 1;
 #else
 int intel_iommu_sm;
-#endif /* INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
+#endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
 
 int intel_iommu_enabled = 0;
 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
@@ -395,6 +365,21 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
 
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
+struct device_domain_info *get_domain_info(struct device *dev)
+{
+       struct device_domain_info *info;
+
+       if (!dev)
+               return NULL;
+
+       info = dev->archdata.iommu;
+       if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO ||
+                    info == DEFER_DEVICE_DOMAIN_INFO))
+               return NULL;
+
+       return info;
+}
+
 DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);
 
@@ -446,12 +431,6 @@ static void init_translation_status(struct intel_iommu *iommu)
                iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
 }
 
-/* Convert generic 'struct iommu_domain to private struct dmar_domain */
-static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
-{
-       return container_of(dom, struct dmar_domain, domain);
-}
-
 static int __init intel_iommu_setup(char *str)
 {
        if (!str)
@@ -480,8 +459,7 @@ static int __init intel_iommu_setup(char *str)
                        pr_info("Intel-IOMMU: scalable mode supported\n");
                        intel_iommu_sm = 1;
                } else if (!strncmp(str, "tboot_noforce", 13)) {
-                       printk(KERN_INFO
-                               "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
+                       pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
                        intel_iommu_tboot_noforce = 1;
                } else if (!strncmp(str, "nobounce", 8)) {
                        pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
@@ -1763,6 +1741,9 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
                if (ecap_prs(iommu->ecap))
                        intel_svm_finish_prq(iommu);
        }
+       if (ecap_vcs(iommu->ecap) && vccap_pasid(iommu->vccap))
+               ioasid_unregister_allocator(&iommu->pasid_allocator);
+
 #endif
 }
 
@@ -1911,11 +1892,6 @@ static int dmar_init_reserved_ranges(void)
        return 0;
 }
 
-static void domain_reserve_special_ranges(struct dmar_domain *domain)
-{
-       copy_reserved_iova(&reserved_iova_list, &domain->iovad);
-}
-
 static inline int guestwidth_to_adjustwidth(int gaw)
 {
        int agaw;
@@ -1930,65 +1906,6 @@ static inline int guestwidth_to_adjustwidth(int gaw)
        return agaw;
 }
 
-static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
-                      int guest_width)
-{
-       int adjust_width, agaw;
-       unsigned long sagaw;
-       int ret;
-
-       init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
-
-       if (!intel_iommu_strict) {
-               ret = init_iova_flush_queue(&domain->iovad,
-                                           iommu_flush_iova, iova_entry_free);
-               if (ret)
-                       pr_info("iova flush queue initialization failed\n");
-       }
-
-       domain_reserve_special_ranges(domain);
-
-       /* calculate AGAW */
-       if (guest_width > cap_mgaw(iommu->cap))
-               guest_width = cap_mgaw(iommu->cap);
-       domain->gaw = guest_width;
-       adjust_width = guestwidth_to_adjustwidth(guest_width);
-       agaw = width_to_agaw(adjust_width);
-       sagaw = cap_sagaw(iommu->cap);
-       if (!test_bit(agaw, &sagaw)) {
-               /* hardware doesn't support it, choose a bigger one */
-               pr_debug("Hardware doesn't support agaw %d\n", agaw);
-               agaw = find_next_bit(&sagaw, 5, agaw);
-               if (agaw >= 5)
-                       return -ENODEV;
-       }
-       domain->agaw = agaw;
-
-       if (ecap_coherent(iommu->ecap))
-               domain->iommu_coherency = 1;
-       else
-               domain->iommu_coherency = 0;
-
-       if (ecap_sc_support(iommu->ecap))
-               domain->iommu_snooping = 1;
-       else
-               domain->iommu_snooping = 0;
-
-       if (intel_iommu_superpage)
-               domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
-       else
-               domain->iommu_superpage = 0;
-
-       domain->nid = iommu->node;
-
-       /* always allocate the top pgd */
-       domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
-       if (!domain->pgd)
-               return -ENOMEM;
-       __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
-       return 0;
-}
-
 static void domain_exit(struct dmar_domain *domain)
 {
 
@@ -1996,7 +1913,8 @@ static void domain_exit(struct dmar_domain *domain)
        domain_remove_dev_info(domain);
 
        /* destroy iovas */
-       put_iova_domain(&domain->iovad);
+       if (domain->domain.type == IOMMU_DOMAIN_DMA)
+               put_iova_domain(&domain->iovad);
 
        if (domain->pgd) {
                struct page *freelist;
@@ -2522,7 +2440,7 @@ struct dmar_domain *find_domain(struct device *dev)
                dev = &pci_real_dma_dev(to_pci_dev(dev))->dev;
 
        /* No lock here, assumes no domain exit in normal case */
-       info = dev->archdata.iommu;
+       info = get_domain_info(dev);
        if (likely(info))
                return info->domain;
 
@@ -2582,6 +2500,12 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
                                             flags);
 }
 
+static bool dev_is_real_dma_subdevice(struct device *dev)
+{
+       return dev && dev_is_pci(dev) &&
+              pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
+}
+
 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
                                                    int bus, int devfn,
                                                    struct device *dev,
@@ -2704,108 +2628,10 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
        return domain;
 }
 
-static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
-{
-       *(u16 *)opaque = alias;
-       return 0;
-}
-
-static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
-{
-       struct device_domain_info *info;
-       struct dmar_domain *domain = NULL;
-       struct intel_iommu *iommu;
-       u16 dma_alias;
-       unsigned long flags;
-       u8 bus, devfn;
-
-       iommu = device_to_iommu(dev, &bus, &devfn);
-       if (!iommu)
-               return NULL;
-
-       if (dev_is_pci(dev)) {
-               struct pci_dev *pdev = to_pci_dev(dev);
-
-               pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
-
-               spin_lock_irqsave(&device_domain_lock, flags);
-               info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
-                                                     PCI_BUS_NUM(dma_alias),
-                                                     dma_alias & 0xff);
-               if (info) {
-                       iommu = info->iommu;
-                       domain = info->domain;
-               }
-               spin_unlock_irqrestore(&device_domain_lock, flags);
-
-               /* DMA alias already has a domain, use it */
-               if (info)
-                       goto out;
-       }
-
-       /* Allocate and initialize new domain for the device */
-       domain = alloc_domain(0);
-       if (!domain)
-               return NULL;
-       if (domain_init(domain, iommu, gaw)) {
-               domain_exit(domain);
-               return NULL;
-       }
-
-out:
-       return domain;
-}
-
-static struct dmar_domain *set_domain_for_dev(struct device *dev,
-                                             struct dmar_domain *domain)
-{
-       struct intel_iommu *iommu;
-       struct dmar_domain *tmp;
-       u16 req_id, dma_alias;
-       u8 bus, devfn;
-
-       iommu = device_to_iommu(dev, &bus, &devfn);
-       if (!iommu)
-               return NULL;
-
-       req_id = ((u16)bus << 8) | devfn;
-
-       if (dev_is_pci(dev)) {
-               struct pci_dev *pdev = to_pci_dev(dev);
-
-               pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
-
-               /* register PCI DMA alias device */
-               if (req_id != dma_alias) {
-                       tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
-                                       dma_alias & 0xff, NULL, domain);
-
-                       if (!tmp || tmp != domain)
-                               return tmp;
-               }
-       }
-
-       tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
-       if (!tmp || tmp != domain)
-               return tmp;
-
-       return domain;
-}
-
 static int iommu_domain_identity_map(struct dmar_domain *domain,
-                                    unsigned long long start,
-                                    unsigned long long end)
+                                    unsigned long first_vpfn,
+                                    unsigned long last_vpfn)
 {
-       unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
-       unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
-
-       if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
-                         dma_to_mm_pfn(last_vpfn))) {
-               pr_err("Reserving iova failed\n");
-               return -ENOMEM;
-       }
-
-       pr_debug("Mapping reserved region %llx-%llx\n", start, end);
        /*
         * RMRR range might have overlap with physical memory range,
         * clear it first
@@ -2817,45 +2643,6 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
                                DMA_PTE_READ|DMA_PTE_WRITE);
 }
 
-static int domain_prepare_identity_map(struct device *dev,
-                                      struct dmar_domain *domain,
-                                      unsigned long long start,
-                                      unsigned long long end)
-{
-       /* For _hardware_ passthrough, don't bother. But for software
-          passthrough, we do it anyway -- it may indicate a memory
-          range which is reserved in E820, so which didn't get set
-          up to start with in si_domain */
-       if (domain == si_domain && hw_pass_through) {
-               dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
-                        start, end);
-               return 0;
-       }
-
-       dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
-
-       if (end < start) {
-               WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
-                       "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
-                       dmi_get_system_info(DMI_BIOS_VENDOR),
-                       dmi_get_system_info(DMI_BIOS_VERSION),
-                    dmi_get_system_info(DMI_PRODUCT_VERSION));
-               return -EIO;
-       }
-
-       if (end >> agaw_to_width(domain->agaw)) {
-               WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
-                    "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
-                    agaw_to_width(domain->agaw),
-                    dmi_get_system_info(DMI_BIOS_VENDOR),
-                    dmi_get_system_info(DMI_BIOS_VERSION),
-                    dmi_get_system_info(DMI_PRODUCT_VERSION));
-               return -EIO;
-       }
-
-       return iommu_domain_identity_map(domain, start, end);
-}
-
 static int md_domain_init(struct dmar_domain *domain, int guest_width);
 
 static int __init si_domain_init(int hw)
@@ -2882,7 +2669,8 @@ static int __init si_domain_init(int hw)
 
                for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
                        ret = iommu_domain_identity_map(si_domain,
-                                       PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
+                                       mm_to_dma_pfn(start_pfn),
+                                       mm_to_dma_pfn(end_pfn));
                        if (ret)
                                return ret;
                }
@@ -2911,17 +2699,6 @@ static int __init si_domain_init(int hw)
        return 0;
 }
 
-static int identity_mapping(struct device *dev)
-{
-       struct device_domain_info *info;
-
-       info = dev->archdata.iommu;
-       if (info)
-               return (info->domain == si_domain);
-
-       return 0;
-}
-
 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
 {
        struct dmar_domain *ndomain;
@@ -3048,31 +2825,6 @@ static int device_def_domain_type(struct device *dev)
 
                if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
                        return IOMMU_DOMAIN_IDENTITY;
-
-               /*
-                * We want to start off with all devices in the 1:1 domain, and
-                * take them out later if we find they can't access all of memory.
-                *
-                * However, we can't do this for PCI devices behind bridges,
-                * because all PCI devices behind the same bridge will end up
-                * with the same source-id on their transactions.
-                *
-                * Practically speaking, we can't change things around for these
-                * devices at run-time, because we can't be sure there'll be no
-                * DMA transactions in flight for any of their siblings.
-                *
-                * So PCI devices (unless they're on the root bus) as well as
-                * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
-                * the 1:1 domain, just in _case_ one of their siblings turns out
-                * not to be able to map all of memory.
-                */
-               if (!pci_is_pcie(pdev)) {
-                       if (!pci_is_root_bus(pdev->bus))
-                               return IOMMU_DOMAIN_DMA;
-                       if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
-                               return IOMMU_DOMAIN_DMA;
-               } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
-                       return IOMMU_DOMAIN_DMA;
        }
 
        return 0;
@@ -3297,6 +3049,85 @@ out_unmap:
        return ret;
 }
 
+#ifdef CONFIG_INTEL_IOMMU_SVM
+static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
+{
+       struct intel_iommu *iommu = data;
+       ioasid_t ioasid;
+
+       if (!iommu)
+               return INVALID_IOASID;
+       /*
+        * VT-d virtual command interface always uses the full 20 bit
+        * PASID range. Host can partition guest PASID range based on
+        * policies but it is out of guest's control.
+        */
+       if (min < PASID_MIN || max > intel_pasid_max_id)
+               return INVALID_IOASID;
+
+       if (vcmd_alloc_pasid(iommu, &ioasid))
+               return INVALID_IOASID;
+
+       return ioasid;
+}
+
+static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
+{
+       struct intel_iommu *iommu = data;
+
+       if (!iommu)
+               return;
+       /*
+        * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
+        * We can only free the PASID when all the devices are unbound.
+        */
+       if (ioasid_find(NULL, ioasid, NULL)) {
+               pr_alert("Cannot free active IOASID %d\n", ioasid);
+               return;
+       }
+       vcmd_free_pasid(iommu, ioasid);
+}
+
+static void register_pasid_allocator(struct intel_iommu *iommu)
+{
+       /*
+        * If we are running in the host, no need for custom allocator
+        * in that PASIDs are allocated from the host system-wide.
+        */
+       if (!cap_caching_mode(iommu->cap))
+               return;
+
+       if (!sm_supported(iommu)) {
+               pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
+               return;
+       }
+
+       /*
+        * Register a custom PASID allocator if we are running in a guest,
+        * guest PASID must be obtained via virtual command interface.
+        * There can be multiple vIOMMUs in each guest but only one allocator
+        * is active. All vIOMMU allocators will eventually be calling the same
+        * host allocator.
+        */
+       if (!ecap_vcs(iommu->ecap) || !vccap_pasid(iommu->vccap))
+               return;
+
+       pr_info("Register custom PASID allocator\n");
+       iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
+       iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
+       iommu->pasid_allocator.pdata = (void *)iommu;
+       if (ioasid_register_allocator(&iommu->pasid_allocator)) {
+               pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
+               /*
+                * Disable scalable mode on this IOMMU if there
+                * is no custom allocator. Mixing SM capable vIOMMU
+                * and non-SM vIOMMU are not supported.
+                */
+               intel_iommu_sm = 0;
+       }
+}
+#endif
+
 static int __init init_dmars(void)
 {
        struct dmar_drhd_unit *drhd;
@@ -3414,6 +3245,9 @@ static int __init init_dmars(void)
         */
        for_each_active_iommu(iommu, drhd) {
                iommu_flush_write_buffer(iommu);
+#ifdef CONFIG_INTEL_IOMMU_SVM
+               register_pasid_allocator(iommu);
+#endif
                iommu_set_root_entry(iommu);
                iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
                iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
@@ -3531,100 +3365,6 @@ static unsigned long intel_alloc_iova(struct device *dev,
        return iova_pfn;
 }
 
-static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
-{
-       struct dmar_domain *domain, *tmp;
-       struct dmar_rmrr_unit *rmrr;
-       struct device *i_dev;
-       int i, ret;
-
-       /* Device shouldn't be attached by any domains. */
-       domain = find_domain(dev);
-       if (domain)
-               return NULL;
-
-       domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
-       if (!domain)
-               goto out;
-
-       /* We have a new domain - setup possible RMRRs for the device */
-       rcu_read_lock();
-       for_each_rmrr_units(rmrr) {
-               for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
-                                         i, i_dev) {
-                       if (i_dev != dev)
-                               continue;
-
-                       ret = domain_prepare_identity_map(dev, domain,
-                                                         rmrr->base_address,
-                                                         rmrr->end_address);
-                       if (ret)
-                               dev_err(dev, "Mapping reserved region failed\n");
-               }
-       }
-       rcu_read_unlock();
-
-       tmp = set_domain_for_dev(dev, domain);
-       if (!tmp || domain != tmp) {
-               domain_exit(domain);
-               domain = tmp;
-       }
-
-out:
-       if (!domain)
-               dev_err(dev, "Allocating domain failed\n");
-       else
-               domain->domain.type = IOMMU_DOMAIN_DMA;
-
-       return domain;
-}
-
-/* Check if the dev needs to go through non-identity map and unmap process.*/
-static bool iommu_need_mapping(struct device *dev)
-{
-       int ret;
-
-       if (iommu_dummy(dev))
-               return false;
-
-       if (unlikely(attach_deferred(dev)))
-               do_deferred_attach(dev);
-
-       ret = identity_mapping(dev);
-       if (ret) {
-               u64 dma_mask = *dev->dma_mask;
-
-               if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
-                       dma_mask = dev->coherent_dma_mask;
-
-               if (dma_mask >= dma_direct_get_required_mask(dev))
-                       return false;
-
-               /*
-                * 32 bit DMA is removed from si_domain and fall back to
-                * non-identity mapping.
-                */
-               dmar_remove_one_dev_info(dev);
-               ret = iommu_request_dma_domain_for_dev(dev);
-               if (ret) {
-                       struct iommu_domain *domain;
-                       struct dmar_domain *dmar_domain;
-
-                       domain = iommu_get_domain_for_dev(dev);
-                       if (domain) {
-                               dmar_domain = to_dmar_domain(domain);
-                               dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
-                       }
-                       dmar_remove_one_dev_info(dev);
-                       get_private_domain_for_dev(dev);
-               }
-
-               dev_info(dev, "32bit DMA uses non-identity mapping\n");
-       }
-
-       return true;
-}
-
 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
                                     size_t size, int dir, u64 dma_mask)
 {
@@ -3638,6 +3378,9 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
 
        BUG_ON(dir == DMA_NONE);
 
+       if (unlikely(attach_deferred(dev)))
+               do_deferred_attach(dev);
+
        domain = find_domain(dev);
        if (!domain)
                return DMA_MAPPING_ERROR;
@@ -3689,20 +3432,15 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page,
                                 enum dma_data_direction dir,
                                 unsigned long attrs)
 {
-       if (iommu_need_mapping(dev))
-               return __intel_map_single(dev, page_to_phys(page) + offset,
-                               size, dir, *dev->dma_mask);
-       return dma_direct_map_page(dev, page, offset, size, dir, attrs);
+       return __intel_map_single(dev, page_to_phys(page) + offset,
+                                 size, dir, *dev->dma_mask);
 }
 
 static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
                                     size_t size, enum dma_data_direction dir,
                                     unsigned long attrs)
 {
-       if (iommu_need_mapping(dev))
-               return __intel_map_single(dev, phys_addr, size, dir,
-                               *dev->dma_mask);
-       return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
+       return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask);
 }
 
 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
@@ -3753,17 +3491,13 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
                             size_t size, enum dma_data_direction dir,
                             unsigned long attrs)
 {
-       if (iommu_need_mapping(dev))
-               intel_unmap(dev, dev_addr, size);
-       else
-               dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
+       intel_unmap(dev, dev_addr, size);
 }
 
 static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
                size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
-       if (iommu_need_mapping(dev))
-               intel_unmap(dev, dev_addr, size);
+       intel_unmap(dev, dev_addr, size);
 }
 
 static void *intel_alloc_coherent(struct device *dev, size_t size,
@@ -3773,8 +3507,8 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
        struct page *page = NULL;
        int order;
 
-       if (!iommu_need_mapping(dev))
-               return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
+       if (unlikely(attach_deferred(dev)))
+               do_deferred_attach(dev);
 
        size = PAGE_ALIGN(size);
        order = get_order(size);
@@ -3809,9 +3543,6 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
        int order;
        struct page *page = virt_to_page(vaddr);
 
-       if (!iommu_need_mapping(dev))
-               return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
-
        size = PAGE_ALIGN(size);
        order = get_order(size);
 
@@ -3829,9 +3560,6 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
        struct scatterlist *sg;
        int i;
 
-       if (!iommu_need_mapping(dev))
-               return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
-
        for_each_sg(sglist, sg, nelems, i) {
                nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
        }
@@ -3855,8 +3583,9 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
        struct intel_iommu *iommu;
 
        BUG_ON(dir == DMA_NONE);
-       if (!iommu_need_mapping(dev))
-               return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
+
+       if (unlikely(attach_deferred(dev)))
+               do_deferred_attach(dev);
 
        domain = find_domain(dev);
        if (!domain)
@@ -3903,8 +3632,6 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
 
 static u64 intel_get_required_mask(struct device *dev)
 {
-       if (!iommu_need_mapping(dev))
-               return dma_direct_get_required_mask(dev);
        return DMA_BIT_MASK(32);
 }
 
@@ -4813,58 +4540,37 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
                                       unsigned long val, void *v)
 {
        struct memory_notify *mhp = v;
-       unsigned long long start, end;
-       unsigned long start_vpfn, last_vpfn;
+       unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
+       unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
+                       mhp->nr_pages - 1);
 
        switch (val) {
        case MEM_GOING_ONLINE:
-               start = mhp->start_pfn << PAGE_SHIFT;
-               end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
-               if (iommu_domain_identity_map(si_domain, start, end)) {
-                       pr_warn("Failed to build identity map for [%llx-%llx]\n",
-                               start, end);
+               if (iommu_domain_identity_map(si_domain,
+                                             start_vpfn, last_vpfn)) {
+                       pr_warn("Failed to build identity map for [%lx-%lx]\n",
+                               start_vpfn, last_vpfn);
                        return NOTIFY_BAD;
                }
                break;
 
        case MEM_OFFLINE:
        case MEM_CANCEL_ONLINE:
-               start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
-               last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
-               while (start_vpfn <= last_vpfn) {
-                       struct iova *iova;
+               {
                        struct dmar_drhd_unit *drhd;
                        struct intel_iommu *iommu;
                        struct page *freelist;
 
-                       iova = find_iova(&si_domain->iovad, start_vpfn);
-                       if (iova == NULL) {
-                               pr_debug("Failed get IOVA for PFN %lx\n",
-                                        start_vpfn);
-                               break;
-                       }
-
-                       iova = split_and_remove_iova(&si_domain->iovad, iova,
-                                                    start_vpfn, last_vpfn);
-                       if (iova == NULL) {
-                               pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
-                                       start_vpfn, last_vpfn);
-                               return NOTIFY_BAD;
-                       }
-
-                       freelist = domain_unmap(si_domain, iova->pfn_lo,
-                                              iova->pfn_hi);
+                       freelist = domain_unmap(si_domain,
+                                               start_vpfn, last_vpfn);
 
                        rcu_read_lock();
                        for_each_active_iommu(iommu, drhd)
                                iommu_flush_iotlb_psi(iommu, si_domain,
-                                       iova->pfn_lo, iova_size(iova),
+                                       start_vpfn, mhp->nr_pages,
                                        !freelist, 0);
                        rcu_read_unlock();
                        dma_free_pagelist(freelist);
-
-                       start_vpfn = iova->pfn_hi + 1;
-                       free_iova_mem(iova);
                }
                break;
        }
@@ -4892,8 +4598,9 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
                for (did = 0; did < cap_ndoms(iommu->cap); did++) {
                        domain = get_iommu_domain(iommu, (u16)did);
 
-                       if (!domain)
+                       if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA)
                                continue;
+
                        free_cpu_cached_iovas(cpu, &domain->iovad);
                }
        }
@@ -5186,18 +4893,6 @@ int __init intel_iommu_init(void)
        }
        up_write(&dmar_global_lock);
 
-#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
-       /*
-        * If the system has no untrusted device or the user has decided
-        * to disable the bounce page mechanisms, we don't need swiotlb.
-        * Mark this and the pre-allocated bounce pages will be released
-        * later.
-        */
-       if (!has_untrusted_dev() || intel_no_bounce)
-               swiotlb = 0;
-#endif
-       dma_ops = &intel_dma_ops;
-
        init_iommu_pm_ops();
 
        down_read(&dmar_global_lock);
@@ -5283,10 +4978,11 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
        if (info->dev) {
                if (dev_is_pci(info->dev) && sm_supported(iommu))
                        intel_pasid_tear_down_entry(iommu, info->dev,
-                                       PASID_RID2PASID);
+                                       PASID_RID2PASID, false);
 
                iommu_disable_dev_iotlb(info);
-               domain_context_clear(iommu, info->dev);
+               if (!dev_is_real_dma_subdevice(info->dev))
+                       domain_context_clear(iommu, info->dev);
                intel_pasid_free_table(info->dev);
        }
 
@@ -5296,12 +4992,6 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
        domain_detach_iommu(domain, iommu);
        spin_unlock_irqrestore(&iommu->lock, flags);
 
-       /* free the private domain */
-       if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
-           !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
-           list_empty(&domain->devices))
-               domain_exit(info->domain);
-
        free_devinfo_mem(info);
 }
 
@@ -5311,9 +5001,8 @@ static void dmar_remove_one_dev_info(struct device *dev)
        unsigned long flags;
 
        spin_lock_irqsave(&device_domain_lock, flags);
-       info = dev->archdata.iommu;
-       if (info && info != DEFER_DEVICE_DOMAIN_INFO
-           && info != DUMMY_DEVICE_DOMAIN_INFO)
+       info = get_domain_info(dev);
+       if (info)
                __dmar_remove_one_dev_info(info);
        spin_unlock_irqrestore(&device_domain_lock, flags);
 }
@@ -5322,9 +5011,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
 {
        int adjust_width;
 
-       init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
-       domain_reserve_special_ranges(domain);
-
        /* calculate AGAW */
        domain->gaw = guest_width;
        adjust_width = guestwidth_to_adjustwidth(guest_width);
@@ -5343,11 +5029,21 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
        return 0;
 }
 
+static void intel_init_iova_domain(struct dmar_domain *dmar_domain)
+{
+       init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
+       copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad);
+
+       if (!intel_iommu_strict &&
+           init_iova_flush_queue(&dmar_domain->iovad,
+                                 iommu_flush_iova, iova_entry_free))
+               pr_info("iova flush queue initialization failed\n");
+}
+
 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 {
        struct dmar_domain *dmar_domain;
        struct iommu_domain *domain;
-       int ret;
 
        switch (type) {
        case IOMMU_DOMAIN_DMA:
@@ -5364,13 +5060,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
                        return NULL;
                }
 
-               if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) {
-                       ret = init_iova_flush_queue(&dmar_domain->iovad,
-                                                   iommu_flush_iova,
-                                                   iova_entry_free);
-                       if (ret)
-                               pr_info("iova flush queue initialization failed\n");
-               }
+               if (type == IOMMU_DOMAIN_DMA)
+                       intel_init_iova_domain(dmar_domain);
 
                domain_update_iommu_cap(dmar_domain);
 
@@ -5403,7 +5094,7 @@ static void intel_iommu_domain_free(struct iommu_domain *domain)
 static inline bool
 is_aux_domain(struct device *dev, struct iommu_domain *domain)
 {
-       struct device_domain_info *info = dev->archdata.iommu;
+       struct device_domain_info *info = get_domain_info(dev);
 
        return info && info->auxd_enabled &&
                        domain->type == IOMMU_DOMAIN_UNMANAGED;
@@ -5412,7 +5103,7 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain)
 static void auxiliary_link_device(struct dmar_domain *domain,
                                  struct device *dev)
 {
-       struct device_domain_info *info = dev->archdata.iommu;
+       struct device_domain_info *info = get_domain_info(dev);
 
        assert_spin_locked(&device_domain_lock);
        if (WARN_ON(!info))
@@ -5425,7 +5116,7 @@ static void auxiliary_link_device(struct dmar_domain *domain,
 static void auxiliary_unlink_device(struct dmar_domain *domain,
                                    struct device *dev)
 {
-       struct device_domain_info *info = dev->archdata.iommu;
+       struct device_domain_info *info = get_domain_info(dev);
 
        assert_spin_locked(&device_domain_lock);
        if (WARN_ON(!info))
@@ -5513,13 +5204,13 @@ static void aux_domain_remove_dev(struct dmar_domain *domain,
                return;
 
        spin_lock_irqsave(&device_domain_lock, flags);
-       info = dev->archdata.iommu;
+       info = get_domain_info(dev);
        iommu = info->iommu;
 
        auxiliary_unlink_device(domain, dev);
 
        spin_lock(&iommu->lock);
-       intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
+       intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false);
        domain_detach_iommu(domain, iommu);
        spin_unlock(&iommu->lock);
 
@@ -5626,6 +5317,176 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
        aux_domain_remove_dev(to_dmar_domain(domain), dev);
 }
 
+/*
+ * 2D array for converting and sanitizing IOMMU generic TLB granularity to
+ * VT-d granularity. Invalidation is typically included in the unmap operation
+ * as a result of DMA or VFIO unmap. However, for assigned devices guest
+ * owns the first level page tables. Invalidations of translation caches in the
+ * guest are trapped and passed down to the host.
+ *
+ * vIOMMU in the guest will only expose first level page tables, therefore
+ * we do not support IOTLB granularity for request without PASID (second level).
+ *
+ * For example, to find the VT-d granularity encoding for IOTLB
+ * type and page selective granularity within PASID:
+ * X: indexed by iommu cache type
+ * Y: indexed by enum iommu_inv_granularity
+ * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR]
+ */
+
+static const int
+inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = {
+       /*
+        * PASID based IOTLB invalidation: PASID selective (per PASID),
+        * page selective (address granularity)
+        */
+       {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
+       /* PASID based dev TLBs */
+       {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL},
+       /* PASID cache */
+       {-EINVAL, -EINVAL, -EINVAL}
+};
+
+static inline int to_vtd_granularity(int type, int granu)
+{
+       return inv_type_granu_table[type][granu];
+}
+
+static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
+{
+       u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
+
+       /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
+        * IOMMU cache invalidate API passes granu_size in bytes, and number of
+        * granu size in contiguous memory.
+        */
+       return order_base_2(nr_pages);
+}
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+static int
+intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
+                          struct iommu_cache_invalidate_info *inv_info)
+{
+       struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+       struct device_domain_info *info;
+       struct intel_iommu *iommu;
+       unsigned long flags;
+       int cache_type;
+       u8 bus, devfn;
+       u16 did, sid;
+       int ret = 0;
+       u64 size = 0;
+
+       if (!inv_info || !dmar_domain ||
+           inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
+               return -EINVAL;
+
+       if (!dev || !dev_is_pci(dev))
+               return -ENODEV;
+
+       iommu = device_to_iommu(dev, &bus, &devfn);
+       if (!iommu)
+               return -ENODEV;
+
+       if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE))
+               return -EINVAL;
+
+       spin_lock_irqsave(&device_domain_lock, flags);
+       spin_lock(&iommu->lock);
+       info = get_domain_info(dev);
+       if (!info) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+       did = dmar_domain->iommu_did[iommu->seq_id];
+       sid = PCI_DEVID(bus, devfn);
+
+       /* Size is only valid in address selective invalidation */
+       if (inv_info->granularity != IOMMU_INV_GRANU_PASID)
+               size = to_vtd_size(inv_info->addr_info.granule_size,
+                                  inv_info->addr_info.nb_granules);
+
+       for_each_set_bit(cache_type,
+                        (unsigned long *)&inv_info->cache,
+                        IOMMU_CACHE_INV_TYPE_NR) {
+               int granu = 0;
+               u64 pasid = 0;
+
+               granu = to_vtd_granularity(cache_type, inv_info->granularity);
+               if (granu == -EINVAL) {
+                       pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n",
+                                          cache_type, inv_info->granularity);
+                       break;
+               }
+
+               /*
+                * PASID is stored in different locations based on the
+                * granularity.
+                */
+               if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
+                   (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
+                       pasid = inv_info->pasid_info.pasid;
+               else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
+                        (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
+                       pasid = inv_info->addr_info.pasid;
+
+               switch (BIT(cache_type)) {
+               case IOMMU_CACHE_INV_TYPE_IOTLB:
+                       if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
+                           size &&
+                           (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
+                               pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n",
+                                                  inv_info->addr_info.addr, size);
+                               ret = -ERANGE;
+                               goto out_unlock;
+                       }
+
+                       /*
+                        * If granu is PASID-selective, address is ignored.
+                        * We use npages = -1 to indicate that.
+                        */
+                       qi_flush_piotlb(iommu, did, pasid,
+                                       mm_to_dma_pfn(inv_info->addr_info.addr),
+                                       (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
+                                       inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
+
+                       /*
+                        * Always flush device IOTLB if ATS is enabled. vIOMMU
+                        * in the guest may assume IOTLB flush is inclusive,
+                        * which is more efficient.
+                        */
+                       if (info->ats_enabled)
+                               qi_flush_dev_iotlb_pasid(iommu, sid,
+                                               info->pfsid, pasid,
+                                               info->ats_qdep,
+                                               inv_info->addr_info.addr,
+                                               size, granu);
+                       break;
+               case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
+                       if (info->ats_enabled)
+                               qi_flush_dev_iotlb_pasid(iommu, sid,
+                                               info->pfsid, pasid,
+                                               info->ats_qdep,
+                                               inv_info->addr_info.addr,
+                                               size, granu);
+                       else
+                               pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
+                       break;
+               default:
+                       dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
+                                           cache_type);
+                       ret = -EINVAL;
+               }
+       }
+out_unlock:
+       spin_unlock(&iommu->lock);
+       spin_unlock_irqrestore(&device_domain_lock, flags);
+
+       return ret;
+}
+#endif
+
 static int intel_iommu_map(struct iommu_domain *domain,
                           unsigned long iova, phys_addr_t hpa,
                           size_t size, int iommu_prot, gfp_t gfp)
@@ -5781,78 +5642,22 @@ static bool intel_iommu_capable(enum iommu_cap cap)
        return false;
 }
 
-static int intel_iommu_add_device(struct device *dev)
+static struct iommu_device *intel_iommu_probe_device(struct device *dev)
 {
-       struct dmar_domain *dmar_domain;
-       struct iommu_domain *domain;
        struct intel_iommu *iommu;
-       struct iommu_group *group;
        u8 bus, devfn;
-       int ret;
 
        iommu = device_to_iommu(dev, &bus, &devfn);
        if (!iommu)
-               return -ENODEV;
-
-       iommu_device_link(&iommu->iommu, dev);
+               return ERR_PTR(-ENODEV);
 
        if (translation_pre_enabled(iommu))
                dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
 
-       group = iommu_group_get_for_dev(dev);
-
-       if (IS_ERR(group)) {
-               ret = PTR_ERR(group);
-               goto unlink;
-       }
-
-       iommu_group_put(group);
-
-       domain = iommu_get_domain_for_dev(dev);
-       dmar_domain = to_dmar_domain(domain);
-       if (domain->type == IOMMU_DOMAIN_DMA) {
-               if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
-                       ret = iommu_request_dm_for_dev(dev);
-                       if (ret) {
-                               dmar_remove_one_dev_info(dev);
-                               dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
-                               domain_add_dev_info(si_domain, dev);
-                               dev_info(dev,
-                                        "Device uses a private identity domain.\n");
-                       }
-               }
-       } else {
-               if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
-                       ret = iommu_request_dma_domain_for_dev(dev);
-                       if (ret) {
-                               dmar_remove_one_dev_info(dev);
-                               dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
-                               if (!get_private_domain_for_dev(dev)) {
-                                       dev_warn(dev,
-                                                "Failed to get a private domain.\n");
-                                       ret = -ENOMEM;
-                                       goto unlink;
-                               }
-
-                               dev_info(dev,
-                                        "Device uses a private dma domain.\n");
-                       }
-               }
-       }
-
-       if (device_needs_bounce(dev)) {
-               dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n");
-               set_dma_ops(dev, &bounce_dma_ops);
-       }
-
-       return 0;
-
-unlink:
-       iommu_device_unlink(&iommu->iommu, dev);
-       return ret;
+       return &iommu->iommu;
 }
 
-static void intel_iommu_remove_device(struct device *dev)
+static void intel_iommu_release_device(struct device *dev)
 {
        struct intel_iommu *iommu;
        u8 bus, devfn;
@@ -5863,11 +5668,19 @@ static void intel_iommu_remove_device(struct device *dev)
 
        dmar_remove_one_dev_info(dev);
 
-       iommu_group_remove_device(dev);
+       set_dma_ops(dev, NULL);
+}
 
-       iommu_device_unlink(&iommu->iommu, dev);
+static void intel_iommu_probe_finalize(struct device *dev)
+{
+       struct iommu_domain *domain;
 
+       domain = iommu_get_domain_for_dev(dev);
        if (device_needs_bounce(dev))
+               set_dma_ops(dev, &bounce_dma_ops);
+       else if (domain && domain->type == IOMMU_DOMAIN_DMA)
+               set_dma_ops(dev, &intel_dma_ops);
+       else
                set_dma_ops(dev, NULL);
 }
 
@@ -5945,7 +5758,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
        spin_lock(&iommu->lock);
 
        ret = -EINVAL;
-       info = dev->archdata.iommu;
+       info = get_domain_info(dev);
        if (!info || !info->pasid_supported)
                goto out;
 
@@ -6041,7 +5854,7 @@ static int intel_iommu_enable_auxd(struct device *dev)
                return -ENODEV;
 
        spin_lock_irqsave(&device_domain_lock, flags);
-       info = dev->archdata.iommu;
+       info = get_domain_info(dev);
        info->auxd_enabled = 1;
        spin_unlock_irqrestore(&device_domain_lock, flags);
 
@@ -6054,7 +5867,7 @@ static int intel_iommu_disable_auxd(struct device *dev)
        unsigned long flags;
 
        spin_lock_irqsave(&device_domain_lock, flags);
-       info = dev->archdata.iommu;
+       info = get_domain_info(dev);
        if (!WARN_ON(!info))
                info->auxd_enabled = 0;
        spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -6107,6 +5920,14 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
                return !!siov_find_pci_dvsec(to_pci_dev(dev));
        }
 
+       if (feat == IOMMU_DEV_FEAT_SVA) {
+               struct device_domain_info *info = get_domain_info(dev);
+
+               return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
+                       info->pasid_supported && info->pri_supported &&
+                       info->ats_supported;
+       }
+
        return false;
 }
 
@@ -6116,6 +5937,16 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
        if (feat == IOMMU_DEV_FEAT_AUX)
                return intel_iommu_enable_auxd(dev);
 
+       if (feat == IOMMU_DEV_FEAT_SVA) {
+               struct device_domain_info *info = get_domain_info(dev);
+
+               if (!info)
+                       return -EINVAL;
+
+               if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
+                       return 0;
+       }
+
        return -ENODEV;
 }
 
@@ -6131,7 +5962,7 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
 static bool
 intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
 {
-       struct device_domain_info *info = dev->archdata.iommu;
+       struct device_domain_info *info = get_domain_info(dev);
 
        if (feat == IOMMU_DEV_FEAT_AUX)
                return scalable_mode_support() && info && info->auxd_enabled;
@@ -6198,8 +6029,9 @@ const struct iommu_ops intel_iommu_ops = {
        .map                    = intel_iommu_map,
        .unmap                  = intel_iommu_unmap,
        .iova_to_phys           = intel_iommu_iova_to_phys,
-       .add_device             = intel_iommu_add_device,
-       .remove_device          = intel_iommu_remove_device,
+       .probe_device           = intel_iommu_probe_device,
+       .probe_finalize         = intel_iommu_probe_finalize,
+       .release_device         = intel_iommu_release_device,
        .get_resv_regions       = intel_iommu_get_resv_regions,
        .put_resv_regions       = generic_iommu_put_resv_regions,
        .apply_resv_region      = intel_iommu_apply_resv_region,
@@ -6209,7 +6041,16 @@ const struct iommu_ops intel_iommu_ops = {
        .dev_enable_feat        = intel_iommu_dev_enable_feat,
        .dev_disable_feat       = intel_iommu_dev_disable_feat,
        .is_attach_deferred     = intel_iommu_is_attach_deferred,
+       .def_domain_type        = device_def_domain_type,
        .pgsize_bitmap          = INTEL_IOMMU_PGSIZES,
+#ifdef CONFIG_INTEL_IOMMU_SVM
+       .cache_invalidate       = intel_iommu_sva_invalidate,
+       .sva_bind_gpasid        = intel_svm_bind_gpasid,
+       .sva_unbind_gpasid      = intel_svm_unbind_gpasid,
+       .sva_bind               = intel_svm_bind,
+       .sva_unbind             = intel_svm_unbind,
+       .sva_get_pasid          = intel_svm_get_pasid,
+#endif
 };
 
 static void quirk_iommu_igfx(struct pci_dev *dev)