Merge tag 'iommu-updates-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Sep 2021 17:44:35 +0000 (10:44 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Sep 2021 17:44:35 +0000 (10:44 -0700)
Pull iommu updates from Joerg Roedel:

 - New DART IOMMU driver for Apple Silicon M1 chips

 - Optimizations for iommu_[map/unmap] performance

 - Selective TLB flush support for the AMD IOMMU driver to make it more
   efficient on emulated IOMMUs

 - Rework IOVA setup and default domain type setting to move more code
   out of IOMMU drivers and to support runtime switching between certain
   types of default domains

 - VT-d Updates from Lu Baolu:
      - Update the virtual command related registers
      - Enable Intel IOMMU scalable mode by default
      - Preset A/D bits for user space DMA usage
      - Allow devices to have more than 32 outstanding PRs
      - Various cleanups

 - ARM SMMU Updates from Will Deacon:
      SMMUv3:
       - Minor optimisation to avoid zeroing struct members on CMD submission
       - Increased use of batched commands to reduce submission latency
       - Refactoring in preparation for ECMDQ support
      SMMUv2:
       - Fix races when probing devices with identical StreamIDs
       - Optimise walk cache flushing for Qualcomm implementations
       - Allow deep sleep states for some Qualcomm SoCs with shared clocks

 - Various smaller optimizations, cleanups, and fixes

* tag 'iommu-updates-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (85 commits)
  iommu/io-pgtable: Abstract iommu_iotlb_gather access
  iommu/arm-smmu: Fix missing unlock on error in arm_smmu_device_group()
  iommu/vt-d: Add present bit check in pasid entry setup helpers
  iommu/vt-d: Use pasid_pte_is_present() helper function
  iommu/vt-d: Drop the kernel doc annotation
  iommu/vt-d: Allow devices to have more than 32 outstanding PRs
  iommu/vt-d: Preset A/D bits for user space DMA usage
  iommu/vt-d: Enable Intel IOMMU scalable mode by default
  iommu/vt-d: Refactor Kconfig a bit
  iommu/vt-d: Remove unnecessary oom message
  iommu/vt-d: Update the virtual command related registers
  iommu: Allow enabling non-strict mode dynamically
  iommu: Merge strictness and domain type configs
  iommu: Only log strictness for DMA domains
  iommu: Expose DMA domain strictness via sysfs
  iommu: Express DMA strictness via the domain type
  iommu/vt-d: Prepare for multiple DMA domain types
  iommu/arm-smmu: Prepare for multiple DMA domain types
  iommu/amd: Prepare for multiple DMA domain types
  iommu: Introduce explicit type for non-strict DMA domains
  ...

43 files changed:
Documentation/ABI/testing/sysfs-kernel-iommu_groups
Documentation/admin-guide/kernel-parameters.txt
Documentation/devicetree/bindings/iommu/apple,dart.yaml [new file with mode: 0644]
MAINTAINERS
drivers/iommu/Kconfig
drivers/iommu/Makefile
drivers/iommu/amd/amd_iommu_types.h
drivers/iommu/amd/init.c
drivers/iommu/amd/io_pgtable.c
drivers/iommu/amd/iommu.c
drivers/iommu/amd/iommu_v2.c
drivers/iommu/apple-dart.c [new file with mode: 0644]
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
drivers/iommu/arm/arm-smmu/arm-smmu.c
drivers/iommu/arm/arm-smmu/arm-smmu.h
drivers/iommu/arm/arm-smmu/qcom_iommu.c
drivers/iommu/dma-iommu.c
drivers/iommu/exynos-iommu.c
drivers/iommu/intel/Kconfig
drivers/iommu/intel/dmar.c
drivers/iommu/intel/iommu.c
drivers/iommu/intel/pasid.c
drivers/iommu/intel/pasid.h
drivers/iommu/intel/perf.c
drivers/iommu/intel/svm.c
drivers/iommu/io-pgtable-arm-v7s.c
drivers/iommu/io-pgtable-arm.c
drivers/iommu/io-pgtable.c
drivers/iommu/iommu.c
drivers/iommu/iova.c
drivers/iommu/ipmmu-vmsa.c
drivers/iommu/mtk_iommu.c
drivers/iommu/mtk_iommu_v1.c
drivers/iommu/rockchip-iommu.c
drivers/iommu/sprd-iommu.c
drivers/iommu/sun50i-iommu.c
drivers/iommu/virtio-iommu.c
include/linux/dma-iommu.h
include/linux/intel-iommu.h
include/linux/intel-svm.h
include/linux/io-pgtable.h
include/linux/iommu.h

index eae2f1c..b15af6a 100644 (file)
@@ -42,8 +42,12 @@ Description: /sys/kernel/iommu_groups/<grp_id>/type shows the type of default
                ========  ======================================================
                DMA       All the DMA transactions from the device in this group
                          are translated by the iommu.
+               DMA-FQ    As above, but using batched invalidation to lazily
+                         remove translations after use. This may offer reduced
+                         overhead at the cost of reduced memory protection.
                identity  All the DMA transactions from the device in this group
-                         are not translated by the iommu.
+                         are not translated by the iommu. Maximum performance
+                         but zero protection.
                auto      Change to the type the device was booted with.
                ========  ======================================================
 
index 84dc579..828d114 100644 (file)
        amd_iommu=      [HW,X86-64]
                        Pass parameters to the AMD IOMMU driver in the system.
                        Possible values are:
-                       fullflush - enable flushing of IO/TLB entries when
-                                   they are unmapped. Otherwise they are
-                                   flushed before they will be reused, which
-                                   is a lot of faster
+                       fullflush - Deprecated, equivalent to iommu.strict=1
                        off       - do not initialize any AMD IOMMU found in
                                    the system
                        force_isolation - Force device isolation for all
                        this case, gfx device will use physical address for
                        DMA.
                strict [Default Off]
-                       With this option on every unmap_single operation will
-                       result in a hardware IOTLB flush operation as opposed
-                       to batching them for performance.
+                       Deprecated, equivalent to iommu.strict=1.
                sp_off [Default Off]
                        By default, super page will be supported if Intel IOMMU
                        has the capability. With this option, super page will
                        not be supported.
-               sm_on [Default Off]
-                       By default, scalable mode will be disabled even if the
-                       hardware advertises that it has support for the scalable
-                       mode translation. With this option set, scalable mode
-                       will be used on hardware which claims to support it.
+               sm_on
+                       Enable the Intel IOMMU scalable mode if the hardware
+                       advertises that it has support for the scalable mode
+                       translation.
+               sm_off
+                       Disallow use of the Intel IOMMU scalable mode.
                tboot_noforce [Default Off]
                        Do not force the Intel IOMMU enabled under tboot.
                        By default, tboot will force Intel IOMMU on, which
                          throughput at the cost of reduced device isolation.
                          Will fall back to strict mode if not supported by
                          the relevant IOMMU driver.
-                       1 - Strict mode (default).
+                       1 - Strict mode.
                          DMA unmap operations invalidate IOMMU hardware TLBs
                          synchronously.
-                       Note: on x86, the default behaviour depends on the
-                       equivalent driver-specific parameters, but a strict
-                       mode explicitly specified by either method takes
-                       precedence.
+                       unset - Use value of CONFIG_IOMMU_DEFAULT_DMA_{LAZY,STRICT}.
+                       Note: on x86, strict mode specified via one of the
+                       legacy driver-specific options takes precedence.
 
        iommu.passthrough=
                        [ARM64, X86] Configure DMA to bypass the IOMMU by default.
diff --git a/Documentation/devicetree/bindings/iommu/apple,dart.yaml b/Documentation/devicetree/bindings/iommu/apple,dart.yaml
new file mode 100644 (file)
index 0000000..94aa9e9
--- /dev/null
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/apple,dart.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Apple DART IOMMU
+
+maintainers:
+  - Sven Peter <sven@svenpeter.dev>
+
+description: |+
+  Apple SoCs may contain an implementation of their Device Address
+  Resolution Table which provides a mandatory layer of address
+  translations for various masters.
+
+  Each DART instance is capable of handling up to 16 different streams
+  with individual pagetables and page-level read/write protection flags.
+
+  This DART IOMMU also raises interrupts in response to various
+  fault conditions.
+
+properties:
+  compatible:
+    const: apple,t8103-dart
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    description:
+      Reference to the gate clock phandle if required for this IOMMU.
+      Optional since not all IOMMUs are attached to a clock gate.
+
+  '#iommu-cells':
+    const: 1
+    description:
+      Has to be one. The single cell describes the stream id emitted by
+      a master to the IOMMU.
+
+required:
+  - compatible
+  - reg
+  - '#iommu-cells'
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |+
+    dart1: iommu@82f80000 {
+      compatible = "apple,t8103-dart";
+      reg = <0x82f80000 0x4000>;
+      interrupts = <1 781 4>;
+      #iommu-cells = <1>;
+    };
+
+    master1 {
+      iommus = <&dart1 0>;
+    };
+
+  - |+
+    dart2a: iommu@82f00000 {
+      compatible = "apple,t8103-dart";
+      reg = <0x82f00000 0x4000>;
+      interrupts = <1 781 4>;
+      #iommu-cells = <1>;
+    };
+    dart2b: iommu@82f80000 {
+      compatible = "apple,t8103-dart";
+      reg = <0x82f80000 0x4000>;
+      interrupts = <1 781 4>;
+      #iommu-cells = <1>;
+    };
+
+    master2 {
+      iommus = <&dart2a 0>, <&dart2b 1>;
+    };
index fb1c48c..92d10ac 100644 (file)
@@ -1268,6 +1268,13 @@ L:       linux-input@vger.kernel.org
 S:     Odd fixes
 F:     drivers/input/mouse/bcm5974.c
 
+APPLE DART IOMMU DRIVER
+M:     Sven Peter <sven@svenpeter.dev>
+L:     iommu@lists.linux-foundation.org
+S:     Maintained
+F:     Documentation/devicetree/bindings/iommu/apple,dart.yaml
+F:     drivers/iommu/apple-dart.c
+
 APPLE SMC DRIVER
 M:     Henrik Rydberg <rydberg@bitmath.org>
 L:     linux-hwmon@vger.kernel.org
index f61516c..8ad8618 100644 (file)
@@ -79,16 +79,57 @@ config IOMMU_DEBUGFS
          debug/iommu directory, and then populate a subdirectory with
          entries as required.
 
-config IOMMU_DEFAULT_PASSTHROUGH
-       bool "IOMMU passthrough by default"
+choice
+       prompt "IOMMU default domain type"
        depends on IOMMU_API
+       default IOMMU_DEFAULT_DMA_LAZY if AMD_IOMMU || INTEL_IOMMU
+       default IOMMU_DEFAULT_DMA_STRICT
        help
-         Enable passthrough by default, removing the need to pass in
-         iommu.passthrough=on or iommu=pt through command line. If this
-         is enabled, you can still disable with iommu.passthrough=off
-         or iommu=nopt depending on the architecture.
+         Choose the type of IOMMU domain used to manage DMA API usage by
+         device drivers. The options here typically represent different
+         levels of tradeoff between robustness/security and performance,
+         depending on the IOMMU driver. Not all IOMMUs support all options.
+         This choice can be overridden at boot via the command line, and for
+         some devices also at runtime via sysfs.
 
-         If unsure, say N here.
+         If unsure, keep the default.
+
+config IOMMU_DEFAULT_DMA_STRICT
+       bool "Translated - Strict"
+       help
+         Trusted devices use translation to restrict their access to only
+         DMA-mapped pages, with strict TLB invalidation on unmap. Equivalent
+         to passing "iommu.passthrough=0 iommu.strict=1" on the command line.
+
+         Untrusted devices always use this mode, with an additional layer of
+         bounce-buffering such that they cannot gain access to any unrelated
+         data within a mapped page.
+
+config IOMMU_DEFAULT_DMA_LAZY
+       bool "Translated - Lazy"
+       help
+         Trusted devices use translation to restrict their access to only
+         DMA-mapped pages, but with "lazy" batched TLB invalidation. This
+         mode allows higher performance with some IOMMUs due to reduced TLB
+         flushing, but at the cost of reduced isolation since devices may be
+         able to access memory for some time after it has been unmapped.
+         Equivalent to passing "iommu.passthrough=0 iommu.strict=0" on the
+         command line.
+
+         If this mode is not supported by the IOMMU driver, the effective
+         runtime default will fall back to IOMMU_DEFAULT_DMA_STRICT.
+
+config IOMMU_DEFAULT_PASSTHROUGH
+       bool "Passthrough"
+       help
+         Trusted devices are identity-mapped, giving them unrestricted access
+         to memory with minimal performance overhead. Equivalent to passing
+         "iommu.passthrough=1" (historically "iommu=pt") on the command line.
+
+         If this mode is not supported by the IOMMU driver, the effective
+         runtime default will fall back to IOMMU_DEFAULT_DMA_STRICT.
+
+endchoice
 
 config OF_IOMMU
        def_bool y
@@ -249,6 +290,20 @@ config SPAPR_TCE_IOMMU
          Enables bits of IOMMU API required by VFIO. The iommu_ops
          is not implemented as it is not necessary for VFIO.
 
+config APPLE_DART
+       tristate "Apple DART IOMMU Support"
+       depends on ARCH_APPLE || (COMPILE_TEST && !GENERIC_ATOMIC64)
+       select IOMMU_API
+       select IOMMU_IO_PGTABLE_LPAE
+       default ARCH_APPLE
+       help
+         Support for Apple DART (Device Address Resolution Table) IOMMUs
+         found in Apple ARM SoCs like the M1.
+         This IOMMU is required for most peripherals using DMA to access
+         the main memory.
+
+         Say Y here if you are using an Apple SoC.
+
 # ARM IOMMU support
 config ARM_SMMU
        tristate "ARM Ltd. System MMU (SMMU) Support"
index c0fb0ba..bc7f730 100644 (file)
@@ -29,3 +29,4 @@ obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
 obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
 obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o
 obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
+obj-$(CONFIG_APPLE_DART) += apple-dart.o
index 94c1a7a..8dbe61e 100644 (file)
@@ -779,12 +779,6 @@ extern u16 amd_iommu_last_bdf;
 /* allocation bitmap for domain ids */
 extern unsigned long *amd_iommu_pd_alloc_bitmap;
 
-/*
- * If true, the addresses will be flushed on unmap time, not when
- * they are reused
- */
-extern bool amd_iommu_unmap_flush;
-
 /* Smallest max PASID supported by any IOMMU in the system */
 extern u32 amd_iommu_max_pasid;
 
index 46280e6..bdcf167 100644 (file)
@@ -161,7 +161,6 @@ u16 amd_iommu_last_bdf;                     /* largest PCI device id we have
                                           to handle */
 LIST_HEAD(amd_iommu_unity_map);                /* a list of required unity mappings
                                           we find in ACPI */
-bool amd_iommu_unmap_flush;            /* if true, flush on every unmap */
 
 LIST_HEAD(amd_iommu_list);             /* list of all AMD IOMMUs in the
                                           system */
@@ -1850,8 +1849,11 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
        if (ret)
                return ret;
 
-       if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
+       if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
+               pr_info("Using strict mode due to virtualization\n");
+               iommu_set_dma_strict();
                amd_iommu_np_cache = true;
+       }
 
        init_iommu_perf_ctr(iommu);
 
@@ -3098,8 +3100,10 @@ static int __init parse_amd_iommu_intr(char *str)
 static int __init parse_amd_iommu_options(char *str)
 {
        for (; *str; ++str) {
-               if (strncmp(str, "fullflush", 9) == 0)
-                       amd_iommu_unmap_flush = true;
+               if (strncmp(str, "fullflush", 9) == 0) {
+                       pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
+                       iommu_set_dma_strict();
+               }
                if (strncmp(str, "force_enable", 12) == 0)
                        amd_iommu_force_enable = true;
                if (strncmp(str, "off", 3) == 0)
index bb0ee5c..182c93a 100644 (file)
@@ -493,9 +493,6 @@ static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo
        unsigned long offset_mask, pte_pgsize;
        u64 *pte, __pte;
 
-       if (pgtable->mode == PAGE_MODE_NONE)
-               return iova;
-
        pte = fetch_pte(pgtable, iova, &pte_pgsize);
 
        if (!pte || !IOMMU_PTE_PRESENT(*pte))
index 811a49a..1722bb1 100644 (file)
@@ -425,9 +425,11 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event)
        if (pdev)
                dev_data = dev_iommu_priv_get(&pdev->dev);
 
-       if (dev_data && __ratelimit(&dev_data->rs)) {
-               pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
-                       vmg_tag, spa, flags);
+       if (dev_data) {
+               if (__ratelimit(&dev_data->rs)) {
+                       pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
+                               vmg_tag, spa, flags);
+               }
        } else {
                pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
                        PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
@@ -456,9 +458,11 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event)
        if (pdev)
                dev_data = dev_iommu_priv_get(&pdev->dev);
 
-       if (dev_data && __ratelimit(&dev_data->rs)) {
-               pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
-                       vmg_tag, gpa, flags_rmp, flags);
+       if (dev_data) {
+               if (__ratelimit(&dev_data->rs)) {
+                       pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
+                               vmg_tag, gpa, flags_rmp, flags);
+               }
        } else {
                pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
                        PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
@@ -480,11 +484,13 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
        if (pdev)
                dev_data = dev_iommu_priv_get(&pdev->dev);
 
-       if (dev_data && __ratelimit(&dev_data->rs)) {
-               pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
-                       domain_id, address, flags);
-       } else if (printk_ratelimit()) {
-               pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
+       if (dev_data) {
+               if (__ratelimit(&dev_data->rs)) {
+                       pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
+                               domain_id, address, flags);
+               }
+       } else {
+               pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
                        PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
                        domain_id, address, flags);
        }
@@ -1261,15 +1267,52 @@ static void __domain_flush_pages(struct protection_domain *domain,
 }
 
 static void domain_flush_pages(struct protection_domain *domain,
-                              u64 address, size_t size)
+                              u64 address, size_t size, int pde)
 {
-       __domain_flush_pages(domain, address, size, 0);
+       if (likely(!amd_iommu_np_cache)) {
+               __domain_flush_pages(domain, address, size, pde);
+               return;
+       }
+
+       /*
+        * When NpCache is on, we infer that we run in a VM and use a vIOMMU.
+        * In such setups it is best to avoid flushes of ranges which are not
+        * naturally aligned, since it would lead to flushes of unmodified
+        * PTEs. Such flushes would require the hypervisor to do more work than
+        * necessary. Therefore, perform repeated flushes of aligned ranges
+        * until you cover the range. Each iteration flushes the smaller
+        * between the natural alignment of the address that we flush and the
+        * greatest naturally aligned region that fits in the range.
+        */
+       while (size != 0) {
+               int addr_alignment = __ffs(address);
+               int size_alignment = __fls(size);
+               int min_alignment;
+               size_t flush_size;
+
+               /*
+                * size is always non-zero, but address might be zero, causing
+                * addr_alignment to be negative. As the casting of the
+                * argument in __ffs(address) to long might trim the high bits
+                * of the address on x86-32, cast to long when doing the check.
+                */
+               if (likely((unsigned long)address != 0))
+                       min_alignment = min(addr_alignment, size_alignment);
+               else
+                       min_alignment = size_alignment;
+
+               flush_size = 1ul << min_alignment;
+
+               __domain_flush_pages(domain, address, flush_size, pde);
+               address += flush_size;
+               size -= flush_size;
+       }
 }
 
 /* Flush the whole IO/TLB for a given protection domain - including PDE */
 void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain)
 {
-       __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
+       domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
 }
 
 void amd_iommu_domain_flush_complete(struct protection_domain *domain)
@@ -1296,7 +1339,7 @@ static void domain_flush_np_cache(struct protection_domain *domain,
                unsigned long flags;
 
                spin_lock_irqsave(&domain->lock, flags);
-               domain_flush_pages(domain, iova, size);
+               domain_flush_pages(domain, iova, size, 1);
                amd_iommu_domain_flush_complete(domain);
                spin_unlock_irqrestore(&domain->lock, flags);
        }
@@ -1707,14 +1750,9 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
 
 static void amd_iommu_probe_finalize(struct device *dev)
 {
-       struct iommu_domain *domain;
-
        /* Domains are initialized for this device - have a look what we ended up with */
-       domain = iommu_get_domain_for_dev(dev);
-       if (domain->type == IOMMU_DOMAIN_DMA)
-               iommu_setup_dma_ops(dev, 0, U64_MAX);
-       else
-               set_dma_ops(dev, NULL);
+       set_dma_ops(dev, NULL);
+       iommu_setup_dma_ops(dev, 0, U64_MAX);
 }
 
 static void amd_iommu_release_device(struct device *dev)
@@ -1775,12 +1813,6 @@ void amd_iommu_domain_update(struct protection_domain *domain)
 static void __init amd_iommu_init_dma_ops(void)
 {
        swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
-
-       if (amd_iommu_unmap_flush)
-               pr_info("IO/TLB flush on unmap enabled\n");
-       else
-               pr_info("Lazy IO/TLB flushing enabled\n");
-       iommu_set_dma_strict(amd_iommu_unmap_flush);
 }
 
 int __init amd_iommu_init_api(void)
@@ -1924,16 +1956,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
        domain->domain.geometry.aperture_end   = ~0ULL;
        domain->domain.geometry.force_aperture = true;
 
-       if (type == IOMMU_DOMAIN_DMA &&
-           iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
-               goto free_domain;
-
        return &domain->domain;
-
-free_domain:
-       protection_domain_free(domain);
-
-       return NULL;
 }
 
 static void amd_iommu_domain_free(struct iommu_domain *dom)
@@ -1950,9 +1973,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
        if (!dom)
                return;
 
-       if (dom->type == IOMMU_DOMAIN_DMA)
-               iommu_put_dma_cookie(&domain->domain);
-
        if (domain->flags & PD_IOMMUV2_MASK)
                free_gcr3_table(domain);
 
@@ -2022,6 +2042,16 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
        return ret;
 }
 
+static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
+                                    unsigned long iova, size_t size)
+{
+       struct protection_domain *domain = to_pdomain(dom);
+       struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+
+       if (ops->map)
+               domain_flush_np_cache(domain, iova, size);
+}
+
 static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
                         phys_addr_t paddr, size_t page_size, int iommu_prot,
                         gfp_t gfp)
@@ -2040,26 +2070,50 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
        if (iommu_prot & IOMMU_WRITE)
                prot |= IOMMU_PROT_IW;
 
-       if (ops->map) {
+       if (ops->map)
                ret = ops->map(ops, iova, paddr, page_size, prot, gfp);
-               domain_flush_np_cache(domain, iova, page_size);
-       }
 
        return ret;
 }
 
+static void amd_iommu_iotlb_gather_add_page(struct iommu_domain *domain,
+                                           struct iommu_iotlb_gather *gather,
+                                           unsigned long iova, size_t size)
+{
+       /*
+        * AMD's IOMMU can flush as many pages as necessary in a single flush.
+        * Unless we run in a virtual machine, which can be inferred according
+        * to whether "non-present cache" is on, it is probably best to prefer
+        * (potentially) too extensive TLB flushing (i.e., more misses) over
+        * mutliple TLB flushes (i.e., more flushes). For virtual machines the
+        * hypervisor needs to synchronize the host IOMMU PTEs with those of
+        * the guest, and the trade-off is different: unnecessary TLB flushes
+        * should be avoided.
+        */
+       if (amd_iommu_np_cache &&
+           iommu_iotlb_gather_is_disjoint(gather, iova, size))
+               iommu_iotlb_sync(domain, gather);
+
+       iommu_iotlb_gather_add_range(gather, iova, size);
+}
+
 static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
                              size_t page_size,
                              struct iommu_iotlb_gather *gather)
 {
        struct protection_domain *domain = to_pdomain(dom);
        struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+       size_t r;
 
        if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
            (domain->iop.mode == PAGE_MODE_NONE))
                return 0;
 
-       return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
+       r = (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
+
+       amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size);
+
+       return r;
 }
 
 static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -2162,7 +2216,13 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
 static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
                                 struct iommu_iotlb_gather *gather)
 {
-       amd_iommu_flush_iotlb_all(domain);
+       struct protection_domain *dom = to_pdomain(domain);
+       unsigned long flags;
+
+       spin_lock_irqsave(&dom->lock, flags);
+       domain_flush_pages(dom, gather->start, gather->end - gather->start, 1);
+       amd_iommu_domain_flush_complete(dom);
+       spin_unlock_irqrestore(&dom->lock, flags);
 }
 
 static int amd_iommu_def_domain_type(struct device *dev)
@@ -2191,6 +2251,7 @@ const struct iommu_ops amd_iommu_ops = {
        .attach_dev = amd_iommu_attach_device,
        .detach_dev = amd_iommu_detach_device,
        .map = amd_iommu_map,
+       .iotlb_sync_map = amd_iommu_iotlb_sync_map,
        .unmap = amd_iommu_unmap,
        .iova_to_phys = amd_iommu_iova_to_phys,
        .probe_device = amd_iommu_probe_device,
index f8d4ad4..a9e5682 100644 (file)
@@ -6,6 +6,7 @@
 
 #define pr_fmt(fmt)     "AMD-Vi: " fmt
 
+#include <linux/refcount.h>
 #include <linux/mmu_notifier.h>
 #include <linux/amd-iommu.h>
 #include <linux/mm_types.h>
@@ -33,7 +34,7 @@ struct pri_queue {
 
 struct pasid_state {
        struct list_head list;                  /* For global state-list */
-       atomic_t count;                         /* Reference count */
+       refcount_t count;                               /* Reference count */
        unsigned mmu_notifier_count;            /* Counting nested mmu_notifier
                                                   calls */
        struct mm_struct *mm;                   /* mm_struct for the faults */
@@ -242,7 +243,7 @@ static struct pasid_state *get_pasid_state(struct device_state *dev_state,
 
        ret = *ptr;
        if (ret)
-               atomic_inc(&ret->count);
+               refcount_inc(&ret->count);
 
 out_unlock:
        spin_unlock_irqrestore(&dev_state->lock, flags);
@@ -257,14 +258,14 @@ static void free_pasid_state(struct pasid_state *pasid_state)
 
 static void put_pasid_state(struct pasid_state *pasid_state)
 {
-       if (atomic_dec_and_test(&pasid_state->count))
+       if (refcount_dec_and_test(&pasid_state->count))
                wake_up(&pasid_state->wq);
 }
 
 static void put_pasid_state_wait(struct pasid_state *pasid_state)
 {
-       atomic_dec(&pasid_state->count);
-       wait_event(pasid_state->wq, !atomic_read(&pasid_state->count));
+       refcount_dec(&pasid_state->count);
+       wait_event(pasid_state->wq, !refcount_read(&pasid_state->count));
        free_pasid_state(pasid_state);
 }
 
@@ -624,7 +625,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
                goto out;
 
 
-       atomic_set(&pasid_state->count, 1);
+       refcount_set(&pasid_state->count, 1);
        init_waitqueue_head(&pasid_state->wq);
        spin_lock_init(&pasid_state->lock);
 
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
new file mode 100644 (file)
index 0000000..559db92
--- /dev/null
@@ -0,0 +1,923 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Apple DART (Device Address Resolution Table) IOMMU driver
+ *
+ * Copyright (C) 2021 The Asahi Linux Contributors
+ *
+ * Based on arm/arm-smmu/arm-ssmu.c and arm/arm-smmu-v3/arm-smmu-v3.c
+ *  Copyright (C) 2013 ARM Limited
+ *  Copyright (C) 2015 ARM Limited
+ * and on exynos-iommu.c
+ *  Copyright (c) 2011,2016 Samsung Electronics Co., Ltd.
+ */
+
+#include <linux/atomic.h>
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/dev_printk.h>
+#include <linux/dma-iommu.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io-pgtable.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_iommu.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/swab.h>
+#include <linux/types.h>
+
+#define DART_MAX_STREAMS 16
+#define DART_MAX_TTBR 4
+#define MAX_DARTS_PER_DEVICE 2
+
+#define DART_STREAM_ALL 0xffff
+
+#define DART_PARAMS1 0x00
+#define DART_PARAMS_PAGE_SHIFT GENMASK(27, 24)
+
+#define DART_PARAMS2 0x04
+#define DART_PARAMS_BYPASS_SUPPORT BIT(0)
+
+#define DART_STREAM_COMMAND 0x20
+#define DART_STREAM_COMMAND_BUSY BIT(2)
+#define DART_STREAM_COMMAND_INVALIDATE BIT(20)
+
+#define DART_STREAM_SELECT 0x34
+
+#define DART_ERROR 0x40
+#define DART_ERROR_STREAM GENMASK(27, 24)
+#define DART_ERROR_CODE GENMASK(11, 0)
+#define DART_ERROR_FLAG BIT(31)
+
+#define DART_ERROR_READ_FAULT BIT(4)
+#define DART_ERROR_WRITE_FAULT BIT(3)
+#define DART_ERROR_NO_PTE BIT(2)
+#define DART_ERROR_NO_PMD BIT(1)
+#define DART_ERROR_NO_TTBR BIT(0)
+
+#define DART_CONFIG 0x60
+#define DART_CONFIG_LOCK BIT(15)
+
+#define DART_STREAM_COMMAND_BUSY_TIMEOUT 100
+
+#define DART_ERROR_ADDR_HI 0x54
+#define DART_ERROR_ADDR_LO 0x50
+
+#define DART_TCR(sid) (0x100 + 4 * (sid))
+#define DART_TCR_TRANSLATE_ENABLE BIT(7)
+#define DART_TCR_BYPASS0_ENABLE BIT(8)
+#define DART_TCR_BYPASS1_ENABLE BIT(12)
+
+#define DART_TTBR(sid, idx) (0x200 + 16 * (sid) + 4 * (idx))
+#define DART_TTBR_VALID BIT(31)
+#define DART_TTBR_SHIFT 12
+
+/*
+ * Private structure associated with each DART device.
+ *
+ * @dev: device struct
+ * @regs: mapped MMIO region
+ * @irq: interrupt number, can be shared with other DARTs
+ * @clks: clocks associated with this DART
+ * @num_clks: number of @clks
+ * @lock: lock for hardware operations involving this dart
+ * @pgsize: pagesize supported by this DART
+ * @supports_bypass: indicates if this DART supports bypass mode
+ * @force_bypass: force bypass mode due to pagesize mismatch?
+ * @sid2group: maps stream ids to iommu_groups
+ * @iommu: iommu core device
+ */
+struct apple_dart {
+       struct device *dev;
+
+       void __iomem *regs;
+
+       int irq;
+       struct clk_bulk_data *clks;
+       int num_clks;
+
+       spinlock_t lock;
+
+       u32 pgsize;
+       u32 supports_bypass : 1;
+       u32 force_bypass : 1;
+
+       struct iommu_group *sid2group[DART_MAX_STREAMS];
+       struct iommu_device iommu;
+};
+
+/*
+ * Convenience struct to identify streams.
+ *
+ * The normal variant is used inside apple_dart_master_cfg which isn't written
+ * to concurrently.
+ * The atomic variant is used inside apple_dart_domain where we have to guard
+ * against races from potential parallel calls to attach/detach_device.
+ * Note that even inside the atomic variant the apple_dart pointer is not
+ * protected: This pointer is initialized once under the domain init mutex
+ * and never changed again afterwards. Devices with different dart pointers
+ * cannot be attached to the same domain.
+ *
+ * @dart dart pointer
+ * @sid stream id bitmap
+ */
+struct apple_dart_stream_map {
+       struct apple_dart *dart;
+       unsigned long sidmap;
+};
+struct apple_dart_atomic_stream_map {
+       struct apple_dart *dart;
+       atomic64_t sidmap;
+};
+
+/*
+ * This structure is attached to each iommu domain handled by a DART.
+ *
+ * @pgtbl_ops: pagetable ops allocated by io-pgtable
+ * @finalized: true if the domain has been completely initialized
+ * @init_lock: protects domain initialization
+ * @stream_maps: streams attached to this domain (valid for DMA/UNMANAGED only)
+ * @domain: core iommu domain pointer
+ */
+struct apple_dart_domain {
+       struct io_pgtable_ops *pgtbl_ops;
+
+       bool finalized;
+       struct mutex init_lock;
+       struct apple_dart_atomic_stream_map stream_maps[MAX_DARTS_PER_DEVICE];
+
+       struct iommu_domain domain;
+};
+
+/*
+ * This structure is attached to devices with dev_iommu_priv_set() on of_xlate
+ * and contains a list of streams bound to this device.
+ * So far the worst case seen is a single device with two streams
+ * from different darts, such that this simple static array is enough.
+ *
+ * @streams: streams for this device
+ */
+struct apple_dart_master_cfg {
+       struct apple_dart_stream_map stream_maps[MAX_DARTS_PER_DEVICE];
+};
+
+/*
+ * Helper macro to iterate over apple_dart_master_cfg.stream_maps and
+ * apple_dart_domain.stream_maps
+ *
+ * @i int used as loop variable
+ * @base pointer to base struct (apple_dart_master_cfg or apple_dart_domain)
+ * @stream pointer to the apple_dart_streams struct for each loop iteration
+ */
+#define for_each_stream_map(i, base, stream_map)                               \
+       for (i = 0, stream_map = &(base)->stream_maps[0];                      \
+            i < MAX_DARTS_PER_DEVICE && stream_map->dart;                     \
+            stream_map = &(base)->stream_maps[++i])
+
+static struct platform_driver apple_dart_driver;
+static const struct iommu_ops apple_dart_iommu_ops;
+static const struct iommu_flush_ops apple_dart_tlb_ops;
+
+static struct apple_dart_domain *to_dart_domain(struct iommu_domain *dom)
+{
+       return container_of(dom, struct apple_dart_domain, domain);
+}
+
+static void
+apple_dart_hw_enable_translation(struct apple_dart_stream_map *stream_map)
+{
+       int sid;
+
+       for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+               writel(DART_TCR_TRANSLATE_ENABLE,
+                      stream_map->dart->regs + DART_TCR(sid));
+}
+
+static void apple_dart_hw_disable_dma(struct apple_dart_stream_map *stream_map)
+{
+       int sid;
+
+       for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+               writel(0, stream_map->dart->regs + DART_TCR(sid));
+}
+
+static void
+apple_dart_hw_enable_bypass(struct apple_dart_stream_map *stream_map)
+{
+       int sid;
+
+       WARN_ON(!stream_map->dart->supports_bypass);
+       for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+               writel(DART_TCR_BYPASS0_ENABLE | DART_TCR_BYPASS1_ENABLE,
+                      stream_map->dart->regs + DART_TCR(sid));
+}
+
+static void apple_dart_hw_set_ttbr(struct apple_dart_stream_map *stream_map,
+                                  u8 idx, phys_addr_t paddr)
+{
+       int sid;
+
+       WARN_ON(paddr & ((1 << DART_TTBR_SHIFT) - 1));
+       for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+               writel(DART_TTBR_VALID | (paddr >> DART_TTBR_SHIFT),
+                      stream_map->dart->regs + DART_TTBR(sid, idx));
+}
+
+static void apple_dart_hw_clear_ttbr(struct apple_dart_stream_map *stream_map,
+                                    u8 idx)
+{
+       int sid;
+
+       for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+               writel(0, stream_map->dart->regs + DART_TTBR(sid, idx));
+}
+
+static void
+apple_dart_hw_clear_all_ttbrs(struct apple_dart_stream_map *stream_map)
+{
+       int i;
+
+       for (i = 0; i < DART_MAX_TTBR; ++i)
+               apple_dart_hw_clear_ttbr(stream_map, i);
+}
+
+static int
+apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map,
+                            u32 command)
+{
+       unsigned long flags;
+       int ret;
+       u32 command_reg;
+
+       spin_lock_irqsave(&stream_map->dart->lock, flags);
+
+       writel(stream_map->sidmap, stream_map->dart->regs + DART_STREAM_SELECT);
+       writel(command, stream_map->dart->regs + DART_STREAM_COMMAND);
+
+       ret = readl_poll_timeout_atomic(
+               stream_map->dart->regs + DART_STREAM_COMMAND, command_reg,
+               !(command_reg & DART_STREAM_COMMAND_BUSY), 1,
+               DART_STREAM_COMMAND_BUSY_TIMEOUT);
+
+       spin_unlock_irqrestore(&stream_map->dart->lock, flags);
+
+       if (ret) {
+               dev_err(stream_map->dart->dev,
+                       "busy bit did not clear after command %x for streams %lx\n",
+                       command, stream_map->sidmap);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int
+apple_dart_hw_invalidate_tlb(struct apple_dart_stream_map *stream_map)
+{
+       return apple_dart_hw_stream_command(stream_map,
+                                           DART_STREAM_COMMAND_INVALIDATE);
+}
+
+static int apple_dart_hw_reset(struct apple_dart *dart)
+{
+       u32 config;
+       struct apple_dart_stream_map stream_map;
+
+       config = readl(dart->regs + DART_CONFIG);
+       if (config & DART_CONFIG_LOCK) {
+               dev_err(dart->dev, "DART is locked down until reboot: %08x\n",
+                       config);
+               return -EINVAL;
+       }
+
+       stream_map.dart = dart;
+       stream_map.sidmap = DART_STREAM_ALL;
+       apple_dart_hw_disable_dma(&stream_map);
+       apple_dart_hw_clear_all_ttbrs(&stream_map);
+
+       /* clear any pending errors before the interrupt is unmasked */
+       writel(readl(dart->regs + DART_ERROR), dart->regs + DART_ERROR);
+
+       return apple_dart_hw_invalidate_tlb(&stream_map);
+}
+
+static void apple_dart_domain_flush_tlb(struct apple_dart_domain *domain)
+{
+       int i;
+       struct apple_dart_atomic_stream_map *domain_stream_map;
+       struct apple_dart_stream_map stream_map;
+
+       for_each_stream_map(i, domain, domain_stream_map) {
+               stream_map.dart = domain_stream_map->dart;
+               stream_map.sidmap = atomic64_read(&domain_stream_map->sidmap);
+               apple_dart_hw_invalidate_tlb(&stream_map);
+       }
+}
+
+static void apple_dart_flush_iotlb_all(struct iommu_domain *domain)
+{
+       apple_dart_domain_flush_tlb(to_dart_domain(domain));
+}
+
+static void apple_dart_iotlb_sync(struct iommu_domain *domain,
+                                 struct iommu_iotlb_gather *gather)
+{
+       apple_dart_domain_flush_tlb(to_dart_domain(domain));
+}
+
+static void apple_dart_iotlb_sync_map(struct iommu_domain *domain,
+                                     unsigned long iova, size_t size)
+{
+       apple_dart_domain_flush_tlb(to_dart_domain(domain));
+}
+
+static void apple_dart_tlb_flush_all(void *cookie)
+{
+       apple_dart_domain_flush_tlb(cookie);
+}
+
+static void apple_dart_tlb_flush_walk(unsigned long iova, size_t size,
+                                     size_t granule, void *cookie)
+{
+       apple_dart_domain_flush_tlb(cookie);
+}
+
+static const struct iommu_flush_ops apple_dart_tlb_ops = {
+       .tlb_flush_all = apple_dart_tlb_flush_all,
+       .tlb_flush_walk = apple_dart_tlb_flush_walk,
+};
+
+static phys_addr_t apple_dart_iova_to_phys(struct iommu_domain *domain,
+                                          dma_addr_t iova)
+{
+       struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+       struct io_pgtable_ops *ops = dart_domain->pgtbl_ops;
+
+       if (!ops)
+               return 0;
+
+       return ops->iova_to_phys(ops, iova);
+}
+
+static int apple_dart_map_pages(struct iommu_domain *domain, unsigned long iova,
+                               phys_addr_t paddr, size_t pgsize,
+                               size_t pgcount, int prot, gfp_t gfp,
+                               size_t *mapped)
+{
+       struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+       struct io_pgtable_ops *ops = dart_domain->pgtbl_ops;
+
+       if (!ops)
+               return -ENODEV;
+
+       return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp,
+                             mapped);
+}
+
+static size_t apple_dart_unmap_pages(struct iommu_domain *domain,
+                                    unsigned long iova, size_t pgsize,
+                                    size_t pgcount,
+                                    struct iommu_iotlb_gather *gather)
+{
+       struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+       struct io_pgtable_ops *ops = dart_domain->pgtbl_ops;
+
+       return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
+}
+
+static void
+apple_dart_setup_translation(struct apple_dart_domain *domain,
+                            struct apple_dart_stream_map *stream_map)
+{
+       int i;
+       struct io_pgtable_cfg *pgtbl_cfg =
+               &io_pgtable_ops_to_pgtable(domain->pgtbl_ops)->cfg;
+
+       for (i = 0; i < pgtbl_cfg->apple_dart_cfg.n_ttbrs; ++i)
+               apple_dart_hw_set_ttbr(stream_map, i,
+                                      pgtbl_cfg->apple_dart_cfg.ttbr[i]);
+       for (; i < DART_MAX_TTBR; ++i)
+               apple_dart_hw_clear_ttbr(stream_map, i);
+
+       apple_dart_hw_enable_translation(stream_map);
+       apple_dart_hw_invalidate_tlb(stream_map);
+}
+
+static int apple_dart_finalize_domain(struct iommu_domain *domain,
+                                     struct apple_dart_master_cfg *cfg)
+{
+       struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+       struct apple_dart *dart = cfg->stream_maps[0].dart;
+       struct io_pgtable_cfg pgtbl_cfg;
+       int ret = 0;
+       int i;
+
+       mutex_lock(&dart_domain->init_lock);
+
+       if (dart_domain->finalized)
+               goto done;
+
+       for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
+               dart_domain->stream_maps[i].dart = cfg->stream_maps[i].dart;
+               atomic64_set(&dart_domain->stream_maps[i].sidmap,
+                            cfg->stream_maps[i].sidmap);
+       }
+
+       pgtbl_cfg = (struct io_pgtable_cfg){
+               .pgsize_bitmap = dart->pgsize,
+               .ias = 32,
+               .oas = 36,
+               .coherent_walk = 1,
+               .tlb = &apple_dart_tlb_ops,
+               .iommu_dev = dart->dev,
+       };
+
+       dart_domain->pgtbl_ops =
+               alloc_io_pgtable_ops(APPLE_DART, &pgtbl_cfg, domain);
+       if (!dart_domain->pgtbl_ops) {
+               ret = -ENOMEM;
+               goto done;
+       }
+
+       domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+       domain->geometry.aperture_start = 0;
+       domain->geometry.aperture_end = DMA_BIT_MASK(32);
+       domain->geometry.force_aperture = true;
+
+       dart_domain->finalized = true;
+
+done:
+       mutex_unlock(&dart_domain->init_lock);
+       return ret;
+}
+
+static int
+apple_dart_mod_streams(struct apple_dart_atomic_stream_map *domain_maps,
+                      struct apple_dart_stream_map *master_maps,
+                      bool add_streams)
+{
+       int i;
+
+       for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
+               if (domain_maps[i].dart != master_maps[i].dart)
+                       return -EINVAL;
+       }
+
+       for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
+               if (!domain_maps[i].dart)
+                       break;
+               if (add_streams)
+                       atomic64_or(master_maps[i].sidmap,
+                                   &domain_maps[i].sidmap);
+               else
+                       atomic64_and(~master_maps[i].sidmap,
+                                    &domain_maps[i].sidmap);
+       }
+
+       return 0;
+}
+
+static int apple_dart_domain_add_streams(struct apple_dart_domain *domain,
+                                        struct apple_dart_master_cfg *cfg)
+{
+       return apple_dart_mod_streams(domain->stream_maps, cfg->stream_maps,
+                                     true);
+}
+
+static int apple_dart_domain_remove_streams(struct apple_dart_domain *domain,
+                                           struct apple_dart_master_cfg *cfg)
+{
+       return apple_dart_mod_streams(domain->stream_maps, cfg->stream_maps,
+                                     false);
+}
+
+static int apple_dart_attach_dev(struct iommu_domain *domain,
+                                struct device *dev)
+{
+       int ret, i;
+       struct apple_dart_stream_map *stream_map;
+       struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+       struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+
+       if (cfg->stream_maps[0].dart->force_bypass &&
+           domain->type != IOMMU_DOMAIN_IDENTITY)
+               return -EINVAL;
+       if (!cfg->stream_maps[0].dart->supports_bypass &&
+           domain->type == IOMMU_DOMAIN_IDENTITY)
+               return -EINVAL;
+
+       ret = apple_dart_finalize_domain(domain, cfg);
+       if (ret)
+               return ret;
+
+       switch (domain->type) {
+       case IOMMU_DOMAIN_DMA:
+       case IOMMU_DOMAIN_UNMANAGED:
+               ret = apple_dart_domain_add_streams(dart_domain, cfg);
+               if (ret)
+                       return ret;
+
+               for_each_stream_map(i, cfg, stream_map)
+                       apple_dart_setup_translation(dart_domain, stream_map);
+               break;
+       case IOMMU_DOMAIN_BLOCKED:
+               for_each_stream_map(i, cfg, stream_map)
+                       apple_dart_hw_disable_dma(stream_map);
+               break;
+       case IOMMU_DOMAIN_IDENTITY:
+               for_each_stream_map(i, cfg, stream_map)
+                       apple_dart_hw_enable_bypass(stream_map);
+               break;
+       }
+
+       return ret;
+}
+
+static void apple_dart_detach_dev(struct iommu_domain *domain,
+                                 struct device *dev)
+{
+       int i;
+       struct apple_dart_stream_map *stream_map;
+       struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+       struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+
+       for_each_stream_map(i, cfg, stream_map)
+               apple_dart_hw_disable_dma(stream_map);
+
+       if (domain->type == IOMMU_DOMAIN_DMA ||
+           domain->type == IOMMU_DOMAIN_UNMANAGED)
+               apple_dart_domain_remove_streams(dart_domain, cfg);
+}
+
+static struct iommu_device *apple_dart_probe_device(struct device *dev)
+{
+       struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+       struct apple_dart_stream_map *stream_map;
+       int i;
+
+       if (!cfg)
+               return ERR_PTR(-ENODEV);
+
+       for_each_stream_map(i, cfg, stream_map)
+               device_link_add(
+                       dev, stream_map->dart->dev,
+                       DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
+
+       return &cfg->stream_maps[0].dart->iommu;
+}
+
+static void apple_dart_release_device(struct device *dev)
+{
+       struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+
+       if (!cfg)
+               return;
+
+       dev_iommu_priv_set(dev, NULL);
+       kfree(cfg);
+}
+
+static struct iommu_domain *apple_dart_domain_alloc(unsigned int type)
+{
+       struct apple_dart_domain *dart_domain;
+
+       if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED &&
+           type != IOMMU_DOMAIN_IDENTITY && type != IOMMU_DOMAIN_BLOCKED)
+               return NULL;
+
+       dart_domain = kzalloc(sizeof(*dart_domain), GFP_KERNEL);
+       if (!dart_domain)
+               return NULL;
+
+       iommu_get_dma_cookie(&dart_domain->domain);
+       mutex_init(&dart_domain->init_lock);
+
+       /* no need to allocate pgtbl_ops or do any other finalization steps */
+       if (type == IOMMU_DOMAIN_IDENTITY || type == IOMMU_DOMAIN_BLOCKED)
+               dart_domain->finalized = true;
+
+       return &dart_domain->domain;
+}
+
+static void apple_dart_domain_free(struct iommu_domain *domain)
+{
+       struct apple_dart_domain *dart_domain = to_dart_domain(domain);
+
+       if (dart_domain->pgtbl_ops)
+               free_io_pgtable_ops(dart_domain->pgtbl_ops);
+
+       kfree(dart_domain);
+}
+
+static int apple_dart_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+       struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+       struct platform_device *iommu_pdev = of_find_device_by_node(args->np);
+       struct apple_dart *dart = platform_get_drvdata(iommu_pdev);
+       struct apple_dart *cfg_dart;
+       int i, sid;
+
+       if (args->args_count != 1)
+               return -EINVAL;
+       sid = args->args[0];
+
+       if (!cfg)
+               cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+       if (!cfg)
+               return -ENOMEM;
+       dev_iommu_priv_set(dev, cfg);
+
+       cfg_dart = cfg->stream_maps[0].dart;
+       if (cfg_dart) {
+               if (cfg_dart->supports_bypass != dart->supports_bypass)
+                       return -EINVAL;
+               if (cfg_dart->force_bypass != dart->force_bypass)
+                       return -EINVAL;
+               if (cfg_dart->pgsize != dart->pgsize)
+                       return -EINVAL;
+       }
+
+       for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
+               if (cfg->stream_maps[i].dart == dart) {
+                       cfg->stream_maps[i].sidmap |= 1 << sid;
+                       return 0;
+               }
+       }
+       for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
+               if (!cfg->stream_maps[i].dart) {
+                       cfg->stream_maps[i].dart = dart;
+                       cfg->stream_maps[i].sidmap = 1 << sid;
+                       return 0;
+               }
+       }
+
+       return -EINVAL;
+}
+
+static struct iommu_group *apple_dart_device_group(struct device *dev)
+{
+       static DEFINE_MUTEX(lock);
+       int i, sid;
+       struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+       struct apple_dart_stream_map *stream_map;
+       struct iommu_group *group = NULL;
+       struct iommu_group *res = ERR_PTR(-EINVAL);
+
+       mutex_lock(&lock);
+
+       for_each_stream_map(i, cfg, stream_map) {
+               for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) {
+                       struct iommu_group *stream_group =
+                               stream_map->dart->sid2group[sid];
+
+                       if (group && group != stream_group) {
+                               res = ERR_PTR(-EINVAL);
+                               goto out;
+                       }
+
+                       group = stream_group;
+               }
+       }
+
+       if (group) {
+               res = iommu_group_ref_get(group);
+               goto out;
+       }
+
+#ifdef CONFIG_PCI
+       if (dev_is_pci(dev))
+               group = pci_device_group(dev);
+       else
+#endif
+               group = generic_device_group(dev);
+
+       for_each_stream_map(i, cfg, stream_map)
+               for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS)
+                       stream_map->dart->sid2group[sid] = group;
+
+       res = group;
+
+out:
+       mutex_unlock(&lock);
+       return res;
+}
+
+static int apple_dart_def_domain_type(struct device *dev)
+{
+       struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+
+       if (cfg->stream_maps[0].dart->force_bypass)
+               return IOMMU_DOMAIN_IDENTITY;
+       if (!cfg->stream_maps[0].dart->supports_bypass)
+               return IOMMU_DOMAIN_DMA;
+
+       return 0;
+}
+
+static const struct iommu_ops apple_dart_iommu_ops = {
+       .domain_alloc = apple_dart_domain_alloc,
+       .domain_free = apple_dart_domain_free,
+       .attach_dev = apple_dart_attach_dev,
+       .detach_dev = apple_dart_detach_dev,
+       .map_pages = apple_dart_map_pages,
+       .unmap_pages = apple_dart_unmap_pages,
+       .flush_iotlb_all = apple_dart_flush_iotlb_all,
+       .iotlb_sync = apple_dart_iotlb_sync,
+       .iotlb_sync_map = apple_dart_iotlb_sync_map,
+       .iova_to_phys = apple_dart_iova_to_phys,
+       .probe_device = apple_dart_probe_device,
+       .release_device = apple_dart_release_device,
+       .device_group = apple_dart_device_group,
+       .of_xlate = apple_dart_of_xlate,
+       .def_domain_type = apple_dart_def_domain_type,
+       .pgsize_bitmap = -1UL, /* Restricted during dart probe */
+};
+
+static irqreturn_t apple_dart_irq(int irq, void *dev)
+{
+       struct apple_dart *dart = dev;
+       const char *fault_name = NULL;
+       u32 error = readl(dart->regs + DART_ERROR);
+       u32 error_code = FIELD_GET(DART_ERROR_CODE, error);
+       u32 addr_lo = readl(dart->regs + DART_ERROR_ADDR_LO);
+       u32 addr_hi = readl(dart->regs + DART_ERROR_ADDR_HI);
+       u64 addr = addr_lo | (((u64)addr_hi) << 32);
+       u8 stream_idx = FIELD_GET(DART_ERROR_STREAM, error);
+
+       if (!(error & DART_ERROR_FLAG))
+               return IRQ_NONE;
+
+       /* there should only be a single bit set but let's use == to be sure */
+       if (error_code == DART_ERROR_READ_FAULT)
+               fault_name = "READ FAULT";
+       else if (error_code == DART_ERROR_WRITE_FAULT)
+               fault_name = "WRITE FAULT";
+       else if (error_code == DART_ERROR_NO_PTE)
+               fault_name = "NO PTE FOR IOVA";
+       else if (error_code == DART_ERROR_NO_PMD)
+               fault_name = "NO PMD FOR IOVA";
+       else if (error_code == DART_ERROR_NO_TTBR)
+               fault_name = "NO TTBR FOR IOVA";
+       else
+               fault_name = "unknown";
+
+       dev_err_ratelimited(
+               dart->dev,
+               "translation fault: status:0x%x stream:%d code:0x%x (%s) at 0x%llx",
+               error, stream_idx, error_code, fault_name, addr);
+
+       writel(error, dart->regs + DART_ERROR);
+       return IRQ_HANDLED;
+}
+
+static int apple_dart_set_bus_ops(const struct iommu_ops *ops)
+{
+       int ret;
+
+       if (!iommu_present(&platform_bus_type)) {
+               ret = bus_set_iommu(&platform_bus_type, ops);
+               if (ret)
+                       return ret;
+       }
+#ifdef CONFIG_PCI
+       if (!iommu_present(&pci_bus_type)) {
+               ret = bus_set_iommu(&pci_bus_type, ops);
+               if (ret) {
+                       bus_set_iommu(&platform_bus_type, NULL);
+                       return ret;
+               }
+       }
+#endif
+       return 0;
+}
+
+static int apple_dart_probe(struct platform_device *pdev)
+{
+       int ret;
+       u32 dart_params[2];
+       struct resource *res;
+       struct apple_dart *dart;
+       struct device *dev = &pdev->dev;
+
+       dart = devm_kzalloc(dev, sizeof(*dart), GFP_KERNEL);
+       if (!dart)
+               return -ENOMEM;
+
+       dart->dev = dev;
+       spin_lock_init(&dart->lock);
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (resource_size(res) < 0x4000) {
+               dev_err(dev, "MMIO region too small (%pr)\n", res);
+               return -EINVAL;
+       }
+
+       dart->regs = devm_ioremap_resource(dev, res);
+       if (IS_ERR(dart->regs))
+               return PTR_ERR(dart->regs);
+
+       dart->irq = platform_get_irq(pdev, 0);
+       if (dart->irq < 0)
+               return -ENODEV;
+
+       ret = devm_clk_bulk_get_all(dev, &dart->clks);
+       if (ret < 0)
+               return ret;
+       dart->num_clks = ret;
+
+       ret = clk_bulk_prepare_enable(dart->num_clks, dart->clks);
+       if (ret)
+               return ret;
+
+       ret = apple_dart_hw_reset(dart);
+       if (ret)
+               goto err_clk_disable;
+
+       dart_params[0] = readl(dart->regs + DART_PARAMS1);
+       dart_params[1] = readl(dart->regs + DART_PARAMS2);
+       dart->pgsize = 1 << FIELD_GET(DART_PARAMS_PAGE_SHIFT, dart_params[0]);
+       dart->supports_bypass = dart_params[1] & DART_PARAMS_BYPASS_SUPPORT;
+       dart->force_bypass = dart->pgsize > PAGE_SIZE;
+
+       ret = request_irq(dart->irq, apple_dart_irq, IRQF_SHARED,
+                         "apple-dart fault handler", dart);
+       if (ret)
+               goto err_clk_disable;
+
+       platform_set_drvdata(pdev, dart);
+
+       ret = apple_dart_set_bus_ops(&apple_dart_iommu_ops);
+       if (ret)
+               goto err_free_irq;
+
+       ret = iommu_device_sysfs_add(&dart->iommu, dev, NULL, "apple-dart.%s",
+                                    dev_name(&pdev->dev));
+       if (ret)
+               goto err_remove_bus_ops;
+
+       ret = iommu_device_register(&dart->iommu, &apple_dart_iommu_ops, dev);
+       if (ret)
+               goto err_sysfs_remove;
+
+       dev_info(
+               &pdev->dev,
+               "DART [pagesize %x, bypass support: %d, bypass forced: %d] initialized\n",
+               dart->pgsize, dart->supports_bypass, dart->force_bypass);
+       return 0;
+
+err_sysfs_remove:
+       iommu_device_sysfs_remove(&dart->iommu);
+err_remove_bus_ops:
+       apple_dart_set_bus_ops(NULL);
+err_free_irq:
+       free_irq(dart->irq, dart);
+err_clk_disable:
+       clk_bulk_disable_unprepare(dart->num_clks, dart->clks);
+
+       return ret;
+}
+
+static int apple_dart_remove(struct platform_device *pdev)
+{
+       struct apple_dart *dart = platform_get_drvdata(pdev);
+
+       apple_dart_hw_reset(dart);
+       free_irq(dart->irq, dart);
+       apple_dart_set_bus_ops(NULL);
+
+       iommu_device_unregister(&dart->iommu);
+       iommu_device_sysfs_remove(&dart->iommu);
+
+       clk_bulk_disable_unprepare(dart->num_clks, dart->clks);
+
+       return 0;
+}
+
+static const struct of_device_id apple_dart_of_match[] = {
+       { .compatible = "apple,t8103-dart", .data = NULL },
+       {},
+};
+MODULE_DEVICE_TABLE(of, apple_dart_of_match);
+
+static struct platform_driver apple_dart_driver = {
+       .driver = {
+               .name                   = "apple-dart",
+               .of_match_table         = apple_dart_of_match,
+               .suppress_bind_attrs    = true,
+       },
+       .probe  = apple_dart_probe,
+       .remove = apple_dart_remove,
+};
+
+module_platform_driver(apple_dart_driver);
+
+MODULE_DESCRIPTION("IOMMU API for Apple's DART");
+MODULE_AUTHOR("Sven Peter <sven@svenpeter.dev>");
+MODULE_LICENSE("GPL v2");
index 235f9bd..a388e31 100644 (file)
@@ -335,10 +335,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
        return 0;
 }
 
+static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
+{
+       return &smmu->cmdq;
+}
+
 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
-                                        u32 prod)
+                                        struct arm_smmu_queue *q, u32 prod)
 {
-       struct arm_smmu_queue *q = &smmu->cmdq.q;
        struct arm_smmu_cmdq_ent ent = {
                .opcode = CMDQ_OP_CMD_SYNC,
        };
@@ -355,7 +359,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
        arm_smmu_cmdq_build_cmd(cmd, &ent);
 }
 
-static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
+static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
+                                    struct arm_smmu_queue *q)
 {
        static const char * const cerror_str[] = {
                [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
@@ -366,7 +371,6 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 
        int i;
        u64 cmd[CMDQ_ENT_DWORDS];
-       struct arm_smmu_queue *q = &smmu->cmdq.q;
        u32 cons = readl_relaxed(q->cons_reg);
        u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
        struct arm_smmu_cmdq_ent cmd_sync = {
@@ -413,6 +417,11 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
        queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 }
 
+static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
+{
+       __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
+}
+
 /*
  * Command queue locking.
  * This is a form of bastardised rwlock with the following major changes:
@@ -579,7 +588,7 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
 {
        unsigned long flags;
        struct arm_smmu_queue_poll qp;
-       struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+       struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
        int ret = 0;
 
        /*
@@ -595,7 +604,7 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
 
        queue_poll_init(smmu, &qp);
        do {
-               llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
+               llq->val = READ_ONCE(cmdq->q.llq.val);
                if (!queue_full(llq))
                        break;
 
@@ -614,7 +623,7 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
 {
        int ret = 0;
        struct arm_smmu_queue_poll qp;
-       struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+       struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
        u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
 
        queue_poll_init(smmu, &qp);
@@ -637,12 +646,12 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
                                               struct arm_smmu_ll_queue *llq)
 {
        struct arm_smmu_queue_poll qp;
-       struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+       struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
        u32 prod = llq->prod;
        int ret = 0;
 
        queue_poll_init(smmu, &qp);
-       llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
+       llq->val = READ_ONCE(cmdq->q.llq.val);
        do {
                if (queue_consumed(llq, prod))
                        break;
@@ -732,12 +741,12 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
        u32 prod;
        unsigned long flags;
        bool owner;
-       struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
-       struct arm_smmu_ll_queue llq = {
-               .max_n_shift = cmdq->q.llq.max_n_shift,
-       }, head = llq;
+       struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
+       struct arm_smmu_ll_queue llq, head;
        int ret = 0;
 
+       llq.max_n_shift = cmdq->q.llq.max_n_shift;
+
        /* 1. Allocate some space in the queue */
        local_irq_save(flags);
        llq.val = READ_ONCE(cmdq->q.llq.val);
@@ -772,7 +781,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
        arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
        if (sync) {
                prod = queue_inc_prod_n(&llq, n);
-               arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
+               arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
                queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
 
                /*
@@ -845,8 +854,9 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
        return ret;
 }
 
-static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
-                                  struct arm_smmu_cmdq_ent *ent)
+static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
+                                    struct arm_smmu_cmdq_ent *ent,
+                                    bool sync)
 {
        u64 cmd[CMDQ_ENT_DWORDS];
 
@@ -856,12 +866,19 @@ static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
                return -EINVAL;
        }
 
-       return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
+       return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
+}
+
+static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
+                                  struct arm_smmu_cmdq_ent *ent)
+{
+       return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
 }
 
-static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
+                                            struct arm_smmu_cmdq_ent *ent)
 {
-       return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
+       return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
 }
 
 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
@@ -929,8 +946,7 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
                .tlbi.asid = asid,
        };
 
-       arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-       arm_smmu_cmdq_issue_sync(smmu);
+       arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 }
 
 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
@@ -939,7 +955,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
        size_t i;
        unsigned long flags;
        struct arm_smmu_master *master;
-       struct arm_smmu_cmdq_batch cmds = {};
+       struct arm_smmu_cmdq_batch cmds;
        struct arm_smmu_device *smmu = smmu_domain->smmu;
        struct arm_smmu_cmdq_ent cmd = {
                .opcode = CMDQ_OP_CFGI_CD,
@@ -949,6 +965,8 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
                },
        };
 
+       cmds.num = 0;
+
        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
        list_for_each_entry(master, &smmu_domain->devices, domain_head) {
                for (i = 0; i < master->num_streams; i++) {
@@ -1211,8 +1229,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
                },
        };
 
-       arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-       arm_smmu_cmdq_issue_sync(smmu);
+       arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 }
 
 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
@@ -1747,15 +1764,16 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
 {
        int i;
        struct arm_smmu_cmdq_ent cmd;
+       struct arm_smmu_cmdq_batch cmds = {};
 
        arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
 
        for (i = 0; i < master->num_streams; i++) {
                cmd.atc.sid = master->streams[i].id;
-               arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
+               arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
        }
 
-       return arm_smmu_cmdq_issue_sync(master->smmu);
+       return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
 }
 
 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
@@ -1765,7 +1783,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
        unsigned long flags;
        struct arm_smmu_cmdq_ent cmd;
        struct arm_smmu_master *master;
-       struct arm_smmu_cmdq_batch cmds = {};
+       struct arm_smmu_cmdq_batch cmds;
 
        if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
                return 0;
@@ -1789,6 +1807,8 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
 
        arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
 
+       cmds.num = 0;
+
        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
        list_for_each_entry(master, &smmu_domain->devices, domain_head) {
                if (!master->ats_enabled)
@@ -1823,8 +1843,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
        } else {
                cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
                cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
-               arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-               arm_smmu_cmdq_issue_sync(smmu);
+               arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
        }
        arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
 }
@@ -1837,7 +1856,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
        struct arm_smmu_device *smmu = smmu_domain->smmu;
        unsigned long end = iova + size, num_pages = 0, tg = 0;
        size_t inv_range = granule;
-       struct arm_smmu_cmdq_batch cmds = {};
+       struct arm_smmu_cmdq_batch cmds;
 
        if (!size)
                return;
@@ -1855,6 +1874,8 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
                num_pages = size >> tg;
        }
 
+       cmds.num = 0;
+
        while (iova < end) {
                if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
                        /*
@@ -1972,6 +1993,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 
        if (type != IOMMU_DOMAIN_UNMANAGED &&
            type != IOMMU_DOMAIN_DMA &&
+           type != IOMMU_DOMAIN_DMA_FQ &&
            type != IOMMU_DOMAIN_IDENTITY)
                return NULL;
 
@@ -1984,12 +2006,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
        if (!smmu_domain)
                return NULL;
 
-       if (type == IOMMU_DOMAIN_DMA &&
-           iommu_get_dma_cookie(&smmu_domain->domain)) {
-               kfree(smmu_domain);
-               return NULL;
-       }
-
        mutex_init(&smmu_domain->init_mutex);
        INIT_LIST_HEAD(&smmu_domain->devices);
        spin_lock_init(&smmu_domain->devices_lock);
@@ -2021,7 +2037,6 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
        struct arm_smmu_device *smmu = smmu_domain->smmu;
 
-       iommu_put_dma_cookie(domain);
        free_io_pgtable_ops(smmu_domain->pgtbl_ops);
 
        /* Free the CD and ASID, if we allocated them */
@@ -2181,9 +2196,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
                .iommu_dev      = smmu->dev,
        };
 
-       if (!iommu_get_dma_strict(domain))
-               pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
-
        pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
        if (!pgtbl_ops)
                return -ENOMEM;
@@ -2439,19 +2451,21 @@ out_unlock:
        return ret;
 }
 
-static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
-                       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
+                             phys_addr_t paddr, size_t pgsize, size_t pgcount,
+                             int prot, gfp_t gfp, size_t *mapped)
 {
        struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
        if (!ops)
                return -ENODEV;
 
-       return ops->map(ops, iova, paddr, size, prot, gfp);
+       return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
 }
 
-static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
-                            size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
+                                  size_t pgsize, size_t pgcount,
+                                  struct iommu_iotlb_gather *gather)
 {
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
        struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
@@ -2459,7 +2473,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
        if (!ops)
                return 0;
 
-       return ops->unmap(ops, iova, size, gather);
+       return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
 }
 
 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
@@ -2488,9 +2502,6 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 {
        struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
-       if (domain->type == IOMMU_DOMAIN_IDENTITY)
-               return iova;
-
        if (!ops)
                return 0;
 
@@ -2825,8 +2836,8 @@ static struct iommu_ops arm_smmu_ops = {
        .domain_alloc           = arm_smmu_domain_alloc,
        .domain_free            = arm_smmu_domain_free,
        .attach_dev             = arm_smmu_attach_dev,
-       .map                    = arm_smmu_map,
-       .unmap                  = arm_smmu_unmap,
+       .map_pages              = arm_smmu_map_pages,
+       .unmap_pages            = arm_smmu_unmap_pages,
        .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
        .iotlb_sync             = arm_smmu_iotlb_sync,
        .iova_to_phys           = arm_smmu_iova_to_phys,
@@ -3338,18 +3349,16 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 
        /* Invalidate any cached configuration */
        cmd.opcode = CMDQ_OP_CFGI_ALL;
-       arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-       arm_smmu_cmdq_issue_sync(smmu);
+       arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 
        /* Invalidate any stale TLB entries */
        if (smmu->features & ARM_SMMU_FEAT_HYP) {
                cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
-               arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+               arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
        }
 
        cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
-       arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-       arm_smmu_cmdq_issue_sync(smmu);
+       arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 
        /* Event queue */
        writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
index 9b9d13e..55690af 100644 (file)
@@ -193,6 +193,8 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
 {
        struct adreno_smmu_priv *priv;
 
+       smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
+
        /* Only enable split pagetables for the GPU device (SID 0) */
        if (!qcom_adreno_smmu_is_gpu_device(dev))
                return 0;
@@ -235,6 +237,14 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
        { }
 };
 
+static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain,
+               struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
+{
+       smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
+
+       return 0;
+}
+
 static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu)
 {
        unsigned int last_s2cr = ARM_SMMU_GR0_S2CR(smmu->num_mapping_groups - 1);
@@ -358,6 +368,7 @@ static int qcom_smmu500_reset(struct arm_smmu_device *smmu)
 }
 
 static const struct arm_smmu_impl qcom_smmu_impl = {
+       .init_context = qcom_smmu_init_context,
        .cfg_probe = qcom_smmu_cfg_probe,
        .def_domain_type = qcom_smmu_def_domain_type,
        .reset = qcom_smmu500_reset,
index f22dbeb..4bc75c4 100644 (file)
@@ -327,9 +327,16 @@ static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
                                     size_t granule, void *cookie)
 {
-       arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
-                                 ARM_SMMU_CB_S1_TLBIVA);
-       arm_smmu_tlb_sync_context(cookie);
+       struct arm_smmu_domain *smmu_domain = cookie;
+       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+
+       if (cfg->flush_walk_prefer_tlbiasid) {
+               arm_smmu_tlb_inv_context_s1(cookie);
+       } else {
+               arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
+                                         ARM_SMMU_CB_S1_TLBIVA);
+               arm_smmu_tlb_sync_context(cookie);
+       }
 }
 
 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
@@ -765,9 +772,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
                .iommu_dev      = smmu->dev,
        };
 
-       if (!iommu_get_dma_strict(domain))
-               pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
-
        if (smmu->impl && smmu->impl->init_context) {
                ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
                if (ret)
@@ -868,10 +872,11 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 {
        struct arm_smmu_domain *smmu_domain;
 
-       if (type != IOMMU_DOMAIN_UNMANAGED &&
-           type != IOMMU_DOMAIN_DMA &&
-           type != IOMMU_DOMAIN_IDENTITY)
-               return NULL;
+       if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
+               if (using_legacy_binding ||
+                   (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ))
+                       return NULL;
+       }
        /*
         * Allocate the domain and initialise some of its data structures.
         * We can't really do anything meaningful until we've added a
@@ -881,12 +886,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
        if (!smmu_domain)
                return NULL;
 
-       if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
-           iommu_get_dma_cookie(&smmu_domain->domain))) {
-               kfree(smmu_domain);
-               return NULL;
-       }
-
        mutex_init(&smmu_domain->init_mutex);
        spin_lock_init(&smmu_domain->cb_lock);
 
@@ -901,7 +900,6 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
         * Free the domain resources. We assume that all devices have
         * already been detached.
         */
-       iommu_put_dma_cookie(domain);
        arm_smmu_destroy_domain_context(domain);
        kfree(smmu_domain);
 }
@@ -1198,8 +1196,9 @@ rpm_put:
        return ret;
 }
 
-static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
-                       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
+                             phys_addr_t paddr, size_t pgsize, size_t pgcount,
+                             int prot, gfp_t gfp, size_t *mapped)
 {
        struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
        struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1209,14 +1208,15 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
                return -ENODEV;
 
        arm_smmu_rpm_get(smmu);
-       ret = ops->map(ops, iova, paddr, size, prot, gfp);
+       ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
        arm_smmu_rpm_put(smmu);
 
        return ret;
 }
 
-static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
-                            size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
+                                  size_t pgsize, size_t pgcount,
+                                  struct iommu_iotlb_gather *iotlb_gather)
 {
        struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
        struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1226,7 +1226,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
                return 0;
 
        arm_smmu_rpm_get(smmu);
-       ret = ops->unmap(ops, iova, size, gather);
+       ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
        arm_smmu_rpm_put(smmu);
 
        return ret;
@@ -1320,9 +1320,6 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
        struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
 
-       if (domain->type == IOMMU_DOMAIN_IDENTITY)
-               return iova;
-
        if (!ops)
                return 0;
 
@@ -1478,16 +1475,21 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
        struct iommu_group *group = NULL;
        int i, idx;
 
+       mutex_lock(&smmu->stream_map_mutex);
        for_each_cfg_sme(cfg, fwspec, i, idx) {
                if (group && smmu->s2crs[idx].group &&
-                   group != smmu->s2crs[idx].group)
+                   group != smmu->s2crs[idx].group) {
+                       mutex_unlock(&smmu->stream_map_mutex);
                        return ERR_PTR(-EINVAL);
+               }
 
                group = smmu->s2crs[idx].group;
        }
 
-       if (group)
+       if (group) {
+               mutex_unlock(&smmu->stream_map_mutex);
                return iommu_group_ref_get(group);
+       }
 
        if (dev_is_pci(dev))
                group = pci_device_group(dev);
@@ -1501,6 +1503,7 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
                for_each_cfg_sme(cfg, fwspec, i, idx)
                        smmu->s2crs[idx].group = group;
 
+       mutex_unlock(&smmu->stream_map_mutex);
        return group;
 }
 
@@ -1582,8 +1585,8 @@ static struct iommu_ops arm_smmu_ops = {
        .domain_alloc           = arm_smmu_domain_alloc,
        .domain_free            = arm_smmu_domain_free,
        .attach_dev             = arm_smmu_attach_dev,
-       .map                    = arm_smmu_map,
-       .unmap                  = arm_smmu_unmap,
+       .map_pages              = arm_smmu_map_pages,
+       .unmap_pages            = arm_smmu_unmap_pages,
        .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
        .iotlb_sync             = arm_smmu_iotlb_sync,
        .iova_to_phys           = arm_smmu_iova_to_phys,
@@ -2281,18 +2284,38 @@ static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
 
 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
 {
+       int ret;
+       struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+
+       ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
+       if (ret)
+               return ret;
+
        if (pm_runtime_suspended(dev))
                return 0;
 
-       return arm_smmu_runtime_resume(dev);
+       ret = arm_smmu_runtime_resume(dev);
+       if (ret)
+               clk_bulk_unprepare(smmu->num_clks, smmu->clks);
+
+       return ret;
 }
 
 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
 {
+       int ret = 0;
+       struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+
        if (pm_runtime_suspended(dev))
-               return 0;
+               goto clk_unprepare;
+
+       ret = arm_smmu_runtime_suspend(dev);
+       if (ret)
+               return ret;
 
-       return arm_smmu_runtime_suspend(dev);
+clk_unprepare:
+       clk_bulk_unprepare(smmu->num_clks, smmu->clks);
+       return ret;
 }
 
 static const struct dev_pm_ops arm_smmu_pm_ops = {
index a502715..432de2f 100644 (file)
@@ -346,6 +346,7 @@ struct arm_smmu_cfg {
        };
        enum arm_smmu_cbar_type         cbar;
        enum arm_smmu_context_fmt       fmt;
+       bool                            flush_walk_prefer_tlbiasid;
 };
 #define ARM_SMMU_INVALID_IRPTNDX       0xff
 
index 021cf8f..b91874c 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
@@ -335,12 +334,6 @@ static struct iommu_domain *qcom_iommu_domain_alloc(unsigned type)
        if (!qcom_domain)
                return NULL;
 
-       if (type == IOMMU_DOMAIN_DMA &&
-           iommu_get_dma_cookie(&qcom_domain->domain)) {
-               kfree(qcom_domain);
-               return NULL;
-       }
-
        mutex_init(&qcom_domain->init_mutex);
        spin_lock_init(&qcom_domain->pgtbl_lock);
 
@@ -351,8 +344,6 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain)
 {
        struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
 
-       iommu_put_dma_cookie(domain);
-
        if (qcom_domain->iommu) {
                /*
                 * NOTE: unmap can be called after client device is powered
index 174b2b3..896bea0 100644 (file)
@@ -317,6 +317,30 @@ static bool dev_is_untrusted(struct device *dev)
        return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
 
+/* sysfs updates are serialised by the mutex of the group owning @domain */
+int iommu_dma_init_fq(struct iommu_domain *domain)
+{
+       struct iommu_dma_cookie *cookie = domain->iova_cookie;
+       int ret;
+
+       if (cookie->fq_domain)
+               return 0;
+
+       ret = init_iova_flush_queue(&cookie->iovad, iommu_dma_flush_iotlb_all,
+                                   iommu_dma_entry_dtor);
+       if (ret) {
+               pr_warn("iova flush queue initialization failed\n");
+               return ret;
+       }
+       /*
+        * Prevent incomplete iovad->fq being observable. Pairs with path from
+        * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova()
+        */
+       smp_wmb();
+       WRITE_ONCE(cookie->fq_domain, domain);
+       return 0;
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -370,17 +394,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 
        init_iova_domain(iovad, 1UL << order, base_pfn);
 
-       if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) &&
-           domain->ops->flush_iotlb_all && !iommu_get_dma_strict(domain)) {
-               if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
-                                         iommu_dma_entry_dtor))
-                       pr_warn("iova flush queue initialization failed\n");
-               else
-                       cookie->fq_domain = domain;
-       }
-
-       if (!dev)
-               return 0;
+       /* If the FQ fails we can simply fall back to strict mode */
+       if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
+               domain->type = IOMMU_DOMAIN_DMA;
 
        return iova_reserve_iommu_regions(dev, domain);
 }
@@ -455,17 +471,17 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 }
 
 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
-               dma_addr_t iova, size_t size, struct page *freelist)
+               dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather)
 {
        struct iova_domain *iovad = &cookie->iovad;
 
        /* The MSI case is only ever cleaning up its most recent allocation */
        if (cookie->type == IOMMU_DMA_MSI_COOKIE)
                cookie->msi_iova -= size;
-       else if (cookie->fq_domain)     /* non-strict mode */
+       else if (gather && gather->queued)
                queue_iova(iovad, iova_pfn(iovad, iova),
                                size >> iova_shift(iovad),
-                               (unsigned long)freelist);
+                               (unsigned long)gather->freelist);
        else
                free_iova_fast(iovad, iova_pfn(iovad, iova),
                                size >> iova_shift(iovad));
@@ -484,13 +500,14 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
        dma_addr -= iova_off;
        size = iova_align(iovad, size + iova_off);
        iommu_iotlb_gather_init(&iotlb_gather);
+       iotlb_gather.queued = READ_ONCE(cookie->fq_domain);
 
        unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
        WARN_ON(unmapped != size);
 
-       if (!cookie->fq_domain)
+       if (!iotlb_gather.queued)
                iommu_iotlb_sync(domain, &iotlb_gather);
-       iommu_dma_free_iova(cookie, dma_addr, size, iotlb_gather.freelist);
+       iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather);
 }
 
 static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
@@ -1330,7 +1347,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
         * The IOMMU core code allocates the default DMA domain, which the
         * underlying IOMMU driver needs to support via the dma-iommu layer.
         */
-       if (domain->type == IOMMU_DOMAIN_DMA) {
+       if (iommu_is_dma_domain(domain)) {
                if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev))
                        goto out_err;
                dev->dma_ops = &iommu_dma_ops;
index d0fbf1d..939ffa7 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
-#include <linux/dma-iommu.h>
 
 typedef u32 sysmmu_iova_t;
 typedef u32 sysmmu_pte_t;
@@ -735,20 +734,16 @@ static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type)
        /* Check if correct PTE offsets are initialized */
        BUG_ON(PG_ENT_SHIFT < 0 || !dma_dev);
 
+       if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED)
+               return NULL;
+
        domain = kzalloc(sizeof(*domain), GFP_KERNEL);
        if (!domain)
                return NULL;
 
-       if (type == IOMMU_DOMAIN_DMA) {
-               if (iommu_get_dma_cookie(&domain->domain) != 0)
-                       goto err_pgtable;
-       } else if (type != IOMMU_DOMAIN_UNMANAGED) {
-               goto err_pgtable;
-       }
-
        domain->pgtable = (sysmmu_pte_t *)__get_free_pages(GFP_KERNEL, 2);
        if (!domain->pgtable)
-               goto err_dma_cookie;
+               goto err_pgtable;
 
        domain->lv2entcnt = (short *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
        if (!domain->lv2entcnt)
@@ -779,9 +774,6 @@ err_lv2ent:
        free_pages((unsigned long)domain->lv2entcnt, 1);
 err_counter:
        free_pages((unsigned long)domain->pgtable, 2);
-err_dma_cookie:
-       if (type == IOMMU_DOMAIN_DMA)
-               iommu_put_dma_cookie(&domain->domain);
 err_pgtable:
        kfree(domain);
        return NULL;
@@ -809,9 +801,6 @@ static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain)
 
        spin_unlock_irqrestore(&domain->lock, flags);
 
-       if (iommu_domain->type == IOMMU_DOMAIN_DMA)
-               iommu_put_dma_cookie(iommu_domain);
-
        dma_unmap_single(dma_dev, virt_to_phys(domain->pgtable), LV1TABLE_SIZE,
                         DMA_TO_DEVICE);
 
index 43ebd8a..0ddb771 100644 (file)
@@ -25,9 +25,11 @@ config INTEL_IOMMU
          and include PCI device scope covered by these DMA
          remapping devices.
 
+if INTEL_IOMMU
+
 config INTEL_IOMMU_DEBUGFS
        bool "Export Intel IOMMU internals in Debugfs"
-       depends on INTEL_IOMMU && IOMMU_DEBUGFS
+       depends on IOMMU_DEBUGFS
        select DMAR_PERF
        help
          !!!WARNING!!!
@@ -41,7 +43,7 @@ config INTEL_IOMMU_DEBUGFS
 
 config INTEL_IOMMU_SVM
        bool "Support for Shared Virtual Memory with Intel IOMMU"
-       depends on INTEL_IOMMU && X86_64
+       depends on X86_64
        select PCI_PASID
        select PCI_PRI
        select MMU_NOTIFIER
@@ -53,9 +55,8 @@ config INTEL_IOMMU_SVM
          means of a Process Address Space ID (PASID).
 
 config INTEL_IOMMU_DEFAULT_ON
-       def_bool y
-       prompt "Enable Intel DMA Remapping Devices by default"
-       depends on INTEL_IOMMU
+       bool "Enable Intel DMA Remapping Devices by default"
+       default y
        help
          Selecting this option will enable a DMAR device at boot time if
          one is found. If this option is not selected, DMAR support can
@@ -63,7 +64,7 @@ config INTEL_IOMMU_DEFAULT_ON
 
 config INTEL_IOMMU_BROKEN_GFX_WA
        bool "Workaround broken graphics drivers (going away soon)"
-       depends on INTEL_IOMMU && BROKEN && X86
+       depends on BROKEN && X86
        help
          Current Graphics drivers tend to use physical address
          for DMA and avoid using DMA APIs. Setting this config
@@ -74,7 +75,7 @@ config INTEL_IOMMU_BROKEN_GFX_WA
 
 config INTEL_IOMMU_FLOPPY_WA
        def_bool y
-       depends on INTEL_IOMMU && X86
+       depends on X86
        help
          Floppy disk drivers are known to bypass DMA API calls
          thereby failing to work when IOMMU is enabled. This
@@ -83,7 +84,7 @@ config INTEL_IOMMU_FLOPPY_WA
 
 config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
        bool "Enable Intel IOMMU scalable mode by default"
-       depends on INTEL_IOMMU
+       default y
        help
          Selecting this option will enable by default the scalable mode if
          hardware presents the capability. The scalable mode is defined in
@@ -92,3 +93,5 @@ config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
          is not selected, scalable mode support could also be enabled by
          passing intel_iommu=sm_on to the kernel. If not sure, please use
          the default value.
+
+endif # INTEL_IOMMU
index d66f79a..0ec5514 100644 (file)
@@ -149,8 +149,6 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
        } else {
                info = kzalloc(size, GFP_KERNEL);
                if (!info) {
-                       pr_warn("Out of memory when allocating notify_info "
-                               "for %s.\n", pci_name(dev));
                        if (dmar_dev_scope_status == 0)
                                dmar_dev_scope_status = -ENOMEM;
                        return NULL;
index dd22fc7..d75f59a 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/iommu.h>
 #include <linux/dma-iommu.h>
 #include <linux/intel-iommu.h>
+#include <linux/intel-svm.h>
 #include <linux/syscore_ops.h>
 #include <linux/tboot.h>
 #include <linux/dmi.h>
 #define LEVEL_STRIDE           (9)
 #define LEVEL_MASK             (((u64)1 << LEVEL_STRIDE) - 1)
 
-/*
- * This bitmap is used to advertise the page sizes our hardware support
- * to the IOMMU core, which will then use this information to split
- * physically contiguous memory regions it is mapping into page sizes
- * that we support.
- *
- * Traditionally the IOMMU core just handed us the mappings directly,
- * after making sure the size is an order of a 4KiB page and that the
- * mapping has natural alignment.
- *
- * To retain this behavior, we currently advertise that we support
- * all page sizes that are an order of 4KiB.
- *
- * If at some point we'd like to utilize the IOMMU core's new behavior,
- * we could change this to advertise the real page sizes we support.
- */
-#define INTEL_IOMMU_PGSIZES    (~0xFFFUL)
-
 static inline int agaw_to_level(int agaw)
 {
        return agaw + 2;
@@ -345,23 +328,13 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
                                            dma_addr_t iova);
 
-#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
-int dmar_disabled = 0;
-#else
-int dmar_disabled = 1;
-#endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
-
-#ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
-int intel_iommu_sm = 1;
-#else
-int intel_iommu_sm;
-#endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
+int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
+int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
 
 int intel_iommu_enabled = 0;
 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
 
 static int dmar_map_gfx = 1;
-static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
 static int iommu_identity_mapping;
 static int iommu_skip_te_disable;
@@ -454,14 +427,17 @@ static int __init intel_iommu_setup(char *str)
                        pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
                        iommu_dma_forcedac = true;
                } else if (!strncmp(str, "strict", 6)) {
-                       pr_info("Disable batched IOTLB flush\n");
-                       intel_iommu_strict = 1;
+                       pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
+                       iommu_set_dma_strict();
                } else if (!strncmp(str, "sp_off", 6)) {
                        pr_info("Disable supported super page\n");
                        intel_iommu_superpage = 0;
                } else if (!strncmp(str, "sm_on", 5)) {
-                       pr_info("Intel-IOMMU: scalable mode supported\n");
+                       pr_info("Enable scalable mode if hardware supports\n");
                        intel_iommu_sm = 1;
+               } else if (!strncmp(str, "sm_off", 6)) {
+                       pr_info("Scalable mode is disallowed\n");
+                       intel_iommu_sm = 0;
                } else if (!strncmp(str, "tboot_noforce", 13)) {
                        pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
                        intel_iommu_tboot_noforce = 1;
@@ -601,7 +577,7 @@ struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
        int iommu_id;
 
        /* si_domain and vm domain should not get here. */
-       if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
+       if (WARN_ON(!iommu_is_dma_domain(&domain->domain)))
                return NULL;
 
        for_each_domain_iommu(iommu_id, domain)
@@ -736,6 +712,23 @@ static int domain_update_device_node(struct dmar_domain *domain)
 
 static void domain_update_iotlb(struct dmar_domain *domain);
 
+/* Return the super pagesize bitmap if supported. */
+static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
+{
+       unsigned long bitmap = 0;
+
+       /*
+        * 1-level super page supports page size of 2MiB, 2-level super page
+        * supports page size of both 2MiB and 1GiB.
+        */
+       if (domain->iommu_superpage == 1)
+               bitmap |= SZ_2M;
+       else if (domain->iommu_superpage == 2)
+               bitmap |= SZ_2M | SZ_1G;
+
+       return bitmap;
+}
+
 /* Some capabilities may be different across iommus */
 static void domain_update_iommu_cap(struct dmar_domain *domain)
 {
@@ -762,6 +755,7 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
        else
                domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
 
+       domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
        domain_update_iotlb(domain);
 }
 
@@ -1035,7 +1029,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
                        pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
                        if (domain_use_first_level(domain)) {
                                pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
-                               if (domain->domain.type == IOMMU_DOMAIN_DMA)
+                               if (iommu_is_dma_domain(&domain->domain))
                                        pteval |= DMA_FL_PTE_ACCESS;
                        }
                        if (cmpxchg64(&pte->val, 0ULL, pteval))
@@ -1548,7 +1542,7 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info)
 
        if (info->pri_supported &&
            (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1)  &&
-           !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
+           !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH))
                info->pri_enabled = 1;
 #endif
        if (info->ats_supported && pci_ats_page_aligned(pdev) &&
@@ -1780,11 +1774,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
        spin_lock_init(&iommu->lock);
 
        iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
-       if (!iommu->domain_ids) {
-               pr_err("%s: Allocating domain id array failed\n",
-                      iommu->name);
+       if (!iommu->domain_ids)
                return -ENOMEM;
-       }
 
        size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
        iommu->domains = kzalloc(size, GFP_KERNEL);
@@ -1980,10 +1971,6 @@ static void domain_exit(struct dmar_domain *domain)
        /* Remove associated devices and clear attached or cached domains */
        domain_remove_dev_info(domain);
 
-       /* destroy iovas */
-       if (domain->domain.type == IOMMU_DOMAIN_DMA)
-               iommu_put_dma_cookie(&domain->domain);
-
        if (domain->pgd) {
                struct page *freelist;
 
@@ -2334,9 +2321,9 @@ static int
 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
                 unsigned long phys_pfn, unsigned long nr_pages, int prot)
 {
+       struct dma_pte *first_pte = NULL, *pte = NULL;
        unsigned int largepage_lvl = 0;
        unsigned long lvl_pages = 0;
-       struct dma_pte *pte = NULL;
        phys_addr_t pteval;
        u64 attr;
 
@@ -2348,13 +2335,9 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
        attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
        attr |= DMA_FL_PTE_PRESENT;
        if (domain_use_first_level(domain)) {
-               attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
-
-               if (domain->domain.type == IOMMU_DOMAIN_DMA) {
-                       attr |= DMA_FL_PTE_ACCESS;
-                       if (prot & DMA_PTE_WRITE)
-                               attr |= DMA_FL_PTE_DIRTY;
-               }
+               attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
+               if (prot & DMA_PTE_WRITE)
+                       attr |= DMA_FL_PTE_DIRTY;
        }
 
        pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
@@ -2369,6 +2352,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
                        pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
                        if (!pte)
                                return -ENOMEM;
+                       first_pte = pte;
+
                        /* It is large page*/
                        if (largepage_lvl > 1) {
                                unsigned long end_pfn;
@@ -2416,14 +2401,14 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
                 * recalculate 'pte' and switch back to smaller pages for the
                 * end of the mapping, if the trailing size is not enough to
                 * use another superpage (i.e. nr_pages < lvl_pages).
-                *
-                * We leave clflush for the leaf pte changes to iotlb_sync_map()
-                * callback.
                 */
                pte++;
                if (!nr_pages || first_pte_in_page(pte) ||
-                   (largepage_lvl > 1 && nr_pages < lvl_pages))
+                   (largepage_lvl > 1 && nr_pages < lvl_pages)) {
+                       domain_flush_cache(domain, first_pte,
+                                          (void *)pte - (void *)first_pte);
                        pte = NULL;
+               }
        }
 
        return 0;
@@ -3227,7 +3212,6 @@ static int __init init_dmars(void)
        g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
                        GFP_KERNEL);
        if (!g_iommus) {
-               pr_err("Allocating global iommu array failed\n");
                ret = -ENOMEM;
                goto error;
        }
@@ -4393,9 +4377,9 @@ int __init intel_iommu_init(void)
                 * is likely to be much lower than the overhead of synchronizing
                 * the virtual and physical IOMMU page-tables.
                 */
-               if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) {
-                       pr_warn("IOMMU batching is disabled due to virtualization");
-                       intel_iommu_strict = 1;
+               if (cap_caching_mode(iommu->cap)) {
+                       pr_info_once("IOMMU batching disallowed due to virtualization\n");
+                       iommu_set_dma_strict();
                }
                iommu_device_sysfs_add(&iommu->iommu, NULL,
                                       intel_iommu_groups,
@@ -4404,7 +4388,6 @@ int __init intel_iommu_init(void)
        }
        up_read(&dmar_global_lock);
 
-       iommu_set_dma_strict(intel_iommu_strict);
        bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
        if (si_domain && !hw_pass_through)
                register_memory_notifier(&intel_iommu_memory_nb);
@@ -4532,6 +4515,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 
        switch (type) {
        case IOMMU_DOMAIN_DMA:
+       case IOMMU_DOMAIN_DMA_FQ:
        case IOMMU_DOMAIN_UNMANAGED:
                dmar_domain = alloc_domain(0);
                if (!dmar_domain) {
@@ -4544,10 +4528,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
                        return NULL;
                }
 
-               if (type == IOMMU_DOMAIN_DMA &&
-                   iommu_get_dma_cookie(&dmar_domain->domain))
-                       return NULL;
-
                domain = &dmar_domain->domain;
                domain->geometry.aperture_start = 0;
                domain->geometry.aperture_end   =
@@ -5067,6 +5047,28 @@ static int intel_iommu_map(struct iommu_domain *domain,
                                hpa >> VTD_PAGE_SHIFT, size, prot);
 }
 
+static int intel_iommu_map_pages(struct iommu_domain *domain,
+                                unsigned long iova, phys_addr_t paddr,
+                                size_t pgsize, size_t pgcount,
+                                int prot, gfp_t gfp, size_t *mapped)
+{
+       unsigned long pgshift = __ffs(pgsize);
+       size_t size = pgcount << pgshift;
+       int ret;
+
+       if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G)
+               return -EINVAL;
+
+       if (!IS_ALIGNED(iova | paddr, pgsize))
+               return -EINVAL;
+
+       ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp);
+       if (!ret && mapped)
+               *mapped = size;
+
+       return ret;
+}
+
 static size_t intel_iommu_unmap(struct iommu_domain *domain,
                                unsigned long iova, size_t size,
                                struct iommu_iotlb_gather *gather)
@@ -5096,6 +5098,17 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
        return size;
 }
 
+static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
+                                     unsigned long iova,
+                                     size_t pgsize, size_t pgcount,
+                                     struct iommu_iotlb_gather *gather)
+{
+       unsigned long pgshift = __ffs(pgsize);
+       size_t size = pgcount << pgshift;
+
+       return intel_iommu_unmap(domain, iova, size, gather);
+}
+
 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
                                 struct iommu_iotlb_gather *gather)
 {
@@ -5172,12 +5185,8 @@ static void intel_iommu_release_device(struct device *dev)
 
 static void intel_iommu_probe_finalize(struct device *dev)
 {
-       struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-
-       if (domain && domain->type == IOMMU_DOMAIN_DMA)
-               iommu_setup_dma_ops(dev, 0, U64_MAX);
-       else
-               set_dma_ops(dev, NULL);
+       set_dma_ops(dev, NULL);
+       iommu_setup_dma_ops(dev, 0, U64_MAX);
 }
 
 static void intel_iommu_get_resv_regions(struct device *device,
@@ -5532,39 +5541,6 @@ static bool risky_device(struct pci_dev *pdev)
        return false;
 }
 
-static void clflush_sync_map(struct dmar_domain *domain, unsigned long clf_pfn,
-                            unsigned long clf_pages)
-{
-       struct dma_pte *first_pte = NULL, *pte = NULL;
-       unsigned long lvl_pages = 0;
-       int level = 0;
-
-       while (clf_pages > 0) {
-               if (!pte) {
-                       level = 0;
-                       pte = pfn_to_dma_pte(domain, clf_pfn, &level);
-                       if (WARN_ON(!pte))
-                               return;
-                       first_pte = pte;
-                       lvl_pages = lvl_to_nr_pages(level);
-               }
-
-               if (WARN_ON(!lvl_pages || clf_pages < lvl_pages))
-                       return;
-
-               clf_pages -= lvl_pages;
-               clf_pfn += lvl_pages;
-               pte++;
-
-               if (!clf_pages || first_pte_in_page(pte) ||
-                   (level > 1 && clf_pages < lvl_pages)) {
-                       domain_flush_cache(domain, first_pte,
-                                          (void *)pte - (void *)first_pte);
-                       pte = NULL;
-               }
-       }
-}
-
 static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
                                       unsigned long iova, size_t size)
 {
@@ -5574,9 +5550,6 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
        struct intel_iommu *iommu;
        int iommu_id;
 
-       if (!dmar_domain->iommu_coherency)
-               clflush_sync_map(dmar_domain, pfn, pages);
-
        for_each_domain_iommu(iommu_id, dmar_domain) {
                iommu = g_iommus[iommu_id];
                __mapping_notify_one(iommu, dmar_domain, pfn, pages);
@@ -5593,9 +5566,9 @@ const struct iommu_ops intel_iommu_ops = {
        .aux_attach_dev         = intel_iommu_aux_attach_device,
        .aux_detach_dev         = intel_iommu_aux_detach_device,
        .aux_get_pasid          = intel_iommu_aux_get_pasid,
-       .map                    = intel_iommu_map,
+       .map_pages              = intel_iommu_map_pages,
+       .unmap_pages            = intel_iommu_unmap_pages,
        .iotlb_sync_map         = intel_iommu_iotlb_sync_map,
-       .unmap                  = intel_iommu_unmap,
        .flush_iotlb_all        = intel_flush_iotlb_all,
        .iotlb_sync             = intel_iommu_tlb_sync,
        .iova_to_phys           = intel_iommu_iova_to_phys,
@@ -5611,7 +5584,7 @@ const struct iommu_ops intel_iommu_ops = {
        .dev_disable_feat       = intel_iommu_dev_disable_feat,
        .is_attach_deferred     = intel_iommu_is_attach_deferred,
        .def_domain_type        = device_def_domain_type,
-       .pgsize_bitmap          = INTEL_IOMMU_PGSIZES,
+       .pgsize_bitmap          = SZ_4K,
 #ifdef CONFIG_INTEL_IOMMU_SVM
        .cache_invalidate       = intel_iommu_sva_invalidate,
        .sva_bind_gpasid        = intel_svm_bind_gpasid,
@@ -5714,8 +5687,8 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
        } else if (dmar_map_gfx) {
                /* we have to ensure the gfx device is idle before we flush */
                pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
-               intel_iommu_strict = 1;
-       }
+               iommu_set_dma_strict();
+       }
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
index 9ec374e..07c390a 100644 (file)
@@ -517,7 +517,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
        if (WARN_ON(!pte))
                return;
 
-       if (!(pte->val[0] & PASID_PTE_PRESENT))
+       if (!pasid_pte_is_present(pte))
                return;
 
        did = pasid_get_domain_id(pte);
@@ -540,6 +540,10 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
                devtlb_invalidation_with_pasid(iommu, dev, pasid);
 }
 
+/*
+ * This function flushes cache for a newly setup pasid table entry.
+ * Caller of it should not modify the in-use pasid table entries.
+ */
 static void pasid_flush_caches(struct intel_iommu *iommu,
                                struct pasid_entry *pte,
                               u32 pasid, u16 did)
@@ -591,6 +595,10 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
        if (WARN_ON(!pte))
                return -EINVAL;
 
+       /* Caller must ensure PASID entry is not in use. */
+       if (pasid_pte_is_present(pte))
+               return -EBUSY;
+
        pasid_clear_entry(pte);
 
        /* Setup the first level page table pointer: */
@@ -690,6 +698,10 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
                return -ENODEV;
        }
 
+       /* Caller must ensure PASID entry is not in use. */
+       if (pasid_pte_is_present(pte))
+               return -EBUSY;
+
        pasid_clear_entry(pte);
        pasid_set_domain_id(pte, did);
        pasid_set_slptr(pte, pgd_val);
@@ -729,6 +741,10 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
                return -ENODEV;
        }
 
+       /* Caller must ensure PASID entry is not in use. */
+       if (pasid_pte_is_present(pte))
+               return -EBUSY;
+
        pasid_clear_entry(pte);
        pasid_set_domain_id(pte, did);
        pasid_set_address_width(pte, iommu->agaw);
index c11bc8b..d5552e2 100644 (file)
 #define VCMD_CMD_ALLOC                 0x1
 #define VCMD_CMD_FREE                  0x2
 #define VCMD_VRSP_IP                   0x1
-#define VCMD_VRSP_SC(e)                        (((e) >> 1) & 0x3)
+#define VCMD_VRSP_SC(e)                        (((e) & 0xff) >> 1)
 #define VCMD_VRSP_SC_SUCCESS           0
-#define VCMD_VRSP_SC_NO_PASID_AVAIL    2
-#define VCMD_VRSP_SC_INVALID_PASID     2
-#define VCMD_VRSP_RESULT_PASID(e)      (((e) >> 8) & 0xfffff)
-#define VCMD_CMD_OPERAND(e)            ((e) << 8)
+#define VCMD_VRSP_SC_NO_PASID_AVAIL    16
+#define VCMD_VRSP_SC_INVALID_PASID     16
+#define VCMD_VRSP_RESULT_PASID(e)      (((e) >> 16) & 0xfffff)
+#define VCMD_CMD_OPERAND(e)            ((e) << 16)
 /*
  * Domain ID reserved for pasid entries programmed for first-level
  * only and pass-through transfer modes.
index 73b7ec7..0e8e032 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/**
+/*
  * perf.c - performance monitor
  *
  * Copyright (C) 2021 Intel Corporation
index 4b9b3f3..2014fe8 100644 (file)
@@ -31,8 +31,6 @@ static irqreturn_t prq_event_thread(int irq, void *d);
 static void intel_svm_drain_prq(struct device *dev, u32 pasid);
 #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
 
-#define PRQ_ORDER 0
-
 static DEFINE_XARRAY_ALLOC(pasid_private_array);
 static int pasid_private_add(ioasid_t pasid, void *priv)
 {
@@ -725,8 +723,6 @@ struct page_req_dsc {
        u64 priv_data[2];
 };
 
-#define PRQ_RING_MASK  ((0x1000 << PRQ_ORDER) - 0x20)
-
 static bool is_canonical_address(u64 addr)
 {
        int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
index d4004bc..bfb6acb 100644 (file)
@@ -519,11 +519,12 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
        return __arm_v7s_map(data, iova, paddr, size, prot, lvl + 1, cptep, gfp);
 }
 
-static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
-                       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+                            phys_addr_t paddr, size_t pgsize, size_t pgcount,
+                            int prot, gfp_t gfp, size_t *mapped)
 {
        struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
-       int ret;
+       int ret = -EINVAL;
 
        if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
                    paddr >= (1ULL << data->iop.cfg.oas)))
@@ -533,7 +534,17 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
        if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
                return 0;
 
-       ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd, gfp);
+       while (pgcount--) {
+               ret = __arm_v7s_map(data, iova, paddr, pgsize, prot, 1, data->pgd,
+                                   gfp);
+               if (ret)
+                       break;
+
+               iova += pgsize;
+               paddr += pgsize;
+               if (mapped)
+                       *mapped += pgsize;
+       }
        /*
         * Synchronise all PTE updates for the new mapping before there's
         * a chance for anything to kick off a table walk for the new iova.
@@ -543,6 +554,12 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
        return ret;
 }
 
+static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
+                      phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+       return arm_v7s_map_pages(ops, iova, paddr, size, 1, prot, gfp, NULL);
+}
+
 static void arm_v7s_free_pgtable(struct io_pgtable *iop)
 {
        struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
@@ -683,14 +700,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
                                                ARM_V7S_BLOCK_SIZE(lvl + 1));
                                ptep = iopte_deref(pte[i], lvl, data);
                                __arm_v7s_free_table(ptep, lvl + 1, data);
-                       } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
-                               /*
-                                * Order the PTE update against queueing the IOVA, to
-                                * guarantee that a flush callback from a different CPU
-                                * has observed it before the TLBIALL can be issued.
-                                */
-                               smp_wmb();
-                       } else {
+                       } else if (!iommu_iotlb_gather_queued(gather)) {
                                io_pgtable_tlb_add_page(iop, gather, iova, blk_size);
                        }
                        iova += blk_size;
@@ -710,15 +720,32 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
        return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep);
 }
 
-static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-                           size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_v7s_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
+                                 size_t pgsize, size_t pgcount,
+                                 struct iommu_iotlb_gather *gather)
 {
        struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
+       size_t unmapped = 0, ret;
 
        if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
                return 0;
 
-       return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
+       while (pgcount--) {
+               ret = __arm_v7s_unmap(data, gather, iova, pgsize, 1, data->pgd);
+               if (!ret)
+                       break;
+
+               unmapped += pgsize;
+               iova += pgsize;
+       }
+
+       return unmapped;
+}
+
+static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+                           size_t size, struct iommu_iotlb_gather *gather)
+{
+       return arm_v7s_unmap_pages(ops, iova, size, 1, gather);
 }
 
 static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
@@ -757,8 +784,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 
        if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
                            IO_PGTABLE_QUIRK_NO_PERMS |
-                           IO_PGTABLE_QUIRK_ARM_MTK_EXT |
-                           IO_PGTABLE_QUIRK_NON_STRICT))
+                           IO_PGTABLE_QUIRK_ARM_MTK_EXT))
                return NULL;
 
        /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
@@ -780,7 +806,9 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 
        data->iop.ops = (struct io_pgtable_ops) {
                .map            = arm_v7s_map,
+               .map_pages      = arm_v7s_map_pages,
                .unmap          = arm_v7s_unmap,
+               .unmap_pages    = arm_v7s_unmap_pages,
                .iova_to_phys   = arm_v7s_iova_to_phys,
        };
 
index 87def58..dd9e471 100644 (file)
@@ -46,6 +46,9 @@
 #define ARM_LPAE_PGD_SIZE(d)                                           \
        (sizeof(arm_lpae_iopte) << (d)->pgd_bits)
 
+#define ARM_LPAE_PTES_PER_TABLE(d)                                     \
+       (ARM_LPAE_GRANULE(d) >> ilog2(sizeof(arm_lpae_iopte)))
+
 /*
  * Calculate the index at level l used to map virtual address a using the
  * pagetable in d.
 #define ARM_MALI_LPAE_MEMATTR_IMP_DEF  0x88ULL
 #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
 
+#define APPLE_DART_PTE_PROT_NO_WRITE (1<<7)
+#define APPLE_DART_PTE_PROT_NO_READ (1<<8)
+
 /* IOPTE accessors */
 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
 
@@ -232,70 +238,77 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
        free_pages((unsigned long)pages, get_order(size));
 }
 
-static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep,
+static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
                                struct io_pgtable_cfg *cfg)
 {
        dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
-                                  sizeof(*ptep), DMA_TO_DEVICE);
+                                  sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
 }
 
-static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
-                              struct io_pgtable_cfg *cfg)
+static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg)
 {
-       *ptep = pte;
+
+       *ptep = 0;
 
        if (!cfg->coherent_walk)
-               __arm_lpae_sync_pte(ptep, cfg);
+               __arm_lpae_sync_pte(ptep, 1, cfg);
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
                               struct iommu_iotlb_gather *gather,
-                              unsigned long iova, size_t size, int lvl,
-                              arm_lpae_iopte *ptep);
+                              unsigned long iova, size_t size, size_t pgcount,
+                              int lvl, arm_lpae_iopte *ptep);
 
 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
                                phys_addr_t paddr, arm_lpae_iopte prot,
-                               int lvl, arm_lpae_iopte *ptep)
+                               int lvl, int num_entries, arm_lpae_iopte *ptep)
 {
        arm_lpae_iopte pte = prot;
+       struct io_pgtable_cfg *cfg = &data->iop.cfg;
+       size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+       int i;
 
        if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
                pte |= ARM_LPAE_PTE_TYPE_PAGE;
        else
                pte |= ARM_LPAE_PTE_TYPE_BLOCK;
 
-       pte |= paddr_to_iopte(paddr, data);
+       for (i = 0; i < num_entries; i++)
+               ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data);
 
-       __arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
+       if (!cfg->coherent_walk)
+               __arm_lpae_sync_pte(ptep, num_entries, cfg);
 }
 
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
                             unsigned long iova, phys_addr_t paddr,
-                            arm_lpae_iopte prot, int lvl,
+                            arm_lpae_iopte prot, int lvl, int num_entries,
                             arm_lpae_iopte *ptep)
 {
-       arm_lpae_iopte pte = *ptep;
-
-       if (iopte_leaf(pte, lvl, data->iop.fmt)) {
-               /* We require an unmap first */
-               WARN_ON(!selftest_running);
-               return -EEXIST;
-       } else if (iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE) {
-               /*
-                * We need to unmap and free the old table before
-                * overwriting it with a block entry.
-                */
-               arm_lpae_iopte *tblp;
-               size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
-
-               tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
-               if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
-                       WARN_ON(1);
-                       return -EINVAL;
+       int i;
+
+       for (i = 0; i < num_entries; i++)
+               if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) {
+                       /* We require an unmap first */
+                       WARN_ON(!selftest_running);
+                       return -EEXIST;
+               } else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) {
+                       /*
+                        * We need to unmap and free the old table before
+                        * overwriting it with a block entry.
+                        */
+                       arm_lpae_iopte *tblp;
+                       size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+
+                       tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
+                       if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1,
+                                            lvl, tblp) != sz) {
+                               WARN_ON(1);
+                               return -EINVAL;
+                       }
                }
-       }
 
-       __arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
+       __arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep);
        return 0;
 }
 
@@ -323,7 +336,7 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
                return old;
 
        /* Even if it's not ours, there's no point waiting; just kick it */
-       __arm_lpae_sync_pte(ptep, cfg);
+       __arm_lpae_sync_pte(ptep, 1, cfg);
        if (old == curr)
                WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC);
 
@@ -331,20 +344,30 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
 }
 
 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
-                         phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
-                         int lvl, arm_lpae_iopte *ptep, gfp_t gfp)
+                         phys_addr_t paddr, size_t size, size_t pgcount,
+                         arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep,
+                         gfp_t gfp, size_t *mapped)
 {
        arm_lpae_iopte *cptep, pte;
        size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
        size_t tblsz = ARM_LPAE_GRANULE(data);
        struct io_pgtable_cfg *cfg = &data->iop.cfg;
+       int ret = 0, num_entries, max_entries, map_idx_start;
 
        /* Find our entry at the current level */
-       ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
+       map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
+       ptep += map_idx_start;
 
        /* If we can install a leaf entry at this level, then do so */
-       if (size == block_size)
-               return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
+       if (size == block_size) {
+               max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
+               num_entries = min_t(int, pgcount, max_entries);
+               ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
+               if (!ret && mapped)
+                       *mapped += num_entries * size;
+
+               return ret;
+       }
 
        /* We can't allocate tables at the final level */
        if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
@@ -361,7 +384,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
                if (pte)
                        __arm_lpae_free_pages(cptep, tblsz, cfg);
        } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
-               __arm_lpae_sync_pte(ptep, cfg);
+               __arm_lpae_sync_pte(ptep, 1, cfg);
        }
 
        if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) {
@@ -373,7 +396,8 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
        }
 
        /* Rinse, repeat */
-       return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep, gfp);
+       return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1,
+                             cptep, gfp, mapped);
 }
 
 static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
@@ -381,6 +405,15 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
 {
        arm_lpae_iopte pte;
 
+       if (data->iop.fmt == APPLE_DART) {
+               pte = 0;
+               if (!(prot & IOMMU_WRITE))
+                       pte |= APPLE_DART_PTE_PROT_NO_WRITE;
+               if (!(prot & IOMMU_READ))
+                       pte |= APPLE_DART_PTE_PROT_NO_READ;
+               return pte;
+       }
+
        if (data->iop.fmt == ARM_64_LPAE_S1 ||
            data->iop.fmt == ARM_32_LPAE_S1) {
                pte = ARM_LPAE_PTE_nG;
@@ -440,8 +473,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
        return pte;
 }
 
-static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
-                       phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp)
+static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
+                             phys_addr_t paddr, size_t pgsize, size_t pgcount,
+                             int iommu_prot, gfp_t gfp, size_t *mapped)
 {
        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
        struct io_pgtable_cfg *cfg = &data->iop.cfg;
@@ -450,7 +484,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
        arm_lpae_iopte prot;
        long iaext = (s64)iova >> cfg->ias;
 
-       if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
+       if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize))
                return -EINVAL;
 
        if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
@@ -463,7 +497,8 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
                return 0;
 
        prot = arm_lpae_prot_to_pte(data, iommu_prot);
-       ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep, gfp);
+       ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl,
+                            ptep, gfp, mapped);
        /*
         * Synchronise all PTE updates for the new mapping before there's
         * a chance for anything to kick off a table walk for the new iova.
@@ -473,6 +508,13 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
        return ret;
 }
 
+static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
+                       phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp)
+{
+       return arm_lpae_map_pages(ops, iova, paddr, size, 1, iommu_prot, gfp,
+                                 NULL);
+}
+
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
                                    arm_lpae_iopte *ptep)
 {
@@ -516,14 +558,15 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
                                       struct iommu_iotlb_gather *gather,
                                       unsigned long iova, size_t size,
                                       arm_lpae_iopte blk_pte, int lvl,
-                                      arm_lpae_iopte *ptep)
+                                      arm_lpae_iopte *ptep, size_t pgcount)
 {
        struct io_pgtable_cfg *cfg = &data->iop.cfg;
        arm_lpae_iopte pte, *tablep;
        phys_addr_t blk_paddr;
        size_t tablesz = ARM_LPAE_GRANULE(data);
        size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
-       int i, unmap_idx = -1;
+       int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
+       int i, unmap_idx_start = -1, num_entries = 0, max_entries;
 
        if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
                return 0;
@@ -532,18 +575,21 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
        if (!tablep)
                return 0; /* Bytes unmapped */
 
-       if (size == split_sz)
-               unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
+       if (size == split_sz) {
+               unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
+               max_entries = ptes_per_table - unmap_idx_start;
+               num_entries = min_t(int, pgcount, max_entries);
+       }
 
        blk_paddr = iopte_to_paddr(blk_pte, data);
        pte = iopte_prot(blk_pte);
 
-       for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
+       for (i = 0; i < ptes_per_table; i++, blk_paddr += split_sz) {
                /* Unmap! */
-               if (i == unmap_idx)
+               if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries))
                        continue;
 
-               __arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
+               __arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
        }
 
        pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg);
@@ -558,76 +604,85 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
                        return 0;
 
                tablep = iopte_deref(pte, data);
-       } else if (unmap_idx >= 0) {
-               io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
-               return size;
+       } else if (unmap_idx_start >= 0) {
+               for (i = 0; i < num_entries; i++)
+                       io_pgtable_tlb_add_page(&data->iop, gather, iova + i * size, size);
+
+               return num_entries * size;
        }
 
-       return __arm_lpae_unmap(data, gather, iova, size, lvl, tablep);
+       return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep);
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
                               struct iommu_iotlb_gather *gather,
-                              unsigned long iova, size_t size, int lvl,
-                              arm_lpae_iopte *ptep)
+                              unsigned long iova, size_t size, size_t pgcount,
+                              int lvl, arm_lpae_iopte *ptep)
 {
        arm_lpae_iopte pte;
        struct io_pgtable *iop = &data->iop;
+       int i = 0, num_entries, max_entries, unmap_idx_start;
 
        /* Something went horribly wrong and we ran out of page table */
        if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
                return 0;
 
-       ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
+       unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
+       ptep += unmap_idx_start;
        pte = READ_ONCE(*ptep);
        if (WARN_ON(!pte))
                return 0;
 
        /* If the size matches this level, we're in the right place */
        if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
-               __arm_lpae_set_pte(ptep, 0, &iop->cfg);
-
-               if (!iopte_leaf(pte, lvl, iop->fmt)) {
-                       /* Also flush any partial walks */
-                       io_pgtable_tlb_flush_walk(iop, iova, size,
-                                                 ARM_LPAE_GRANULE(data));
-                       ptep = iopte_deref(pte, data);
-                       __arm_lpae_free_pgtable(data, lvl + 1, ptep);
-               } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
-                       /*
-                        * Order the PTE update against queueing the IOVA, to
-                        * guarantee that a flush callback from a different CPU
-                        * has observed it before the TLBIALL can be issued.
-                        */
-                       smp_wmb();
-               } else {
-                       io_pgtable_tlb_add_page(iop, gather, iova, size);
+               max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
+               num_entries = min_t(int, pgcount, max_entries);
+
+               while (i < num_entries) {
+                       pte = READ_ONCE(*ptep);
+                       if (WARN_ON(!pte))
+                               break;
+
+                       __arm_lpae_clear_pte(ptep, &iop->cfg);
+
+                       if (!iopte_leaf(pte, lvl, iop->fmt)) {
+                               /* Also flush any partial walks */
+                               io_pgtable_tlb_flush_walk(iop, iova + i * size, size,
+                                                         ARM_LPAE_GRANULE(data));
+                               __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
+                       } else if (!iommu_iotlb_gather_queued(gather)) {
+                               io_pgtable_tlb_add_page(iop, gather, iova + i * size, size);
+                       }
+
+                       ptep++;
+                       i++;
                }
 
-               return size;
+               return i * size;
        } else if (iopte_leaf(pte, lvl, iop->fmt)) {
                /*
                 * Insert a table at the next level to map the old region,
                 * minus the part we want to unmap
                 */
                return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
-                                               lvl + 1, ptep);
+                                               lvl + 1, ptep, pgcount);
        }
 
        /* Keep on walkin' */
        ptep = iopte_deref(pte, data);
-       return __arm_lpae_unmap(data, gather, iova, size, lvl + 1, ptep);
+       return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl + 1, ptep);
 }
 
-static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-                            size_t size, struct iommu_iotlb_gather *gather)
+static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
+                                  size_t pgsize, size_t pgcount,
+                                  struct iommu_iotlb_gather *gather)
 {
        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
        struct io_pgtable_cfg *cfg = &data->iop.cfg;
        arm_lpae_iopte *ptep = data->pgd;
        long iaext = (s64)iova >> cfg->ias;
 
-       if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
+       if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
                return 0;
 
        if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
@@ -635,7 +690,14 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
        if (WARN_ON(iaext))
                return 0;
 
-       return __arm_lpae_unmap(data, gather, iova, size, data->start_level, ptep);
+       return __arm_lpae_unmap(data, gather, iova, pgsize, pgcount,
+                               data->start_level, ptep);
+}
+
+static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+                            size_t size, struct iommu_iotlb_gather *gather)
+{
+       return arm_lpae_unmap_pages(ops, iova, size, 1, gather);
 }
 
 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
@@ -750,7 +812,9 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
 
        data->iop.ops = (struct io_pgtable_ops) {
                .map            = arm_lpae_map,
+               .map_pages      = arm_lpae_map_pages,
                .unmap          = arm_lpae_unmap,
+               .unmap_pages    = arm_lpae_unmap_pages,
                .iova_to_phys   = arm_lpae_iova_to_phys,
        };
 
@@ -766,7 +830,6 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
        bool tg1;
 
        if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
-                           IO_PGTABLE_QUIRK_NON_STRICT |
                            IO_PGTABLE_QUIRK_ARM_TTBR1 |
                            IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
                return NULL;
@@ -870,7 +933,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
        typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr;
 
        /* The NS quirk doesn't apply at stage 2 */
-       if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT))
+       if (cfg->quirks)
                return NULL;
 
        data = arm_lpae_alloc_pgtable(cfg);
@@ -1043,6 +1106,52 @@ out_free_data:
        return NULL;
 }
 
+static struct io_pgtable *
+apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+       struct arm_lpae_io_pgtable *data;
+       int i;
+
+       if (cfg->oas > 36)
+               return NULL;
+
+       data = arm_lpae_alloc_pgtable(cfg);
+       if (!data)
+               return NULL;
+
+       /*
+        * The table format itself always uses two levels, but the total VA
+        * space is mapped by four separate tables, making the MMIO registers
+        * an effective "level 1". For simplicity, though, we treat this
+        * equivalently to LPAE stage 2 concatenation at level 2, with the
+        * additional TTBRs each just pointing at consecutive pages.
+        */
+       if (data->start_level < 1)
+               goto out_free_data;
+       if (data->start_level == 1 && data->pgd_bits > 2)
+               goto out_free_data;
+       if (data->start_level > 1)
+               data->pgd_bits = 0;
+       data->start_level = 2;
+       cfg->apple_dart_cfg.n_ttbrs = 1 << data->pgd_bits;
+       data->pgd_bits += data->bits_per_level;
+
+       data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL,
+                                          cfg);
+       if (!data->pgd)
+               goto out_free_data;
+
+       for (i = 0; i < cfg->apple_dart_cfg.n_ttbrs; ++i)
+               cfg->apple_dart_cfg.ttbr[i] =
+                       virt_to_phys(data->pgd + i * ARM_LPAE_GRANULE(data));
+
+       return &data->iop;
+
+out_free_data:
+       kfree(data);
+       return NULL;
+}
+
 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
        .alloc  = arm_64_lpae_alloc_pgtable_s1,
        .free   = arm_lpae_free_pgtable,
@@ -1068,6 +1177,11 @@ struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
        .free   = arm_lpae_free_pgtable,
 };
 
+struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns = {
+       .alloc  = apple_dart_alloc_pgtable,
+       .free   = arm_lpae_free_pgtable,
+};
+
 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
 
 static struct io_pgtable_cfg *cfg_cookie __initdata;
index 6e9917c..f4bfcef 100644 (file)
@@ -20,6 +20,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
        [ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns,
        [ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns,
        [ARM_MALI_LPAE] = &io_pgtable_arm_mali_lpae_init_fns,
+       [APPLE_DART] = &io_pgtable_apple_dart_init_fns,
 #endif
 #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S
        [ARM_V7S] = &io_pgtable_arm_v7s_init_fns,
index 5a570d4..3303d70 100644 (file)
@@ -7,7 +7,9 @@
 #define pr_fmt(fmt)    "iommu: " fmt
 
 #include <linux/device.h>
+#include <linux/dma-iommu.h>
 #include <linux/kernel.h>
+#include <linux/bits.h>
 #include <linux/bug.h>
 #include <linux/types.h>
 #include <linux/init.h>
@@ -29,7 +31,7 @@ static struct kset *iommu_group_kset;
 static DEFINE_IDA(iommu_group_ida);
 
 static unsigned int iommu_def_domain_type __read_mostly;
-static bool iommu_dma_strict __read_mostly = true;
+static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
 static u32 iommu_cmd_line __read_mostly;
 
 struct iommu_group {
@@ -113,6 +115,7 @@ static const char *iommu_domain_type_str(unsigned int t)
        case IOMMU_DOMAIN_UNMANAGED:
                return "Unmanaged";
        case IOMMU_DOMAIN_DMA:
+       case IOMMU_DOMAIN_DMA_FQ:
                return "Translated";
        default:
                return "Unknown";
@@ -133,11 +136,20 @@ static int __init iommu_subsys_init(void)
                }
        }
 
+       if (!iommu_default_passthrough() && !iommu_dma_strict)
+               iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;
+
        pr_info("Default domain type: %s %s\n",
                iommu_domain_type_str(iommu_def_domain_type),
                (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
                        "(set via kernel command line)" : "");
 
+       if (!iommu_default_passthrough())
+               pr_info("DMA domain TLB invalidation policy: %s mode %s\n",
+                       iommu_dma_strict ? "strict" : "lazy",
+                       (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
+                               "(set via kernel command line)" : "");
+
        return 0;
 }
 subsys_initcall(iommu_subsys_init);
@@ -273,7 +285,9 @@ int iommu_probe_device(struct device *dev)
         * support default domains, so the return value is not yet
         * checked.
         */
+       mutex_lock(&group->mutex);
        iommu_alloc_default_domain(group, dev);
+       mutex_unlock(&group->mutex);
 
        if (group->default_domain) {
                ret = __iommu_attach_device(group->default_domain, dev);
@@ -344,21 +358,13 @@ static int __init iommu_dma_setup(char *str)
 }
 early_param("iommu.strict", iommu_dma_setup);
 
-void iommu_set_dma_strict(bool strict)
+void iommu_set_dma_strict(void)
 {
-       if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT))
-               iommu_dma_strict = strict;
+       iommu_dma_strict = true;
+       if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ)
+               iommu_def_domain_type = IOMMU_DOMAIN_DMA;
 }
 
-bool iommu_get_dma_strict(struct iommu_domain *domain)
-{
-       /* only allow lazy flushing for DMA domains */
-       if (domain->type == IOMMU_DOMAIN_DMA)
-               return iommu_dma_strict;
-       return true;
-}
-EXPORT_SYMBOL_GPL(iommu_get_dma_strict);
-
 static ssize_t iommu_group_attr_show(struct kobject *kobj,
                                     struct attribute *__attr, char *buf)
 {
@@ -546,6 +552,9 @@ static ssize_t iommu_group_show_type(struct iommu_group *group,
                case IOMMU_DOMAIN_DMA:
                        type = "DMA\n";
                        break;
+               case IOMMU_DOMAIN_DMA_FQ:
+                       type = "DMA-FQ\n";
+                       break;
                }
        }
        mutex_unlock(&group->mutex);
@@ -759,7 +768,7 @@ static int iommu_create_device_direct_mappings(struct iommu_group *group,
        unsigned long pg_size;
        int ret = 0;
 
-       if (!domain || domain->type != IOMMU_DOMAIN_DMA)
+       if (!domain || !iommu_is_dma_domain(domain))
                return 0;
 
        BUG_ON(!domain->pgsize_bitmap);
@@ -1944,6 +1953,11 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
        /* Assume all sizes by default; the driver may override this later */
        domain->pgsize_bitmap  = bus->iommu_ops->pgsize_bitmap;
 
+       /* Temporarily avoid -EEXIST while drivers still get their own cookies */
+       if (iommu_is_dma_domain(domain) && !domain->iova_cookie && iommu_get_dma_cookie(domain)) {
+               iommu_domain_free(domain);
+               domain = NULL;
+       }
        return domain;
 }
 
@@ -1955,6 +1969,7 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc);
 
 void iommu_domain_free(struct iommu_domain *domain)
 {
+       iommu_put_dma_cookie(domain);
        domain->ops->domain_free(domain);
 }
 EXPORT_SYMBOL_GPL(iommu_domain_free);
@@ -2370,45 +2385,94 @@ EXPORT_SYMBOL_GPL(iommu_detach_group);
 
 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 {
-       if (unlikely(domain->ops->iova_to_phys == NULL))
+       if (domain->type == IOMMU_DOMAIN_IDENTITY)
+               return iova;
+
+       if (domain->type == IOMMU_DOMAIN_BLOCKED)
                return 0;
 
        return domain->ops->iova_to_phys(domain, iova);
 }
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
-static size_t iommu_pgsize(struct iommu_domain *domain,
-                          unsigned long addr_merge, size_t size)
+static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
+                          phys_addr_t paddr, size_t size, size_t *count)
 {
-       unsigned int pgsize_idx;
-       size_t pgsize;
+       unsigned int pgsize_idx, pgsize_idx_next;
+       unsigned long pgsizes;
+       size_t offset, pgsize, pgsize_next;
+       unsigned long addr_merge = paddr | iova;
 
-       /* Max page size that still fits into 'size' */
-       pgsize_idx = __fls(size);
+       /* Page sizes supported by the hardware and small enough for @size */
+       pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);
 
-       /* need to consider alignment requirements ? */
-       if (likely(addr_merge)) {
-               /* Max page size allowed by address */
-               unsigned int align_pgsize_idx = __ffs(addr_merge);
-               pgsize_idx = min(pgsize_idx, align_pgsize_idx);
-       }
+       /* Constrain the page sizes further based on the maximum alignment */
+       if (likely(addr_merge))
+               pgsizes &= GENMASK(__ffs(addr_merge), 0);
 
-       /* build a mask of acceptable page sizes */
-       pgsize = (1UL << (pgsize_idx + 1)) - 1;
+       /* Make sure we have at least one suitable page size */
+       BUG_ON(!pgsizes);
 
-       /* throw away page sizes not supported by the hardware */
-       pgsize &= domain->pgsize_bitmap;
+       /* Pick the biggest page size remaining */
+       pgsize_idx = __fls(pgsizes);
+       pgsize = BIT(pgsize_idx);
+       if (!count)
+               return pgsize;
 
-       /* make sure we're still sane */
-       BUG_ON(!pgsize);
+       /* Find the next biggest support page size, if it exists */
+       pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
+       if (!pgsizes)
+               goto out_set_count;
 
-       /* pick the biggest page */
-       pgsize_idx = __fls(pgsize);
-       pgsize = 1UL << pgsize_idx;
+       pgsize_idx_next = __ffs(pgsizes);
+       pgsize_next = BIT(pgsize_idx_next);
 
+       /*
+        * There's no point trying a bigger page size unless the virtual
+        * and physical addresses are similarly offset within the larger page.
+        */
+       if ((iova ^ paddr) & (pgsize_next - 1))
+               goto out_set_count;
+
+       /* Calculate the offset to the next page size alignment boundary */
+       offset = pgsize_next - (addr_merge & (pgsize_next - 1));
+
+       /*
+        * If size is big enough to accommodate the larger page, reduce
+        * the number of smaller pages.
+        */
+       if (offset + pgsize_next <= size)
+               size = offset;
+
+out_set_count:
+       *count = size >> pgsize_idx;
        return pgsize;
 }
 
+static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
+                            phys_addr_t paddr, size_t size, int prot,
+                            gfp_t gfp, size_t *mapped)
+{
+       const struct iommu_ops *ops = domain->ops;
+       size_t pgsize, count;
+       int ret;
+
+       pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
+
+       pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
+                iova, &paddr, pgsize, count);
+
+       if (ops->map_pages) {
+               ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
+                                    gfp, mapped);
+       } else {
+               ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+               *mapped = ret ? 0 : pgsize;
+       }
+
+       return ret;
+}
+
 static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
                       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
@@ -2419,7 +2483,7 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
        phys_addr_t orig_paddr = paddr;
        int ret = 0;
 
-       if (unlikely(ops->map == NULL ||
+       if (unlikely(!(ops->map || ops->map_pages) ||
                     domain->pgsize_bitmap == 0UL))
                return -ENODEV;
 
@@ -2443,18 +2507,21 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
        pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
 
        while (size) {
-               size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
+               size_t mapped = 0;
 
-               pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
-                        iova, &paddr, pgsize);
-               ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
+               ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
+                                       &mapped);
+               /*
+                * Some pages may have been mapped, even if an error occurred,
+                * so we should account for those so they can be unmapped.
+                */
+               size -= mapped;
 
                if (ret)
                        break;
 
-               iova += pgsize;
-               paddr += pgsize;
-               size -= pgsize;
+               iova += mapped;
+               paddr += mapped;
        }
 
        /* unroll mapping in case something went wrong */
@@ -2494,6 +2561,19 @@ int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
 }
 EXPORT_SYMBOL_GPL(iommu_map_atomic);
 
+static size_t __iommu_unmap_pages(struct iommu_domain *domain,
+                                 unsigned long iova, size_t size,
+                                 struct iommu_iotlb_gather *iotlb_gather)
+{
+       const struct iommu_ops *ops = domain->ops;
+       size_t pgsize, count;
+
+       pgsize = iommu_pgsize(domain, iova, iova, size, &count);
+       return ops->unmap_pages ?
+              ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
+              ops->unmap(domain, iova, pgsize, iotlb_gather);
+}
+
 static size_t __iommu_unmap(struct iommu_domain *domain,
                            unsigned long iova, size_t size,
                            struct iommu_iotlb_gather *iotlb_gather)
@@ -2503,7 +2583,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
        unsigned long orig_iova = iova;
        unsigned int min_pagesz;
 
-       if (unlikely(ops->unmap == NULL ||
+       if (unlikely(!(ops->unmap || ops->unmap_pages) ||
                     domain->pgsize_bitmap == 0UL))
                return 0;
 
@@ -2531,9 +2611,9 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
         * or we hit an area that isn't mapped.
         */
        while (unmapped < size) {
-               size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
-
-               unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
+               unmapped_page = __iommu_unmap_pages(domain, iova,
+                                                   size - unmapped,
+                                                   iotlb_gather);
                if (!unmapped_page)
                        break;
 
@@ -3128,6 +3208,14 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
                goto out;
        }
 
+       /* We can bring up a flush queue without tearing down the domain */
+       if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) {
+               ret = iommu_dma_init_fq(prev_dom);
+               if (!ret)
+                       prev_dom->type = IOMMU_DOMAIN_DMA_FQ;
+               goto out;
+       }
+
        /* Sets group->default_domain to the newly allocated domain */
        ret = iommu_group_alloc_default_domain(dev->bus, group, type);
        if (ret)
@@ -3168,9 +3256,9 @@ out:
 }
 
 /*
- * Changing the default domain through sysfs requires the users to ubind the
- * drivers from the devices in the iommu group. Return failure if this doesn't
- * meet.
+ * Changing the default domain through sysfs requires the users to unbind the
+ * drivers from the devices in the iommu group, except for a DMA -> DMA-FQ
+ * transition. Return failure if this isn't met.
  *
  * We need to consider the race between this and the device release path.
  * device_lock(dev) is used here to guarantee that the device release path
@@ -3193,6 +3281,8 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
                req_type = IOMMU_DOMAIN_IDENTITY;
        else if (sysfs_streq(buf, "DMA"))
                req_type = IOMMU_DOMAIN_DMA;
+       else if (sysfs_streq(buf, "DMA-FQ"))
+               req_type = IOMMU_DOMAIN_DMA_FQ;
        else if (sysfs_streq(buf, "auto"))
                req_type = 0;
        else
@@ -3244,7 +3334,8 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
 
        /* Check if the device in the group still has a driver bound to it */
        device_lock(dev);
-       if (device_is_bound(dev)) {
+       if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ &&
+           group->default_domain->type == IOMMU_DOMAIN_DMA)) {
                pr_err_ratelimited("Device is still bound to driver\n");
                ret = -EBUSY;
                goto out;
index b6cf5f1..0af42fb 100644 (file)
@@ -121,8 +121,6 @@ int init_iova_flush_queue(struct iova_domain *iovad,
                spin_lock_init(&fq->lock);
        }
 
-       smp_wmb();
-
        iovad->fq = queue;
 
        timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
@@ -633,10 +631,20 @@ void queue_iova(struct iova_domain *iovad,
                unsigned long pfn, unsigned long pages,
                unsigned long data)
 {
-       struct iova_fq *fq = raw_cpu_ptr(iovad->fq);
+       struct iova_fq *fq;
        unsigned long flags;
        unsigned idx;
 
+       /*
+        * Order against the IOMMU driver's pagetable update from unmapping
+        * @pte, to guarantee that iova_domain_flush() observes that if called
+        * from a different CPU before we release the lock below. Full barrier
+        * so it also pairs with iommu_dma_init_fq() to avoid seeing partially
+        * written fq state here.
+        */
+       smp_mb();
+
+       fq = raw_cpu_ptr(iovad->fq);
        spin_lock_irqsave(&fq->lock, flags);
 
        /*
index 51ea6f0..d38ff29 100644 (file)
@@ -8,7 +8,6 @@
 
 #include <linux/bitmap.h>
 #include <linux/delay.h>
-#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/export.h>
@@ -564,10 +563,13 @@ static irqreturn_t ipmmu_irq(int irq, void *dev)
  * IOMMU Operations
  */
 
-static struct iommu_domain *__ipmmu_domain_alloc(unsigned type)
+static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
 {
        struct ipmmu_vmsa_domain *domain;
 
+       if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
+               return NULL;
+
        domain = kzalloc(sizeof(*domain), GFP_KERNEL);
        if (!domain)
                return NULL;
@@ -577,27 +579,6 @@ static struct iommu_domain *__ipmmu_domain_alloc(unsigned type)
        return &domain->io_domain;
 }
 
-static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
-{
-       struct iommu_domain *io_domain = NULL;
-
-       switch (type) {
-       case IOMMU_DOMAIN_UNMANAGED:
-               io_domain = __ipmmu_domain_alloc(type);
-               break;
-
-       case IOMMU_DOMAIN_DMA:
-               io_domain = __ipmmu_domain_alloc(type);
-               if (io_domain && iommu_get_dma_cookie(io_domain)) {
-                       kfree(io_domain);
-                       io_domain = NULL;
-               }
-               break;
-       }
-
-       return io_domain;
-}
-
 static void ipmmu_domain_free(struct iommu_domain *io_domain)
 {
        struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
@@ -606,7 +587,6 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
         * Free the domain resources. We assume that all devices have already
         * been detached.
         */
-       iommu_put_dma_cookie(io_domain);
        ipmmu_domain_destroy_context(domain);
        free_io_pgtable_ops(domain->iop);
        kfree(domain);
index 6f7c696..d837adf 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/component.h>
 #include <linux/device.h>
 #include <linux/dma-direct.h>
-#include <linux/dma-iommu.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -441,17 +440,11 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
        if (!dom)
                return NULL;
 
-       if (iommu_get_dma_cookie(&dom->domain)) {
-               kfree(dom);
-               return NULL;
-       }
-
        return &dom->domain;
 }
 
 static void mtk_iommu_domain_free(struct iommu_domain *domain)
 {
-       iommu_put_dma_cookie(domain);
        kfree(to_mtk_domain(domain));
 }
 
@@ -520,12 +513,8 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain,
                              struct iommu_iotlb_gather *gather)
 {
        struct mtk_iommu_domain *dom = to_mtk_domain(domain);
-       unsigned long end = iova + size - 1;
 
-       if (gather->start > iova)
-               gather->start = iova;
-       if (gather->end < end)
-               gather->end = end;
+       iommu_iotlb_gather_add_range(gather, iova, size);
        return dom->iop->unmap(dom->iop, iova, size, gather);
 }
 
index 778e66f..be22fcf 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/component.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
-#include <linux/dma-iommu.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
index 9febfb7..5cb2608 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/device.h>
-#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
@@ -1074,10 +1073,6 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
        if (!rk_domain)
                return NULL;
 
-       if (type == IOMMU_DOMAIN_DMA &&
-           iommu_get_dma_cookie(&rk_domain->domain))
-               goto err_free_domain;
-
        /*
         * rk32xx iommus use a 2 level pagetable.
         * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries.
@@ -1085,7 +1080,7 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
         */
        rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32);
        if (!rk_domain->dt)
-               goto err_put_cookie;
+               goto err_free_domain;
 
        rk_domain->dt_dma = dma_map_single(dma_dev, rk_domain->dt,
                                           SPAGE_SIZE, DMA_TO_DEVICE);
@@ -1106,9 +1101,6 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
 
 err_free_dt:
        free_page((unsigned long)rk_domain->dt);
-err_put_cookie:
-       if (type == IOMMU_DOMAIN_DMA)
-               iommu_put_dma_cookie(&rk_domain->domain);
 err_free_domain:
        kfree(rk_domain);
 
@@ -1137,8 +1129,6 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
                         SPAGE_SIZE, DMA_TO_DEVICE);
        free_page((unsigned long)rk_domain->dt);
 
-       if (domain->type == IOMMU_DOMAIN_DMA)
-               iommu_put_dma_cookie(&rk_domain->domain);
        kfree(rk_domain);
 }
 
index 73dfd99..27ac818 100644 (file)
@@ -8,7 +8,6 @@
 
 #include <linux/clk.h>
 #include <linux/device.h>
-#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
 #include <linux/iommu.h>
@@ -144,11 +143,6 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type)
        if (!dom)
                return NULL;
 
-       if (iommu_get_dma_cookie(&dom->domain)) {
-               kfree(dom);
-               return NULL;
-       }
-
        spin_lock_init(&dom->pgtlock);
 
        dom->domain.geometry.aperture_start = 0;
@@ -161,7 +155,6 @@ static void sprd_iommu_domain_free(struct iommu_domain *domain)
 {
        struct sprd_iommu_domain *dom = to_sprd_domain(domain);
 
-       iommu_put_dma_cookie(domain);
        kfree(dom);
 }
 
index 181bb1c..9299702 100644 (file)
@@ -7,7 +7,6 @@
 #include <linux/clk.h>
 #include <linux/device.h>
 #include <linux/dma-direction.h>
-#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/errno.h>
@@ -610,14 +609,10 @@ static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type)
        if (!sun50i_domain)
                return NULL;
 
-       if (type == IOMMU_DOMAIN_DMA &&
-           iommu_get_dma_cookie(&sun50i_domain->domain))
-               goto err_free_domain;
-
        sun50i_domain->dt = (u32 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
                                                    get_order(DT_SIZE));
        if (!sun50i_domain->dt)
-               goto err_put_cookie;
+               goto err_free_domain;
 
        refcount_set(&sun50i_domain->refcnt, 1);
 
@@ -627,10 +622,6 @@ static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type)
 
        return &sun50i_domain->domain;
 
-err_put_cookie:
-       if (type == IOMMU_DOMAIN_DMA)
-               iommu_put_dma_cookie(&sun50i_domain->domain);
-
 err_free_domain:
        kfree(sun50i_domain);
 
@@ -644,8 +635,6 @@ static void sun50i_iommu_domain_free(struct iommu_domain *domain)
        free_pages((unsigned long)sun50i_domain->dt, get_order(DT_SIZE));
        sun50i_domain->dt = NULL;
 
-       iommu_put_dma_cookie(domain);
-
        kfree(sun50i_domain);
 }
 
index 6abdcab..80930ce 100644 (file)
@@ -598,12 +598,6 @@ static struct iommu_domain *viommu_domain_alloc(unsigned type)
        spin_lock_init(&vdomain->mappings_lock);
        vdomain->mappings = RB_ROOT_CACHED;
 
-       if (type == IOMMU_DOMAIN_DMA &&
-           iommu_get_dma_cookie(&vdomain->domain)) {
-               kfree(vdomain);
-               return NULL;
-       }
-
        return &vdomain->domain;
 }
 
@@ -643,8 +637,6 @@ static void viommu_domain_free(struct iommu_domain *domain)
 {
        struct viommu_domain *vdomain = to_viommu_domain(domain);
 
-       iommu_put_dma_cookie(domain);
-
        /* Free all remaining mappings (size 2^64) */
        viommu_del_mappings(vdomain, 0, 0);
 
index 758ca46..24607dc 100644 (file)
@@ -20,6 +20,7 @@ void iommu_put_dma_cookie(struct iommu_domain *domain);
 
 /* Setup call for arch DMA mapping code */
 void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
+int iommu_dma_init_fq(struct iommu_domain *domain);
 
 /* The DMA API isn't _quite_ the whole story, though... */
 /*
@@ -54,6 +55,11 @@ static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base,
 {
 }
 
+static inline int iommu_dma_init_fq(struct iommu_domain *domain)
+{
+       return -EINVAL;
+}
+
 static inline int iommu_get_dma_cookie(struct iommu_domain *domain)
 {
        return -ENODEV;
index d0fa0b3..05a65eb 100644 (file)
 #define DMAR_MTRR_PHYSMASK8_REG 0x208
 #define DMAR_MTRR_PHYSBASE9_REG 0x210
 #define DMAR_MTRR_PHYSMASK9_REG 0x218
-#define DMAR_VCCAP_REG         0xe00 /* Virtual command capability register */
-#define DMAR_VCMD_REG          0xe10 /* Virtual command register */
-#define DMAR_VCRSP_REG         0xe20 /* Virtual command response register */
+#define DMAR_VCCAP_REG         0xe30 /* Virtual command capability register */
+#define DMAR_VCMD_REG          0xe00 /* Virtual command register */
+#define DMAR_VCRSP_REG         0xe10 /* Virtual command response register */
 
 #define DMAR_IQER_REG_IQEI(reg)                FIELD_GET(GENMASK_ULL(3, 0), reg)
 #define DMAR_IQER_REG_ITESID(reg)      FIELD_GET(GENMASK_ULL(47, 32), reg)
index 10fa80e..57cceec 100644 (file)
 #define SVM_REQ_EXEC   (1<<1)
 #define SVM_REQ_PRIV   (1<<0)
 
+/* Page Request Queue depth */
+#define PRQ_ORDER      2
+#define PRQ_RING_MASK  ((0x1000 << PRQ_ORDER) - 0x20)
+#define PRQ_DEPTH      ((0x1000 << PRQ_ORDER) >> 5)
+
 /*
  * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
  * for access to kernel addresses. No IOTLB flushes are automatically done
index 4d40dfa..86af6f0 100644 (file)
@@ -16,6 +16,7 @@ enum io_pgtable_fmt {
        ARM_V7S,
        ARM_MALI_LPAE,
        AMD_IOMMU_V1,
+       APPLE_DART,
        IO_PGTABLE_NUM_FMTS,
 };
 
@@ -73,10 +74,6 @@ struct io_pgtable_cfg {
         *      to support up to 35 bits PA where the bit32, bit33 and bit34 are
         *      encoded in the bit9, bit4 and bit5 of the PTE respectively.
         *
-        * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
-        *      on unmap, for DMA domains using the flush queue mechanism for
-        *      delayed invalidation.
-        *
         * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table
         *      for use in the upper half of a split address space.
         *
@@ -86,7 +83,6 @@ struct io_pgtable_cfg {
        #define IO_PGTABLE_QUIRK_ARM_NS         BIT(0)
        #define IO_PGTABLE_QUIRK_NO_PERMS       BIT(1)
        #define IO_PGTABLE_QUIRK_ARM_MTK_EXT    BIT(3)
-       #define IO_PGTABLE_QUIRK_NON_STRICT     BIT(4)
        #define IO_PGTABLE_QUIRK_ARM_TTBR1      BIT(5)
        #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6)
        unsigned long                   quirks;
@@ -136,6 +132,11 @@ struct io_pgtable_cfg {
                        u64     transtab;
                        u64     memattr;
                } arm_mali_lpae_cfg;
+
+               struct {
+                       u64 ttbr[4];
+                       u32 n_ttbrs;
+               } apple_dart_cfg;
        };
 };
 
@@ -143,7 +144,9 @@ struct io_pgtable_cfg {
  * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
  *
  * @map:          Map a physically contiguous memory region.
+ * @map_pages:    Map a physically contiguous range of pages of the same size.
  * @unmap:        Unmap a physically contiguous memory region.
+ * @unmap_pages:  Unmap a range of virtually contiguous pages of the same size.
  * @iova_to_phys: Translate iova to physical address.
  *
  * These functions map directly onto the iommu_ops member functions with
@@ -152,8 +155,14 @@ struct io_pgtable_cfg {
 struct io_pgtable_ops {
        int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
                   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+       int (*map_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+                        phys_addr_t paddr, size_t pgsize, size_t pgcount,
+                        int prot, gfp_t gfp, size_t *mapped);
        size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
                        size_t size, struct iommu_iotlb_gather *gather);
+       size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova,
+                             size_t pgsize, size_t pgcount,
+                             struct iommu_iotlb_gather *gather);
        phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
                                    unsigned long iova);
 };
@@ -246,5 +255,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns;
 
 #endif /* __IO_PGTABLE_H */
index 9369458..d2f3435 100644 (file)
@@ -40,6 +40,7 @@ struct iommu_domain;
 struct notifier_block;
 struct iommu_sva;
 struct iommu_fault_event;
+struct iommu_dma_cookie;
 
 /* iommu fault flags */
 #define IOMMU_FAULT_READ       0x0
@@ -60,6 +61,7 @@ struct iommu_domain_geometry {
 #define __IOMMU_DOMAIN_DMA_API (1U << 1)  /* Domain for use in DMA-API
                                              implementation              */
 #define __IOMMU_DOMAIN_PT      (1U << 2)  /* Domain is identity mapped   */
+#define __IOMMU_DOMAIN_DMA_FQ  (1U << 3)  /* DMA-API uses flush queue    */
 
 /*
  * This are the possible domain-types
@@ -72,12 +74,17 @@ struct iommu_domain_geometry {
  *     IOMMU_DOMAIN_DMA        - Internally used for DMA-API implementations.
  *                               This flag allows IOMMU drivers to implement
  *                               certain optimizations for these domains
+ *     IOMMU_DOMAIN_DMA_FQ     - As above, but definitely using batched TLB
+ *                               invalidation.
  */
 #define IOMMU_DOMAIN_BLOCKED   (0U)
 #define IOMMU_DOMAIN_IDENTITY  (__IOMMU_DOMAIN_PT)
 #define IOMMU_DOMAIN_UNMANAGED (__IOMMU_DOMAIN_PAGING)
 #define IOMMU_DOMAIN_DMA       (__IOMMU_DOMAIN_PAGING |        \
                                 __IOMMU_DOMAIN_DMA_API)
+#define IOMMU_DOMAIN_DMA_FQ    (__IOMMU_DOMAIN_PAGING |        \
+                                __IOMMU_DOMAIN_DMA_API |       \
+                                __IOMMU_DOMAIN_DMA_FQ)
 
 struct iommu_domain {
        unsigned type;
@@ -86,9 +93,14 @@ struct iommu_domain {
        iommu_fault_handler_t handler;
        void *handler_token;
        struct iommu_domain_geometry geometry;
-       void *iova_cookie;
+       struct iommu_dma_cookie *iova_cookie;
 };
 
+static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
+{
+       return domain->type & __IOMMU_DOMAIN_DMA_API;
+}
+
 enum iommu_cap {
        IOMMU_CAP_CACHE_COHERENCY,      /* IOMMU can enforce cache coherent DMA
                                           transactions */
@@ -160,16 +172,22 @@ enum iommu_dev_features {
  * @start: IOVA representing the start of the range to be flushed
  * @end: IOVA representing the end of the range to be flushed (inclusive)
  * @pgsize: The interval at which to perform the flush
+ * @freelist: Removed pages to free after sync
+ * @queued: Indicates that the flush will be queued
  *
  * This structure is intended to be updated by multiple calls to the
  * ->unmap() function in struct iommu_ops before eventually being passed
- * into ->iotlb_sync().
+ * into ->iotlb_sync(). Drivers can add pages to @freelist to be freed after
+ * ->iotlb_sync() or ->iotlb_flush_all() have cleared all cached references to
+ * them. @queued is set to indicate when ->iotlb_flush_all() will be called
+ * later instead of ->iotlb_sync(), so drivers may optimise accordingly.
  */
 struct iommu_iotlb_gather {
        unsigned long           start;
        unsigned long           end;
        size_t                  pgsize;
        struct page             *freelist;
+       bool                    queued;
 };
 
 /**
@@ -180,7 +198,10 @@ struct iommu_iotlb_gather {
  * @attach_dev: attach device to an iommu domain
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
+ * @map_pages: map a physically contiguous set of pages of the same size to
+ *             an iommu domain.
  * @unmap: unmap a physically contiguous memory region from an iommu domain
+ * @unmap_pages: unmap a number of pages of the same size from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
  * @iotlb_sync_map: Sync mappings created recently using @map to the hardware
  * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
@@ -229,8 +250,14 @@ struct iommu_ops {
        void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
        int (*map)(struct iommu_domain *domain, unsigned long iova,
                   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+       int (*map_pages)(struct iommu_domain *domain, unsigned long iova,
+                        phys_addr_t paddr, size_t pgsize, size_t pgcount,
+                        int prot, gfp_t gfp, size_t *mapped);
        size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
                     size_t size, struct iommu_iotlb_gather *iotlb_gather);
+       size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
+                             size_t pgsize, size_t pgcount,
+                             struct iommu_iotlb_gather *iotlb_gather);
        void (*flush_iotlb_all)(struct iommu_domain *domain);
        void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
                               size_t size);
@@ -476,8 +503,7 @@ int iommu_enable_nesting(struct iommu_domain *domain);
 int iommu_set_pgtable_quirks(struct iommu_domain *domain,
                unsigned long quirks);
 
-void iommu_set_dma_strict(bool val);
-bool iommu_get_dma_strict(struct iommu_domain *domain);
+void iommu_set_dma_strict(void);
 
 extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
                              unsigned long iova, int flags);
@@ -497,29 +523,80 @@ static inline void iommu_iotlb_sync(struct iommu_domain *domain,
        iommu_iotlb_gather_init(iotlb_gather);
 }
 
+/**
+ * iommu_iotlb_gather_is_disjoint - Checks whether a new range is disjoint
+ *
+ * @gather: TLB gather data
+ * @iova: start of page to invalidate
+ * @size: size of page to invalidate
+ *
+ * Helper for IOMMU drivers to check whether a new range and the gathered range
+ * are disjoint. For many IOMMUs, flushing the IOMMU in this case is better
+ * than merging the two, which might lead to unnecessary invalidations.
+ */
+static inline
+bool iommu_iotlb_gather_is_disjoint(struct iommu_iotlb_gather *gather,
+                                   unsigned long iova, size_t size)
+{
+       unsigned long start = iova, end = start + size - 1;
+
+       return gather->end != 0 &&
+               (end + 1 < gather->start || start > gather->end + 1);
+}
+
+
+/**
+ * iommu_iotlb_gather_add_range - Gather for address-based TLB invalidation
+ * @gather: TLB gather data
+ * @iova: start of page to invalidate
+ * @size: size of page to invalidate
+ *
+ * Helper for IOMMU drivers to build arbitrarily-sized invalidation commands
+ * where only the address range matters, and simply minimising intermediate
+ * syncs is preferred.
+ */
+static inline void iommu_iotlb_gather_add_range(struct iommu_iotlb_gather *gather,
+                                               unsigned long iova, size_t size)
+{
+       unsigned long end = iova + size - 1;
+
+       if (gather->start > iova)
+               gather->start = iova;
+       if (gather->end < end)
+               gather->end = end;
+}
+
+/**
+ * iommu_iotlb_gather_add_page - Gather for page-based TLB invalidation
+ * @domain: IOMMU domain to be invalidated
+ * @gather: TLB gather data
+ * @iova: start of page to invalidate
+ * @size: size of page to invalidate
+ *
+ * Helper for IOMMU drivers to build invalidation commands based on individual
+ * pages, or with page size/table level hints which cannot be gathered if they
+ * differ.
+ */
 static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain,
                                               struct iommu_iotlb_gather *gather,
                                               unsigned long iova, size_t size)
 {
-       unsigned long start = iova, end = start + size - 1;
-
        /*
         * If the new page is disjoint from the current range or is mapped at
         * a different granularity, then sync the TLB so that the gather
         * structure can be rewritten.
         */
-       if (gather->pgsize != size ||
-           end + 1 < gather->start || start > gather->end + 1) {
-               if (gather->pgsize)
-                       iommu_iotlb_sync(domain, gather);
-               gather->pgsize = size;
-       }
+       if ((gather->pgsize && gather->pgsize != size) ||
+           iommu_iotlb_gather_is_disjoint(gather, iova, size))
+               iommu_iotlb_sync(domain, gather);
 
-       if (gather->end < end)
-               gather->end = end;
+       gather->pgsize = size;
+       iommu_iotlb_gather_add_range(gather, iova, size);
+}
 
-       if (gather->start > start)
-               gather->start = start;
+static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather)
+{
+       return gather && gather->queued;
 }
 
 /* PCI device grouping function */
@@ -870,6 +947,11 @@ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain,
 {
 }
 
+static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather)
+{
+       return false;
+}
+
 static inline void iommu_device_unregister(struct iommu_device *iommu)
 {
 }