debug/iommu directory, and then populate a subdirectory with
entries as required.
----- config IOMMU_DEFAULT_PASSTHROUGH
----- bool "IOMMU passthrough by default"
+++++ choice
+++++ prompt "IOMMU default domain type"
depends on IOMMU_API
+++++ default IOMMU_DEFAULT_DMA_LAZY if AMD_IOMMU || INTEL_IOMMU
+++++ default IOMMU_DEFAULT_DMA_STRICT
help
----- Enable passthrough by default, removing the need to pass in
----- iommu.passthrough=on or iommu=pt through command line. If this
----- is enabled, you can still disable with iommu.passthrough=off
----- or iommu=nopt depending on the architecture.
-
- If unsure, say N here.
+++++ Choose the type of IOMMU domain used to manage DMA API usage by
+++++ device drivers. The options here typically represent different
+++++ levels of tradeoff between robustness/security and performance,
+++++ depending on the IOMMU driver. Not all IOMMUs support all options.
+++++ This choice can be overridden at boot via the command line, and for
+++++ some devices also at runtime via sysfs.
---- If unsure, say N here.
-
- - choice
- - prompt "IOMMU default DMA IOTLB invalidation mode"
- - depends on IOMMU_DMA
+++++ If unsure, keep the default.
+ ++
- - default IOMMU_DEFAULT_LAZY if (AMD_IOMMU || INTEL_IOMMU)
- - default IOMMU_DEFAULT_STRICT
+++++ config IOMMU_DEFAULT_DMA_STRICT
+++++ bool "Translated - Strict"
+ ++ help
- - This option allows an IOMMU DMA IOTLB invalidation mode to be
- - chosen at build time, to override the default mode of each ARCH,
- - removing the need to pass in kernel parameters through command line.
- - It is still possible to provide common boot params to override this
- - config.
+++++ Trusted devices use translation to restrict their access to only
+++++ DMA-mapped pages, with strict TLB invalidation on unmap. Equivalent
+++++ to passing "iommu.passthrough=0 iommu.strict=1" on the command line.
+ ++
- - If unsure, keep the default.
+++++ Untrusted devices always use this mode, with an additional layer of
+++++ bounce-buffering such that they cannot gain access to any unrelated
+++++ data within a mapped page.
+ ++
- - config IOMMU_DEFAULT_STRICT
- - bool "strict"
+++++ config IOMMU_DEFAULT_DMA_LAZY
+++++ bool "Translated - Lazy"
+ ++ help
- - For every IOMMU DMA unmap operation, the flush operation of IOTLB and
- - the free operation of IOVA are guaranteed to be done in the unmap
- - function.
+++++ Trusted devices use translation to restrict their access to only
+++++ DMA-mapped pages, but with "lazy" batched TLB invalidation. This
+++++ mode allows higher performance with some IOMMUs due to reduced TLB
+++++ flushing, but at the cost of reduced isolation since devices may be
+++++ able to access memory for some time after it has been unmapped.
+++++ Equivalent to passing "iommu.passthrough=0 iommu.strict=0" on the
+++++ command line.
+++++
+++++ If this mode is not supported by the IOMMU driver, the effective
+++++ runtime default will fall back to IOMMU_DEFAULT_DMA_STRICT.
+ ++
- - config IOMMU_DEFAULT_LAZY
- - bool "lazy"
+++++ config IOMMU_DEFAULT_PASSTHROUGH
+++++ bool "Passthrough"
+ ++ help
- - Support lazy mode, where for every IOMMU DMA unmap operation, the
- - flush operation of IOTLB and the free operation of IOVA are deferred.
- - They are only guaranteed to be done before the related IOVA will be
- - reused.
- -
- - The isolation provided in this mode is not as secure as STRICT mode,
- - such that a vulnerable time window may be created between the DMA
- - unmap and the mappings cached in the IOMMU IOTLB or device TLB
- - finally being invalidated, where the device could still access the
- - memory which has already been unmapped by the device driver.
- - However this mode may provide better performance in high throughput
- - scenarios, and is still considerably more secure than passthrough
- - mode or no IOMMU.
+++++ Trusted devices are identity-mapped, giving them unrestricted access
+++++ to memory with minimal performance overhead. Equivalent to passing
+++++ "iommu.passthrough=1" (historically "iommu=pt") on the command line.
+++++
+++++ If this mode is not supported by the IOMMU driver, the effective
+++++ runtime default will fall back to IOMMU_DEFAULT_DMA_STRICT.
+ ++
+ ++ endchoice
config OF_IOMMU
def_bool y
return -EINVAL;
}
-- ---- return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
++ ++++ return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
+ + ++}
+ + ++
- - static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
++ ++++static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
++ ++++ struct arm_smmu_cmdq_ent *ent)
+ + ++{
- - return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
++ ++++ return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
+ + }
+ +
- - --static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
++ ++++static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
++ ++++ struct arm_smmu_cmdq_ent *ent)
+ + {
- - -- return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
++ ++++ return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
}
static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
{
++ ++++ int ret = 0;
++ ++++ struct arm_smmu_device *smmu = dev_get_drvdata(dev);
++ ++++
if (pm_runtime_suspended(dev))
-- ---- return 0;
++ ++++ goto clk_unprepare;
++ ++
-- return arm_smmu_runtime_suspend(dev);
++ ++++ ret = arm_smmu_runtime_suspend(dev);
++ ++++ if (ret)
++ ++++ return ret;
++
-- -- return arm_smmu_runtime_suspend(dev);
++ ++++clk_unprepare:
++ ++++ clk_bulk_unprepare(smmu->num_clks, smmu->clks);
++ ++++ return ret;
}
static const struct dev_pm_ops arm_smmu_pm_ops = {
/* If the size matches this level, we're in the right place */
if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
- -- __arm_lpae_set_pte(ptep, 0, &iop->cfg);
- --
- -- if (!iopte_leaf(pte, lvl, iop->fmt)) {
- -- /* Also flush any partial walks */
- -- io_pgtable_tlb_flush_walk(iop, iova, size,
- -- ARM_LPAE_GRANULE(data));
- -- ptep = iopte_deref(pte, data);
- -- __arm_lpae_free_pgtable(data, lvl + 1, ptep);
- -- } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
- -- /*
- -- * Order the PTE update against queueing the IOVA, to
- -- * guarantee that a flush callback from a different CPU
- -- * has observed it before the TLBIALL can be issued.
- -- */
- -- smp_wmb();
- -- } else {
- -- io_pgtable_tlb_add_page(iop, gather, iova, size);
+ ++ max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
+ ++ num_entries = min_t(int, pgcount, max_entries);
+ ++
+ ++ while (i < num_entries) {
+ ++ pte = READ_ONCE(*ptep);
+ ++ if (WARN_ON(!pte))
+ ++ break;
+ ++
+ ++ __arm_lpae_clear_pte(ptep, &iop->cfg);
+ ++
+ ++ if (!iopte_leaf(pte, lvl, iop->fmt)) {
+ ++ /* Also flush any partial walks */
+ ++ io_pgtable_tlb_flush_walk(iop, iova + i * size, size,
+ ++ ARM_LPAE_GRANULE(data));
+ ++ __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
- - } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
- - /*
- - * Order the PTE update against queueing the IOVA, to
- - * guarantee that a flush callback from a different CPU
- - * has observed it before the TLBIALL can be issued.
- - */
- - smp_wmb();
- - } else {
- } else if (!gather->queued) {
++++++ } else if (!iommu_iotlb_gather_queued(gather)) {
+ ++ io_pgtable_tlb_add_page(iop, gather, iova + i * size, size);
+ ++ }
+ ++
+ ++ ptep++;
+ ++ i++;
}
- -- return size;
+ ++ return i * size;
} else if (iopte_leaf(pte, lvl, iop->fmt)) {
/*
* Insert a table at the next level to map the old region,
#define pr_fmt(fmt) "iommu: " fmt
#include <linux/device.h>
+++++ #include <linux/dma-iommu.h>
#include <linux/kernel.h>
+ ++ #include <linux/bits.h>
#include <linux/bug.h>
#include <linux/types.h>
#include <linux/init.h>
static DEFINE_IDA(iommu_group_ida);
static unsigned int iommu_def_domain_type __read_mostly;
- -- static bool iommu_dma_strict __read_mostly = true;
- - static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_STRICT);
+++++ static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
static u32 iommu_cmd_line __read_mostly;
struct iommu_group {
(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
"(set via kernel command line)" : "");
- - pr_info("DMA domain TLB invalidation policy: %s mode %s\n",
- - iommu_dma_strict ? "strict" : "lazy",
- - (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
- - "(set via kernel command line)" : "");
+++++ if (!iommu_default_passthrough())
+++++ pr_info("DMA domain TLB invalidation policy: %s mode %s\n",
+++++ iommu_dma_strict ? "strict" : "lazy",
+++++ (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
+++++ "(set via kernel command line)" : "");
+ ++
return 0;
}
subsys_initcall(iommu_subsys_init);
}
early_param("iommu.strict", iommu_dma_setup);
- -- void iommu_set_dma_strict(bool strict)
- - {
- - if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT))
- - iommu_dma_strict = strict;
- - }
- -
- - bool iommu_get_dma_strict(struct iommu_domain *domain)
+ ++ void iommu_set_dma_strict(void)
{
- - /* only allow lazy flushing for DMA domains */
- - if (domain->type == IOMMU_DOMAIN_DMA)
- - return iommu_dma_strict;
- - return true;
- if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT))
- iommu_dma_strict = strict;
+ ++ iommu_dma_strict = true;
+++++ if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ)
+++++ iommu_def_domain_type = IOMMU_DOMAIN_DMA;
}
- - EXPORT_SYMBOL_GPL(iommu_get_dma_strict);
- -- bool iommu_get_dma_strict(struct iommu_domain *domain)
- -- {
- -- /* only allow lazy flushing for DMA domains */
- -- if (domain->type == IOMMU_DOMAIN_DMA)
- -- return iommu_dma_strict;
- -- return true;
- -- }
- -- EXPORT_SYMBOL_GPL(iommu_get_dma_strict);
- --
static ssize_t iommu_group_attr_show(struct kobject *kobj,
struct attribute *__attr, char *buf)
{
}
EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
- -- static size_t iommu_pgsize(struct iommu_domain *domain,
- -- unsigned long addr_merge, size_t size)
+ ++ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
+ ++ phys_addr_t paddr, size_t size, size_t *count)
{
- -- unsigned int pgsize_idx;
- -- size_t pgsize;
+ ++ unsigned int pgsize_idx, pgsize_idx_next;
+ ++ unsigned long pgsizes;
+ ++ size_t offset, pgsize, pgsize_next;
+ ++ unsigned long addr_merge = paddr | iova;
- -- /* Max page size that still fits into 'size' */
- -- pgsize_idx = __fls(size);
+ ++ /* Page sizes supported by the hardware and small enough for @size */
+ ++ pgsizes = domain->pgsize_bitmap & GENMASK(__fls(size), 0);
- -- /* need to consider alignment requirements ? */
- -- if (likely(addr_merge)) {
- -- /* Max page size allowed by address */
- -- unsigned int align_pgsize_idx = __ffs(addr_merge);
- -- pgsize_idx = min(pgsize_idx, align_pgsize_idx);
- -- }
+ ++ /* Constrain the page sizes further based on the maximum alignment */
+ ++ if (likely(addr_merge))
+ ++ pgsizes &= GENMASK(__ffs(addr_merge), 0);
+ +
- /* build a mask of acceptable page sizes */
- pgsize = (1UL << (pgsize_idx + 1)) - 1;
+ ++ /* Make sure we have at least one suitable page size */
+ ++ BUG_ON(!pgsizes);
+
- /* build a mask of acceptable page sizes */
- pgsize = (1UL << (pgsize_idx + 1)) - 1;
- /* throw away page sizes not supported by the hardware */
- pgsize &= domain->pgsize_bitmap;
+ ++ /* Pick the biggest page size remaining */
+ ++ pgsize_idx = __fls(pgsizes);
+ ++ pgsize = BIT(pgsize_idx);
+ ++ if (!count)
+ ++ return pgsize;
- /* throw away page sizes not supported by the hardware */
- pgsize &= domain->pgsize_bitmap;
- /* build a mask of acceptable page sizes */
- pgsize = (1UL << (pgsize_idx + 1)) - 1;
- /* make sure we're still sane */
- BUG_ON(!pgsize);
+ ++ /* Find the next biggest support page size, if it exists */
+ ++ pgsizes = domain->pgsize_bitmap & ~GENMASK(pgsize_idx, 0);
+ ++ if (!pgsizes)
+ ++ goto out_set_count;
- /* make sure we're still sane */
- BUG_ON(!pgsize);
- /* throw away page sizes not supported by the hardware */
- pgsize &= domain->pgsize_bitmap;
- /* pick the biggest page */
- pgsize_idx = __fls(pgsize);
- pgsize = 1UL << pgsize_idx;
+ ++ pgsize_idx_next = __ffs(pgsizes);
+ ++ pgsize_next = BIT(pgsize_idx_next);
+
- /* make sure we're still sane */
- BUG_ON(!pgsize);
+ ++ /*
+ ++ * There's no point trying a bigger page size unless the virtual
+ ++ * and physical addresses are similarly offset within the larger page.
+ ++ */
+ ++ if ((iova ^ paddr) & (pgsize_next - 1))
+ ++ goto out_set_count;
+
- - /* pick the biggest page */
- - pgsize_idx = __fls(pgsize);
- - pgsize = 1UL << pgsize_idx;
+ ++ /* Calculate the offset to the next page size alignment boundary */
+ ++ offset = pgsize_next - (addr_merge & (pgsize_next - 1));
+
+ ++ /*
+ ++ * If size is big enough to accommodate the larger page, reduce
+ ++ * the number of smaller pages.
+ ++ */
+ ++ if (offset + pgsize_next <= size)
+ ++ size = offset;
+ ++
+ ++ out_set_count:
+ ++ *count = size >> pgsize_idx;
return pgsize;
}
int iommu_set_pgtable_quirks(struct iommu_domain *domain,
unsigned long quirks);
- -- void iommu_set_dma_strict(bool val);
- -- bool iommu_get_dma_strict(struct iommu_domain *domain);
+ ++ void iommu_set_dma_strict(void);
- - bool iommu_get_dma_strict(struct iommu_domain *domain);
extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
unsigned long iova, int flags);
* a different granularity, then sync the TLB so that the gather
* structure can be rewritten.
*/
---- -- if (gather->pgsize != size ||
---- -- end + 1 < gather->start || start > gather->end + 1) {
---- -- if (gather->pgsize)
---- -- iommu_iotlb_sync(domain, gather);
---- -- gather->pgsize = size;
---- -- }
-
- if (gather->end < end)
- gather->end = end;
++++ ++ if ((gather->pgsize && gather->pgsize != size) ||
++++ ++ iommu_iotlb_gather_is_disjoint(gather, iova, size))
++++ ++ iommu_iotlb_sync(domain, gather);
---- - if (gather->end < end)
---- - gather->end = end;
- if (gather->start > start)
- gather->start = start;
++++ ++ gather->pgsize = size;
++++ ++ iommu_iotlb_gather_add_range(gather, iova, size);
++++ + }
---- - if (gather->start > start)
---- - gather->start = start;
++++++ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather)
++++++ {
++++++ return gather && gather->queued;
+ }
+
/* PCI device grouping function */
extern struct iommu_group *pci_device_group(struct device *dev);
/* Generic device grouping function */