iommu/arm-smmu-v3: Support IOMMU_HWPT_INVALIDATE using a VIOMMU object
authorNicolin Chen <nicolinc@nvidia.com>
Thu, 31 Oct 2024 00:20:56 +0000 (21:20 -0300)
committerJason Gunthorpe <jgg@nvidia.com>
Tue, 12 Nov 2024 18:11:03 +0000 (14:11 -0400)
Implement the vIOMMU's cache_invalidate op for user space to invalidate
the IOTLB entries, Device ATS and CD entries that are cached by hardware.

Add struct iommu_viommu_arm_smmuv3_invalidate defining invalidation
entries that are simply in the native format of a 128-bit TLBI
command. Scan those commands against the permitted command list and fix
their VMID/SID fields to match what is stored in the vIOMMU.

Link: https://patch.msgid.link/r/12-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com
Co-developed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
include/uapi/linux/iommufd.h

index 84c8a21..c96cab6 100644 (file)
@@ -215,8 +215,134 @@ arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
        return &nested_domain->domain;
 }
 
+static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu *vsmmu, u32 vsid, u32 *sid)
+{
+       struct arm_smmu_master *master;
+       struct device *dev;
+       int ret = 0;
+
+       xa_lock(&vsmmu->core.vdevs);
+       dev = iommufd_viommu_find_dev(&vsmmu->core, (unsigned long)vsid);
+       if (!dev) {
+               ret = -EIO;
+               goto unlock;
+       }
+       master = dev_iommu_priv_get(dev);
+
+       /* At this moment, iommufd only supports PCI device that has one SID */
+       if (sid)
+               *sid = master->streams[0].id;
+unlock:
+       xa_unlock(&vsmmu->core.vdevs);
+       return ret;
+}
+
+/* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */
+struct arm_vsmmu_invalidation_cmd {
+       union {
+               u64 cmd[2];
+               struct iommu_viommu_arm_smmuv3_invalidate ucmd;
+       };
+};
+
+/*
+ * Convert, in place, the raw invalidation command into an internal format that
+ * can be passed to arm_smmu_cmdq_issue_cmdlist(). Internally commands are
+ * stored in CPU endian.
+ *
+ * Enforce the VMID or SID on the command.
+ */
+static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
+                                     struct arm_vsmmu_invalidation_cmd *cmd)
+{
+       /* Commands are le64 stored in u64 */
+       cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]);
+       cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]);
+
+       switch (cmd->cmd[0] & CMDQ_0_OP) {
+       case CMDQ_OP_TLBI_NSNH_ALL:
+               /* Convert to NH_ALL */
+               cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL |
+                             FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
+               cmd->cmd[1] = 0;
+               break;
+       case CMDQ_OP_TLBI_NH_VA:
+       case CMDQ_OP_TLBI_NH_VAA:
+       case CMDQ_OP_TLBI_NH_ALL:
+       case CMDQ_OP_TLBI_NH_ASID:
+               cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID;
+               cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
+               break;
+       case CMDQ_OP_ATC_INV:
+       case CMDQ_OP_CFGI_CD:
+       case CMDQ_OP_CFGI_CD_ALL: {
+               u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]);
+
+               if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid))
+                       return -EIO;
+               cmd->cmd[0] &= ~CMDQ_CFGI_0_SID;
+               cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
+               break;
+       }
+       default:
+               return -EIO;
+       }
+       return 0;
+}
+
+static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
+                                     struct iommu_user_data_array *array)
+{
+       struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
+       struct arm_smmu_device *smmu = vsmmu->smmu;
+       struct arm_vsmmu_invalidation_cmd *last;
+       struct arm_vsmmu_invalidation_cmd *cmds;
+       struct arm_vsmmu_invalidation_cmd *cur;
+       struct arm_vsmmu_invalidation_cmd *end;
+       int ret;
+
+       cmds = kcalloc(array->entry_num, sizeof(*cmds), GFP_KERNEL);
+       if (!cmds)
+               return -ENOMEM;
+       cur = cmds;
+       end = cmds + array->entry_num;
+
+       static_assert(sizeof(*cmds) == 2 * sizeof(u64));
+       ret = iommu_copy_struct_from_full_user_array(
+               cmds, sizeof(*cmds), array,
+               IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3);
+       if (ret)
+               goto out;
+
+       last = cmds;
+       while (cur != end) {
+               ret = arm_vsmmu_convert_user_cmd(vsmmu, cur);
+               if (ret)
+                       goto out;
+
+               /* FIXME work in blocks of CMDQ_BATCH_ENTRIES and copy each block? */
+               cur++;
+               if (cur != end && (cur - last) != CMDQ_BATCH_ENTRIES - 1)
+                       continue;
+
+               /* FIXME always uses the main cmdq rather than trying to group by type */
+               ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd,
+                                                 cur - last, true);
+               if (ret) {
+                       cur--;
+                       goto out;
+               }
+               last = cur;
+       }
+out:
+       array->entry_num = cur - cmds;
+       kfree(cmds);
+       return ret;
+}
+
 static const struct iommufd_viommu_ops arm_vsmmu_ops = {
        .alloc_domain_nested = arm_vsmmu_alloc_domain_nested,
+       .cache_invalidate = arm_vsmmu_cache_invalidate,
 };
 
 struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
@@ -239,6 +365,14 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
        if (s2_parent->smmu != master->smmu)
                return ERR_PTR(-EINVAL);
 
+       /*
+        * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW
+        * defect is needed to determine if arm_vsmmu_cache_invalidate() needs
+        * any change to remove this.
+        */
+       if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC))
+               return ERR_PTR(-EOPNOTSUPP);
+
        /*
         * Must support some way to prevent the VM from bypassing the cache
         * because VFIO currently does not do any cache maintenance. canwbs
index b47f802..2a9f2d1 100644 (file)
@@ -766,9 +766,9 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
  *   insert their own list of commands then all of the commands from one
  *   CPU will appear before any of the commands from the other CPU.
  */
-static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
-                                      struct arm_smmu_cmdq *cmdq,
-                                      u64 *cmds, int n, bool sync)
+int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+                               struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
+                               bool sync)
 {
        u64 cmd_sync[CMDQ_ENT_DWORDS];
        u32 prod;
index 01c1d16..af25f09 100644 (file)
@@ -529,6 +529,7 @@ struct arm_smmu_cmdq_ent {
                #define CMDQ_OP_TLBI_NH_ALL     0x10
                #define CMDQ_OP_TLBI_NH_ASID    0x11
                #define CMDQ_OP_TLBI_NH_VA      0x12
+               #define CMDQ_OP_TLBI_NH_VAA     0x13
                #define CMDQ_OP_TLBI_EL2_ALL    0x20
                #define CMDQ_OP_TLBI_EL2_ASID   0x21
                #define CMDQ_OP_TLBI_EL2_VA     0x22
@@ -951,6 +952,10 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state);
 void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
                                  const struct arm_smmu_ste *target);
 
+int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+                               struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
+                               bool sync);
+
 #ifdef CONFIG_ARM_SMMU_V3_SVA
 bool arm_smmu_sva_supported(struct arm_smmu_device *smmu);
 bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
index a66eb03..747d3d9 100644 (file)
@@ -713,9 +713,11 @@ struct iommu_hwpt_get_dirty_bitmap {
  * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
  *                                        Data Type
  * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
+ * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3
  */
 enum iommu_hwpt_invalidate_data_type {
        IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
+       IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1,
 };
 
 /**
@@ -754,6 +756,28 @@ struct iommu_hwpt_vtd_s1_invalidate {
        __u32 __reserved;
 };
 
+/**
+ * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation
+ *         (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3)
+ * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ.
+ *       Must be little-endian.
+ *
+ * Supported command list only when passing in a vIOMMU via @hwpt_id:
+ *     CMDQ_OP_TLBI_NSNH_ALL
+ *     CMDQ_OP_TLBI_NH_VA
+ *     CMDQ_OP_TLBI_NH_VAA
+ *     CMDQ_OP_TLBI_NH_ALL
+ *     CMDQ_OP_TLBI_NH_ASID
+ *     CMDQ_OP_ATC_INV
+ *     CMDQ_OP_CFGI_CD
+ *     CMDQ_OP_CFGI_CD_ALL
+ *
+ * -EIO will be returned if the command is not supported.
+ */
+struct iommu_viommu_arm_smmuv3_invalidate {
+       __aligned_le64 cmd[2];
+};
+
 /**
  * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
  * @size: sizeof(struct iommu_hwpt_invalidate)