x86/mmu: Allocate/free a PASID
authorFenghua Yu <fenghua.yu@intel.com>
Tue, 15 Sep 2020 16:30:13 +0000 (09:30 -0700)
committerBorislav Petkov <bp@suse.de>
Thu, 17 Sep 2020 18:22:15 +0000 (20:22 +0200)
A PASID is allocated for an "mm" the first time any thread binds to an
SVA-capable device and is freed from the "mm" when the SVA is unbound
by the last thread. It's possible for the "mm" to have different PASID
values in different binding/unbinding SVA cycles.

The mm's PASID (non-zero for valid PASID or 0 for invalid PASID) is
propagated to a per-thread PASID MSR for all threads within the mm
through IPI, context switch, or inherited. This is done to ensure that a
running thread has the right PASID in the MSR matching the mm's PASID.

 [ bp: s/SVM/SVA/g; massage. ]

Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/1600187413-163670-10-git-send-email-fenghua.yu@intel.com
arch/x86/include/asm/fpu/api.h
arch/x86/include/asm/fpu/internal.h
arch/x86/kernel/fpu/xstate.c
drivers/iommu/intel/svm.c

index b774c52..dcd9503 100644 (file)
@@ -62,4 +62,16 @@ extern void switch_fpu_return(void);
  */
 extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
 
+/*
+ * Tasks that are not using SVA have mm->pasid set to zero to note that they
+ * will not have the valid bit set in MSR_IA32_PASID while they are running.
+ */
+#define PASID_DISABLED 0
+
+#ifdef CONFIG_IOMMU_SUPPORT
+/* Update current's PASID MSR/state by mm's PASID. */
+void update_pasid(void);
+#else
+static inline void update_pasid(void) { }
+#endif
 #endif /* _ASM_X86_FPU_API_H */
index 0a460f2..341d00e 100644 (file)
@@ -583,6 +583,13 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
                        pkru_val = pk->pkru;
        }
        __write_pkru(pkru_val);
+
+       /*
+        * Expensive PASID MSR write will be avoided in update_pasid() because
+        * TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated
+        * unless it's different from mm->pasid to reduce overhead.
+        */
+       update_pasid();
 }
 
 /*
index 67f1a03..5d80474 100644 (file)
@@ -1402,3 +1402,60 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
        return 0;
 }
 #endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
+#ifdef CONFIG_IOMMU_SUPPORT
+void update_pasid(void)
+{
+       u64 pasid_state;
+       u32 pasid;
+
+       if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
+               return;
+
+       if (!current->mm)
+               return;
+
+       pasid = READ_ONCE(current->mm->pasid);
+       /* Set the valid bit in the PASID MSR/state only for valid pasid. */
+       pasid_state = pasid == PASID_DISABLED ?
+                     pasid : pasid | MSR_IA32_PASID_VALID;
+
+       /*
+        * No need to hold fregs_lock() since the task's fpstate won't
+        * be changed by others (e.g. ptrace) while the task is being
+        * switched to or is in IPI.
+        */
+       if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
+               /* The MSR is active and can be directly updated. */
+               wrmsrl(MSR_IA32_PASID, pasid_state);
+       } else {
+               struct fpu *fpu = &current->thread.fpu;
+               struct ia32_pasid_state *ppasid_state;
+               struct xregs_state *xsave;
+
+               /*
+                * The CPU's xstate registers are not currently active. Just
+                * update the PASID state in the memory buffer here. The
+                * PASID MSR will be loaded when returning to user mode.
+                */
+               xsave = &fpu->state.xsave;
+               xsave->header.xfeatures |= XFEATURE_MASK_PASID;
+               ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID);
+               /*
+                * Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state
+                * won't be NULL and no need to check its value.
+                *
+                * Only update the task's PASID state when it's different
+                * from the mm's pasid.
+                */
+               if (ppasid_state->pasid != pasid_state) {
+                       /*
+                        * Invalid fpregs so that state restoring will pick up
+                        * the PASID state.
+                        */
+                       __fpu_invalidate_fpregs_state(fpu);
+                       ppasid_state->pasid = pasid_state;
+               }
+       }
+}
+#endif /* CONFIG_IOMMU_SUPPORT */
index fc90a07..60ffe08 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/mm_types.h>
 #include <linux/ioasid.h>
 #include <asm/page.h>
+#include <asm/fpu/api.h>
 
 #include "pasid.h"
 
@@ -444,6 +445,24 @@ out:
        return ret;
 }
 
+static void _load_pasid(void *unused)
+{
+       update_pasid();
+}
+
+static void load_pasid(struct mm_struct *mm, u32 pasid)
+{
+       mutex_lock(&mm->context.lock);
+
+       /* Synchronize with READ_ONCE in update_pasid(). */
+       smp_store_release(&mm->pasid, pasid);
+
+       /* Update PASID MSR on all CPUs running the mm's tasks. */
+       on_each_cpu_mask(mm_cpumask(mm), _load_pasid, NULL, true);
+
+       mutex_unlock(&mm->context.lock);
+}
+
 /* Caller must hold pasid_mutex, mm reference */
 static int
 intel_svm_bind_mm(struct device *dev, unsigned int flags,
@@ -591,6 +610,10 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
                }
 
                list_add_tail(&svm->list, &global_svm_list);
+               if (mm) {
+                       /* The newly allocated pasid is loaded to the mm. */
+                       load_pasid(mm, svm->pasid);
+               }
        } else {
                /*
                 * Binding a new device with existing PASID, need to setup
@@ -654,8 +677,11 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
 
                        if (list_empty(&svm->devs)) {
                                ioasid_free(svm->pasid);
-                               if (svm->mm)
+                               if (svm->mm) {
                                        mmu_notifier_unregister(&svm->notifier, svm->mm);
+                                       /* Clear mm's pasid. */
+                                       load_pasid(svm->mm, PASID_DISABLED);
+                               }
                                list_del(&svm->list);
                                /* We mandate that no page faults may be outstanding
                                 * for the PASID when intel_svm_unbind_mm() is called.