#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-/* free ( 3*32+17) */
+#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */
#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */
+ #define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */
/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
+#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
pkru_val = pk->pkru;
}
__write_pkru(pkru_val);
+
+ /*
+ * Expensive PASID MSR write will be avoided in update_pasid() because
+ * TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated
+ * unless it's different from mm->pasid to reduce overhead.
+ */
+ update_pasid();
}
/*
{
u32 eax, edx;
- asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
- : "=a" (eax), "=d" (edx)
- : "c" (index));
+ asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
return eax + ((u64)edx << 32);
}
u32 eax = value;
u32 edx = value >> 32;
- asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
- : : "a" (eax), "d" (edx), "c" (index));
+ asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
}
#endif /* _ASM_X86_FPU_INTERNAL_H */
#define nop() asm volatile ("nop")
+static inline void serialize(void)
+{
+ /* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
+ asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
+}
+
+ /* The dst parameter must be 64-bytes aligned */
+ static inline void movdir64b(void *dst, const void *src)
+ {
+ const struct { char _[64]; } *__src = src;
+ struct { char _[64]; } *__dst = dst;
+
+ /*
+ * MOVDIR64B %(rdx), rax.
+ *
+ * Both __src and __dst must be memory constraints in order to tell the
+ * compiler that no other memory accesses should be reordered around
+ * this one.
+ *
+ * Also, both must be supplied as lvalues because this tells
+ * the compiler what the object is (its size) the instruction accesses.
+ * I.e., not the pointers but what they point to, thus the deref'ing '*'.
+ */
+ asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+ : "+m" (*__dst)
+ : "m" (*__src), "a" (__dst), "d" (__src));
+ }
+
+ /**
+ * enqcmds - Enqueue a command in supervisor (CPL0) mode
+ * @dst: destination, in MMIO space (must be 512-bit aligned)
+ * @src: 512 bits memory operand
+ *
+ * The ENQCMDS instruction allows software to write a 512-bit command to
+ * a 512-bit-aligned special MMIO region that supports the instruction.
+ * A return status is loaded into the ZF flag in the RFLAGS register.
+ * ZF = 0 equates to success, and ZF = 1 indicates retry or error.
+ *
+ * This function issues the ENQCMDS instruction to submit data from
+ * kernel space to MMIO space, in a unit of 512 bits. Order of data access
+ * is not guaranteed, nor is a memory barrier performed afterwards. It
+ * returns 0 on success and -EAGAIN on failure.
+ *
+ * Warning: Do not use this helper unless your driver has checked that the
+ * ENQCMDS instruction is supported on the platform and the device accepts
+ * ENQCMDS.
+ */
+ static inline int enqcmds(void __iomem *dst, const void *src)
+ {
+ const struct { char _[64]; } *__src = src;
+ struct { char _[64]; } *__dst = dst;
+ int zf;
+
+ /*
+ * ENQCMDS %(rdx), rax
+ *
+ * See movdir64b()'s comment on operand specification.
+ */
+ asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90"
+ CC_SET(z)
+ : CC_OUT(z) (zf), "+m" (*__dst)
+ : "m" (*__src), "a" (__dst), "d" (__src));
+
+ /* Submission failure is indicated via EFLAGS.ZF=1 */
+ if (zf)
+ return -EAGAIN;
+
+ return 0;
+ }
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_SPECIAL_INSNS_H */
#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
- unsigned int pasid, unsigned int vmid);
+ u32 pasid, unsigned int vmid);
static int execute_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
}
static int
- set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
+ set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
unsigned int vmid)
{
return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
dqm->sched_running = false;
dqm_unlock(dqm);
+ pm_release_ib(&dqm->packets);
+
kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
pm_uninit(&dqm->packets, hanging);
if (q->properties.is_active) {
increment_queue_count(dqm, q->properties.type);
- retval = execute_queues_cpsch(dqm,
+ execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
}
kfree(dqm);
}
- int kfd_process_vm_fault(struct device_queue_manager *dqm,
- unsigned int pasid)
+ int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid)
{
struct kfd_process_device *pdd;
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
{
struct device *dev = iommu->iommu.dev;
- int type, devid, pasid, flags, tag;
+ int type, devid, flags, tag;
volatile u32 *event = __evt;
int count = 0;
u64 address;
+ u32 pasid;
retry:
type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
}
- static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
+ static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, u32 pasid,
u64 address, bool size)
{
memset(cmd, 0, sizeof(*cmd));
CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
}
- static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
+ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, u32 pasid,
int qdep, u64 address, bool size)
{
memset(cmd, 0, sizeof(*cmd));
CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
}
- static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
+ static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, u32 pasid,
int status, int tag, bool gn)
{
memset(cmd, 0, sizeof(*cmd));
}
EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
- static int __flush_pasid(struct protection_domain *domain, int pasid,
+ static int __flush_pasid(struct protection_domain *domain, u32 pasid,
u64 address, bool size)
{
struct iommu_dev_data *dev_data;
return ret;
}
- static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid,
+ static int __amd_iommu_flush_page(struct protection_domain *domain, u32 pasid,
u64 address)
{
return __flush_pasid(domain, pasid, address, false);
}
- int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+ int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
u64 address)
{
struct protection_domain *domain = to_pdomain(dom);
}
EXPORT_SYMBOL(amd_iommu_flush_page);
- static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid)
+ static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid)
{
return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
true);
}
- int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid)
+ int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid)
{
struct protection_domain *domain = to_pdomain(dom);
unsigned long flags;
}
EXPORT_SYMBOL(amd_iommu_flush_tlb);
- static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
+ static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc)
{
int index;
u64 *pte;
return pte;
}
- static int __set_gcr3(struct protection_domain *domain, int pasid,
+ static int __set_gcr3(struct protection_domain *domain, u32 pasid,
unsigned long cr3)
{
struct domain_pgtable pgtable;
return __amd_iommu_flush_tlb(domain, pasid);
}
- static int __clear_gcr3(struct protection_domain *domain, int pasid)
+ static int __clear_gcr3(struct protection_domain *domain, u32 pasid)
{
struct domain_pgtable pgtable;
u64 *pte;
return __amd_iommu_flush_tlb(domain, pasid);
}
- int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
+ int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
unsigned long cr3)
{
struct protection_domain *domain = to_pdomain(dom);
}
EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
- int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid)
+ int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid)
{
struct protection_domain *domain = to_pdomain(dom);
unsigned long flags;
}
EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
- int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
int status, int tag)
{
struct iommu_dev_data *dev_data;
{
struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+ u64 valid;
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
!entry || entry->lo.fields_vapic.guest_mode)
return 0;
+ valid = entry->lo.fields_vapic.valid;
+
entry->lo.val = 0;
entry->hi.val = 0;
+ entry->lo.fields_vapic.valid = valid;
entry->lo.fields_vapic.guest_mode = 1;
entry->lo.fields_vapic.ga_log_intr = 1;
entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr;
struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
struct irq_cfg *cfg = ir_data->cfg;
- u64 valid = entry->lo.fields_remap.valid;
+ u64 valid;
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
!entry || !entry->lo.fields_vapic.guest_mode)
return 0;
+ valid = entry->lo.fields_remap.valid;
+
entry->lo.val = 0;
entry->hi.val = 0;
static int domain_setup_first_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev,
- int pasid)
+ u32 pasid)
{
int flags = PASID_FLAG_SUPERVISOR_MODE;
struct dma_pte *pgd = domain->pgd;
}
/* Setup the PASID entry for requests without PASID: */
- spin_lock(&iommu->lock);
+ spin_lock_irqsave(&iommu->lock, flags);
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
dev, PASID_RID2PASID);
else
ret = intel_pasid_setup_second_level(iommu, domain,
dev, PASID_RID2PASID);
- spin_unlock(&iommu->lock);
+ spin_unlock_irqrestore(&iommu->lock, flags);
if (ret) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
return -ENODEV;
if (domain->default_pasid <= 0) {
- int pasid;
+ u32 pasid;
/* No private data needed for the default pasid */
pasid = ioasid_alloc(NULL, PASID_MIN,
*/
atomic_t mm_count;
+ /**
+ * @has_pinned: Whether this mm has pinned any pages. This can
+ * be either replaced in the future by @pinned_vm when it
+ * becomes stable, or grow into a counter on its own. We're
+ * aggresive on this bit now - even if the pinned pages were
+ * unpinned later on, we'll still keep this bit set for the
+ * lifecycle of this mm just for simplicity.
+ */
+ atomic_t has_pinned;
+
#ifdef CONFIG_MMU
atomic_long_t pgtables_bytes; /* PTE page table pages */
#endif
atomic_long_t hugetlb_usage;
#endif
struct work_struct async_put_work;
+
+ #ifdef CONFIG_IOMMU_SUPPORT
+ u32 pasid;
+ #endif
} __randomize_layout;
/*