drm/amdkfd: Add sanity checks in IRQ handlers
authorFelix Kuehling <Felix.Kuehling@amd.com>
Tue, 1 May 2018 21:56:12 +0000 (17:56 -0400)
committerOded Gabbay <oded.gabbay@gmail.com>
Tue, 1 May 2018 21:56:12 +0000 (17:56 -0400)
Only accept interrupts from KFD VMIDs. Just checking for a PASID may
not be enough because amdgpu started using PASIDs to map VM faults
to processes.

Warn if an IRQ doesn't have a valid PASID (indicating a firmware bug).

Suggested-by: Shaoyun Liu <Shaoyun.Liu@amd.com>
Suggested-by: Oak Zeng <Oak.Zeng@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c

index 3d5ccb3..49df6c7 100644 (file)
 static bool cik_event_interrupt_isr(struct kfd_dev *dev,
                                        const uint32_t *ih_ring_entry)
 {
-       unsigned int pasid;
        const struct cik_ih_ring_entry *ihre =
                        (const struct cik_ih_ring_entry *)ih_ring_entry;
+       unsigned int vmid, pasid;
+
+       /* Only handle interrupts from KFD VMIDs */
+       vmid  = (ihre->ring_id & 0x0000ff00) >> 8;
+       if (vmid < dev->vm_info.first_vmid_kfd ||
+           vmid > dev->vm_info.last_vmid_kfd)
+               return 0;
 
+       /* If there is no valid PASID, it's likely a firmware bug */
        pasid = (ihre->ring_id & 0xffff0000) >> 16;
+       if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
+               return 0;
 
-       /* Do not process in ISR, just request it to be forwarded to WQ. */
-       return (pasid != 0) &&
-               (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
+       /* Interrupt types we care about: various signals and faults.
+        * They will be forwarded to a work queue (see below).
+        */
+       return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
                ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
                ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
-               ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
+               ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE;
 }
 
 static void cik_event_interrupt_wq(struct kfd_dev *dev,
index 39d4115..37029ba 100644 (file)
@@ -29,27 +29,35 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
                                        const uint32_t *ih_ring_entry)
 {
        uint16_t source_id, client_id, pasid, vmid;
+       const uint32_t *data = ih_ring_entry;
 
-       source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
-       client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
-       pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+       /* Only handle interrupts from KFD VMIDs */
        vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+       if (vmid < dev->vm_info.first_vmid_kfd ||
+           vmid > dev->vm_info.last_vmid_kfd)
+               return 0;
+
+       /* If there is no valid PASID, it's likely a firmware bug */
+       pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+       if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
+               return 0;
 
-       if (pasid) {
-               const uint32_t *data = ih_ring_entry;
+       source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+       client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
 
-               pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n",
-                        client_id, source_id, pasid);
-               pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
-                        data[0], data[1], data[2], data[3],
-                        data[4], data[5], data[6], data[7]);
-       }
+       pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n",
+                client_id, source_id, pasid);
+       pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+                data[0], data[1], data[2], data[3],
+                data[4], data[5], data[6], data[7]);
 
-       return (pasid != 0) &&
-               (source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
-                source_id == SOC15_INTSRC_SDMA_TRAP ||
-                source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
-                source_id == SOC15_INTSRC_CP_BAD_OPCODE);
+       /* Interrupt types we care about: various signals and faults.
+        * They will be forwarded to a work queue (see below).
+        */
+       return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
+               source_id == SOC15_INTSRC_SDMA_TRAP ||
+               source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
+               source_id == SOC15_INTSRC_CP_BAD_OPCODE;
 }
 
 static void event_interrupt_wq_v9(struct kfd_dev *dev,