drm/amdkfd: Add KFD support for soc21 v3
authorMukul Joshi <mukul.joshi@amd.com>
Tue, 26 Apr 2022 17:00:11 +0000 (13:00 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 4 May 2022 14:43:54 +0000 (10:43 -0400)
Add initial support for soc21 in KFD compute
driver (Mukul)
- Add new definition for soc21 device.
- Add new file for amdgpu-kfd interface for GFX11 family.
- Add new file for queue management, interrupt handling,
  mqd management for GFX11 family in KFD driver.
- Related changes/updates for soc21 device in
  KFD driver.
- Repurpose last 2 entries of SDMA MQD for driver use.

v2: Add an optional argument into update queue operation (Mukul)

v3: Switch to ip version check, replace kgd_dev with
    amdgpu_device (Hawking)

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Oak Zeng <Oak.Zeng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
20 files changed:
drivers/gpu/drm/amd/amdgpu/Makefile
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/Makefile
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c [new file with mode: 0644]
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
drivers/gpu/drm/amd/amdkfd/soc15_int.h
drivers/gpu/drm/amd/include/kgd_kfd_interface.h

index 6caf239..6a67210 100644 (file)
@@ -215,7 +215,8 @@ amdgpu-y += \
        amdgpu_amdkfd_arcturus.o \
        amdgpu_amdkfd_aldebaran.o \
        amdgpu_amdkfd_gfx_v10.o \
-       amdgpu_amdkfd_gfx_v10_3.o
+       amdgpu_amdkfd_gfx_v10_3.o \
+       amdgpu_amdkfd_gfx_v11.o
 
 ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
 amdgpu-y += amdgpu_amdkfd_gfx_v7.o
index 64c6664..1f8161c 100644 (file)
@@ -100,7 +100,18 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
         * The first num_doorbells are used by amdgpu.
         * amdkfd takes whatever's left in the aperture.
         */
-       if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) {
+       if (adev->enable_mes) {
+               /*
+                * With MES enabled, we only need to initialize
+                * the base address. The size and offset are
+                * not initialized as AMDGPU manages the whole
+                * doorbell space.
+                */
+               *aperture_base = adev->doorbell.base;
+               *aperture_size = 0;
+               *start_offset = 0;
+       } else if (adev->doorbell.size > adev->doorbell.num_doorbells *
+                                               sizeof(u32)) {
                *aperture_base = adev->doorbell.base;
                *aperture_size = adev->doorbell.size;
                *start_offset = adev->doorbell.num_doorbells * sizeof(u32);
@@ -128,7 +139,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
                                          AMDGPU_GMC_HOLE_START),
                        .drm_render_minor = adev_to_drm(adev)->render->index,
                        .sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
-
+                       .enable_mes = adev->enable_mes,
                };
 
                /* this is going to have a few of the MSBs set that we need to
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
new file mode 100644 (file)
index 0000000..0b0a72c
--- /dev/null
@@ -0,0 +1,625 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/mmu_context.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "oss/osssys_6_0_0_offset.h"
+#include "oss/osssys_6_0_0_sh_mask.h"
+#include "soc15_common.h"
+#include "soc15d.h"
+#include "v11_structs.h"
+#include "soc21.h"
+
+enum hqd_dequeue_request_type {
+       NO_ACTION = 0,
+       DRAIN_PIPE,
+       RESET_WAVES,
+       SAVE_WAVES
+};
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+                       uint32_t queue, uint32_t vmid)
+{
+       mutex_lock(&adev->srbm_mutex);
+       soc21_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+       soc21_grbm_select(adev, 0, 0, 0, 0);
+       mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+                               uint32_t queue_id)
+{
+       uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+       uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+       lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
+                              uint32_t pipe_id, uint32_t queue_id)
+{
+       unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+                       queue_id;
+
+       return 1ull << bit;
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+       unlock_srbm(adev);
+}
+
+static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmid,
+                                       uint32_t sh_mem_config,
+                                       uint32_t sh_mem_ape1_base,
+                                       uint32_t sh_mem_ape1_limit,
+                                       uint32_t sh_mem_bases)
+{
+       lock_srbm(adev, 0, 0, 0, vmid);
+
+       WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_CONFIG), sh_mem_config);
+       WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_BASES), sh_mem_bases);
+
+       unlock_srbm(adev);
+}
+
+static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
+                                       unsigned int vmid)
+{
+       uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
+
+       /* Mapping vmid to pasid also for IH block */
+       pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
+                       vmid, pasid);
+       WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid, value);
+
+       return 0;
+}
+
+static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id)
+{
+       uint32_t mec;
+       uint32_t pipe;
+
+       mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+       pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+       lock_srbm(adev, mec, pipe, 0, 0);
+
+       WREG32(SOC15_REG_OFFSET(GC, 0, regCPC_INT_CNTL),
+               CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+               CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+       unlock_srbm(adev);
+
+       return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+                               unsigned int engine_id,
+                               unsigned int queue_id)
+{
+       uint32_t sdma_engine_reg_base = 0;
+       uint32_t sdma_rlc_reg_offset;
+
+       switch (engine_id) {
+       case 0:
+               sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+                               regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+               break;
+       case 1:
+               sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+                               regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+               break;
+       default:
+               BUG();
+       }
+
+       sdma_rlc_reg_offset = sdma_engine_reg_base
+               + queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
+
+       pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+                       queue_id, sdma_rlc_reg_offset);
+
+       return sdma_rlc_reg_offset;
+}
+
+static inline struct v11_compute_mqd *get_mqd(void *mqd)
+{
+       return (struct v11_compute_mqd *)mqd;
+}
+
+static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+       return (struct v11_sdma_mqd *)mqd;
+}
+
+static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
+                       uint32_t queue_id, uint32_t __user *wptr,
+                       uint32_t wptr_shift, uint32_t wptr_mask,
+                       struct mm_struct *mm)
+{
+       struct v11_compute_mqd *m;
+       uint32_t *mqd_hqd;
+       uint32_t reg, hqd_base, data;
+
+       m = get_mqd(mqd);
+
+       pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
+       acquire_queue(adev, pipe_id, queue_id);
+
+       /* HIQ is set during driver init period with vmid set to 0*/
+       if (m->cp_hqd_vmid == 0) {
+               uint32_t value, mec, pipe;
+
+               mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+               pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+               pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+                       mec, pipe, queue_id);
+               value = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS));
+               value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+                       ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+               WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS), value);
+       }
+
+       /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+       mqd_hqd = &m->cp_mqd_base_addr_lo;
+       hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+
+       for (reg = hqd_base;
+            reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+               WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+       /* Activate doorbell logic before triggering WPTR poll. */
+       data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+                            CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+       WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+       if (wptr) {
+               /* Don't read wptr with get_user because the user
+                * context may not be accessible (if this function
+                * runs in a work queue). Instead trigger a one-shot
+                * polling read from memory in the CP. This assumes
+                * that wptr is GPU-accessible in the queue's VMID via
+                * ATC or SVM. WPTR==RPTR before starting the poll so
+                * the CP starts fetching new commands from the right
+                * place.
+                *
+                * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+                * tricky. Assume that the queue didn't overflow. The
+                * number of valid bits in the 32-bit RPTR depends on
+                * the queue size. The remaining bits are taken from
+                * the saved 64-bit WPTR. If the WPTR wrapped, add the
+                * queue size.
+                */
+               uint32_t queue_size =
+                       2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+                                          CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+               uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+               if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+                       guessed_wptr += queue_size;
+               guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+               guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+               WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_LO),
+                      lower_32_bits(guessed_wptr));
+               WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI),
+                      upper_32_bits(guessed_wptr));
+               WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+                      lower_32_bits((uint64_t)wptr));
+               WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+                      upper_32_bits((uint64_t)wptr));
+               pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+                        (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+               WREG32(SOC15_REG_OFFSET(GC, 0, regCP_PQ_WPTR_POLL_CNTL1),
+                      (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+       }
+
+       /* Start the EOP fetcher */
+       WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_EOP_RPTR),
+              REG_SET_FIELD(m->cp_hqd_eop_rptr,
+                            CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+       data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+       WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE), data);
+
+       release_queue(adev);
+
+       return 0;
+}
+
+static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
+                             uint32_t pipe_id, uint32_t queue_id,
+                             uint32_t doorbell_off)
+{
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+       struct v11_compute_mqd *m;
+       uint32_t mec, pipe;
+       int r;
+
+       m = get_mqd(mqd);
+
+       acquire_queue(adev, pipe_id, queue_id);
+
+       mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+       pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+       pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+                mec, pipe, queue_id);
+
+       spin_lock(&adev->gfx.kiq.ring_lock);
+       r = amdgpu_ring_alloc(kiq_ring, 7);
+       if (r) {
+               pr_err("Failed to alloc KIQ (%d).\n", r);
+               goto out_unlock;
+       }
+
+       amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+       amdgpu_ring_write(kiq_ring,
+                         PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+                         PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+                         PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+                         PACKET3_MAP_QUEUES_PIPE(pipe) |
+                         PACKET3_MAP_QUEUES_ME((mec - 1)) |
+                         PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+                         PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+                         PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+                         PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+       amdgpu_ring_write(kiq_ring,
+                       PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+       amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+       amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+       amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+       amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+       amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+       spin_unlock(&adev->gfx.kiq.ring_lock);
+       release_queue(adev);
+
+       return r;
+}
+
+static int hqd_dump_v11(struct amdgpu_device *adev,
+                       uint32_t pipe_id, uint32_t queue_id,
+                       uint32_t (**dump)[2], uint32_t *n_regs)
+{
+       uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do {                            \
+               if (WARN_ON_ONCE(i >= HQD_N_REGS))      \
+                       break;                          \
+               (*dump)[i][0] = (addr) << 2;            \
+               (*dump)[i++][1] = RREG32(addr);         \
+       } while (0)
+
+       *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+       if (*dump == NULL)
+               return -ENOMEM;
+
+       acquire_queue(adev, pipe_id, queue_id);
+
+       for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+            reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+               DUMP_REG(reg);
+
+       release_queue(adev);
+
+       WARN_ON_ONCE(i != HQD_N_REGS);
+       *n_regs = i;
+
+       return 0;
+}
+
+static int hqd_sdma_load_v11(struct amdgpu_device *adev, void *mqd,
+                            uint32_t __user *wptr, struct mm_struct *mm)
+{
+       struct v11_sdma_mqd *m;
+       uint32_t sdma_rlc_reg_offset;
+       unsigned long end_jiffies;
+       uint32_t data;
+       uint64_t data64;
+       uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+       m = get_sdma_mqd(mqd);
+       sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+                                           m->sdma_queue_id);
+
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
+               m->sdmax_rlcx_rb_cntl & (~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK));
+
+       end_jiffies = msecs_to_jiffies(2000) + jiffies;
+       while (true) {
+               data = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
+               if (data & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
+                       break;
+               if (time_after(jiffies, end_jiffies)) {
+                       pr_err("SDMA RLC not idle in %s\n", __func__);
+                       return -ETIME;
+               }
+               usleep_range(500, 1000);
+       }
+
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL_OFFSET,
+              m->sdmax_rlcx_doorbell_offset);
+
+       data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_QUEUE0_DOORBELL,
+                            ENABLE, 1);
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, data);
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR,
+                               m->sdmax_rlcx_rb_rptr);
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI,
+                               m->sdmax_rlcx_rb_rptr_hi);
+
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 1);
+       if (read_user_wptr(mm, wptr64, data64)) {
+               WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
+                      lower_32_bits(data64));
+               WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
+                      upper_32_bits(data64));
+       } else {
+               WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
+                      m->sdmax_rlcx_rb_rptr);
+               WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
+                      m->sdmax_rlcx_rb_rptr_hi);
+       }
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 0);
+
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE, m->sdmax_rlcx_rb_base);
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE_HI,
+                       m->sdmax_rlcx_rb_base_hi);
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_LO,
+                       m->sdmax_rlcx_rb_rptr_addr_lo);
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_HI,
+                       m->sdmax_rlcx_rb_rptr_addr_hi);
+
+       data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+                            RB_ENABLE, 1);
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, data);
+
+       return 0;
+}
+
+static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
+                            uint32_t engine_id, uint32_t queue_id,
+                            uint32_t (**dump)[2], uint32_t *n_regs)
+{
+       uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+                       engine_id, queue_id);
+       uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (7+11+1+12+12)
+
+       *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+       if (*dump == NULL)
+               return -ENOMEM;
+
+       for (reg = regSDMA0_QUEUE0_RB_CNTL;
+            reg <= regSDMA0_QUEUE0_RB_WPTR_HI; reg++)
+               DUMP_REG(sdma_rlc_reg_offset + reg);
+       for (reg = regSDMA0_QUEUE0_RB_RPTR_ADDR_HI;
+            reg <= regSDMA0_QUEUE0_DOORBELL; reg++)
+               DUMP_REG(sdma_rlc_reg_offset + reg);
+       for (reg = regSDMA0_QUEUE0_DOORBELL_LOG;
+            reg <= regSDMA0_QUEUE0_DOORBELL_LOG; reg++)
+               DUMP_REG(sdma_rlc_reg_offset + reg);
+       for (reg = regSDMA0_QUEUE0_DOORBELL_OFFSET;
+            reg <= regSDMA0_QUEUE0_RB_PREEMPT; reg++)
+               DUMP_REG(sdma_rlc_reg_offset + reg);
+       for (reg = regSDMA0_QUEUE0_MIDCMD_DATA0;
+            reg <= regSDMA0_QUEUE0_MIDCMD_CNTL; reg++)
+               DUMP_REG(sdma_rlc_reg_offset + reg);
+
+       WARN_ON_ONCE(i != HQD_N_REGS);
+       *n_regs = i;
+
+       return 0;
+}
+
+static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
+                               uint32_t pipe_id, uint32_t queue_id)
+{
+       uint32_t act;
+       bool retval = false;
+       uint32_t low, high;
+
+       acquire_queue(adev, pipe_id, queue_id);
+       act = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
+       if (act) {
+               low = lower_32_bits(queue_address >> 8);
+               high = upper_32_bits(queue_address >> 8);
+
+               if (low == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE)) &&
+                  high == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE_HI)))
+                       retval = true;
+       }
+       release_queue(adev);
+       return retval;
+}
+
+static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
+{
+       struct v11_sdma_mqd *m;
+       uint32_t sdma_rlc_reg_offset;
+       uint32_t sdma_rlc_rb_cntl;
+
+       m = get_sdma_mqd(mqd);
+       sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+                                           m->sdma_queue_id);
+
+       sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
+
+       if (sdma_rlc_rb_cntl & SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK)
+               return true;
+
+       return false;
+}
+
+static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
+                               enum kfd_preempt_type reset_type,
+                               unsigned int utimeout, uint32_t pipe_id,
+                               uint32_t queue_id)
+{
+       enum hqd_dequeue_request_type type;
+       unsigned long end_jiffies;
+       uint32_t temp;
+       struct v11_compute_mqd *m = get_mqd(mqd);
+
+       acquire_queue(adev, pipe_id, queue_id);
+
+       if (m->cp_hqd_vmid == 0)
+               WREG32_FIELD15_PREREG(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+       switch (reset_type) {
+       case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+               type = DRAIN_PIPE;
+               break;
+       case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+               type = RESET_WAVES;
+               break;
+       default:
+               type = DRAIN_PIPE;
+               break;
+       }
+
+       WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_DEQUEUE_REQUEST), type);
+
+       end_jiffies = (utimeout * HZ / 1000) + jiffies;
+       while (true) {
+               temp = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
+               if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+                       break;
+               if (time_after(jiffies, end_jiffies)) {
+                       pr_err("cp queue pipe %d queue %d preemption failed\n",
+                                       pipe_id, queue_id);
+                       release_queue(adev);
+                       return -ETIME;
+               }
+               usleep_range(500, 1000);
+       }
+
+       release_queue(adev);
+       return 0;
+}
+
+static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,
+                               unsigned int utimeout)
+{
+       struct v11_sdma_mqd *m;
+       uint32_t sdma_rlc_reg_offset;
+       uint32_t temp;
+       unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+       m = get_sdma_mqd(mqd);
+       sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+                                           m->sdma_queue_id);
+
+       temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
+       temp = temp & ~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK;
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, temp);
+
+       while (true) {
+               temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
+               if (temp & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
+                       break;
+               if (time_after(jiffies, end_jiffies)) {
+                       pr_err("SDMA RLC not idle in %s\n", __func__);
+                       return -ETIME;
+               }
+               usleep_range(500, 1000);
+       }
+
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, 0);
+       WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
+               RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL) |
+               SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK);
+
+       m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR);
+       m->sdmax_rlcx_rb_rptr_hi =
+               RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI);
+
+       return 0;
+}
+
+static int wave_control_execute_v11(struct amdgpu_device *adev,
+                                       uint32_t gfx_index_val,
+                                       uint32_t sq_cmd)
+{
+       uint32_t data = 0;
+
+       mutex_lock(&adev->grbm_idx_mutex);
+
+       WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
+       WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
+
+       data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+               INSTANCE_BROADCAST_WRITES, 1);
+       data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+               SA_BROADCAST_WRITES, 1);
+       data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+               SE_BROADCAST_WRITES, 1);
+
+       WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
+       mutex_unlock(&adev->grbm_idx_mutex);
+
+       return 0;
+}
+
+static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev,
+               uint32_t vmid, uint64_t page_table_base)
+{
+       if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+               pr_err("trying to set page table base for wrong VMID %u\n",
+                      vmid);
+               return;
+       }
+
+       /* SDMA is on gfxhub as well for gfx11 adapters */
+       adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
+       .program_sh_mem_settings = program_sh_mem_settings_v11,
+       .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
+       .init_interrupts = init_interrupts_v11,
+       .hqd_load = hqd_load_v11,
+       .hiq_mqd_load = hiq_mqd_load_v11,
+       .hqd_sdma_load = hqd_sdma_load_v11,
+       .hqd_dump = hqd_dump_v11,
+       .hqd_sdma_dump = hqd_sdma_dump_v11,
+       .hqd_is_occupied = hqd_is_occupied_v11,
+       .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v11,
+       .hqd_destroy = hqd_destroy_v11,
+       .hqd_sdma_destroy = hqd_sdma_destroy_v11,
+       .wave_control_execute = wave_control_execute_v11,
+       .get_atc_vmid_pasid_mapping_info = NULL,
+       .set_vm_context_page_table_base = set_vm_context_page_table_base_v11,
+};
index 19cfbf9..e758c2a 100644 (file)
@@ -37,6 +37,7 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
                $(AMDKFD_PATH)/kfd_mqd_manager_vi.o \
                $(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
                $(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
+               $(AMDKFD_PATH)/kfd_mqd_manager_v11.o \
                $(AMDKFD_PATH)/kfd_kernel_queue.o \
                $(AMDKFD_PATH)/kfd_packet_manager.o \
                $(AMDKFD_PATH)/kfd_packet_manager_vi.o \
@@ -47,10 +48,12 @@ AMDKFD_FILES        := $(AMDKFD_PATH)/kfd_module.o \
                $(AMDKFD_PATH)/kfd_device_queue_manager_vi.o \
                $(AMDKFD_PATH)/kfd_device_queue_manager_v9.o \
                $(AMDKFD_PATH)/kfd_device_queue_manager_v10.o \
+               $(AMDKFD_PATH)/kfd_device_queue_manager_v11.o \
                $(AMDKFD_PATH)/kfd_interrupt.o \
                $(AMDKFD_PATH)/kfd_events.o \
                $(AMDKFD_PATH)/cik_event_interrupt.o \
                $(AMDKFD_PATH)/kfd_int_process_v9.o \
+               $(AMDKFD_PATH)/kfd_int_process_v11.o \
                $(AMDKFD_PATH)/kfd_smi_events.o \
                $(AMDKFD_PATH)/kfd_crat.o
 
index 51e1c98..e9d79fa 100644 (file)
@@ -1315,6 +1315,8 @@ static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
        return 1;
 }
 
+#define KFD_MAX_CACHE_TYPES 6
+
 static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
                                                   struct kfd_gpu_cache_info *pcache_info)
 {
@@ -1408,6 +1410,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
                        int *num_of_entries)
 {
        struct kfd_gpu_cache_info *pcache_info;
+       struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
        int num_of_cache_types = 0;
        int i, j, k;
        int ct = 0;
@@ -1516,6 +1519,11 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
                        pcache_info = yellow_carp_cache_info;
                        num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
                        break;
+               case IP_VERSION(11, 0, 0):
+                       pcache_info = cache_info;
+                       num_of_cache_types =
+                               kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
+                       break;
                default:
                        return -EINVAL;
                }
index ed33e95..3d4faa6 100644 (file)
@@ -53,6 +53,7 @@ extern const struct kfd2kgd_calls arcturus_kfd2kgd;
 extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
 extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
 extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v11_kfd2kgd;
 
 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
                                unsigned int chunk_size);
@@ -60,7 +61,7 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
 
 static int kfd_resume(struct kfd_dev *kfd);
 
-static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
+static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
 {
        uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
 
@@ -85,6 +86,7 @@ static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
        case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */
        case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
        case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
+       case IP_VERSION(6, 0, 0):
                kfd->device_info.num_sdma_queues_per_engine = 8;
                break;
        default:
@@ -93,6 +95,17 @@ static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)
                        sdma_version);
                kfd->device_info.num_sdma_queues_per_engine = 8;
        }
+
+       switch (sdma_version) {
+       case IP_VERSION(6, 0, 0):
+               /* Reserve 1 for paging and 1 for gfx */
+               kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
+               /* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */
+               kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL;
+               break;
+       default:
+               break;
+       }
 }
 
 static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
@@ -121,6 +134,9 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
        case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
                kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
                break;
+       case IP_VERSION(11, 0, 0):
+               kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
+               break;
        default:
                dev_warn(kfd_device, "v9 event interrupt handler is set due to "
                        "mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version);
@@ -145,7 +161,7 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
                kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
                kfd->device_info.supports_cwsr = true;
 
-               kfd_device_info_set_sdma_queue_num(kfd);
+               kfd_device_info_set_sdma_info(kfd);
 
                kfd_device_info_set_event_interrupt_class(kfd);
 
@@ -346,6 +362,10 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
                        if (!vf)
                                f2g = &gfx_v10_3_kfd2kgd;
                        break;
+               case IP_VERSION(11, 0, 0):
+                       gfx_target_version = 110000;
+                       f2g = &gfx_v11_kfd2kgd;
+                       break;
                default:
                        break;
                }
index 1986722..e9c9a3a 100644 (file)
@@ -35,6 +35,7 @@
 #include "cik_regs.h"
 #include "kfd_kernel_queue.h"
 #include "amdgpu_amdkfd.h"
+#include "mes_api_def.h"
 
 /* Size of the per-pipe EOP queue */
 #define CIK_HPD_EOP_BYTES_LOG2 11
@@ -118,6 +119,11 @@ unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
                dqm->dev->device_info.num_sdma_queues_per_engine;
 }
 
+static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm)
+{
+       return dqm->dev->device_info.reserved_sdma_queues_bitmap;
+}
+
 void program_sh_mem_settings(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd)
 {
@@ -129,6 +135,151 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
                                                qpd->sh_mem_bases);
 }
 
+static void kfd_hws_hang(struct device_queue_manager *dqm)
+{
+       /*
+        * Issue a GPU reset if HWS is unresponsive
+        */
+       dqm->is_hws_hang = true;
+
+       /* It's possible we're detecting a HWS hang in the
+        * middle of a GPU reset. No need to schedule another
+        * reset in this case.
+        */
+       if (!dqm->is_resetting)
+               schedule_work(&dqm->hw_exception_work);
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+       int mes_queue_type;
+
+       switch (queue_type) {
+       case KFD_QUEUE_TYPE_COMPUTE:
+               mes_queue_type = MES_QUEUE_TYPE_COMPUTE;
+               break;
+       case KFD_QUEUE_TYPE_SDMA:
+               mes_queue_type = MES_QUEUE_TYPE_SDMA;
+               break;
+       default:
+               WARN(1, "Invalid queue type %d", queue_type);
+               mes_queue_type = -EINVAL;
+               break;
+       }
+
+       return mes_queue_type;
+}
+
+static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
+                        struct qcm_process_device *qpd)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+       struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+       struct mes_add_queue_input queue_input;
+       int r;
+
+       if (dqm->is_hws_hang)
+               return -EIO;
+
+       memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
+       queue_input.process_id = qpd->pqm->process->pasid;
+       queue_input.page_table_base_addr =  qpd->page_table_base;
+       queue_input.process_va_start = 0;
+       queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
+       /* MES unit for quantum is 100ns */
+       queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM;  /* Equivalent to 10ms. */
+       queue_input.process_context_addr = pdd->proc_ctx_gpu_addr;
+       queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */
+       queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
+       queue_input.inprocess_gang_priority = q->properties.priority;
+       queue_input.gang_global_priority_level =
+                                       AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+       queue_input.doorbell_offset = q->properties.doorbell_off;
+       queue_input.mqd_addr = q->gart_mqd_addr;
+       queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
+       queue_input.paging = false;
+       queue_input.tba_addr = qpd->tba_addr;
+       queue_input.tma_addr = qpd->tma_addr;
+
+       queue_input.queue_type = convert_to_mes_queue_type(q->properties.type);
+       if (queue_input.queue_type < 0) {
+               pr_err("Queue type not supported with MES, queue:%d\n",
+                               q->properties.type);
+               return -EINVAL;
+       }
+
+       if (q->gws) {
+               queue_input.gws_base = 0;
+               queue_input.gws_size = qpd->num_gws;
+       }
+
+       amdgpu_mes_lock(&adev->mes);
+       r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
+       amdgpu_mes_unlock(&adev->mes);
+       if (r) {
+               pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
+                       q->properties.doorbell_off);
+               pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
+               kfd_hws_hang(dqm);
+}
+
+       return r;
+}
+
+static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
+                       struct qcm_process_device *qpd)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+       int r;
+       struct mes_remove_queue_input queue_input;
+
+       if (dqm->is_hws_hang)
+               return -EIO;
+
+       memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
+       queue_input.doorbell_offset = q->properties.doorbell_off;
+       queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
+
+       amdgpu_mes_lock(&adev->mes);
+       r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
+       amdgpu_mes_unlock(&adev->mes);
+
+       if (r) {
+               pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n",
+                       q->properties.doorbell_off);
+               pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
+               kfd_hws_hang(dqm);
+       }
+
+       return r;
+}
+
+static int remove_all_queues_mes(struct device_queue_manager *dqm)
+{
+       struct device_process_node *cur;
+       struct qcm_process_device *qpd;
+       struct queue *q;
+       int retval = 0;
+
+       list_for_each_entry(cur, &dqm->queues, list) {
+               qpd = cur->qpd;
+               list_for_each_entry(q, &qpd->queues_list, list) {
+                       if (q->properties.is_active) {
+                               retval = remove_queue_mes(dqm, q, qpd);
+                               if (retval) {
+                                       pr_err("%s: Failed to remove queue %d for dev %d",
+                                               __func__,
+                                               q->properties.queue_id,
+                                               dqm->dev->id);
+                                       return retval;
+                               }
+                       }
+               }
+       }
+
+       return retval;
+}
+
 static void increment_queue_count(struct device_queue_manager *dqm,
                                  struct qcm_process_device *qpd,
                                  struct queue *q)
@@ -659,6 +810,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
        struct mqd_manager *mqd_mgr;
        struct kfd_process_device *pdd;
        bool prev_active = false;
+       bool add_queue = false;
 
        dqm_lock(dqm);
        pdd = kfd_get_process_device_data(q->device, q->process);
@@ -674,8 +826,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
 
        /* Make sure the queue is unmapped before updating the MQD */
        if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
-               retval = unmap_queues_cpsch(dqm,
-                               KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
+               if (!dqm->dev->shared_resources.enable_mes)
+                       retval = unmap_queues_cpsch(dqm,
+                                                   KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
+               else if (prev_active)
+                       retval = remove_queue_mes(dqm, q, &pdd->qpd);
+
                if (retval) {
                        pr_err("unmap queue failed\n");
                        goto out_unlock;
@@ -727,9 +883,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
                q->properties.is_gws = false;
        }
 
-       if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
-               retval = map_queues_cpsch(dqm);
-       else if (q->properties.is_active &&
+       if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
+               if (!dqm->dev->shared_resources.enable_mes)
+                       retval = map_queues_cpsch(dqm);
+               else if (add_queue)
+                       retval = add_queue_mes(dqm, q, &pdd->qpd);
+       } else if (q->properties.is_active &&
                 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
                  q->properties.type == KFD_QUEUE_TYPE_SDMA ||
                  q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
@@ -822,12 +981,22 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
 
                q->properties.is_active = false;
                decrement_queue_count(dqm, qpd, q);
+
+               if (dqm->dev->shared_resources.enable_mes) {
+                       retval = remove_queue_mes(dqm, q, qpd);
+                       if (retval) {
+                               pr_err("Failed to evict queue %d\n",
+                                       q->properties.queue_id);
+                               goto out;
+                       }
+               }
        }
        pdd->last_evict_timestamp = get_jiffies_64();
-       retval = execute_queues_cpsch(dqm,
-                               qpd->is_debug ?
-                               KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
-                               KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+       if (!dqm->dev->shared_resources.enable_mes)
+               retval = execute_queues_cpsch(dqm,
+                                             qpd->is_debug ?
+                                             KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
+                                             KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
 
 out:
        dqm_unlock(dqm);
@@ -951,9 +1120,19 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
 
                q->properties.is_active = true;
                increment_queue_count(dqm, &pdd->qpd, q);
+
+               if (dqm->dev->shared_resources.enable_mes) {
+                       retval = add_queue_mes(dqm, q, qpd);
+                       if (retval) {
+                               pr_err("Failed to restore queue %d\n",
+                                       q->properties.queue_id);
+                               goto out;
+                       }
+               }
        }
-       retval = execute_queues_cpsch(dqm,
-                               KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+       if (!dqm->dev->shared_resources.enable_mes)
+               retval = execute_queues_cpsch(dqm,
+                                             KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
        qpd->evicted = 0;
        eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
        atomic64_add(eviction_duration, &pdd->evict_duration_counter);
@@ -1081,6 +1260,9 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
        memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
 
        dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
+       dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
+       pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
+
        dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
 
        return 0;
@@ -1277,6 +1459,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
        else
                dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
 
+       dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
+       pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);
+
        num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
        if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap))
                dqm->xgmi_sdma_bitmap = ULLONG_MAX;
@@ -1295,14 +1480,16 @@ static int start_cpsch(struct device_queue_manager *dqm)
        retval = 0;
 
        dqm_lock(dqm);
-       retval = pm_init(&dqm->packet_mgr, dqm);
-       if (retval)
-               goto fail_packet_manager_init;
 
-       retval = set_sched_resources(dqm);
-       if (retval)
-               goto fail_set_sched_resources;
+       if (!dqm->dev->shared_resources.enable_mes) {
+               retval = pm_init(&dqm->packet_mgr, dqm);
+               if (retval)
+                       goto fail_packet_manager_init;
 
+               retval = set_sched_resources(dqm);
+               if (retval)
+                       goto fail_set_sched_resources;
+       }
        pr_debug("Allocating fence memory\n");
 
        /* allocate fence memory on the gart */
@@ -1321,13 +1508,15 @@ static int start_cpsch(struct device_queue_manager *dqm)
        dqm->is_hws_hang = false;
        dqm->is_resetting = false;
        dqm->sched_running = true;
-       execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+       if (!dqm->dev->shared_resources.enable_mes)
+               execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
        dqm_unlock(dqm);
 
        return 0;
 fail_allocate_vidmem:
 fail_set_sched_resources:
-       pm_uninit(&dqm->packet_mgr, false);
+       if (!dqm->dev->shared_resources.enable_mes)
+               pm_uninit(&dqm->packet_mgr, false);
 fail_packet_manager_init:
        dqm_unlock(dqm);
        return retval;
@@ -1343,15 +1532,22 @@ static int stop_cpsch(struct device_queue_manager *dqm)
                return 0;
        }
 
-       if (!dqm->is_hws_hang)
-               unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
+       if (!dqm->is_hws_hang) {
+               if (!dqm->dev->shared_resources.enable_mes)
+                       unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
+               else
+                       remove_all_queues_mes(dqm);
+       }
+
        hanging = dqm->is_hws_hang || dqm->is_resetting;
        dqm->sched_running = false;
 
-       pm_release_ib(&dqm->packet_mgr);
+       if (!dqm->dev->shared_resources.enable_mes)
+               pm_release_ib(&dqm->packet_mgr);
 
        kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
-       pm_uninit(&dqm->packet_mgr, hanging);
+       if (!dqm->dev->shared_resources.enable_mes)
+               pm_uninit(&dqm->packet_mgr, hanging);
        dqm_unlock(dqm);
 
        return 0;
@@ -1469,8 +1665,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
        if (q->properties.is_active) {
                increment_queue_count(dqm, qpd, q);
 
-               execute_queues_cpsch(dqm,
-                               KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+               if (!dqm->dev->shared_resources.enable_mes) {
+                       retval = execute_queues_cpsch(dqm,
+                                            KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+               } else {
+                       retval = add_queue_mes(dqm, q, qpd);
+                       if (retval)
+                               goto cleanup_queue;
+               }
        }
 
        /*
@@ -1485,6 +1687,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
        dqm_unlock(dqm);
        return retval;
 
+cleanup_queue:
+       qpd->queue_count--;
+       list_del(&q->list);
+       if (q->properties.is_active)
+               decrement_queue_count(dqm, qpd, q);
+       mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+       dqm_unlock(dqm);
 out_deallocate_doorbell:
        deallocate_doorbell(qpd, q);
 out_deallocate_sdma_queue:
@@ -1572,13 +1781,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
                                queue_preemption_timeout_ms);
        if (retval) {
                pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
-               dqm->is_hws_hang = true;
-               /* It's possible we're detecting a HWS hang in the
-                * middle of a GPU reset. No need to schedule another
-                * reset in this case.
-                */
-               if (!dqm->is_resetting)
-                       schedule_work(&dqm->hw_exception_work);
+               kfd_hws_hang(dqm);
                return retval;
        }
 
@@ -1683,11 +1886,15 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
        list_del(&q->list);
        qpd->queue_count--;
        if (q->properties.is_active) {
-               decrement_queue_count(dqm, qpd, q);
-               retval = execute_queues_cpsch(dqm,
-                               KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
-               if (retval == -ETIME)
-                       qpd->reset_wavefronts = true;
+               if (!dqm->dev->shared_resources.enable_mes) {
+                       decrement_queue_count(dqm, qpd, q);
+                       retval = execute_queues_cpsch(dqm,
+                                                     KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+                       if (retval == -ETIME)
+                               qpd->reset_wavefronts = true;
+               } else {
+                       retval = remove_queue_mes(dqm, q, qpd);
+               }
        }
 
        /*
@@ -1941,9 +2148,17 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
                else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
                        deallocate_sdma_queue(dqm, q);
 
-               if (q->properties.is_active)
+               if (q->properties.is_active) {
                        decrement_queue_count(dqm, qpd, q);
 
+                       if (dqm->dev->shared_resources.enable_mes) {
+                               retval = remove_queue_mes(dqm, q, qpd);
+                               if (retval)
+                                       pr_err("Failed to remove queue %d\n",
+                                               q->properties.queue_id);
+                       }
+               }
+
                dqm->total_queue_count--;
        }
 
@@ -1958,7 +2173,9 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
                }
        }
 
-       retval = execute_queues_cpsch(dqm, filter, 0);
+       if (!dqm->dev->shared_resources.enable_mes)
+               retval = execute_queues_cpsch(dqm, filter, 0);
+
        if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
                pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
                dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
@@ -2133,7 +2350,9 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
                break;
 
        default:
-               if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
+               if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
+                       device_queue_manager_init_v11(&dqm->asic_ops);
+               else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
                        device_queue_manager_init_v10_navi10(&dqm->asic_ops);
                else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
                        device_queue_manager_init_v9(&dqm->asic_ops);
index 3d539d6..a537b9e 100644 (file)
@@ -35,6 +35,9 @@
 
 #define VMID_NUM 16
 
+#define KFD_MES_PROCESS_QUANTUM                100000
+#define KFD_MES_GANG_QUANTUM           10000
+
 struct device_process_node {
        struct qcm_process_device *qpd;
        struct list_head list;
@@ -267,6 +270,8 @@ void device_queue_manager_init_v9(
                struct device_queue_manager_asic_ops *asic_ops);
 void device_queue_manager_init_v10_navi10(
                struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_v11(
+               struct device_queue_manager_asic_ops *asic_ops);
 void program_sh_mem_settings(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd);
 unsigned int get_cp_queues_num(struct device_queue_manager *dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
new file mode 100644 (file)
index 0000000..2e129da
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "soc21_enum.h"
+
+static int update_qpd_v11(struct device_queue_manager *dqm,
+                        struct qcm_process_device *qpd);
+static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
+                           struct qcm_process_device *qpd);
+
+void device_queue_manager_init_v11(
+       struct device_queue_manager_asic_ops *asic_ops)
+{
+       asic_ops->update_qpd = update_qpd_v11;
+       asic_ops->init_sdma_vm = init_sdma_vm_v11;
+       asic_ops->mqd_manager_init = mqd_manager_init_v11;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
+{
+       uint32_t shared_base = pdd->lds_base >> 48;
+       uint32_t private_base = pdd->scratch_base >> 48;
+
+       return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+               private_base;
+}
+
+static int update_qpd_v11(struct device_queue_manager *dqm,
+                        struct qcm_process_device *qpd)
+{
+       struct kfd_process_device *pdd;
+
+       pdd = qpd_to_pdd(qpd);
+
+       /* check if sh_mem_config register already configured */
+       if (qpd->sh_mem_config == 0) {
+               qpd->sh_mem_config =
+                       (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+                               SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+                       (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+
+               qpd->sh_mem_ape1_limit = 0;
+               qpd->sh_mem_ape1_base = 0;
+       }
+
+       qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+
+       pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+
+       return 0;
+}
+
+static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
+                           struct qcm_process_device *qpd)
+{
+       /* Not needed on SDMAv4 onwards any more */
+       q->properties.sdma_vm_addr = 0;
+}
index 5401b63..cb3d2cc 100644 (file)
 /* # of doorbell bytes allocated for each process. */
 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
 {
-       return roundup(kfd->device_info.doorbell_size *
-                       KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
-                       PAGE_SIZE);
+       if (!kfd->shared_resources.enable_mes)
+               return roundup(kfd->device_info.doorbell_size *
+                               KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+                               PAGE_SIZE);
+       else
+               return amdgpu_mes_doorbell_process_slice(
+                                       (struct amdgpu_device *)kfd->adev);
 }
 
 /* Doorbell calculations for device init. */
@@ -61,6 +65,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
        size_t doorbell_aperture_size;
        size_t doorbell_process_limit;
 
+       /*
+        * With MES enabled, just set the doorbell base as it is needed
+        * to calculate doorbell physical address.
+        */
+       if (kfd->shared_resources.enable_mes) {
+               kfd->doorbell_base =
+                       kfd->shared_resources.doorbell_physical_address;
+               return 0;
+       }
+
        /*
         * We start with calculations in bytes because the input data might
         * only be byte-aligned.
@@ -237,10 +251,16 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
         * the process's doorbells. The offset returned is in dword
         * units regardless of the ASIC-dependent doorbell size.
         */
-       return kfd->doorbell_base_dw_offset +
-               pdd->doorbell_index
-               * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
-               doorbell_id * kfd->device_info.doorbell_size / sizeof(u32);
+       if (!kfd->shared_resources.enable_mes)
+               return kfd->doorbell_base_dw_offset +
+                       pdd->doorbell_index
+                       * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
+                       doorbell_id *
+                       kfd->device_info.doorbell_size / sizeof(u32);
+       else
+               return amdgpu_mes_get_doorbell_dw_offset_in_bar(
+                               (struct amdgpu_device *)kfd->adev,
+                               pdd->doorbell_index, doorbell_id);
 }
 
 uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
@@ -261,8 +281,16 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
 
 int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
 {
-       int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
-                               GFP_KERNEL);
+       int r = 0;
+
+       if (!kfd->shared_resources.enable_mes)
+               r = ida_simple_get(&kfd->doorbell_ida, 1,
+                                  kfd->max_doorbell_slices, GFP_KERNEL);
+       else
+               r = amdgpu_mes_alloc_process_doorbells(
+                               (struct amdgpu_device *)kfd->adev,
+                               doorbell_index);
+
        if (r > 0)
                *doorbell_index = r;
 
@@ -271,6 +299,12 @@ int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_inde
 
 void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
 {
-       if (doorbell_index)
-               ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
+       if (doorbell_index) {
+               if (!kfd->shared_resources.enable_mes)
+                       ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
+               else
+                       amdgpu_mes_free_process_doorbells(
+                                       (struct amdgpu_device *)kfd->adev,
+                                       doorbell_index);
+       }
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
new file mode 100644 (file)
index 0000000..c3919aa
--- /dev/null
@@ -0,0 +1,383 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_priv.h"
+#include "kfd_events.h"
+#include "soc15_int.h"
+#include "kfd_device_queue_manager.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+#include "kfd_smi_events.h"
+
+/*
+ * GFX11 SQ Interrupts
+ *
+ * There are 3 encoding types of interrupts sourced from SQ sent as a 44-bit
+ * packet to the Interrupt Handler:
+ * Auto - Generated by the SQG (various cmd overflows, timestamps etc)
+ * Wave - Generated by S_SENDMSG through a shader program
+ * Error - HW generated errors (Illegal instructions, Memviols, EDC etc)
+ *
+ * The 44-bit packet is mapped as {context_id1[7:0],context_id0[31:0]} plus
+ * 4-bits for VMID (SOC15_VMID_FROM_IH_ENTRY) as such:
+ *
+ * - context_id1[7:6]
+ * Encoding type (0 = Auto, 1 = Wave, 2 = Error)
+ *
+ * - context_id0[26]
+ * PRIV bit indicates that Wave S_SEND or error occurred within trap
+ *
+ * - context_id0[24:0]
+ * 25-bit data with the following layout per encoding type:
+ * Auto - only context_id0[8:0] is used, which reports various interrupts
+ * generated by SQG.  The rest is 0.
+ * Wave - user data sent from m0 via S_SENDMSG (context_id0[23:0])
+ * Error - Error Type (context_id0[24:21]), Error Details (context_id0[20:0])
+ *
+ * The other context_id bits show coordinates (SE/SH/CU/SIMD/WGP) for wave
+ * S_SENDMSG and Errors.  These are 0 for Auto.
+ */
+
+enum SQ_INTERRUPT_WORD_ENCODING {
+       SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,
+       SQ_INTERRUPT_WORD_ENCODING_INST,
+       SQ_INTERRUPT_WORD_ENCODING_ERROR,
+};
+
+enum SQ_INTERRUPT_ERROR_TYPE {
+       SQ_INTERRUPT_ERROR_TYPE_EDC_FUE = 0x0,
+       SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST,
+       SQ_INTERRUPT_ERROR_TYPE_MEMVIOL,
+       SQ_INTERRUPT_ERROR_TYPE_EDC_FED,
+};
+
+/* SQ_INTERRUPT_WORD_AUTO_CTXID */
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE__SHIFT             0
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT__SHIFT                      1
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF_FULL__SHIFT    2
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__REG_TIMESTAMP__SHIFT            3
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__CMD_TIMESTAMP__SHIFT            4
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_CMD_OVERFLOW__SHIFT                5
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_REG_OVERFLOW__SHIFT                6
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__IMMED_OVERFLOW__SHIFT           7
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR__SHIFT   8
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING__SHIFT                 6
+
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_MASK               0x00000001
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT_MASK                                0x00000002
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF_FULL_MASK      0x00000004
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__REG_TIMESTAMP_MASK              0x00000008
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__CMD_TIMESTAMP_MASK              0x00000010
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_CMD_OVERFLOW_MASK          0x00000020
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_REG_OVERFLOW_MASK          0x00000040
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__IMMED_OVERFLOW_MASK             0x00000080
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR_MASK     0x00000100
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING_MASK                   0x000000c0
+
+/* SQ_INTERRUPT_WORD_WAVE_CTXID */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA__SHIFT     0
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SH_ID__SHIFT    25
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV__SHIFT     26
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID__SHIFT  27
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SIMD_ID__SHIFT  0
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID__SHIFT   2
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING__SHIFT 6
+
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA_MASK       0x00ffffff /* [23:0] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SH_ID_MASK      0x02000000 /* [25] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK       0x04000000 /* [26] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID_MASK    0xf8000000 /* [31:27] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SIMD_ID_MASK    0x00000003 /* [33:32] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID_MASK     0x0000003c /* [37:34] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING_MASK   0x000000c0 /* [39:38] */
+
+/* SQ_INTERRUPT_WORD_ERROR_CTXID */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__DETAIL__SHIFT  0
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__TYPE__SHIFT    21
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__SH_ID__SHIFT   25
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__PRIV__SHIFT    26
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__WAVE_ID__SHIFT 27
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__SIMD_ID__SHIFT 0
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__WGP_ID__SHIFT  2
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__ENCODING__SHIFT        6
+
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__DETAIL_MASK    0x001fffff /* [20:0] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__TYPE_MASK      0x01e00000 /* [24:21] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__SH_ID_MASK     0x02000000 /* [25] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__PRIV_MASK      0x04000000 /* [26] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__WAVE_ID_MASK   0xf8000000 /* [31:27] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__SIMD_ID_MASK   0x00000003 /* [33:32] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__WGP_ID_MASK    0x0000003c /* [37:34] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__ENCODING_MASK  0x000000c0 /* [39:38] */
+
+/*
+ * The debugger will send user data(m0) with PRIV=1 to indicate it requires
+ * notification from the KFD with the following queue id (DOORBELL_ID) and
+ * trap code (TRAP_CODE).
+ */
+#define KFD_CTXID0_TRAP_CODE_SHIFT     10
+#define KFD_CTXID0_TRAP_CODE_MASK      0xfffc00
+#define KFD_CTXID0_CP_BAD_OP_ECODE_MASK        0x3ffffff
+#define KFD_CTXID0_DOORBELL_ID_MASK    0x0003ff
+
+#define KFD_CTXID0_TRAP_CODE(ctxid0)           (((ctxid0) &  \
+                               KFD_CTXID0_TRAP_CODE_MASK) >> \
+                               KFD_CTXID0_TRAP_CODE_SHIFT)
+#define KFD_CTXID0_CP_BAD_OP_ECODE(ctxid0)     (((ctxid0) &        \
+                               KFD_CTXID0_CP_BAD_OP_ECODE_MASK) >> \
+                               KFD_CTXID0_TRAP_CODE_SHIFT)
+#define KFD_CTXID0_DOORBELL_ID(ctxid0)         ((ctxid0) & \
+                               KFD_CTXID0_DOORBELL_ID_MASK)
+
+static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1)
+{
+       pr_debug(
+               "sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_BUF_FULL),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, REG_TIMESTAMP),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, CMD_TIMESTAMP),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_CMD_OVERFLOW),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_REG_OVERFLOW),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, IMMED_OVERFLOW),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_UTC_ERROR));
+}
+
+static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1)
+{
+       pr_debug(
+               "sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SH_ID),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, WAVE_ID),
+               REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SIMD_ID),
+               REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID));
+}
+
+static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1)
+{
+       pr_warn(
+               "sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, SH_ID),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, PRIV),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, WAVE_ID),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, SIMD_ID),
+               REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, WGP_ID));
+}
+
+static void event_interrupt_poison_consumption_v11(struct kfd_dev *dev,
+                               uint16_t pasid, uint16_t source_id)
+{
+       int ret = -EINVAL;
+       struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+
+       if (!p)
+               return;
+
+       /* all queues of a process will be unmapped in one time */
+       if (atomic_read(&p->poison)) {
+               kfd_unref_process(p);
+               return;
+       }
+
+       atomic_set(&p->poison, 1);
+       kfd_unref_process(p);
+
+       switch (source_id) {
+       case SOC15_INTSRC_SQ_INTERRUPT_MSG:
+               if (dev->dqm->ops.reset_queues)
+                       ret = dev->dqm->ops.reset_queues(dev->dqm, pasid);
+               break;
+       case SOC21_INTSRC_SDMA_ECC:
+       default:
+               break;
+       }
+
+       kfd_signal_poison_consumed_event(dev, pasid);
+
+       /* resetting queue passes, do page retirement without gpu reset
+          resetting queue fails, fallback to gpu reset solution */
+       if (!ret)
+               amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
+       else
+               amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true);
+}
+
+static bool event_interrupt_isr_v11(struct kfd_dev *dev,
+                                       const uint32_t *ih_ring_entry,
+                                       uint32_t *patched_ihre,
+                                       bool *patched_flag)
+{
+       uint16_t source_id, client_id, pasid, vmid;
+       const uint32_t *data = ih_ring_entry;
+       uint32_t context_id0;
+
+       source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+       client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+       /* Only handle interrupts from KFD VMIDs */
+       vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+       if (/*!KFD_IRQ_IS_FENCE(client_id, source_id) &&*/
+           (vmid < dev->vm_info.first_vmid_kfd ||
+           vmid > dev->vm_info.last_vmid_kfd))
+               return 0;
+
+       pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+       context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+
+       if ((source_id == SOC15_INTSRC_CP_END_OF_PIPE) &&
+           (context_id0 & AMDGPU_FENCE_MES_QUEUE_FLAG))
+               return 0;
+
+       pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+                client_id, source_id, vmid, pasid);
+       pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+                data[0], data[1], data[2], data[3],
+                data[4], data[5], data[6], data[7]);
+
+       /* If there is no valid PASID, it's likely a bug */
+       if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
+               return 0;
+
+       /* Interrupt types we care about: various signals and faults.
+        * They will be forwarded to a work queue (see below).
+        */
+       return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
+               source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
+               source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
+               source_id == SOC21_INTSRC_SDMA_TRAP ||
+               client_id == SOC21_IH_CLIENTID_VMC ||
+               ((client_id == SOC21_IH_CLIENTID_GFX) &&
+                (source_id == UTCL2_1_0__SRCID__FAULT)) /*||
+                  KFD_IRQ_IS_FENCE(client_id, source_id)*/;
+}
+
+static void event_interrupt_wq_v11(struct kfd_dev *dev,
+                                       const uint32_t *ih_ring_entry)
+{
+       uint16_t source_id, client_id, ring_id, pasid, vmid;
+       uint32_t context_id0, context_id1;
+       uint8_t sq_int_enc, sq_int_errtype, sq_int_priv;
+       struct kfd_vm_fault_info info = {0};
+       struct kfd_hsa_memory_exception_data exception_data;
+
+       source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+       client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+       ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+       pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+       vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+       context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+       context_id1 = SOC15_CONTEXT_ID1_FROM_IH_ENTRY(ih_ring_entry);
+
+       /* VMC, UTCL2 */
+       if (client_id == SOC21_IH_CLIENTID_VMC ||
+            ((client_id == SOC21_IH_CLIENTID_GFX) &&
+            (source_id == UTCL2_1_0__SRCID__FAULT))) {
+
+               info.vmid = vmid;
+               info.mc_id = client_id;
+               info.page_addr = ih_ring_entry[4] |
+                       (uint64_t)(ih_ring_entry[5] & 0xf) << 32;
+               info.prot_valid = ring_id & 0x08;
+               info.prot_read  = ring_id & 0x10;
+               info.prot_write = ring_id & 0x20;
+
+               memset(&exception_data, 0, sizeof(exception_data));
+               exception_data.gpu_id = dev->id;
+               exception_data.va = (info.page_addr) << PAGE_SHIFT;
+               exception_data.failure.NotPresent = info.prot_valid ? 1 : 0;
+               exception_data.failure.NoExecute = info.prot_exec ? 1 : 0;
+               exception_data.failure.ReadOnly = info.prot_write ? 1 : 0;
+               exception_data.failure.imprecise = 0;
+
+               /*kfd_set_dbg_ev_from_interrupt(dev, pasid, -1,
+                                             KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION),
+                                             &exception_data, sizeof(exception_data));*/
+               kfd_smi_event_update_vmfault(dev, pasid);
+
+       /* GRBM, SDMA, SE, PMM */
+       } else if (client_id == SOC21_IH_CLIENTID_GRBM_CP ||
+                  client_id == SOC21_IH_CLIENTID_GFX) {
+
+               /* CP */
+               if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
+                       kfd_signal_event_interrupt(pasid, context_id0, 32);
+               /*else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
+                       kfd_set_dbg_ev_from_interrupt(dev, pasid,
+                               KFD_CTXID0_DOORBELL_ID(context_id0),
+                               KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)),
+                               NULL, 0);*/
+
+               /* SDMA */
+               else if (source_id == SOC21_INTSRC_SDMA_TRAP)
+                       kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
+               else if (source_id == SOC21_INTSRC_SDMA_ECC) {
+                       event_interrupt_poison_consumption_v11(dev, pasid, source_id);
+                       return;
+               }
+
+               /* SQ */
+               else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) {
+                       sq_int_enc = REG_GET_FIELD(context_id1,
+                                       SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
+                       switch (sq_int_enc) {
+                       case SQ_INTERRUPT_WORD_ENCODING_AUTO:
+                               print_sq_intr_info_auto(context_id0, context_id1);
+                               break;
+                       case SQ_INTERRUPT_WORD_ENCODING_INST:
+                               print_sq_intr_info_inst(context_id0, context_id1);
+                               sq_int_priv = REG_GET_FIELD(context_id0,
+                                               SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV);
+                               if (sq_int_priv /*&& (kfd_set_dbg_ev_from_interrupt(dev, pasid,
+                                               KFD_CTXID0_DOORBELL_ID(context_id0),
+                                               KFD_CTXID0_TRAP_CODE(context_id0),
+                                               NULL, 0))*/)
+                                       return;
+                               break;
+                       case SQ_INTERRUPT_WORD_ENCODING_ERROR:
+                               print_sq_intr_info_error(context_id0, context_id1);
+                               sq_int_errtype = REG_GET_FIELD(context_id0,
+                                               SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE);
+                               if (sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
+                                   sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
+                                       event_interrupt_poison_consumption_v11(
+                                                       dev, pasid, source_id);
+                                       return;
+                               }
+                               break;
+                       default:
+                               break;
+                       }
+                       kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24);
+               }
+
+       /*} else if (KFD_IRQ_IS_FENCE(client_id, source_id)) {
+               kfd_process_close_interrupt_drain(pasid);*/
+       }
+}
+
+const struct kfd_event_interrupt_class event_interrupt_class_v11 = {
+       .interrupt_isr = event_interrupt_isr_v11,
+       .interrupt_wq = event_interrupt_wq_v11,
+};
index f27fe02..0b75a37 100644 (file)
@@ -90,7 +90,7 @@ enum SQ_INTERRUPT_ERROR_TYPE {
 #define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000
 #define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20
 
-static void event_interrupt_poison_consumption(struct kfd_dev *dev,
+static void event_interrupt_poison_consumption_v9(struct kfd_dev *dev,
                                uint16_t pasid, uint16_t client_id)
 {
        int old_poison, ret = -EINVAL;
@@ -316,7 +316,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
                                        sq_intr_err);
                                if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
                                        sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
-                                       event_interrupt_poison_consumption(dev, pasid, client_id);
+                                       event_interrupt_poison_consumption_v9(dev, pasid, client_id);
                                        return;
                                }
                                break;
@@ -337,7 +337,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
                if (source_id == SOC15_INTSRC_SDMA_TRAP) {
                        kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
                } else if (source_id == SOC15_INTSRC_SDMA_ECC) {
-                       event_interrupt_poison_consumption(dev, pasid, client_id);
+                       event_interrupt_poison_consumption_v9(dev, pasid, client_id);
                        return;
                }
        } else if (client_id == SOC15_IH_CLIENTID_VMC ||
@@ -348,7 +348,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
 
                if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
                    amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
-                       event_interrupt_poison_consumption(dev, pasid, client_id);
+                       event_interrupt_poison_consumption_v9(dev, pasid, client_id);
                        return;
                }
 
index 5ac2092..49a283b 100644 (file)
@@ -100,7 +100,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
 {
        struct kfd_cu_info cu_info;
        uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
-       int i, se, sh, cu;
+       int i, se, sh, cu, cu_bitmap_sh_mul;
 
        amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
 
@@ -120,6 +120,10 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
                        cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines);
                return;
        }
+
+       cu_bitmap_sh_mul = (KFD_GC_VERSION(mm->dev) >= IP_VERSION(11, 0, 0) &&
+                           KFD_GC_VERSION(mm->dev) < IP_VERSION(12, 0, 0)) ? 2 : 1;
+
        /* Count active CUs per SH.
         *
         * Some CUs in an SH may be disabled.   HW expects disabled CUs to be
@@ -129,10 +133,12 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
         * Each half of se_mask must be filled only on bits 0-cu_per_sh[se][sh]-1.
         *
         * See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info.
+        * See note on GFX11 cu_bitmap layout in gfx_v11_0_get_cu_info.
         */
        for (se = 0; se < cu_info.num_shader_engines; se++)
                for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
-                       cu_per_sh[se][sh] = hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]);
+                       cu_per_sh[se][sh] = hweight32(
+                               cu_info.cu_bitmap[se % 4][sh + (se / 4) * cu_bitmap_sh_mul]);
 
        /* Symmetrically map cu_mask to all SEs & SHs:
         * se_mask programs up to 2 SH in the upper and lower 16 bits.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
new file mode 100644 (file)
index 0000000..4e0387f
--- /dev/null
@@ -0,0 +1,508 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "v11_structs.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "amdgpu_amdkfd.h"
+
+static inline struct v11_compute_mqd *get_mqd(void *mqd)
+{
+       return (struct v11_compute_mqd *)mqd;
+}
+
+static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+       return (struct v11_sdma_mqd *)mqd;
+}
+
+static void update_cu_mask(struct mqd_manager *mm, void *mqd,
+                          struct mqd_update_info *minfo)
+{
+       struct v11_compute_mqd *m;
+       uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
+
+       if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
+           !minfo->cu_mask.ptr)
+               return;
+
+       mqd_symmetrically_map_cu_mask(mm,
+               minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
+
+       m = get_mqd(mqd);
+       m->compute_static_thread_mgmt_se0 = se_mask[0];
+       m->compute_static_thread_mgmt_se1 = se_mask[1];
+       m->compute_static_thread_mgmt_se2 = se_mask[2];
+       m->compute_static_thread_mgmt_se3 = se_mask[3];
+       m->compute_static_thread_mgmt_se4 = se_mask[4];
+       m->compute_static_thread_mgmt_se5 = se_mask[5];
+       m->compute_static_thread_mgmt_se6 = se_mask[6];
+       m->compute_static_thread_mgmt_se7 = se_mask[7];
+
+       pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
+               m->compute_static_thread_mgmt_se0,
+               m->compute_static_thread_mgmt_se1,
+               m->compute_static_thread_mgmt_se2,
+               m->compute_static_thread_mgmt_se3,
+               m->compute_static_thread_mgmt_se4,
+               m->compute_static_thread_mgmt_se5,
+               m->compute_static_thread_mgmt_se6,
+               m->compute_static_thread_mgmt_se7);
+}
+
+static void set_priority(struct v11_compute_mqd *m, struct queue_properties *q)
+{
+       m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+       m->cp_hqd_queue_priority = q->priority;
+}
+
+static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+               struct queue_properties *q)
+{
+       struct kfd_mem_obj *mqd_mem_obj;
+       int size;
+
+       /*
+        * MES write to areas beyond MQD size. So allocate
+        * 1 PAGE_SIZE memory for MQD is MES is enabled.
+        */
+       if (kfd->shared_resources.enable_mes)
+               size = PAGE_SIZE;
+       else
+               size = sizeof(struct v11_compute_mqd);
+
+       if (kfd_gtt_sa_allocate(kfd, size, &mqd_mem_obj))
+               return NULL;
+
+       return mqd_mem_obj;
+}
+
+static void init_mqd(struct mqd_manager *mm, void **mqd,
+                       struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+                       struct queue_properties *q)
+{
+       uint64_t addr;
+       struct v11_compute_mqd *m;
+       int size;
+
+       m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
+       addr = mqd_mem_obj->gpu_addr;
+
+       if (mm->dev->shared_resources.enable_mes)
+               size = PAGE_SIZE;
+       else
+               size = sizeof(struct v11_compute_mqd);
+
+       memset(m, 0, size);
+
+       m->header = 0xC0310800;
+       m->compute_pipelinestat_enable = 1;
+       m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+       m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+       m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+       m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+       m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+                       0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+       m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
+
+       m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
+       m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
+
+       m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+                       1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+                       1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+       if (q->format == KFD_QUEUE_FORMAT_AQL) {
+               m->cp_hqd_aql_control =
+                       1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
+       }
+
+       if (mm->dev->cwsr_enabled) {
+               m->cp_hqd_persistent_state |=
+                       (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+               m->cp_hqd_ctx_save_base_addr_lo =
+                       lower_32_bits(q->ctx_save_restore_area_address);
+               m->cp_hqd_ctx_save_base_addr_hi =
+                       upper_32_bits(q->ctx_save_restore_area_address);
+               m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+               m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+               m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+               m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+       }
+
+       *mqd = m;
+       if (gart_addr)
+               *gart_addr = addr;
+       mm->update_mqd(mm, m, q, NULL);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+                       uint32_t pipe_id, uint32_t queue_id,
+                       struct queue_properties *p, struct mm_struct *mms)
+{
+       int r = 0;
+       /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+       uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+
+       r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
+                                         (uint32_t __user *)p->write_ptr,
+                                         wptr_shift, 0, mms);
+       return r;
+}
+
+static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+                           uint32_t pipe_id, uint32_t queue_id,
+                           struct queue_properties *p, struct mm_struct *mms)
+{
+       return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
+                                             queue_id, p->doorbell_off);
+}
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+                      struct queue_properties *q,
+                      struct mqd_update_info *minfo)
+{
+       struct v11_compute_mqd *m;
+
+       m = get_mqd(mqd);
+
+       m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+       m->cp_hqd_pq_control |=
+                       ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+       pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+       m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+       m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+       m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+       m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+       m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+       m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+       m->cp_hqd_pq_doorbell_control =
+               q->doorbell_off <<
+                       CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+       pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+                       m->cp_hqd_pq_doorbell_control);
+
+       m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT;
+
+       /*
+        * HW does not clamp this field correctly. Maximum EOP queue size
+        * is constrained by per-SE EOP done signal count, which is 8-bit.
+        * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+        * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+        * is safe, giving a maximum field value of 0xA.
+        */
+       m->cp_hqd_eop_control = min(0xA,
+               ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
+       m->cp_hqd_eop_base_addr_lo =
+                       lower_32_bits(q->eop_ring_buffer_address >> 8);
+       m->cp_hqd_eop_base_addr_hi =
+                       upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+       m->cp_hqd_iq_timer = 0;
+
+       m->cp_hqd_vmid = q->vmid;
+
+       if (q->format == KFD_QUEUE_FORMAT_AQL) {
+               /* GC 10 removed WPP_CLAMP from PQ Control */
+               m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+                               2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
+                               1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT ;
+               m->cp_hqd_pq_doorbell_control |=
+                       1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
+       }
+       if (mm->dev->cwsr_enabled)
+               m->cp_hqd_ctx_save_control = 0;
+
+       update_cu_mask(mm, mqd, minfo);
+       set_priority(m, q);
+
+       q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static uint32_t read_doorbell_id(void *mqd)
+{
+       struct v11_compute_mqd *m = (struct v11_compute_mqd *)mqd;
+
+       return m->queue_doorbell_id0;
+}
+
+static int destroy_mqd(struct mqd_manager *mm, void *mqd,
+                      enum kfd_preempt_type type,
+                      unsigned int timeout, uint32_t pipe_id,
+                      uint32_t queue_id)
+{
+       return mm->dev->kfd2kgd->hqd_destroy
+               (mm->dev->adev, mqd, type, timeout,
+                pipe_id, queue_id);
+}
+
+static void free_mqd(struct mqd_manager *mm, void *mqd,
+                       struct kfd_mem_obj *mqd_mem_obj)
+{
+       kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static bool is_occupied(struct mqd_manager *mm, void *mqd,
+                       uint64_t queue_address, uint32_t pipe_id,
+                       uint32_t queue_id)
+{
+       return mm->dev->kfd2kgd->hqd_is_occupied(
+               mm->dev->adev, queue_address,
+               pipe_id, queue_id);
+}
+
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+                         void __user *ctl_stack,
+                         u32 *ctl_stack_used_size,
+                         u32 *save_area_used_size)
+{
+       struct v11_compute_mqd *m;
+       /*struct mqd_user_context_save_area_header header;*/
+
+       m = get_mqd(mqd);
+
+       /* Control stack is written backwards, while workgroup context data
+        * is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
+        * Current position is at m->cp_hqd_cntl_stack_offset and
+        * m->cp_hqd_wg_state_offset, respectively.
+        */
+       *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+               m->cp_hqd_cntl_stack_offset;
+       *save_area_used_size = m->cp_hqd_wg_state_offset -
+               m->cp_hqd_cntl_stack_size;
+
+       /* Control stack is not copied to user mode for GFXv11 because
+        * it's part of the context save area that is already
+        * accessible to user mode
+        */
+/*
+       header.control_stack_size = *ctl_stack_used_size;
+       header.wave_state_size = *save_area_used_size;
+
+       header.wave_state_offset = m->cp_hqd_wg_state_offset;
+       header.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+       if (copy_to_user(ctl_stack, &header, sizeof(header)))
+               return -EFAULT;
+*/
+       return 0;
+}
+
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+                       struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+                       struct queue_properties *q)
+{
+       struct v11_compute_mqd *m;
+
+       init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+       m = get_mqd(*mqd);
+
+       m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+                       1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+}
+
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+               struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+               struct queue_properties *q)
+{
+       struct v11_sdma_mqd *m;
+
+       m = (struct v11_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+
+       memset(m, 0, sizeof(struct v11_sdma_mqd));
+
+       *mqd = m;
+       if (gart_addr)
+               *gart_addr = mqd_mem_obj->gpu_addr;
+
+       mm->update_mqd(mm, m, q, NULL);
+}
+
+static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+               uint32_t pipe_id, uint32_t queue_id,
+               struct queue_properties *p, struct mm_struct *mms)
+{
+       return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
+                                              (uint32_t __user *)p->write_ptr,
+                                              mms);
+}
+
+#define SDMA_RLC_DUMMY_DEFAULT 0xf
+
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+               struct queue_properties *q,
+               struct mqd_update_info *minfo)
+{
+       struct v11_sdma_mqd *m;
+
+       m = get_sdma_mqd(mqd);
+       m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+               << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+               q->vmid << SDMA0_QUEUE0_RB_CNTL__RB_VMID__SHIFT |
+               1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+               6 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
+
+       m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+       m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+       m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+       m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+       m->sdmax_rlcx_doorbell_offset =
+               q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+       m->sdma_engine_id = q->sdma_engine_id;
+       m->sdma_queue_id = q->sdma_queue_id;
+       m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+
+       q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+/*
+ *  * preempt type here is ignored because there is only one way
+ *  * to preempt sdma queue
+ */
+static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+               enum kfd_preempt_type type,
+               unsigned int timeout, uint32_t pipe_id,
+               uint32_t queue_id)
+{
+       return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+}
+
+static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+               uint64_t queue_address, uint32_t pipe_id,
+               uint32_t queue_id)
+{
+       return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+       seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+                    data, sizeof(struct v11_compute_mqd), false);
+       return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+       seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+                    data, sizeof(struct v11_sdma_mqd), false);
+       return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
+               struct kfd_dev *dev)
+{
+       struct mqd_manager *mqd;
+
+       if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+               return NULL;
+
+       mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+       if (!mqd)
+               return NULL;
+
+       mqd->dev = dev;
+
+       switch (type) {
+       case KFD_MQD_TYPE_CP:
+               pr_debug("%s@%i\n", __func__, __LINE__);
+               mqd->allocate_mqd = allocate_mqd;
+               mqd->init_mqd = init_mqd;
+               mqd->free_mqd = free_mqd;
+               mqd->load_mqd = load_mqd;
+               mqd->update_mqd = update_mqd;
+               mqd->destroy_mqd = destroy_mqd;
+               mqd->is_occupied = is_occupied;
+               mqd->mqd_size = sizeof(struct v11_compute_mqd);
+               mqd->get_wave_state = get_wave_state;
+#if defined(CONFIG_DEBUG_FS)
+               mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+               pr_debug("%s@%i\n", __func__, __LINE__);
+               break;
+       case KFD_MQD_TYPE_HIQ:
+               pr_debug("%s@%i\n", __func__, __LINE__);
+               mqd->allocate_mqd = allocate_hiq_mqd;
+               mqd->init_mqd = init_mqd_hiq;
+               mqd->free_mqd = free_mqd_hiq_sdma;
+               mqd->load_mqd = hiq_load_mqd_kiq;
+               mqd->update_mqd = update_mqd;
+               mqd->destroy_mqd = destroy_mqd;
+               mqd->is_occupied = is_occupied;
+               mqd->mqd_size = sizeof(struct v11_compute_mqd);
+#if defined(CONFIG_DEBUG_FS)
+               mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+               mqd->read_doorbell_id = read_doorbell_id;
+               pr_debug("%s@%i\n", __func__, __LINE__);
+               break;
+       case KFD_MQD_TYPE_DIQ:
+               mqd->allocate_mqd = allocate_mqd;
+               mqd->init_mqd = init_mqd_hiq;
+               mqd->free_mqd = free_mqd;
+               mqd->load_mqd = load_mqd;
+               mqd->update_mqd = update_mqd;
+               mqd->destroy_mqd = destroy_mqd;
+               mqd->is_occupied = is_occupied;
+               mqd->mqd_size = sizeof(struct v11_compute_mqd);
+#if defined(CONFIG_DEBUG_FS)
+               mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+               break;
+       case KFD_MQD_TYPE_SDMA:
+               pr_debug("%s@%i\n", __func__, __LINE__);
+               mqd->allocate_mqd = allocate_sdma_mqd;
+               mqd->init_mqd = init_mqd_sdma;
+               mqd->free_mqd = free_mqd_hiq_sdma;
+               mqd->load_mqd = load_mqd_sdma;
+               mqd->update_mqd = update_mqd_sdma;
+               mqd->destroy_mqd = destroy_mqd_sdma;
+               mqd->is_occupied = is_occupied_sdma;
+               mqd->mqd_size = sizeof(struct v11_sdma_mqd);
+#if defined(CONFIG_DEBUG_FS)
+               mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+               pr_debug("%s@%i\n", __func__, __LINE__);
+               break;
+       default:
+               kfree(mqd);
+               return NULL;
+       }
+
+       return mqd;
+}
index b9ca957..91e5fa5 100644 (file)
@@ -228,6 +228,8 @@ struct kfd_device_info {
        bool needs_pci_atomics;
        uint32_t no_atomic_fw_version;
        unsigned int num_sdma_queues_per_engine;
+       unsigned int num_reserved_sdma_queues_per_engine;
+       uint64_t reserved_sdma_queues_bitmap;
 };
 
 unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev);
@@ -564,6 +566,10 @@ struct queue {
 
        /* procfs */
        struct kobject kobj;
+
+       void *gang_ctx_bo;
+       uint64_t gang_ctx_gpu_addr;
+       void *gang_ctx_cpu_ptr;
 };
 
 enum KFD_MQD_TYPE {
@@ -779,6 +785,10 @@ struct kfd_process_device {
         * checkpointed node to refer to this device.
         */
        uint32_t user_gpu_id;
+
+       void *proc_ctx_bo;
+       uint64_t proc_ctx_gpu_addr;
+       void *proc_ctx_cpu_ptr;
 };
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -1170,6 +1180,8 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
                struct kfd_dev *dev);
 struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
                struct kfd_dev *dev);
+struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
+               struct kfd_dev *dev);
 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
 void device_queue_manager_uninit(struct device_queue_manager *dqm);
 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
@@ -1292,6 +1304,7 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
 /* Events */
 extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
 extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v11;
 
 extern const struct kfd_device_global_init_class device_global_init_class_cik;
 
index cb8f4a4..e3d64ec 100644 (file)
@@ -1041,6 +1041,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 
                kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
 
+               if (pdd->dev->shared_resources.enable_mes)
+                       amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
+                                                  pdd->proc_ctx_bo);
                /*
                 * before destroying pdd, make sure to report availability
                 * for auto suspend
@@ -1484,6 +1487,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
                                                        struct kfd_process *p)
 {
        struct kfd_process_device *pdd = NULL;
+       int retval = 0;
 
        if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
                return NULL;
@@ -1516,6 +1520,21 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
        pdd->sdma_past_activity_counter = 0;
        pdd->user_gpu_id = dev->id;
        atomic64_set(&pdd->evict_duration_counter, 0);
+
+       if (dev->shared_resources.enable_mes) {
+               retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+                                               AMDGPU_MES_PROC_CTX_SIZE,
+                                               &pdd->proc_ctx_bo,
+                                               &pdd->proc_ctx_gpu_addr,
+                                               &pdd->proc_ctx_cpu_ptr,
+                                               false);
+               if (retval) {
+                       pr_err("failed to allocate process context bo\n");
+                       goto err_free_pdd;
+               }
+               memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+       }
+
        p->pdds[p->n_pdds++] = pdd;
 
        /* Init idr used for memory handle translation */
index 4f58e67..dc00484 100644 (file)
@@ -198,8 +198,26 @@ static int init_user_queue(struct process_queue_manager *pqm,
        (*q)->device = dev;
        (*q)->process = pqm->process;
 
+       if (dev->shared_resources.enable_mes) {
+               retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+                                               AMDGPU_MES_GANG_CTX_SIZE,
+                                               &(*q)->gang_ctx_bo,
+                                               &(*q)->gang_ctx_gpu_addr,
+                                               &(*q)->gang_ctx_cpu_ptr,
+                                               false);
+               if (retval) {
+                       pr_err("failed to allocate gang context bo\n");
+                       goto cleanup;
+               }
+               memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
+       }
+
        pr_debug("PQM After init queue");
+       return 0;
 
+cleanup:
+       if (dev->shared_resources.enable_mes)
+               uninit_queue(*q);
        return retval;
 }
 
@@ -418,6 +436,9 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
                        pdd->qpd.num_gws = 0;
                }
 
+               if (dev->shared_resources.enable_mes)
+                       amdgpu_amdkfd_free_gtt_mem(dev->adev,
+                                                  pqn->q->gang_ctx_bo);
                uninit_queue(pqn->q);
        }
 
index 05089f1..2e20f54 100644 (file)
@@ -1412,7 +1412,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
        dev->node_props.num_sdma_xgmi_engines =
                                        kfd_get_num_xgmi_sdma_engines(gpu);
        dev->node_props.num_sdma_queues_per_engine =
-                               gpu->device_info.num_sdma_queues_per_engine;
+                               gpu->device_info.num_sdma_queues_per_engine -
+                               gpu->device_info.num_reserved_sdma_queues_per_engine;
        dev->node_props.num_gws = (dev->gpu->gws &&
                dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
                dev->gpu->adev->gds.gws_size : 0;
index daf3c44..e3f3b0b 100644 (file)
@@ -31,7 +31,8 @@
 #define SOC15_INTSRC_VMC_FAULT         0
 #define SOC15_INTSRC_SDMA_TRAP         224
 #define SOC15_INTSRC_SDMA_ECC          220
-
+#define SOC21_INTSRC_SDMA_TRAP         49
+#define SOC21_INTSRC_SDMA_ECC          62
 
 #define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff)
 #define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff)
index 2f60cf3..e85364d 100644 (file)
@@ -152,6 +152,7 @@ struct kgd2kfd_shared_resources {
        /* Minor device number of the render node */
        int drm_render_minor;
 
+       bool enable_mes;
 };
 
 struct tile_config {