drm/amdkfd: CWSR with software scheduler
authorMukul Joshi <mukul.joshi@amd.com>
Mon, 9 Aug 2021 19:52:50 +0000 (15:52 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 11 Aug 2021 21:19:54 +0000 (17:19 -0400)
This patch adds support to program trap handler settings
when loading driver with software scheduler (sched_policy=2).

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Suggested-by: Jay Cornwall <Jay.Cornwall@amd.com>
Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/include/kgd_kfd_interface.h

index 491acdf..960acf6 100644 (file)
@@ -560,6 +560,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
        case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
                type = RESET_WAVES;
                break;
+       case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+               type = SAVE_WAVES;
+               break;
        default:
                type = DRAIN_PIPE;
                break;
@@ -754,6 +757,33 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
        adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
 }
 
+static void program_trap_handler_settings(struct kgd_dev *kgd,
+               uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+{
+       struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+       lock_srbm(kgd, 0, 0, 0, vmid);
+
+       /*
+        * Program TBA registers
+        */
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+                       lower_32_bits(tba_addr >> 8));
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+                       upper_32_bits(tba_addr >> 8) |
+                       (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+       /*
+        * Program TMA registers
+        */
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+                       lower_32_bits(tma_addr >> 8));
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+                       upper_32_bits(tma_addr >> 8));
+
+       unlock_srbm(kgd);
+}
+
 const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
        .program_sh_mem_settings = kgd_program_sh_mem_settings,
        .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -774,4 +804,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
        .get_atc_vmid_pasid_mapping_info =
                        get_atc_vmid_pasid_mapping_info,
        .set_vm_context_page_table_base = set_vm_context_page_table_base,
+       .program_trap_handler_settings = program_trap_handler_settings,
 };
index 1f5620c..dac0d75 100644 (file)
@@ -537,6 +537,9 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd,
        case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
                type = RESET_WAVES;
                break;
+       case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+               type = SAVE_WAVES;
+               break;
        default:
                type = DRAIN_PIPE;
                break;
@@ -658,6 +661,33 @@ static void set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t v
        adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
 }
 
+static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd,
+                       uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+{
+       struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+       lock_srbm(kgd, 0, 0, 0, vmid);
+
+       /*
+        * Program TBA registers
+        */
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+                       lower_32_bits(tba_addr >> 8));
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+                       upper_32_bits(tba_addr >> 8) |
+                       (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+       /*
+        * Program TMA registers
+        */
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+                       lower_32_bits(tma_addr >> 8));
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+                        upper_32_bits(tma_addr >> 8));
+
+       unlock_srbm(kgd);
+}
+
 #if 0
 uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd,
                                uint32_t trap_debug_wave_launch_mode,
@@ -820,6 +850,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
        .address_watch_get_offset = address_watch_get_offset_v10_3,
        .get_atc_vmid_pasid_mapping_info = NULL,
        .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
+       .program_trap_handler_settings = program_trap_handler_settings_v10_3,
 #if 0
        .enable_debug_trap = enable_debug_trap_v10_3,
        .disable_debug_trap = disable_debug_trap_v10_3,
index ed3014f..1542449 100644 (file)
@@ -42,7 +42,8 @@
 enum hqd_dequeue_request_type {
        NO_ACTION = 0,
        DRAIN_PIPE,
-       RESET_WAVES
+       RESET_WAVES,
+       SAVE_WAVES
 };
 
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -566,6 +567,9 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
        case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
                type = RESET_WAVES;
                break;
+       case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+               type = SAVE_WAVES;
+               break;
        default:
                type = DRAIN_PIPE;
                break;
@@ -878,6 +882,32 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,
                                adev->gfx.cu_info.max_waves_per_simd;
 }
 
+static void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
+                        uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
+{
+       struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+       lock_srbm(kgd, 0, 0, 0, vmid);
+
+       /*
+        * Program TBA registers
+        */
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+                        lower_32_bits(tba_addr >> 8));
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+                        upper_32_bits(tba_addr >> 8));
+
+       /*
+        * Program TMA registers
+        */
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+                       lower_32_bits(tma_addr >> 8));
+       WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+                       upper_32_bits(tma_addr >> 8));
+
+       unlock_srbm(kgd);
+}
+
 const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
        .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
        .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -899,4 +929,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
                        kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
        .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
        .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+       .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
 };
index a972ef5..f8fce9d 100644 (file)
@@ -211,6 +211,15 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
        WARN_ON(!old);
 }
 
+static void program_trap_handler_settings(struct device_queue_manager *dqm,
+                               struct qcm_process_device *qpd)
+{
+       if (dqm->dev->kfd2kgd->program_trap_handler_settings)
+               dqm->dev->kfd2kgd->program_trap_handler_settings(
+                                               dqm->dev->kgd, qpd->vmid,
+                                               qpd->tba_addr, qpd->tma_addr);
+}
+
 static int allocate_vmid(struct device_queue_manager *dqm,
                        struct qcm_process_device *qpd,
                        struct queue *q)
@@ -241,6 +250,10 @@ static int allocate_vmid(struct device_queue_manager *dqm,
 
        program_sh_mem_settings(dqm, qpd);
 
+       if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 &&
+           dqm->dev->cwsr_enabled)
+               program_trap_handler_settings(dqm, qpd);
+
        /* qpd->page_table_base is set earlier when register_process()
         * is called, i.e. when the first queue is created.
         */
@@ -582,7 +595,9 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
                }
 
                retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
-                               KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
+                               (dqm->dev->cwsr_enabled?
+                                KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+                               KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
                                KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
                if (retval) {
                        pr_err("destroy mqd failed\n");
@@ -675,7 +690,9 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
                        continue;
 
                retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
-                               KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
+                               (dqm->dev->cwsr_enabled?
+                                KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+                               KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
                                KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
                if (retval && !ret)
                        /* Return the first error, but keep going to
index 95c656d..c84bd7b 100644 (file)
@@ -44,6 +44,7 @@ struct kgd_mem;
 enum kfd_preempt_type {
        KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0,
        KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
+       KFD_PREEMPT_TYPE_WAVEFRONT_SAVE
 };
 
 struct kfd_vm_fault_info {
@@ -298,6 +299,8 @@ struct kfd2kgd_calls {
 
        void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int *wave_cnt,
                        int *max_waves_per_cu);
+       void (*program_trap_handler_settings)(struct kgd_dev *kgd,
+                       uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
 };
 
 #endif /* KGD_KFD_INTERFACE_H_INCLUDED */