From 35ba8850b673050b71b17c1421079031f4fda319 Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Thu, 7 Jul 2022 20:57:34 +0800 Subject: [PATCH] drm/amdgpu/mes: fix mes submission in atomic context For some cases (accessing registers, unmap legacy queue), it needs access mes in atomic context. Use spinlock to protect agaist mes ring buffer race condition. Signed-off-by: Jack Xiao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 16 +------ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 1 + drivers/gpu/drm/amd/amdgpu/mes_v10_1.c | 55 +++++++++------------ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 63 ++++++++++--------------- 4 files changed, 50 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index ca44aa123a1e..db2138b7a858 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -150,6 +150,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev) idr_init(&adev->mes.queue_id_idr); ida_init(&adev->mes.doorbell_ida); spin_lock_init(&adev->mes.queue_id_lock); + spin_lock_init(&adev->mes.ring_lock); mutex_init(&adev->mes.mutex_hidden); adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; @@ -794,8 +795,6 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct mes_unmap_legacy_queue_input queue_input; int r; - amdgpu_mes_lock(&adev->mes); - queue_input.action = action; queue_input.queue_type = ring->funcs->type; queue_input.doorbell_offset = ring->doorbell_index; @@ -808,7 +807,6 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, if (r) DRM_ERROR("failed to unmap legacy queue\n"); - amdgpu_mes_unlock(&adev->mes); return r; } @@ -817,8 +815,6 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) struct mes_misc_op_input op_input; int r, val = 0; - amdgpu_mes_lock(&adev->mes); - op_input.op = MES_MISC_OP_READ_REG; op_input.read_reg.reg_offset = reg; op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr; @@ -835,7 +831,6 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) val = *(adev->mes.read_val_ptr); error: - amdgpu_mes_unlock(&adev->mes); return val; } @@ -845,8 +840,6 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev, struct mes_misc_op_input op_input; int r; - amdgpu_mes_lock(&adev->mes); - op_input.op = MES_MISC_OP_WRITE_REG; op_input.write_reg.reg_offset = reg; op_input.write_reg.reg_value = val; @@ -862,7 +855,6 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev, DRM_ERROR("failed to write reg (0x%x)\n", reg); error: - amdgpu_mes_unlock(&adev->mes); return r; } @@ -873,8 +865,6 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, struct mes_misc_op_input op_input; int r; - amdgpu_mes_lock(&adev->mes); - op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT; op_input.wrm_reg.reg0 = reg0; op_input.wrm_reg.reg1 = reg1; @@ -892,7 +882,6 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, DRM_ERROR("failed to reg_write_reg_wait\n"); error: - amdgpu_mes_unlock(&adev->mes); return r; } @@ -902,8 +891,6 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, struct mes_misc_op_input op_input; int r; - amdgpu_mes_lock(&adev->mes); - op_input.op = MES_MISC_OP_WRM_REG_WAIT; op_input.wrm_reg.reg0 = reg; op_input.wrm_reg.ref = val; @@ -920,7 +907,6 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, DRM_ERROR("failed to reg_write_reg_wait\n"); error: - amdgpu_mes_unlock(&adev->mes); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 3cec87e023b3..1133c361e7a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -83,6 +83,7 @@ struct amdgpu_mes { uint64_t default_gang_quantum; struct amdgpu_ring ring; + spinlock_t ring_lock; const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 18a129f36215..75cf92d38d41 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -87,21 +87,32 @@ static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = { }; static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size) + void *pkt, int size, + int api_status_off) { int ndw = size / 4; signed long r; union MESAPI__ADD_QUEUE *x_pkt = pkt; + struct MES_API_STATUS *api_status; struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; + unsigned long flags; BUG_ON(size % 4 != 0); - if (amdgpu_ring_alloc(ring, ndw)) + spin_lock_irqsave(&mes->ring_lock, flags); + if (amdgpu_ring_alloc(ring, ndw)) { + spin_unlock_irqrestore(&mes->ring_lock, flags); return -ENOMEM; + } + + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); + api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; + api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq; amdgpu_ring_write_multiple(ring, pkt, ndw); amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&mes->ring_lock, flags); DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); @@ -166,13 +177,9 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gws_size = input->gws_size; mes_add_queue_pkt.trap_handler_addr = input->tba_addr; - mes_add_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_add_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_add_queue_pkt, sizeof(mes_add_queue_pkt)); + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); } static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes, @@ -189,13 +196,9 @@ static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes, @@ -227,13 +230,9 @@ static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.unmap_kiq_utility_queue = 1; } - mes_remove_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes, @@ -258,13 +257,9 @@ static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_status_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_status_pkt, sizeof(mes_status_pkt)); + &mes_status_pkt, sizeof(mes_status_pkt), + offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes) @@ -313,13 +308,9 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.disable_mes_log = 1; mes_set_hw_res_pkt.use_different_vmid_compute = 1; - mes_set_hw_res_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_set_hw_res_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt)); + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } static const struct amdgpu_mes_funcs mes_v10_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 6b07a8b23d67..b78e09910c7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -86,21 +86,32 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = { }; static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size) + void *pkt, int size, + int api_status_off) { int ndw = size / 4; signed long r; union MESAPI__ADD_QUEUE *x_pkt = pkt; + struct MES_API_STATUS *api_status; struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; + unsigned long flags; BUG_ON(size % 4 != 0); - if (amdgpu_ring_alloc(ring, ndw)) + spin_lock_irqsave(&mes->ring_lock, flags); + if (amdgpu_ring_alloc(ring, ndw)) { + spin_unlock_irqrestore(&mes->ring_lock, flags); return -ENOMEM; + } + + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); + api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; + api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq; amdgpu_ring_write_multiple(ring, pkt, ndw); amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&mes->ring_lock, flags); DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); @@ -173,13 +184,9 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.tma_addr = input->tma_addr; mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; - mes_add_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_add_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_add_queue_pkt, sizeof(mes_add_queue_pkt)); + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); } static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, @@ -196,13 +203,9 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, @@ -233,13 +236,9 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); } - mes_remove_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes, @@ -264,13 +263,9 @@ static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_status_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_status_pkt, sizeof(mes_status_pkt)); + &mes_status_pkt, sizeof(mes_status_pkt), + offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } static int mes_v11_0_misc_op(struct amdgpu_mes *mes, @@ -316,13 +311,9 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - misc_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - misc_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &misc_pkt, sizeof(misc_pkt)); + &misc_pkt, sizeof(misc_pkt), + offsetof(union MESAPI__MISC, api_status)); } static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) @@ -372,13 +363,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.use_different_vmid_compute = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; - mes_set_hw_res_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_set_hw_res_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt)); + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } static const struct amdgpu_mes_funcs mes_v11_0_funcs = { -- 2.20.1