Don't use the HWS if it's known to be hanging. In a reset also
don't try to destroy the HIQ because that may hang on SRIOV if the
KIQ is unresponsive.
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Tested-by: Emily Deng <Emily.Deng@amd.com>
Reviewed-by: shaoyunl <shaoyun.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
static int stop_nocpsch(struct device_queue_manager *dqm)
{
if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
static int stop_nocpsch(struct device_queue_manager *dqm)
{
if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
- pm_uninit(&dqm->packets);
+ pm_uninit(&dqm->packets, false);
dqm->sched_running = false;
return 0;
dqm->sched_running = false;
return 0;
return 0;
fail_allocate_vidmem:
fail_set_sched_resources:
return 0;
fail_allocate_vidmem:
fail_set_sched_resources:
- pm_uninit(&dqm->packets);
+ pm_uninit(&dqm->packets, false);
fail_packet_manager_init:
return retval;
}
static int stop_cpsch(struct device_queue_manager *dqm)
{
fail_packet_manager_init:
return retval;
}
static int stop_cpsch(struct device_queue_manager *dqm)
{
- unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ if (!dqm->is_hws_hang)
+ unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ hanging = dqm->is_hws_hang || dqm->is_resetting;
dqm->sched_running = false;
dqm_unlock(dqm);
kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
dqm->sched_running = false;
dqm_unlock(dqm);
kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
- pm_uninit(&dqm->packets);
+ pm_uninit(&dqm->packets, hanging);
}
/* Uninitialize a kernel queue and free all its memory usages. */
}
/* Uninitialize a kernel queue and free all its memory usages. */
-static void kq_uninitialize(struct kernel_queue *kq)
+static void kq_uninitialize(struct kernel_queue *kq, bool hanging)
- if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
+ if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && !hanging)
kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
kq->queue->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
kq->queue->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
-void kernel_queue_uninit(struct kernel_queue *kq)
+void kernel_queue_uninit(struct kernel_queue *kq, bool hanging)
+ kq_uninitialize(kq, hanging);
-void pm_uninit(struct packet_manager *pm)
+void pm_uninit(struct packet_manager *pm, bool hanging)
{
mutex_destroy(&pm->lock);
{
mutex_destroy(&pm->lock);
- kernel_queue_uninit(pm->priv_queue);
+ kernel_queue_uninit(pm->priv_queue, hanging);
}
int pm_send_set_resources(struct packet_manager *pm,
}
int pm_send_set_resources(struct packet_manager *pm,
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
enum kfd_queue_type type);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
enum kfd_queue_type type);
-void kernel_queue_uninit(struct kernel_queue *kq);
+void kernel_queue_uninit(struct kernel_queue *kq, bool hanging);
int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid);
/* Process Queue Manager */
int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid);
/* Process Queue Manager */
extern const struct packet_manager_funcs kfd_v9_pm_funcs;
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
extern const struct packet_manager_funcs kfd_v9_pm_funcs;
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
-void pm_uninit(struct packet_manager *pm);
+void pm_uninit(struct packet_manager *pm, bool hanging);
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res);
int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res);
int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
/* destroy kernel queue (DIQ) */
dqm = pqn->kq->dev->dqm;
dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
/* destroy kernel queue (DIQ) */
dqm = pqn->kq->dev->dqm;
dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
- kernel_queue_uninit(pqn->kq);
+ kernel_queue_uninit(pqn->kq, false);