From 44e21ea6dcd5bc9cf7a1c4e3281779987c070373 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 5 Nov 2024 21:45:57 +0100 Subject: [PATCH] drm/xe/guc: Don't treat GuC generic CAT error as protocol error GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any context. We shouldn't treat that as G2H protocol error that would justify a GT reset, as it may happen due to some VF activity. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241105204557.1991-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_fwif.h | 1 + drivers/gpu/drm/xe/xe_guc_submit.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 08ffe59f22fa..057153f89b30 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -17,6 +17,7 @@ #define G2H_LEN_DW_TLB_INVALIDATE 3 #define GUC_ID_MAX 65535 +#define GUC_ID_UNKNOWN 0xffffffff #define GUC_CONTEXT_DISABLE 0 #define GUC_CONTEXT_ENABLE 1 diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 37d4ad8e4f5c..9e0f86f3778b 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -2021,6 +2021,15 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, guc_id = msg[0]; + if (guc_id == GUC_ID_UNKNOWN) { + /* + * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF + * context. In such case only PF will be notified about that fault. + */ + xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); + return 0; + } + q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) return -EPROTO; -- 2.20.1