1 // SPDX-License-Identifier: MIT
3 * Copyright © 2021 Intel Corporation
6 #include "xe_execlist.h"
8 #include <drm/drm_managed.h>
10 #include "instructions/xe_mi_commands.h"
11 #include "regs/xe_engine_regs.h"
12 #include "regs/xe_gpu_commands.h"
13 #include "regs/xe_gt_regs.h"
14 #include "regs/xe_lrc_layout.h"
15 #include "xe_assert.h"
17 #include "xe_device.h"
18 #include "xe_exec_queue.h"
20 #include "xe_hw_fence.h"
22 #include "xe_macros.h"
25 #include "xe_ring_ops_types.h"
26 #include "xe_sched_job.h"
28 #define XE_EXECLIST_HANG_LIMIT 1
30 #define SW_CTX_ID_SHIFT 37
31 #define SW_CTX_ID_WIDTH 11
32 #define XEHP_SW_CTX_ID_SHIFT 39
33 #define XEHP_SW_CTX_ID_WIDTH 16
36 GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
39 #define XEHP_SW_CTX_ID \
40 GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
44 static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
47 struct xe_gt *gt = hwe->gt;
48 struct xe_device *xe = gt_to_xe(gt);
51 lrc_desc = xe_lrc_descriptor(lrc);
53 if (GRAPHICS_VERx100(xe) >= 1250) {
54 xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
55 lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
57 xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
58 lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
61 if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
62 xe_mmio_write32(hwe->gt, RCU_MODE,
63 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
65 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
66 lrc->ring.old_tail = lrc->ring.tail;
69 * Make sure the context image is complete before we submit it to HW.
71 * Ostensibly, writes (including the WCB) should be flushed prior to
72 * an uncached write such as our mmio register access, the empirical
73 * evidence (esp. on Braswell) suggests that the WC write into memory
74 * may not be visible to the HW prior to the completion of the UC
75 * register write and that we may begin execution from the context
76 * before its image is complete leading to invalid PD chasing.
80 xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base),
81 xe_bo_ggtt_addr(hwe->hwsp));
82 xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base));
83 xe_mmio_write32(gt, RING_MODE(hwe->mmio_base),
84 _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
86 xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
87 lower_32_bits(lrc_desc));
88 xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
89 upper_32_bits(lrc_desc));
90 xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base),
94 static void __xe_execlist_port_start(struct xe_execlist_port *port,
95 struct xe_execlist_exec_queue *exl)
97 struct xe_device *xe = gt_to_xe(port->hwe->gt);
98 int max_ctx = FIELD_MAX(SW_CTX_ID);
100 if (GRAPHICS_VERx100(xe) >= 1250)
101 max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
103 xe_execlist_port_assert_held(port);
105 if (port->running_exl != exl || !exl->has_run) {
108 /* 0 is reserved for the kernel context */
109 if (port->last_ctx_id > max_ctx)
110 port->last_ctx_id = 1;
113 __start_lrc(port->hwe, exl->q->lrc, port->last_ctx_id);
114 port->running_exl = exl;
118 static void __xe_execlist_port_idle(struct xe_execlist_port *port)
120 u32 noop[2] = { MI_NOOP, MI_NOOP };
122 xe_execlist_port_assert_held(port);
124 if (!port->running_exl)
127 xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop));
128 __start_lrc(port->hwe, &port->hwe->kernel_lrc, 0);
129 port->running_exl = NULL;
132 static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
134 struct xe_lrc *lrc = exl->q->lrc;
136 return lrc->ring.tail == lrc->ring.old_tail;
139 static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
141 struct xe_execlist_exec_queue *exl = NULL;
144 xe_execlist_port_assert_held(port);
146 for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
147 while (!list_empty(&port->active[i])) {
148 exl = list_first_entry(&port->active[i],
149 struct xe_execlist_exec_queue,
151 list_del(&exl->active_link);
153 if (xe_execlist_is_idle(exl)) {
154 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
158 list_add_tail(&exl->active_link, &port->active[i]);
159 __xe_execlist_port_start(port, exl);
164 __xe_execlist_port_idle(port);
167 static u64 read_execlist_status(struct xe_hw_engine *hwe)
169 struct xe_gt *gt = hwe->gt;
172 lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
173 hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
175 return lo | (u64)hi << 32;
178 static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
182 xe_execlist_port_assert_held(port);
184 status = read_execlist_status(port->hwe);
188 __xe_execlist_port_start_next_active(port);
191 static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
194 struct xe_execlist_port *port = hwe->exl_port;
196 spin_lock(&port->lock);
197 xe_execlist_port_irq_handler_locked(port);
198 spin_unlock(&port->lock);
201 static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
202 enum xe_exec_queue_priority priority)
204 xe_execlist_port_assert_held(port);
206 if (port->running_exl && port->running_exl->active_priority >= priority)
209 __xe_execlist_port_start_next_active(port);
212 static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
214 struct xe_execlist_port *port = exl->port;
215 enum xe_exec_queue_priority priority = exl->active_priority;
217 XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
218 XE_WARN_ON(priority < 0);
219 XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
221 spin_lock_irq(&port->lock);
223 if (exl->active_priority != priority &&
224 exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
225 /* Priority changed, move it to the right list */
226 list_del(&exl->active_link);
227 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
230 if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
231 exl->active_priority = priority;
232 list_add_tail(&exl->active_link, &port->active[priority]);
235 xe_execlist_port_wake_locked(exl->port, priority);
237 spin_unlock_irq(&port->lock);
240 static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
242 struct xe_execlist_port *port =
243 container_of(timer, struct xe_execlist_port, irq_fail);
245 spin_lock_irq(&port->lock);
246 xe_execlist_port_irq_handler_locked(port);
247 spin_unlock_irq(&port->lock);
249 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
250 add_timer(&port->irq_fail);
253 struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
254 struct xe_hw_engine *hwe)
256 struct drm_device *drm = &xe->drm;
257 struct xe_execlist_port *port;
260 port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
262 return ERR_PTR(-ENOMEM);
266 spin_lock_init(&port->lock);
267 for (i = 0; i < ARRAY_SIZE(port->active); i++)
268 INIT_LIST_HEAD(&port->active[i]);
270 port->last_ctx_id = 1;
271 port->running_exl = NULL;
273 hwe->irq_handler = xe_execlist_port_irq_handler;
275 /* TODO: Fix the interrupt code so it doesn't race like mad */
276 timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
277 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
278 add_timer(&port->irq_fail);
283 void xe_execlist_port_destroy(struct xe_execlist_port *port)
285 del_timer(&port->irq_fail);
287 /* Prevent an interrupt while we're destroying */
288 spin_lock_irq(>_to_xe(port->hwe->gt)->irq.lock);
289 port->hwe->irq_handler = NULL;
290 spin_unlock_irq(>_to_xe(port->hwe->gt)->irq.lock);
293 static struct dma_fence *
294 execlist_run_job(struct drm_sched_job *drm_job)
296 struct xe_sched_job *job = to_xe_sched_job(drm_job);
297 struct xe_exec_queue *q = job->q;
298 struct xe_execlist_exec_queue *exl = job->q->execlist;
300 q->ring_ops->emit_job(job);
301 xe_execlist_make_active(exl);
303 return dma_fence_get(job->fence);
306 static void execlist_job_free(struct drm_sched_job *drm_job)
308 struct xe_sched_job *job = to_xe_sched_job(drm_job);
310 xe_sched_job_put(job);
313 static const struct drm_sched_backend_ops drm_sched_ops = {
314 .run_job = execlist_run_job,
315 .free_job = execlist_job_free,
318 static int execlist_exec_queue_init(struct xe_exec_queue *q)
320 struct drm_gpu_scheduler *sched;
321 struct xe_execlist_exec_queue *exl;
322 struct xe_device *xe = gt_to_xe(q->gt);
325 xe_assert(xe, !xe_device_uc_enabled(xe));
327 drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
329 exl = kzalloc(sizeof(*exl), GFP_KERNEL);
335 err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
336 q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
337 XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
338 NULL, NULL, q->hwe->name,
339 gt_to_xe(q->gt)->drm.dev);
344 err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
348 exl->port = q->hwe->exl_port;
349 exl->has_run = false;
350 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
352 q->entity = &exl->entity;
354 xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
359 drm_sched_fini(&exl->sched);
365 static void execlist_exec_queue_fini_async(struct work_struct *w)
367 struct xe_execlist_exec_queue *ee =
368 container_of(w, struct xe_execlist_exec_queue, fini_async);
369 struct xe_exec_queue *q = ee->q;
370 struct xe_execlist_exec_queue *exl = q->execlist;
371 struct xe_device *xe = gt_to_xe(q->gt);
374 xe_assert(xe, !xe_device_uc_enabled(xe));
376 spin_lock_irqsave(&exl->port->lock, flags);
377 if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
378 list_del(&exl->active_link);
379 spin_unlock_irqrestore(&exl->port->lock, flags);
381 if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT)
382 xe_device_remove_persistent_exec_queues(xe, q);
383 drm_sched_entity_fini(&exl->entity);
384 drm_sched_fini(&exl->sched);
387 xe_exec_queue_fini(q);
390 static void execlist_exec_queue_kill(struct xe_exec_queue *q)
395 static void execlist_exec_queue_fini(struct xe_exec_queue *q)
397 INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
398 queue_work(system_unbound_wq, &q->execlist->fini_async);
401 static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
402 enum xe_exec_queue_priority priority)
408 static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
414 static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
415 u32 preempt_timeout_us)
421 static int execlist_exec_queue_set_job_timeout(struct xe_exec_queue *q,
428 static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
434 static void execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
440 static void execlist_exec_queue_resume(struct xe_exec_queue *q)
445 static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
451 static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
452 .init = execlist_exec_queue_init,
453 .kill = execlist_exec_queue_kill,
454 .fini = execlist_exec_queue_fini,
455 .set_priority = execlist_exec_queue_set_priority,
456 .set_timeslice = execlist_exec_queue_set_timeslice,
457 .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
458 .set_job_timeout = execlist_exec_queue_set_job_timeout,
459 .suspend = execlist_exec_queue_suspend,
460 .suspend_wait = execlist_exec_queue_suspend_wait,
461 .resume = execlist_exec_queue_resume,
462 .reset_status = execlist_exec_queue_reset_status,
465 int xe_execlist_init(struct xe_gt *gt)
467 /* GuC submission enabled, nothing to do */
468 if (xe_device_uc_enabled(gt_to_xe(gt)))
471 gt->exec_queue_ops = &execlist_exec_queue_ops;